|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 1, |
|
"global_step": 26, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"abs_diff": 0.4453125, |
|
"all_logps_1": -644.9681396484375, |
|
"all_logps_1_values": -644.9680786132812, |
|
"all_logps_2": 424.9236145019531, |
|
"all_logps_2_values": 424.9236145019531, |
|
"epoch": 0.038461538461538464, |
|
"grad_norm": 20.744102687471287, |
|
"learning_rate": 3.333333333333333e-07, |
|
"logits/chosen": 5.625, |
|
"logits/rejected": 5.625, |
|
"logps/chosen": -2.03125, |
|
"logps/rejected": -1.9375, |
|
"loss": 1.9612, |
|
"original_losses": 2.0625, |
|
"rewards/accuracies": 0.4270833432674408, |
|
"rewards/chosen": -5.0625, |
|
"rewards/margins": -0.2421875, |
|
"rewards/rejected": -4.84375, |
|
"step": 1, |
|
"weight": 1.0 |
|
}, |
|
{ |
|
"epoch": 0.038461538461538464, |
|
"eval_abs_diff": 0.44921875, |
|
"eval_all_logps_1": -657.8338623046875, |
|
"eval_all_logps_1_values": -657.8338012695312, |
|
"eval_all_logps_2": 434.6329040527344, |
|
"eval_all_logps_2_values": 434.6329345703125, |
|
"eval_logits/chosen": 5.71875, |
|
"eval_logits/rejected": 5.6875, |
|
"eval_logps/chosen": -1.96875, |
|
"eval_logps/rejected": -2.03125, |
|
"eval_loss": 1.7894011735916138, |
|
"eval_original_losses": 1.8125, |
|
"eval_rewards/accuracies": 0.4404762089252472, |
|
"eval_rewards/chosen": -4.90625, |
|
"eval_rewards/margins": 0.189453125, |
|
"eval_rewards/rejected": -5.09375, |
|
"eval_runtime": 18.7618, |
|
"eval_samples_per_second": 104.521, |
|
"eval_steps_per_second": 0.373, |
|
"eval_weight": 1.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.07692307692307693, |
|
"eval_abs_diff": 0.453125, |
|
"eval_all_logps_1": -657.5560913085938, |
|
"eval_all_logps_1_values": -657.5560302734375, |
|
"eval_all_logps_2": 434.6329040527344, |
|
"eval_all_logps_2_values": 434.6329345703125, |
|
"eval_logits/chosen": 5.6875, |
|
"eval_logits/rejected": 5.6875, |
|
"eval_logps/chosen": -1.9609375, |
|
"eval_logps/rejected": -2.03125, |
|
"eval_loss": 1.7886760234832764, |
|
"eval_original_losses": 1.8125, |
|
"eval_rewards/accuracies": 0.4444444477558136, |
|
"eval_rewards/chosen": -4.90625, |
|
"eval_rewards/margins": 0.189453125, |
|
"eval_rewards/rejected": -5.09375, |
|
"eval_runtime": 7.6106, |
|
"eval_samples_per_second": 257.667, |
|
"eval_steps_per_second": 0.92, |
|
"eval_weight": 1.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.11538461538461539, |
|
"eval_abs_diff": 0.451171875, |
|
"eval_all_logps_1": -657.2574462890625, |
|
"eval_all_logps_1_values": -657.2574462890625, |
|
"eval_all_logps_2": 434.6329040527344, |
|
"eval_all_logps_2_values": 434.6329345703125, |
|
"eval_logits/chosen": 5.71875, |
|
"eval_logits/rejected": 5.6875, |
|
"eval_logps/chosen": -1.96875, |
|
"eval_logps/rejected": -2.046875, |
|
"eval_loss": 1.7886680364608765, |
|
"eval_original_losses": 1.8203125, |
|
"eval_rewards/accuracies": 0.4444444477558136, |
|
"eval_rewards/chosen": -4.9375, |
|
"eval_rewards/margins": 0.1884765625, |
|
"eval_rewards/rejected": -5.125, |
|
"eval_runtime": 8.5255, |
|
"eval_samples_per_second": 230.016, |
|
"eval_steps_per_second": 0.821, |
|
"eval_weight": 1.0, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.15384615384615385, |
|
"eval_abs_diff": 0.451171875, |
|
"eval_all_logps_1": -657.5513916015625, |
|
"eval_all_logps_1_values": -657.5513305664062, |
|
"eval_all_logps_2": 434.6329040527344, |
|
"eval_all_logps_2_values": 434.6329345703125, |
|
"eval_logits/chosen": 5.71875, |
|
"eval_logits/rejected": 5.6875, |
|
"eval_logps/chosen": -1.96875, |
|
"eval_logps/rejected": -2.046875, |
|
"eval_loss": 1.789082407951355, |
|
"eval_original_losses": 1.8125, |
|
"eval_rewards/accuracies": 0.4365079700946808, |
|
"eval_rewards/chosen": -4.9375, |
|
"eval_rewards/margins": 0.1806640625, |
|
"eval_rewards/rejected": -5.09375, |
|
"eval_runtime": 7.6165, |
|
"eval_samples_per_second": 257.468, |
|
"eval_steps_per_second": 0.919, |
|
"eval_weight": 1.0, |
|
"step": 4 |
|
}, |
|
{ |
|
"abs_diff": 0.466796875, |
|
"all_logps_1": -640.5847778320312, |
|
"all_logps_1_values": -640.584716796875, |
|
"all_logps_2": 413.3194274902344, |
|
"all_logps_2_values": 413.3194580078125, |
|
"epoch": 0.19230769230769232, |
|
"grad_norm": 21.578765352244158, |
|
"learning_rate": 9.814586436738997e-07, |
|
"logits/chosen": 5.6875, |
|
"logits/rejected": 5.6875, |
|
"logps/chosen": -1.9765625, |
|
"logps/rejected": -2.0, |
|
"loss": 1.868, |
|
"original_losses": 1.9140625, |
|
"rewards/accuracies": 0.4366319179534912, |
|
"rewards/chosen": -4.9375, |
|
"rewards/margins": 0.0712890625, |
|
"rewards/rejected": -5.0, |
|
"step": 5, |
|
"weight": 1.0 |
|
}, |
|
{ |
|
"epoch": 0.19230769230769232, |
|
"eval_abs_diff": 0.447265625, |
|
"eval_all_logps_1": -656.76513671875, |
|
"eval_all_logps_1_values": -656.76513671875, |
|
"eval_all_logps_2": 434.6329040527344, |
|
"eval_all_logps_2_values": 434.6329345703125, |
|
"eval_logits/chosen": 5.71875, |
|
"eval_logits/rejected": 5.6875, |
|
"eval_logps/chosen": -1.9609375, |
|
"eval_logps/rejected": -2.03125, |
|
"eval_loss": 1.788102388381958, |
|
"eval_original_losses": 1.8125, |
|
"eval_rewards/accuracies": 0.432539701461792, |
|
"eval_rewards/chosen": -4.90625, |
|
"eval_rewards/margins": 0.181640625, |
|
"eval_rewards/rejected": -5.09375, |
|
"eval_runtime": 7.6214, |
|
"eval_samples_per_second": 257.303, |
|
"eval_steps_per_second": 0.918, |
|
"eval_weight": 1.0, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.23076923076923078, |
|
"eval_abs_diff": 0.451171875, |
|
"eval_all_logps_1": -658.1024169921875, |
|
"eval_all_logps_1_values": -658.1024169921875, |
|
"eval_all_logps_2": 434.6329040527344, |
|
"eval_all_logps_2_values": 434.6329345703125, |
|
"eval_logits/chosen": 5.71875, |
|
"eval_logits/rejected": 5.6875, |
|
"eval_logps/chosen": -1.9765625, |
|
"eval_logps/rejected": -2.046875, |
|
"eval_loss": 1.7911142110824585, |
|
"eval_original_losses": 1.8203125, |
|
"eval_rewards/accuracies": 0.4523809254169464, |
|
"eval_rewards/chosen": -4.9375, |
|
"eval_rewards/margins": 0.1669921875, |
|
"eval_rewards/rejected": -5.09375, |
|
"eval_runtime": 7.6157, |
|
"eval_samples_per_second": 257.493, |
|
"eval_steps_per_second": 0.919, |
|
"eval_weight": 1.0, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.2692307692307692, |
|
"eval_abs_diff": 0.451171875, |
|
"eval_all_logps_1": -657.3370361328125, |
|
"eval_all_logps_1_values": -657.3370361328125, |
|
"eval_all_logps_2": 434.6329040527344, |
|
"eval_all_logps_2_values": 434.6329345703125, |
|
"eval_logits/chosen": 5.71875, |
|
"eval_logits/rejected": 5.6875, |
|
"eval_logps/chosen": -1.9609375, |
|
"eval_logps/rejected": -2.03125, |
|
"eval_loss": 1.7869629859924316, |
|
"eval_original_losses": 1.8125, |
|
"eval_rewards/accuracies": 0.44841268658638, |
|
"eval_rewards/chosen": -4.90625, |
|
"eval_rewards/margins": 0.1845703125, |
|
"eval_rewards/rejected": -5.09375, |
|
"eval_runtime": 7.6132, |
|
"eval_samples_per_second": 257.579, |
|
"eval_steps_per_second": 0.919, |
|
"eval_weight": 1.0, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.3076923076923077, |
|
"eval_abs_diff": 0.447265625, |
|
"eval_all_logps_1": -657.35888671875, |
|
"eval_all_logps_1_values": -657.35888671875, |
|
"eval_all_logps_2": 434.6329040527344, |
|
"eval_all_logps_2_values": 434.6329345703125, |
|
"eval_logits/chosen": 5.6875, |
|
"eval_logits/rejected": 5.65625, |
|
"eval_logps/chosen": -1.96875, |
|
"eval_logps/rejected": -2.03125, |
|
"eval_loss": 1.7835049629211426, |
|
"eval_original_losses": 1.8203125, |
|
"eval_rewards/accuracies": 0.4404762089252472, |
|
"eval_rewards/chosen": -4.90625, |
|
"eval_rewards/margins": 0.1728515625, |
|
"eval_rewards/rejected": -5.09375, |
|
"eval_runtime": 7.6173, |
|
"eval_samples_per_second": 257.44, |
|
"eval_steps_per_second": 0.919, |
|
"eval_weight": 1.0, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.34615384615384615, |
|
"eval_abs_diff": 0.4453125, |
|
"eval_all_logps_1": -657.4702758789062, |
|
"eval_all_logps_1_values": -657.47021484375, |
|
"eval_all_logps_2": 434.6329040527344, |
|
"eval_all_logps_2_values": 434.6329345703125, |
|
"eval_logits/chosen": 5.71875, |
|
"eval_logits/rejected": 5.6875, |
|
"eval_logps/chosen": -1.9609375, |
|
"eval_logps/rejected": -2.03125, |
|
"eval_loss": 1.785959005355835, |
|
"eval_original_losses": 1.8125, |
|
"eval_rewards/accuracies": 0.4404761791229248, |
|
"eval_rewards/chosen": -4.90625, |
|
"eval_rewards/margins": 0.185546875, |
|
"eval_rewards/rejected": -5.09375, |
|
"eval_runtime": 7.6175, |
|
"eval_samples_per_second": 257.433, |
|
"eval_steps_per_second": 0.919, |
|
"eval_weight": 1.0, |
|
"step": 9 |
|
}, |
|
{ |
|
"abs_diff": 0.427734375, |
|
"all_logps_1": -622.0468139648438, |
|
"all_logps_1_values": -622.0468139648438, |
|
"all_logps_2": 399.6798400878906, |
|
"all_logps_2_values": 399.67987060546875, |
|
"epoch": 0.38461538461538464, |
|
"grad_norm": 18.220030847238334, |
|
"learning_rate": 7.883401610574336e-07, |
|
"logits/chosen": 5.6875, |
|
"logits/rejected": 5.6875, |
|
"logps/chosen": -1.953125, |
|
"logps/rejected": -1.96875, |
|
"loss": 1.886, |
|
"original_losses": 1.8671875, |
|
"rewards/accuracies": 0.47291669249534607, |
|
"rewards/chosen": -4.875, |
|
"rewards/margins": 0.025146484375, |
|
"rewards/rejected": -4.90625, |
|
"step": 10, |
|
"weight": 1.0 |
|
}, |
|
{ |
|
"epoch": 0.38461538461538464, |
|
"eval_abs_diff": 0.4453125, |
|
"eval_all_logps_1": -657.2244873046875, |
|
"eval_all_logps_1_values": -657.2244262695312, |
|
"eval_all_logps_2": 434.6329040527344, |
|
"eval_all_logps_2_values": 434.6329345703125, |
|
"eval_logits/chosen": 5.71875, |
|
"eval_logits/rejected": 5.6875, |
|
"eval_logps/chosen": -1.9609375, |
|
"eval_logps/rejected": -2.03125, |
|
"eval_loss": 1.789680004119873, |
|
"eval_original_losses": 1.8125, |
|
"eval_rewards/accuracies": 0.4325396716594696, |
|
"eval_rewards/chosen": -4.90625, |
|
"eval_rewards/margins": 0.185546875, |
|
"eval_rewards/rejected": -5.09375, |
|
"eval_runtime": 7.6199, |
|
"eval_samples_per_second": 257.352, |
|
"eval_steps_per_second": 0.919, |
|
"eval_weight": 1.0, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.4230769230769231, |
|
"eval_abs_diff": 0.447265625, |
|
"eval_all_logps_1": -657.7448120117188, |
|
"eval_all_logps_1_values": -657.7448120117188, |
|
"eval_all_logps_2": 434.6329040527344, |
|
"eval_all_logps_2_values": 434.6329345703125, |
|
"eval_logits/chosen": 5.71875, |
|
"eval_logits/rejected": 5.6875, |
|
"eval_logps/chosen": -1.9609375, |
|
"eval_logps/rejected": -2.03125, |
|
"eval_loss": 1.785233974456787, |
|
"eval_original_losses": 1.8125, |
|
"eval_rewards/accuracies": 0.44841268658638, |
|
"eval_rewards/chosen": -4.90625, |
|
"eval_rewards/margins": 0.1806640625, |
|
"eval_rewards/rejected": -5.09375, |
|
"eval_runtime": 7.6147, |
|
"eval_samples_per_second": 257.527, |
|
"eval_steps_per_second": 0.919, |
|
"eval_weight": 1.0, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.46153846153846156, |
|
"eval_abs_diff": 0.44921875, |
|
"eval_all_logps_1": -657.9037475585938, |
|
"eval_all_logps_1_values": -657.9037475585938, |
|
"eval_all_logps_2": 434.6329040527344, |
|
"eval_all_logps_2_values": 434.6329345703125, |
|
"eval_logits/chosen": 5.71875, |
|
"eval_logits/rejected": 5.6875, |
|
"eval_logps/chosen": -1.9609375, |
|
"eval_logps/rejected": -2.03125, |
|
"eval_loss": 1.7826603651046753, |
|
"eval_original_losses": 1.8203125, |
|
"eval_rewards/accuracies": 0.4603174328804016, |
|
"eval_rewards/chosen": -4.90625, |
|
"eval_rewards/margins": 0.1796875, |
|
"eval_rewards/rejected": -5.09375, |
|
"eval_runtime": 7.6147, |
|
"eval_samples_per_second": 257.529, |
|
"eval_steps_per_second": 0.919, |
|
"eval_weight": 1.0, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_abs_diff": 0.451171875, |
|
"eval_all_logps_1": -657.7488403320312, |
|
"eval_all_logps_1_values": -657.748779296875, |
|
"eval_all_logps_2": 434.6329040527344, |
|
"eval_all_logps_2_values": 434.6329345703125, |
|
"eval_logits/chosen": 5.71875, |
|
"eval_logits/rejected": 5.6875, |
|
"eval_logps/chosen": -1.9609375, |
|
"eval_logps/rejected": -2.03125, |
|
"eval_loss": 1.7843893766403198, |
|
"eval_original_losses": 1.8203125, |
|
"eval_rewards/accuracies": 0.4365079402923584, |
|
"eval_rewards/chosen": -4.90625, |
|
"eval_rewards/margins": 0.1689453125, |
|
"eval_rewards/rejected": -5.0625, |
|
"eval_runtime": 7.6141, |
|
"eval_samples_per_second": 257.548, |
|
"eval_steps_per_second": 0.919, |
|
"eval_weight": 1.0, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.5384615384615384, |
|
"eval_abs_diff": 0.439453125, |
|
"eval_all_logps_1": -657.5706787109375, |
|
"eval_all_logps_1_values": -657.5706787109375, |
|
"eval_all_logps_2": 434.6329040527344, |
|
"eval_all_logps_2_values": 434.6329345703125, |
|
"eval_logits/chosen": 5.71875, |
|
"eval_logits/rejected": 5.6875, |
|
"eval_logps/chosen": -1.953125, |
|
"eval_logps/rejected": -2.03125, |
|
"eval_loss": 1.782787799835205, |
|
"eval_original_losses": 1.8046875, |
|
"eval_rewards/accuracies": 0.4404762089252472, |
|
"eval_rewards/chosen": -4.875, |
|
"eval_rewards/margins": 0.1884765625, |
|
"eval_rewards/rejected": -5.0625, |
|
"eval_runtime": 7.6188, |
|
"eval_samples_per_second": 257.39, |
|
"eval_steps_per_second": 0.919, |
|
"eval_weight": 1.0, |
|
"step": 14 |
|
}, |
|
{ |
|
"abs_diff": 0.4375, |
|
"all_logps_1": -654.1092529296875, |
|
"all_logps_1_values": -654.1092529296875, |
|
"all_logps_2": 426.70001220703125, |
|
"all_logps_2_values": 426.70001220703125, |
|
"epoch": 0.5769230769230769, |
|
"grad_norm": 20.281380041912414, |
|
"learning_rate": 4.6587879331766457e-07, |
|
"logits/chosen": 5.6875, |
|
"logits/rejected": 5.6875, |
|
"logps/chosen": -2.015625, |
|
"logps/rejected": -2.03125, |
|
"loss": 1.8572, |
|
"original_losses": 1.875, |
|
"rewards/accuracies": 0.43194445967674255, |
|
"rewards/chosen": -5.03125, |
|
"rewards/margins": 0.03857421875, |
|
"rewards/rejected": -5.0625, |
|
"step": 15, |
|
"weight": 1.0 |
|
}, |
|
{ |
|
"epoch": 0.5769230769230769, |
|
"eval_abs_diff": 0.4453125, |
|
"eval_all_logps_1": -657.2753295898438, |
|
"eval_all_logps_1_values": -657.2752685546875, |
|
"eval_all_logps_2": 434.6329040527344, |
|
"eval_all_logps_2_values": 434.6329345703125, |
|
"eval_logits/chosen": 5.71875, |
|
"eval_logits/rejected": 5.6875, |
|
"eval_logps/chosen": -1.9609375, |
|
"eval_logps/rejected": -2.03125, |
|
"eval_loss": 1.785233974456787, |
|
"eval_original_losses": 1.8125, |
|
"eval_rewards/accuracies": 0.4365079402923584, |
|
"eval_rewards/chosen": -4.90625, |
|
"eval_rewards/margins": 0.1767578125, |
|
"eval_rewards/rejected": -5.0625, |
|
"eval_runtime": 7.6233, |
|
"eval_samples_per_second": 257.239, |
|
"eval_steps_per_second": 0.918, |
|
"eval_weight": 1.0, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.6153846153846154, |
|
"eval_abs_diff": 0.44140625, |
|
"eval_all_logps_1": -657.5228271484375, |
|
"eval_all_logps_1_values": -657.5228271484375, |
|
"eval_all_logps_2": 434.6329040527344, |
|
"eval_all_logps_2_values": 434.6329345703125, |
|
"eval_logits/chosen": 5.71875, |
|
"eval_logits/rejected": 5.6875, |
|
"eval_logps/chosen": -1.953125, |
|
"eval_logps/rejected": -2.015625, |
|
"eval_loss": 1.7797918319702148, |
|
"eval_original_losses": 1.8125, |
|
"eval_rewards/accuracies": 0.4246031939983368, |
|
"eval_rewards/chosen": -4.90625, |
|
"eval_rewards/margins": 0.1708984375, |
|
"eval_rewards/rejected": -5.0625, |
|
"eval_runtime": 7.6127, |
|
"eval_samples_per_second": 257.596, |
|
"eval_steps_per_second": 0.92, |
|
"eval_weight": 1.0, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.6538461538461539, |
|
"eval_abs_diff": 0.44140625, |
|
"eval_all_logps_1": -657.8072509765625, |
|
"eval_all_logps_1_values": -657.8073120117188, |
|
"eval_all_logps_2": 434.6329040527344, |
|
"eval_all_logps_2_values": 434.6329345703125, |
|
"eval_logits/chosen": 5.71875, |
|
"eval_logits/rejected": 5.6875, |
|
"eval_logps/chosen": -1.953125, |
|
"eval_logps/rejected": -2.03125, |
|
"eval_loss": 1.7796564102172852, |
|
"eval_original_losses": 1.8046875, |
|
"eval_rewards/accuracies": 0.4484127163887024, |
|
"eval_rewards/chosen": -4.875, |
|
"eval_rewards/margins": 0.181640625, |
|
"eval_rewards/rejected": -5.0625, |
|
"eval_runtime": 7.6162, |
|
"eval_samples_per_second": 257.477, |
|
"eval_steps_per_second": 0.919, |
|
"eval_weight": 1.0, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.6923076923076923, |
|
"eval_abs_diff": 0.4375, |
|
"eval_all_logps_1": -657.43701171875, |
|
"eval_all_logps_1_values": -657.43701171875, |
|
"eval_all_logps_2": 434.6329040527344, |
|
"eval_all_logps_2_values": 434.6329345703125, |
|
"eval_logits/chosen": 5.71875, |
|
"eval_logits/rejected": 5.6875, |
|
"eval_logps/chosen": -1.9609375, |
|
"eval_logps/rejected": -2.03125, |
|
"eval_loss": 1.7829551696777344, |
|
"eval_original_losses": 1.8125, |
|
"eval_rewards/accuracies": 0.4404762089252472, |
|
"eval_rewards/chosen": -4.90625, |
|
"eval_rewards/margins": 0.1630859375, |
|
"eval_rewards/rejected": -5.0625, |
|
"eval_runtime": 7.6171, |
|
"eval_samples_per_second": 257.447, |
|
"eval_steps_per_second": 0.919, |
|
"eval_weight": 1.0, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.7307692307692307, |
|
"eval_abs_diff": 0.44140625, |
|
"eval_all_logps_1": -657.5411376953125, |
|
"eval_all_logps_1_values": -657.5411987304688, |
|
"eval_all_logps_2": 434.6329040527344, |
|
"eval_all_logps_2_values": 434.6329345703125, |
|
"eval_logits/chosen": 5.71875, |
|
"eval_logits/rejected": 5.6875, |
|
"eval_logps/chosen": -1.9609375, |
|
"eval_logps/rejected": -2.03125, |
|
"eval_loss": 1.783098578453064, |
|
"eval_original_losses": 1.8046875, |
|
"eval_rewards/accuracies": 0.4523809552192688, |
|
"eval_rewards/chosen": -4.875, |
|
"eval_rewards/margins": 0.1787109375, |
|
"eval_rewards/rejected": -5.0625, |
|
"eval_runtime": 7.6155, |
|
"eval_samples_per_second": 257.501, |
|
"eval_steps_per_second": 0.919, |
|
"eval_weight": 1.0, |
|
"step": 19 |
|
}, |
|
{ |
|
"abs_diff": 0.392578125, |
|
"all_logps_1": -622.8844604492188, |
|
"all_logps_1_values": -622.8843994140625, |
|
"all_logps_2": 406.131591796875, |
|
"all_logps_2_values": 406.131591796875, |
|
"epoch": 0.7692307692307693, |
|
"grad_norm": 25.7996031816865, |
|
"learning_rate": 1.5872342839067304e-07, |
|
"logits/chosen": 5.75, |
|
"logits/rejected": 5.71875, |
|
"logps/chosen": -1.9609375, |
|
"logps/rejected": -1.96875, |
|
"loss": 1.8374, |
|
"original_losses": 1.828125, |
|
"rewards/accuracies": 0.44930553436279297, |
|
"rewards/chosen": -4.90625, |
|
"rewards/margins": 0.0208740234375, |
|
"rewards/rejected": -4.9375, |
|
"step": 20, |
|
"weight": 1.0 |
|
}, |
|
{ |
|
"epoch": 0.7692307692307693, |
|
"eval_abs_diff": 0.451171875, |
|
"eval_all_logps_1": -657.5830078125, |
|
"eval_all_logps_1_values": -657.5830688476562, |
|
"eval_all_logps_2": 434.6329040527344, |
|
"eval_all_logps_2_values": 434.6329345703125, |
|
"eval_logits/chosen": 5.71875, |
|
"eval_logits/rejected": 5.6875, |
|
"eval_logps/chosen": -1.953125, |
|
"eval_logps/rejected": -2.03125, |
|
"eval_loss": 1.7812340259552002, |
|
"eval_original_losses": 1.8046875, |
|
"eval_rewards/accuracies": 0.4523809552192688, |
|
"eval_rewards/chosen": -4.90625, |
|
"eval_rewards/margins": 0.197265625, |
|
"eval_rewards/rejected": -5.09375, |
|
"eval_runtime": 7.6104, |
|
"eval_samples_per_second": 257.674, |
|
"eval_steps_per_second": 0.92, |
|
"eval_weight": 1.0, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.8076923076923077, |
|
"eval_abs_diff": 0.44140625, |
|
"eval_all_logps_1": -657.6909790039062, |
|
"eval_all_logps_1_values": -657.6909790039062, |
|
"eval_all_logps_2": 434.6329040527344, |
|
"eval_all_logps_2_values": 434.6329345703125, |
|
"eval_logits/chosen": 5.71875, |
|
"eval_logits/rejected": 5.6875, |
|
"eval_logps/chosen": -1.9609375, |
|
"eval_logps/rejected": -2.03125, |
|
"eval_loss": 1.7849550247192383, |
|
"eval_original_losses": 1.8125, |
|
"eval_rewards/accuracies": 0.4444444477558136, |
|
"eval_rewards/chosen": -4.875, |
|
"eval_rewards/margins": 0.171875, |
|
"eval_rewards/rejected": -5.0625, |
|
"eval_runtime": 7.6199, |
|
"eval_samples_per_second": 257.354, |
|
"eval_steps_per_second": 0.919, |
|
"eval_weight": 1.0, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.8461538461538461, |
|
"eval_abs_diff": 0.443359375, |
|
"eval_all_logps_1": -657.1679077148438, |
|
"eval_all_logps_1_values": -657.1679077148438, |
|
"eval_all_logps_2": 434.6329040527344, |
|
"eval_all_logps_2_values": 434.6329345703125, |
|
"eval_logits/chosen": 5.71875, |
|
"eval_logits/rejected": 5.6875, |
|
"eval_logps/chosen": -1.953125, |
|
"eval_logps/rejected": -2.03125, |
|
"eval_loss": 1.7851064205169678, |
|
"eval_original_losses": 1.8046875, |
|
"eval_rewards/accuracies": 0.4404761791229248, |
|
"eval_rewards/chosen": -4.90625, |
|
"eval_rewards/margins": 0.18359375, |
|
"eval_rewards/rejected": -5.0625, |
|
"eval_runtime": 7.6179, |
|
"eval_samples_per_second": 257.42, |
|
"eval_steps_per_second": 0.919, |
|
"eval_weight": 1.0, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.8846153846153846, |
|
"eval_abs_diff": 0.4375, |
|
"eval_all_logps_1": -658.0194091796875, |
|
"eval_all_logps_1_values": -658.0193481445312, |
|
"eval_all_logps_2": 434.6329040527344, |
|
"eval_all_logps_2_values": 434.6329345703125, |
|
"eval_logits/chosen": 5.71875, |
|
"eval_logits/rejected": 5.6875, |
|
"eval_logps/chosen": -1.9609375, |
|
"eval_logps/rejected": -2.03125, |
|
"eval_loss": 1.7782222032546997, |
|
"eval_original_losses": 1.8046875, |
|
"eval_rewards/accuracies": 0.436507910490036, |
|
"eval_rewards/chosen": -4.90625, |
|
"eval_rewards/margins": 0.1748046875, |
|
"eval_rewards/rejected": -5.0625, |
|
"eval_runtime": 7.6206, |
|
"eval_samples_per_second": 257.33, |
|
"eval_steps_per_second": 0.919, |
|
"eval_weight": 1.0, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.9230769230769231, |
|
"eval_abs_diff": 0.4375, |
|
"eval_all_logps_1": -657.4482421875, |
|
"eval_all_logps_1_values": -657.4481811523438, |
|
"eval_all_logps_2": 434.6329040527344, |
|
"eval_all_logps_2_values": 434.6329345703125, |
|
"eval_logits/chosen": 5.71875, |
|
"eval_logits/rejected": 5.6875, |
|
"eval_logps/chosen": -1.9609375, |
|
"eval_logps/rejected": -2.03125, |
|
"eval_loss": 1.7799512147903442, |
|
"eval_original_losses": 1.8046875, |
|
"eval_rewards/accuracies": 0.4523809552192688, |
|
"eval_rewards/chosen": -4.90625, |
|
"eval_rewards/margins": 0.1708984375, |
|
"eval_rewards/rejected": -5.0625, |
|
"eval_runtime": 7.6139, |
|
"eval_samples_per_second": 257.556, |
|
"eval_steps_per_second": 0.919, |
|
"eval_weight": 1.0, |
|
"step": 24 |
|
}, |
|
{ |
|
"abs_diff": 0.396484375, |
|
"all_logps_1": -644.0272216796875, |
|
"all_logps_1_values": -644.0272216796875, |
|
"all_logps_2": 413.7660217285156, |
|
"all_logps_2_values": 413.7659606933594, |
|
"epoch": 0.9615384615384616, |
|
"grad_norm": 18.38859727265469, |
|
"learning_rate": 4.657026981834622e-09, |
|
"logits/chosen": 5.71875, |
|
"logits/rejected": 5.6875, |
|
"logps/chosen": -1.984375, |
|
"logps/rejected": -1.9609375, |
|
"loss": 1.8714, |
|
"original_losses": 1.90625, |
|
"rewards/accuracies": 0.4229166507720947, |
|
"rewards/chosen": -4.96875, |
|
"rewards/margins": -0.0693359375, |
|
"rewards/rejected": -4.90625, |
|
"step": 25, |
|
"weight": 1.0 |
|
}, |
|
{ |
|
"epoch": 0.9615384615384616, |
|
"eval_abs_diff": 0.4375, |
|
"eval_all_logps_1": -657.451171875, |
|
"eval_all_logps_1_values": -657.4511108398438, |
|
"eval_all_logps_2": 434.6329040527344, |
|
"eval_all_logps_2_values": 434.6329345703125, |
|
"eval_logits/chosen": 5.71875, |
|
"eval_logits/rejected": 5.6875, |
|
"eval_logps/chosen": -1.953125, |
|
"eval_logps/rejected": -2.03125, |
|
"eval_loss": 1.778795838356018, |
|
"eval_original_losses": 1.796875, |
|
"eval_rewards/accuracies": 0.4325396716594696, |
|
"eval_rewards/chosen": -4.875, |
|
"eval_rewards/margins": 0.181640625, |
|
"eval_rewards/rejected": -5.0625, |
|
"eval_runtime": 7.6217, |
|
"eval_samples_per_second": 257.293, |
|
"eval_steps_per_second": 0.918, |
|
"eval_weight": 1.0, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_abs_diff": 0.4453125, |
|
"eval_all_logps_1": -656.8973388671875, |
|
"eval_all_logps_1_values": -656.8973388671875, |
|
"eval_all_logps_2": 434.6329040527344, |
|
"eval_all_logps_2_values": 434.6329345703125, |
|
"eval_logits/chosen": 5.71875, |
|
"eval_logits/rejected": 5.6875, |
|
"eval_logps/chosen": -1.9453125, |
|
"eval_logps/rejected": -2.03125, |
|
"eval_loss": 1.7800946235656738, |
|
"eval_original_losses": 1.796875, |
|
"eval_rewards/accuracies": 0.4404762089252472, |
|
"eval_rewards/chosen": -4.875, |
|
"eval_rewards/margins": 0.2001953125, |
|
"eval_rewards/rejected": -5.0625, |
|
"eval_runtime": 7.6164, |
|
"eval_samples_per_second": 257.472, |
|
"eval_steps_per_second": 0.919, |
|
"eval_weight": 1.0, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 26, |
|
"total_flos": 0.0, |
|
"train_loss": 1.8681734525240385, |
|
"train_runtime": 997.9993, |
|
"train_samples_per_second": 59.996, |
|
"train_steps_per_second": 0.026 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 26, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 36, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|