|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997382884061764, |
|
"eval_steps": 100, |
|
"global_step": 955, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.208333333333333e-09, |
|
"logits/chosen": -2.899709463119507, |
|
"logits/rejected": -2.879509687423706, |
|
"logps/chosen": -314.8815612792969, |
|
"logps/rejected": -239.785888671875, |
|
"loss": 0.6931, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1, |
|
"use_label": 18.0 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.976717112922002e-07, |
|
"logits/chosen": -2.8282251358032227, |
|
"logits/rejected": -2.8269264698028564, |
|
"logps/chosen": -285.5703430175781, |
|
"logps/rejected": -267.9831237792969, |
|
"loss": 0.6805, |
|
"pred_label": 479.7752380371094, |
|
"rewards/accuracies": 0.5012626051902771, |
|
"rewards/chosen": 0.0007524320390075445, |
|
"rewards/margins": 0.0014539315598085523, |
|
"rewards/rejected": -0.0007014995790086687, |
|
"step": 100, |
|
"use_label": 1138.2247314453125 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.3946449359720607e-07, |
|
"logits/chosen": -2.829744338989258, |
|
"logits/rejected": -2.8234996795654297, |
|
"logps/chosen": -283.5379333496094, |
|
"logps/rejected": -264.45965576171875, |
|
"loss": 0.6797, |
|
"pred_label": 1464.762451171875, |
|
"rewards/accuracies": 0.5268750190734863, |
|
"rewards/chosen": 0.0027550552040338516, |
|
"rewards/margins": 0.004276593215763569, |
|
"rewards/rejected": -0.0015215380117297173, |
|
"step": 200, |
|
"use_label": 3337.237548828125 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.812572759022118e-07, |
|
"logits/chosen": -2.822391986846924, |
|
"logits/rejected": -2.821011543273926, |
|
"logps/chosen": -290.09552001953125, |
|
"logps/rejected": -260.20050048828125, |
|
"loss": 0.678, |
|
"pred_label": 2522.33740234375, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": 0.004526687320321798, |
|
"rewards/margins": 0.008910334669053555, |
|
"rewards/rejected": -0.004383646883070469, |
|
"step": 300, |
|
"use_label": 5479.66259765625 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.230500582072177e-07, |
|
"logits/chosen": -2.8350696563720703, |
|
"logits/rejected": -2.8237721920013428, |
|
"logps/chosen": -284.8573303222656, |
|
"logps/rejected": -260.8428039550781, |
|
"loss": 0.6752, |
|
"pred_label": 3636.47509765625, |
|
"rewards/accuracies": 0.5778124928474426, |
|
"rewards/chosen": 0.00710176769644022, |
|
"rewards/margins": 0.013848603703081608, |
|
"rewards/rejected": -0.006746836472302675, |
|
"step": 400, |
|
"use_label": 7565.52490234375 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.648428405122235e-07, |
|
"logits/chosen": -2.8218374252319336, |
|
"logits/rejected": -2.810873508453369, |
|
"logps/chosen": -281.2003173828125, |
|
"logps/rejected": -257.1551818847656, |
|
"loss": 0.6707, |
|
"pred_label": 4911.896484375, |
|
"rewards/accuracies": 0.5731250047683716, |
|
"rewards/chosen": 0.008172390051186085, |
|
"rewards/margins": 0.017216255888342857, |
|
"rewards/rejected": -0.009043867699801922, |
|
"step": 500, |
|
"use_label": 9490.103515625 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.0663562281722933e-07, |
|
"logits/chosen": -2.8341524600982666, |
|
"logits/rejected": -2.8230907917022705, |
|
"logps/chosen": -284.7864685058594, |
|
"logps/rejected": -262.0230712890625, |
|
"loss": 0.6665, |
|
"pred_label": 6349.55859375, |
|
"rewards/accuracies": 0.6025000214576721, |
|
"rewards/chosen": 0.012129506096243858, |
|
"rewards/margins": 0.025582188740372658, |
|
"rewards/rejected": -0.0134526826441288, |
|
"step": 600, |
|
"use_label": 11252.44140625 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.4842840512223514e-07, |
|
"logits/chosen": -2.8274898529052734, |
|
"logits/rejected": -2.811511516571045, |
|
"logps/chosen": -282.0050048828125, |
|
"logps/rejected": -252.4735565185547, |
|
"loss": 0.6639, |
|
"pred_label": 7854.15869140625, |
|
"rewards/accuracies": 0.6112499833106995, |
|
"rewards/chosen": 0.013030249625444412, |
|
"rewards/margins": 0.026329634711146355, |
|
"rewards/rejected": -0.013299385085701942, |
|
"step": 700, |
|
"use_label": 12947.8408203125 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 9.022118742724097e-08, |
|
"logits/chosen": -2.832928419113159, |
|
"logits/rejected": -2.8318238258361816, |
|
"logps/chosen": -285.62213134765625, |
|
"logps/rejected": -259.8959045410156, |
|
"loss": 0.662, |
|
"pred_label": 9441.02734375, |
|
"rewards/accuracies": 0.6253125071525574, |
|
"rewards/chosen": 0.01489347591996193, |
|
"rewards/margins": 0.030955424532294273, |
|
"rewards/rejected": -0.016061950474977493, |
|
"step": 800, |
|
"use_label": 14560.97265625 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.20139697322468e-08, |
|
"logits/chosen": -2.824517250061035, |
|
"logits/rejected": -2.830662250518799, |
|
"logps/chosen": -276.052490234375, |
|
"logps/rejected": -262.5692138671875, |
|
"loss": 0.6628, |
|
"pred_label": 10994.255859375, |
|
"rewards/accuracies": 0.6143749952316284, |
|
"rewards/chosen": 0.012237527407705784, |
|
"rewards/margins": 0.026263901963829994, |
|
"rewards/rejected": -0.01402637455612421, |
|
"step": 900, |
|
"use_label": 16207.744140625 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -2.841262102127075, |
|
"eval_logits/rejected": -2.8343887329101562, |
|
"eval_logps/chosen": -281.85919189453125, |
|
"eval_logps/rejected": -262.4202880859375, |
|
"eval_loss": 0.6618225574493408, |
|
"eval_pred_label": 12855.98046875, |
|
"eval_rewards/accuracies": 0.6150000095367432, |
|
"eval_rewards/chosen": 0.011613711714744568, |
|
"eval_rewards/margins": 0.02489962987601757, |
|
"eval_rewards/rejected": -0.013285920023918152, |
|
"eval_runtime": 826.7995, |
|
"eval_samples_per_second": 2.419, |
|
"eval_steps_per_second": 0.302, |
|
"eval_use_label": 18206.01953125, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 955, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6705795382954063, |
|
"train_runtime": 45840.6595, |
|
"train_samples_per_second": 1.334, |
|
"train_steps_per_second": 0.021 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 955, |
|
"num_train_epochs": 1, |
|
"save_steps": 10, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|