zephyr-ds / trainer_state.json
jikaixuan's picture
Model save
ccb0f19 verified
raw
history blame
6.81 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9997382884061764,
"eval_steps": 100,
"global_step": 955,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 5.208333333333333e-09,
"logits/chosen": -2.899709463119507,
"logits/rejected": -2.879509687423706,
"logps/chosen": -314.8815612792969,
"logps/rejected": -239.785888671875,
"loss": 0.6931,
"pred_label": 0.0,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1,
"use_label": 18.0
},
{
"epoch": 0.1,
"learning_rate": 4.976717112922002e-07,
"logits/chosen": -2.8282251358032227,
"logits/rejected": -2.8269264698028564,
"logps/chosen": -285.5703430175781,
"logps/rejected": -267.9831237792969,
"loss": 0.6805,
"pred_label": 479.7752380371094,
"rewards/accuracies": 0.5012626051902771,
"rewards/chosen": 0.0007524320390075445,
"rewards/margins": 0.0014539315598085523,
"rewards/rejected": -0.0007014995790086687,
"step": 100,
"use_label": 1138.2247314453125
},
{
"epoch": 0.21,
"learning_rate": 4.3946449359720607e-07,
"logits/chosen": -2.829744338989258,
"logits/rejected": -2.8234996795654297,
"logps/chosen": -283.5379333496094,
"logps/rejected": -264.45965576171875,
"loss": 0.6797,
"pred_label": 1464.762451171875,
"rewards/accuracies": 0.5268750190734863,
"rewards/chosen": 0.0027550552040338516,
"rewards/margins": 0.004276593215763569,
"rewards/rejected": -0.0015215380117297173,
"step": 200,
"use_label": 3337.237548828125
},
{
"epoch": 0.31,
"learning_rate": 3.812572759022118e-07,
"logits/chosen": -2.822391986846924,
"logits/rejected": -2.821011543273926,
"logps/chosen": -290.09552001953125,
"logps/rejected": -260.20050048828125,
"loss": 0.678,
"pred_label": 2522.33740234375,
"rewards/accuracies": 0.546875,
"rewards/chosen": 0.004526687320321798,
"rewards/margins": 0.008910334669053555,
"rewards/rejected": -0.004383646883070469,
"step": 300,
"use_label": 5479.66259765625
},
{
"epoch": 0.42,
"learning_rate": 3.230500582072177e-07,
"logits/chosen": -2.8350696563720703,
"logits/rejected": -2.8237721920013428,
"logps/chosen": -284.8573303222656,
"logps/rejected": -260.8428039550781,
"loss": 0.6752,
"pred_label": 3636.47509765625,
"rewards/accuracies": 0.5778124928474426,
"rewards/chosen": 0.00710176769644022,
"rewards/margins": 0.013848603703081608,
"rewards/rejected": -0.006746836472302675,
"step": 400,
"use_label": 7565.52490234375
},
{
"epoch": 0.52,
"learning_rate": 2.648428405122235e-07,
"logits/chosen": -2.8218374252319336,
"logits/rejected": -2.810873508453369,
"logps/chosen": -281.2003173828125,
"logps/rejected": -257.1551818847656,
"loss": 0.6707,
"pred_label": 4911.896484375,
"rewards/accuracies": 0.5731250047683716,
"rewards/chosen": 0.008172390051186085,
"rewards/margins": 0.017216255888342857,
"rewards/rejected": -0.009043867699801922,
"step": 500,
"use_label": 9490.103515625
},
{
"epoch": 0.63,
"learning_rate": 2.0663562281722933e-07,
"logits/chosen": -2.8341524600982666,
"logits/rejected": -2.8230907917022705,
"logps/chosen": -284.7864685058594,
"logps/rejected": -262.0230712890625,
"loss": 0.6665,
"pred_label": 6349.55859375,
"rewards/accuracies": 0.6025000214576721,
"rewards/chosen": 0.012129506096243858,
"rewards/margins": 0.025582188740372658,
"rewards/rejected": -0.0134526826441288,
"step": 600,
"use_label": 11252.44140625
},
{
"epoch": 0.73,
"learning_rate": 1.4842840512223514e-07,
"logits/chosen": -2.8274898529052734,
"logits/rejected": -2.811511516571045,
"logps/chosen": -282.0050048828125,
"logps/rejected": -252.4735565185547,
"loss": 0.6639,
"pred_label": 7854.15869140625,
"rewards/accuracies": 0.6112499833106995,
"rewards/chosen": 0.013030249625444412,
"rewards/margins": 0.026329634711146355,
"rewards/rejected": -0.013299385085701942,
"step": 700,
"use_label": 12947.8408203125
},
{
"epoch": 0.84,
"learning_rate": 9.022118742724097e-08,
"logits/chosen": -2.832928419113159,
"logits/rejected": -2.8318238258361816,
"logps/chosen": -285.62213134765625,
"logps/rejected": -259.8959045410156,
"loss": 0.662,
"pred_label": 9441.02734375,
"rewards/accuracies": 0.6253125071525574,
"rewards/chosen": 0.01489347591996193,
"rewards/margins": 0.030955424532294273,
"rewards/rejected": -0.016061950474977493,
"step": 800,
"use_label": 14560.97265625
},
{
"epoch": 0.94,
"learning_rate": 3.20139697322468e-08,
"logits/chosen": -2.824517250061035,
"logits/rejected": -2.830662250518799,
"logps/chosen": -276.052490234375,
"logps/rejected": -262.5692138671875,
"loss": 0.6628,
"pred_label": 10994.255859375,
"rewards/accuracies": 0.6143749952316284,
"rewards/chosen": 0.012237527407705784,
"rewards/margins": 0.026263901963829994,
"rewards/rejected": -0.01402637455612421,
"step": 900,
"use_label": 16207.744140625
},
{
"epoch": 1.0,
"eval_logits/chosen": -2.841262102127075,
"eval_logits/rejected": -2.8343887329101562,
"eval_logps/chosen": -281.85919189453125,
"eval_logps/rejected": -262.4202880859375,
"eval_loss": 0.6618225574493408,
"eval_pred_label": 12855.98046875,
"eval_rewards/accuracies": 0.6150000095367432,
"eval_rewards/chosen": 0.011613711714744568,
"eval_rewards/margins": 0.02489962987601757,
"eval_rewards/rejected": -0.013285920023918152,
"eval_runtime": 826.7995,
"eval_samples_per_second": 2.419,
"eval_steps_per_second": 0.302,
"eval_use_label": 18206.01953125,
"step": 955
},
{
"epoch": 1.0,
"step": 955,
"total_flos": 0.0,
"train_loss": 0.6705795382954063,
"train_runtime": 45840.6595,
"train_samples_per_second": 1.334,
"train_steps_per_second": 0.021
}
],
"logging_steps": 100,
"max_steps": 955,
"num_train_epochs": 1,
"save_steps": 10,
"total_flos": 0.0,
"trial_name": null,
"trial_params": null
}