zephyr-ds / trainer_state.json
jikaixuan's picture
Model save
dc83d7c verified
raw
history blame
6.53 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9997382884061764,
"eval_steps": 100,
"global_step": 955,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.0416666666666667e-07,
"logits/chosen": -2.980285167694092,
"logits/rejected": -2.87275767326355,
"logps/chosen": -313.4390563964844,
"logps/rejected": -236.1754150390625,
"loss": 0.6931,
"pred_label": 0.0,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1,
"use_label": 0.0
},
{
"epoch": 0.1,
"learning_rate": 9.953434225844005e-06,
"logits/chosen": -2.8180909156799316,
"logits/rejected": -2.8273613452911377,
"logps/chosen": -285.13623046875,
"logps/rejected": -271.66839599609375,
"loss": 0.6624,
"pred_label": 0.0,
"rewards/accuracies": 0.6041666865348816,
"rewards/chosen": 0.014434419572353363,
"rewards/margins": 0.08631344139575958,
"rewards/rejected": -0.07187902927398682,
"step": 100,
"use_label": 0.0
},
{
"epoch": 0.21,
"learning_rate": 8.789289871944122e-06,
"logits/chosen": -2.8230364322662354,
"logits/rejected": -2.8086395263671875,
"logps/chosen": -278.2524108886719,
"logps/rejected": -263.9921569824219,
"loss": 0.5868,
"pred_label": 0.0,
"rewards/accuracies": 0.6949999928474426,
"rewards/chosen": 0.05295524746179581,
"rewards/margins": 0.39829620718955994,
"rewards/rejected": -0.3453409671783447,
"step": 200,
"use_label": 0.0
},
{
"epoch": 0.31,
"learning_rate": 7.625145518044238e-06,
"logits/chosen": -2.803905725479126,
"logits/rejected": -2.802032232284546,
"logps/chosen": -284.01385498046875,
"logps/rejected": -259.5546569824219,
"loss": 0.562,
"pred_label": 0.0,
"rewards/accuracies": 0.7056249976158142,
"rewards/chosen": 0.005377008114010096,
"rewards/margins": 0.5735920667648315,
"rewards/rejected": -0.5682151317596436,
"step": 300,
"use_label": 0.0
},
{
"epoch": 0.42,
"learning_rate": 6.461001164144355e-06,
"logits/chosen": -2.8141045570373535,
"logits/rejected": -2.7911813259124756,
"logps/chosen": -284.3139953613281,
"logps/rejected": -269.4837951660156,
"loss": 0.5527,
"pred_label": 0.0,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.011648621410131454,
"rewards/margins": 0.6873368620872498,
"rewards/rejected": -0.6756882667541504,
"step": 400,
"use_label": 0.0
},
{
"epoch": 0.52,
"learning_rate": 5.2968568102444705e-06,
"logits/chosen": -2.7915823459625244,
"logits/rejected": -2.776299476623535,
"logps/chosen": -269.73016357421875,
"logps/rejected": -257.2498474121094,
"loss": 0.5556,
"pred_label": 0.0,
"rewards/accuracies": 0.6862499713897705,
"rewards/chosen": 0.03227977454662323,
"rewards/margins": 0.5588020086288452,
"rewards/rejected": -0.5265222191810608,
"step": 500,
"use_label": 0.0
},
{
"epoch": 0.63,
"learning_rate": 4.132712456344587e-06,
"logits/chosen": -2.8199498653411865,
"logits/rejected": -2.8022332191467285,
"logps/chosen": -284.0947265625,
"logps/rejected": -271.3774108886719,
"loss": 0.5422,
"pred_label": 0.0,
"rewards/accuracies": 0.7143750190734863,
"rewards/chosen": 0.11361943930387497,
"rewards/margins": 0.7251341938972473,
"rewards/rejected": -0.6115147471427917,
"step": 600,
"use_label": 0.0
},
{
"epoch": 0.73,
"learning_rate": 2.9685681024447033e-06,
"logits/chosen": -2.8110527992248535,
"logits/rejected": -2.788975477218628,
"logps/chosen": -280.3959045410156,
"logps/rejected": -254.49673461914062,
"loss": 0.5404,
"pred_label": 0.0,
"rewards/accuracies": 0.7212499976158142,
"rewards/chosen": 0.11637673527002335,
"rewards/margins": 0.6999369263648987,
"rewards/rejected": -0.5835601687431335,
"step": 700,
"use_label": 0.0
},
{
"epoch": 0.84,
"learning_rate": 1.8044237485448196e-06,
"logits/chosen": -2.8146722316741943,
"logits/rejected": -2.812129020690918,
"logps/chosen": -287.4331359863281,
"logps/rejected": -267.59161376953125,
"loss": 0.5343,
"pred_label": 0.0,
"rewards/accuracies": 0.721875011920929,
"rewards/chosen": 0.13239255547523499,
"rewards/margins": 0.7503484487533569,
"rewards/rejected": -0.6179558634757996,
"step": 800,
"use_label": 0.0
},
{
"epoch": 0.94,
"learning_rate": 6.402793946449361e-07,
"logits/chosen": -2.8043179512023926,
"logits/rejected": -2.8079681396484375,
"logps/chosen": -276.8100891113281,
"logps/rejected": -262.690673828125,
"loss": 0.545,
"pred_label": 0.0,
"rewards/accuracies": 0.7193750143051147,
"rewards/chosen": 0.06981150805950165,
"rewards/margins": 0.7052963972091675,
"rewards/rejected": -0.6354848742485046,
"step": 900,
"use_label": 0.0
},
{
"epoch": 1.0,
"eval_logits/chosen": -2.827404737472534,
"eval_logits/rejected": -2.818655014038086,
"eval_logps/chosen": -283.37945556640625,
"eval_logps/rejected": -265.9969787597656,
"eval_loss": 0.5459502935409546,
"eval_pred_label": 0.0,
"eval_rewards/accuracies": 0.7139999866485596,
"eval_rewards/chosen": 0.08778975158929825,
"eval_rewards/margins": 0.7575166821479797,
"eval_rewards/rejected": -0.6697269678115845,
"eval_runtime": 479.5351,
"eval_samples_per_second": 4.171,
"eval_steps_per_second": 0.261,
"eval_use_label": 0.0,
"step": 955
},
{
"epoch": 1.0,
"step": 955,
"total_flos": 0.0,
"train_loss": 0.5628191218950361,
"train_runtime": 25746.1298,
"train_samples_per_second": 2.375,
"train_steps_per_second": 0.037
}
],
"logging_steps": 100,
"max_steps": 955,
"num_train_epochs": 1,
"save_steps": 50,
"total_flos": 0.0,
"trial_name": null,
"trial_params": null
}