zephyr-ds / trainer_state.json
jikaixuan's picture
Model save
7d24dc6 verified
raw
history blame
6.54 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9997382884061764,
"eval_steps": 100,
"global_step": 955,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 5.208333333333333e-08,
"logits/chosen": -2.980285167694092,
"logits/rejected": -2.87275767326355,
"logps/chosen": -313.4390563964844,
"logps/rejected": -236.1754150390625,
"loss": 0.6931,
"pred_label": 0.0,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1,
"use_label": 0.0
},
{
"epoch": 0.1,
"learning_rate": 4.9767171129220025e-06,
"logits/chosen": -2.8189077377319336,
"logits/rejected": -2.828260660171509,
"logps/chosen": -285.1784362792969,
"logps/rejected": -271.1445617675781,
"loss": 0.6803,
"pred_label": 0.0,
"rewards/accuracies": 0.5738636255264282,
"rewards/chosen": 0.01021653600037098,
"rewards/margins": 0.029710056260228157,
"rewards/rejected": -0.019493522122502327,
"step": 100,
"use_label": 0.0
},
{
"epoch": 0.21,
"learning_rate": 4.394644935972061e-06,
"logits/chosen": -2.8255398273468018,
"logits/rejected": -2.813203811645508,
"logps/chosen": -278.2582092285156,
"logps/rejected": -262.1144714355469,
"loss": 0.6236,
"pred_label": 0.0,
"rewards/accuracies": 0.6868749856948853,
"rewards/chosen": 0.05237884446978569,
"rewards/margins": 0.20995216071605682,
"rewards/rejected": -0.15757331252098083,
"step": 200,
"use_label": 0.0
},
{
"epoch": 0.31,
"learning_rate": 3.812572759022119e-06,
"logits/chosen": -2.809633255004883,
"logits/rejected": -2.80940580368042,
"logps/chosen": -283.6641540527344,
"logps/rejected": -257.23779296875,
"loss": 0.5901,
"pred_label": 0.0,
"rewards/accuracies": 0.6956250071525574,
"rewards/chosen": 0.04034877195954323,
"rewards/margins": 0.3768764138221741,
"rewards/rejected": -0.33652764558792114,
"step": 300,
"use_label": 0.0
},
{
"epoch": 0.42,
"learning_rate": 3.2305005820721774e-06,
"logits/chosen": -2.821267604827881,
"logits/rejected": -2.8011868000030518,
"logps/chosen": -284.2022399902344,
"logps/rejected": -267.48358154296875,
"loss": 0.5757,
"pred_label": 0.0,
"rewards/accuracies": 0.7081249952316284,
"rewards/chosen": 0.022827474400401115,
"rewards/margins": 0.4984941780567169,
"rewards/rejected": -0.47566673159599304,
"step": 400,
"use_label": 0.0
},
{
"epoch": 0.52,
"learning_rate": 2.6484284051222353e-06,
"logits/chosen": -2.7993886470794678,
"logits/rejected": -2.7845959663391113,
"logps/chosen": -270.09637451171875,
"logps/rejected": -256.13458251953125,
"loss": 0.5798,
"pred_label": 0.0,
"rewards/accuracies": 0.6612499952316284,
"rewards/chosen": -0.004339172504842281,
"rewards/margins": 0.4106566905975342,
"rewards/rejected": -0.41499578952789307,
"step": 500,
"use_label": 0.0
},
{
"epoch": 0.63,
"learning_rate": 2.0663562281722936e-06,
"logits/chosen": -2.824444532394409,
"logits/rejected": -2.808506965637207,
"logps/chosen": -284.5413513183594,
"logps/rejected": -270.1356201171875,
"loss": 0.5643,
"pred_label": 0.0,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": 0.06895674020051956,
"rewards/margins": 0.5562920570373535,
"rewards/rejected": -0.4873352348804474,
"step": 600,
"use_label": 0.0
},
{
"epoch": 0.73,
"learning_rate": 1.4842840512223516e-06,
"logits/chosen": -2.8160746097564697,
"logits/rejected": -2.795208692550659,
"logps/chosen": -280.79766845703125,
"logps/rejected": -253.41058349609375,
"loss": 0.5631,
"pred_label": 0.0,
"rewards/accuracies": 0.7231249809265137,
"rewards/chosen": 0.07620371133089066,
"rewards/margins": 0.5511507391929626,
"rewards/rejected": -0.474947065114975,
"step": 700,
"use_label": 0.0
},
{
"epoch": 0.84,
"learning_rate": 9.022118742724098e-07,
"logits/chosen": -2.8168938159942627,
"logits/rejected": -2.8180816173553467,
"logps/chosen": -287.77679443359375,
"logps/rejected": -266.3511657714844,
"loss": 0.5548,
"pred_label": 0.0,
"rewards/accuracies": 0.7112500071525574,
"rewards/chosen": 0.09802371263504028,
"rewards/margins": 0.5919383764266968,
"rewards/rejected": -0.49391472339630127,
"step": 800,
"use_label": 0.0
},
{
"epoch": 0.94,
"learning_rate": 3.2013969732246806e-07,
"logits/chosen": -2.808311700820923,
"logits/rejected": -2.812298536300659,
"logps/chosen": -276.976806640625,
"logps/rejected": -261.3187561035156,
"loss": 0.5689,
"pred_label": 0.0,
"rewards/accuracies": 0.7093750238418579,
"rewards/chosen": 0.05313897505402565,
"rewards/margins": 0.551435649394989,
"rewards/rejected": -0.4982966482639313,
"step": 900,
"use_label": 0.0
},
{
"epoch": 1.0,
"eval_logits/chosen": -2.8302695751190186,
"eval_logits/rejected": -2.8291618824005127,
"eval_logps/chosen": -283.5841369628906,
"eval_logps/rejected": -264.64422607421875,
"eval_loss": 0.5655555129051208,
"eval_pred_label": 0.0,
"eval_rewards/accuracies": 0.699999988079071,
"eval_rewards/chosen": 0.06732505559921265,
"eval_rewards/margins": 0.6017746329307556,
"eval_rewards/rejected": -0.534449577331543,
"eval_runtime": 457.335,
"eval_samples_per_second": 4.373,
"eval_steps_per_second": 0.273,
"eval_use_label": 0.0,
"step": 955
},
{
"epoch": 1.0,
"step": 955,
"total_flos": 0.0,
"train_loss": 0.5870625535855118,
"train_runtime": 25384.4334,
"train_samples_per_second": 2.408,
"train_steps_per_second": 0.038
}
],
"logging_steps": 100,
"max_steps": 955,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"trial_name": null,
"trial_params": null
}