sft-mistral-v1-original-data / trainer_state.json
hllj's picture
Model save
c523564
raw
history blame
3.73 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.394648829431438,
"eval_steps": 200,
"global_step": 236,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.6666666666666667e-06,
"loss": 0.8728,
"step": 1
},
{
"epoch": 0.03,
"learning_rate": 1.6666666666666667e-05,
"loss": 0.8168,
"step": 10
},
{
"epoch": 0.07,
"learning_rate": 3.3333333333333335e-05,
"loss": 0.6667,
"step": 20
},
{
"epoch": 0.1,
"learning_rate": 5e-05,
"loss": 0.5432,
"step": 30
},
{
"epoch": 0.13,
"learning_rate": 4.996177016978633e-05,
"loss": 0.4616,
"step": 40
},
{
"epoch": 0.17,
"learning_rate": 4.984719760073877e-05,
"loss": 0.4572,
"step": 50
},
{
"epoch": 0.2,
"learning_rate": 4.9656632700046265e-05,
"loss": 0.4327,
"step": 60
},
{
"epoch": 0.23,
"learning_rate": 4.9390658288812675e-05,
"loss": 0.401,
"step": 70
},
{
"epoch": 0.27,
"learning_rate": 4.90500878195646e-05,
"loss": 0.4179,
"step": 80
},
{
"epoch": 0.3,
"learning_rate": 4.8635962888399254e-05,
"loss": 0.4091,
"step": 90
},
{
"epoch": 0.33,
"learning_rate": 4.820140360457198e-05,
"loss": 0.4178,
"step": 100
},
{
"epoch": 0.37,
"learning_rate": 4.7651197369406566e-05,
"loss": 0.4046,
"step": 110
},
{
"epoch": 1.01,
"learning_rate": 4.703171501987564e-05,
"loss": 0.396,
"step": 120
},
{
"epoch": 1.04,
"learning_rate": 4.6344851172382647e-05,
"loss": 0.3232,
"step": 130
},
{
"epoch": 1.07,
"learning_rate": 4.5592706521989154e-05,
"loss": 0.3301,
"step": 140
},
{
"epoch": 1.11,
"learning_rate": 4.477758141767761e-05,
"loss": 0.333,
"step": 150
},
{
"epoch": 1.14,
"learning_rate": 4.390196882699528e-05,
"loss": 0.3361,
"step": 160
},
{
"epoch": 1.17,
"learning_rate": 4.296854671159614e-05,
"loss": 0.3169,
"step": 170
},
{
"epoch": 1.21,
"learning_rate": 4.198016983699933e-05,
"loss": 0.3168,
"step": 180
},
{
"epoch": 1.24,
"learning_rate": 4.0939861041613107e-05,
"loss": 0.3351,
"step": 190
},
{
"epoch": 1.27,
"learning_rate": 3.9850801991726846e-05,
"loss": 0.3103,
"step": 200
},
{
"epoch": 1.27,
"eval_loss": 0.5224232077598572,
"eval_runtime": 6.8718,
"eval_samples_per_second": 20.373,
"eval_steps_per_second": 5.093,
"step": 200
},
{
"epoch": 1.31,
"learning_rate": 3.871632345074615e-05,
"loss": 0.3372,
"step": 210
},
{
"epoch": 1.34,
"learning_rate": 3.753989509243122e-05,
"loss": 0.3065,
"step": 220
},
{
"epoch": 1.37,
"learning_rate": 3.632511488929382e-05,
"loss": 0.3254,
"step": 230
},
{
"epoch": 1.39,
"step": 236,
"total_flos": 4.241630717752115e+16,
"train_loss": 0.40742398091291976,
"train_runtime": 500.0224,
"train_samples_per_second": 4.784,
"train_steps_per_second": 1.196
}
],
"logging_steps": 10,
"max_steps": 598,
"num_train_epochs": 2,
"save_steps": 200,
"total_flos": 4.241630717752115e+16,
"trial_name": null,
"trial_params": null
}