the-the-pigs / trainer_state.json
AlekseyKorshuk's picture
huggingartists
97e4998
{
"best_metric": 1.621703028678894,
"best_model_checkpoint": "output/the-the-pigs/checkpoint-132",
"epoch": 11.0,
"global_step": 132,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.38,
"learning_rate": 9.292589525111794e-05,
"loss": 2.5049,
"step": 5
},
{
"epoch": 0.77,
"learning_rate": 1.725216267546246e-05,
"loss": 2.2501,
"step": 10
},
{
"epoch": 1.0,
"eval_loss": 1.966219425201416,
"eval_runtime": 0.848,
"eval_samples_per_second": 22.405,
"eval_steps_per_second": 3.538,
"step": 13
},
{
"epoch": 1.15,
"learning_rate": 7.857716640189785e-06,
"loss": 2.2537,
"step": 15
},
{
"epoch": 1.54,
"learning_rate": 7.686881626551516e-05,
"loss": 2.1305,
"step": 20
},
{
"epoch": 1.92,
"learning_rate": 0.00013520660867542716,
"loss": 2.0792,
"step": 25
},
{
"epoch": 2.0,
"eval_loss": 1.9108868837356567,
"eval_runtime": 0.8428,
"eval_samples_per_second": 22.544,
"eval_steps_per_second": 3.56,
"step": 26
},
{
"epoch": 2.31,
"learning_rate": 0.00010756924162575734,
"loss": 2.1293,
"step": 30
},
{
"epoch": 2.69,
"learning_rate": 2.9630758374242683e-05,
"loss": 2.0062,
"step": 35
},
{
"epoch": 3.0,
"eval_loss": 1.830873966217041,
"eval_runtime": 0.8441,
"eval_samples_per_second": 22.51,
"eval_steps_per_second": 3.554,
"step": 39
},
{
"epoch": 3.08,
"learning_rate": 1.9933913245728396e-06,
"loss": 1.9324,
"step": 40
},
{
"epoch": 3.46,
"learning_rate": 6.033118373448485e-05,
"loss": 1.9227,
"step": 45
},
{
"epoch": 3.85,
"learning_rate": 0.00012934228335981018,
"loss": 1.9492,
"step": 50
},
{
"epoch": 4.0,
"eval_loss": 1.8247166872024536,
"eval_runtime": 0.8513,
"eval_samples_per_second": 22.319,
"eval_steps_per_second": 3.524,
"step": 52
},
{
"epoch": 4.23,
"learning_rate": 0.00011994783732453755,
"loss": 1.8601,
"step": 55
},
{
"epoch": 4.62,
"learning_rate": 4.42741047488822e-05,
"loss": 1.7486,
"step": 60
},
{
"epoch": 5.0,
"learning_rate": 0.0,
"loss": 1.8994,
"step": 65
},
{
"epoch": 5.0,
"eval_loss": 1.7911409139633179,
"eval_runtime": 0.8376,
"eval_samples_per_second": 22.685,
"eval_steps_per_second": 3.582,
"step": 65
},
{
"epoch": 5.38,
"learning_rate": 4.4274104748882125e-05,
"loss": 1.7558,
"step": 70
},
{
"epoch": 5.77,
"learning_rate": 0.00011994783732453749,
"loss": 1.8425,
"step": 75
},
{
"epoch": 6.0,
"eval_loss": 1.780040979385376,
"eval_runtime": 0.8465,
"eval_samples_per_second": 22.444,
"eval_steps_per_second": 3.544,
"step": 78
},
{
"epoch": 6.15,
"learning_rate": 0.00012934228335981018,
"loss": 1.72,
"step": 80
},
{
"epoch": 6.54,
"learning_rate": 6.033118373448493e-05,
"loss": 1.7991,
"step": 85
},
{
"epoch": 6.92,
"learning_rate": 1.9933913245728244e-06,
"loss": 1.7086,
"step": 90
},
{
"epoch": 7.0,
"eval_loss": 1.7639576196670532,
"eval_runtime": 0.8412,
"eval_samples_per_second": 22.587,
"eval_steps_per_second": 3.566,
"step": 91
},
{
"epoch": 7.31,
"learning_rate": 2.963075837424261e-05,
"loss": 1.6218,
"step": 95
},
{
"epoch": 7.69,
"learning_rate": 0.00010756924162575728,
"loss": 1.6723,
"step": 100
},
{
"epoch": 8.0,
"eval_loss": 1.7437282800674438,
"eval_runtime": 0.8437,
"eval_samples_per_second": 22.521,
"eval_steps_per_second": 3.556,
"step": 104
},
{
"epoch": 8.08,
"learning_rate": 0.0001352066086754272,
"loss": 1.7365,
"step": 105
},
{
"epoch": 8.46,
"learning_rate": 7.68688162655152e-05,
"loss": 1.6506,
"step": 110
},
{
"epoch": 8.85,
"learning_rate": 7.85771664018977e-06,
"loss": 1.5713,
"step": 115
},
{
"epoch": 9.0,
"eval_loss": 1.7347867488861084,
"eval_runtime": 0.851,
"eval_samples_per_second": 22.327,
"eval_steps_per_second": 3.525,
"step": 117
},
{
"epoch": 10.0,
"learning_rate": 0.0001372,
"loss": 1.6342,
"step": 120
},
{
"epoch": 10.0,
"eval_loss": 1.6437487602233887,
"eval_runtime": 0.9748,
"eval_samples_per_second": 22.569,
"eval_steps_per_second": 3.078,
"step": 120
},
{
"epoch": 10.42,
"learning_rate": 8.635498649403298e-05,
"loss": 1.7365,
"step": 125
},
{
"epoch": 10.83,
"learning_rate": 9.190657300387535e-06,
"loss": 1.6443,
"step": 130
},
{
"epoch": 11.0,
"eval_loss": 1.621703028678894,
"eval_runtime": 1.0058,
"eval_samples_per_second": 21.873,
"eval_steps_per_second": 2.983,
"step": 132
}
],
"max_steps": 1308,
"num_train_epochs": 109,
"total_flos": 130384723968000.0,
"trial_name": null,
"trial_params": null
}