flan-t5-small_en-no / trainer_state.json
navjordj's picture
End of training
f6e00e4
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.0,
"global_step": 4860,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 2.06,
"learning_rate": 3.971193415637861e-05,
"loss": 2.8803,
"step": 500
},
{
"epoch": 4.12,
"learning_rate": 2.9423868312757202e-05,
"loss": 2.3089,
"step": 1000
},
{
"epoch": 6.17,
"learning_rate": 1.91358024691358e-05,
"loss": 2.116,
"step": 1500
},
{
"epoch": 8.23,
"learning_rate": 8.847736625514404e-06,
"loss": 2.0249,
"step": 2000
},
{
"epoch": 8.64,
"learning_rate": 2.839506172839506e-05,
"loss": 1.984,
"step": 2100
},
{
"epoch": 9.05,
"learning_rate": 2.736625514403292e-05,
"loss": 1.9696,
"step": 2200
},
{
"epoch": 9.47,
"learning_rate": 2.6337448559670787e-05,
"loss": 1.9453,
"step": 2300
},
{
"epoch": 9.88,
"learning_rate": 2.5308641975308646e-05,
"loss": 1.9285,
"step": 2400
},
{
"epoch": 10.29,
"learning_rate": 2.4279835390946505e-05,
"loss": 1.9107,
"step": 2500
},
{
"epoch": 10.7,
"learning_rate": 2.3251028806584364e-05,
"loss": 1.8961,
"step": 2600
},
{
"epoch": 11.11,
"learning_rate": 2.2222222222222223e-05,
"loss": 1.8783,
"step": 2700
},
{
"epoch": 11.52,
"learning_rate": 2.1193415637860082e-05,
"loss": 1.8653,
"step": 2800
},
{
"epoch": 11.93,
"learning_rate": 2.016460905349794e-05,
"loss": 1.8566,
"step": 2900
},
{
"epoch": 12.35,
"learning_rate": 1.91358024691358e-05,
"loss": 1.8443,
"step": 3000
},
{
"epoch": 12.76,
"learning_rate": 1.8106995884773663e-05,
"loss": 1.833,
"step": 3100
},
{
"epoch": 13.17,
"learning_rate": 1.7078189300411522e-05,
"loss": 1.8222,
"step": 3200
},
{
"epoch": 13.58,
"learning_rate": 1.604938271604938e-05,
"loss": 1.8154,
"step": 3300
},
{
"epoch": 13.99,
"learning_rate": 1.5020576131687244e-05,
"loss": 1.8063,
"step": 3400
},
{
"epoch": 14.4,
"learning_rate": 1.3991769547325103e-05,
"loss": 1.7957,
"step": 3500
},
{
"epoch": 14.81,
"learning_rate": 1.2962962962962962e-05,
"loss": 1.7933,
"step": 3600
},
{
"epoch": 15.23,
"learning_rate": 1.1934156378600823e-05,
"loss": 1.7856,
"step": 3700
},
{
"epoch": 15.64,
"learning_rate": 1.0905349794238684e-05,
"loss": 1.7801,
"step": 3800
},
{
"epoch": 16.05,
"learning_rate": 9.876543209876543e-06,
"loss": 1.7724,
"step": 3900
},
{
"epoch": 16.46,
"learning_rate": 8.847736625514404e-06,
"loss": 1.7685,
"step": 4000
},
{
"epoch": 16.87,
"learning_rate": 7.818930041152265e-06,
"loss": 1.7627,
"step": 4100
},
{
"epoch": 17.28,
"learning_rate": 6.790123456790123e-06,
"loss": 1.7664,
"step": 4200
},
{
"epoch": 17.7,
"learning_rate": 5.761316872427984e-06,
"loss": 1.7572,
"step": 4300
},
{
"epoch": 18.11,
"learning_rate": 4.732510288065844e-06,
"loss": 1.7504,
"step": 4400
},
{
"epoch": 18.52,
"learning_rate": 3.7037037037037037e-06,
"loss": 1.7536,
"step": 4500
},
{
"epoch": 18.93,
"learning_rate": 2.6748971193415637e-06,
"loss": 1.7503,
"step": 4600
},
{
"epoch": 19.34,
"learning_rate": 1.6460905349794242e-06,
"loss": 1.7459,
"step": 4700
},
{
"epoch": 19.75,
"learning_rate": 6.17283950617284e-07,
"loss": 1.7519,
"step": 4800
},
{
"epoch": 20.0,
"step": 4860,
"total_flos": 4.53936534237696e+16,
"train_loss": 1.07285234270763,
"train_runtime": 3358.559,
"train_samples_per_second": 369.843,
"train_steps_per_second": 1.447
}
],
"max_steps": 4860,
"num_train_epochs": 20,
"total_flos": 4.53936534237696e+16,
"trial_name": null,
"trial_params": null
}