uk-mt5-small-gec-tokenized / trainer_state.json
SashkoMolodec
initial commit
41d25f6
{
"best_metric": 0.3236161470413208,
"best_model_checkpoint": "uk-mt5-small-gec/checkpoint-2550",
"epoch": 3.0141843971631204,
"global_step": 2550,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.18,
"learning_rate": 0.0009822695035460994,
"loss": 1.7105,
"step": 150
},
{
"epoch": 0.18,
"eval_google_bleu": 0.4163395132458002,
"eval_loss": 0.5110855102539062,
"eval_runtime": 63.8854,
"eval_samples_per_second": 47.037,
"eval_steps_per_second": 1.471,
"step": 150
},
{
"epoch": 0.35,
"learning_rate": 0.0009645390070921985,
"loss": 0.5262,
"step": 300
},
{
"epoch": 0.35,
"eval_google_bleu": 0.4177246109040804,
"eval_loss": 0.4555143713951111,
"eval_runtime": 64.7255,
"eval_samples_per_second": 46.427,
"eval_steps_per_second": 1.452,
"step": 300
},
{
"epoch": 0.53,
"learning_rate": 0.0009468085106382979,
"loss": 0.4747,
"step": 450
},
{
"epoch": 0.53,
"eval_google_bleu": 0.4190545982950951,
"eval_loss": 0.39340439438819885,
"eval_runtime": 65.0885,
"eval_samples_per_second": 46.168,
"eval_steps_per_second": 1.444,
"step": 450
},
{
"epoch": 0.71,
"learning_rate": 0.0009290780141843972,
"loss": 0.4397,
"step": 600
},
{
"epoch": 0.71,
"eval_google_bleu": 0.4194098007404113,
"eval_loss": 0.38720330595970154,
"eval_runtime": 64.9943,
"eval_samples_per_second": 46.235,
"eval_steps_per_second": 1.446,
"step": 600
},
{
"epoch": 0.89,
"learning_rate": 0.0009113475177304966,
"loss": 0.4011,
"step": 750
},
{
"epoch": 0.89,
"eval_google_bleu": 0.4206793462964932,
"eval_loss": 0.3757050335407257,
"eval_runtime": 64.9697,
"eval_samples_per_second": 46.252,
"eval_steps_per_second": 1.447,
"step": 750
},
{
"epoch": 1.06,
"learning_rate": 0.0008936170212765957,
"loss": 0.3596,
"step": 900
},
{
"epoch": 1.06,
"eval_google_bleu": 0.41963648653723384,
"eval_loss": 0.3498598635196686,
"eval_runtime": 65.0647,
"eval_samples_per_second": 46.185,
"eval_steps_per_second": 1.445,
"step": 900
},
{
"epoch": 1.24,
"learning_rate": 0.000875886524822695,
"loss": 0.3304,
"step": 1050
},
{
"epoch": 1.24,
"eval_google_bleu": 0.41878056569628735,
"eval_loss": 0.35025209188461304,
"eval_runtime": 65.045,
"eval_samples_per_second": 46.199,
"eval_steps_per_second": 1.445,
"step": 1050
},
{
"epoch": 1.42,
"learning_rate": 0.0008581560283687944,
"loss": 0.3459,
"step": 1200
},
{
"epoch": 1.42,
"eval_google_bleu": 0.4209402244713899,
"eval_loss": 0.3510892987251282,
"eval_runtime": 65.018,
"eval_samples_per_second": 46.218,
"eval_steps_per_second": 1.446,
"step": 1200
},
{
"epoch": 1.6,
"learning_rate": 0.0008404255319148936,
"loss": 0.3481,
"step": 1350
},
{
"epoch": 1.6,
"eval_google_bleu": 0.421413227953932,
"eval_loss": 0.3524581491947174,
"eval_runtime": 64.891,
"eval_samples_per_second": 46.308,
"eval_steps_per_second": 1.449,
"step": 1350
},
{
"epoch": 1.77,
"learning_rate": 0.0008226950354609929,
"loss": 0.3474,
"step": 1500
},
{
"epoch": 1.77,
"eval_google_bleu": 0.4201196404522534,
"eval_loss": 0.34515607357025146,
"eval_runtime": 65.0925,
"eval_samples_per_second": 46.165,
"eval_steps_per_second": 1.444,
"step": 1500
},
{
"epoch": 1.95,
"learning_rate": 0.0008049645390070922,
"loss": 0.3337,
"step": 1650
},
{
"epoch": 1.95,
"eval_google_bleu": 0.4206501794224663,
"eval_loss": 0.349977970123291,
"eval_runtime": 64.8413,
"eval_samples_per_second": 46.344,
"eval_steps_per_second": 1.45,
"step": 1650
},
{
"epoch": 2.13,
"learning_rate": 0.0007872340425531915,
"loss": 0.2976,
"step": 1800
},
{
"epoch": 2.13,
"eval_google_bleu": 0.4203146331993136,
"eval_loss": 0.34735825657844543,
"eval_runtime": 65.2646,
"eval_samples_per_second": 46.043,
"eval_steps_per_second": 1.44,
"step": 1800
},
{
"epoch": 2.3,
"learning_rate": 0.0007695035460992907,
"loss": 0.2791,
"step": 1950
},
{
"epoch": 2.3,
"eval_google_bleu": 0.41895903795565576,
"eval_loss": 0.35174980759620667,
"eval_runtime": 65.0609,
"eval_samples_per_second": 46.187,
"eval_steps_per_second": 1.445,
"step": 1950
},
{
"epoch": 2.48,
"learning_rate": 0.0007517730496453901,
"loss": 0.2882,
"step": 2100
},
{
"epoch": 2.48,
"eval_google_bleu": 0.41924575092997335,
"eval_loss": 0.3675171136856079,
"eval_runtime": 65.2859,
"eval_samples_per_second": 46.028,
"eval_steps_per_second": 1.44,
"step": 2100
},
{
"epoch": 2.66,
"learning_rate": 0.0007340425531914894,
"loss": 0.2835,
"step": 2250
},
{
"epoch": 2.66,
"eval_google_bleu": 0.4194459403609403,
"eval_loss": 0.33840152621269226,
"eval_runtime": 65.1731,
"eval_samples_per_second": 46.108,
"eval_steps_per_second": 1.442,
"step": 2250
},
{
"epoch": 2.84,
"learning_rate": 0.0007163120567375887,
"loss": 0.2791,
"step": 2400
},
{
"epoch": 2.84,
"eval_google_bleu": 0.4186248912097476,
"eval_loss": 0.3389674425125122,
"eval_runtime": 65.3042,
"eval_samples_per_second": 46.015,
"eval_steps_per_second": 1.439,
"step": 2400
},
{
"epoch": 3.01,
"learning_rate": 0.0006985815602836879,
"loss": 0.286,
"step": 2550
},
{
"epoch": 3.01,
"eval_google_bleu": 0.4192825533140128,
"eval_loss": 0.3236161470413208,
"eval_runtime": 65.0214,
"eval_samples_per_second": 46.216,
"eval_steps_per_second": 1.446,
"step": 2550
}
],
"max_steps": 8460,
"num_train_epochs": 10,
"total_flos": 2801591305076736.0,
"trial_name": null,
"trial_params": null
}