uk-mt5-base-gec-tokenized / trainer_state.json
SashkoMolodec
initial commit
381c5aa
raw
history blame contribute delete
No virus
4.47 kB
{
"best_metric": 0.30846577882766724,
"best_model_checkpoint": "uk-mt5-small-gec/checkpoint-750",
"epoch": 1.950354609929078,
"global_step": 1650,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.18,
"learning_rate": 0.0009822695035460994,
"loss": 1.1542,
"step": 150
},
{
"epoch": 0.18,
"eval_google_bleu": 0.41727698590897433,
"eval_loss": 0.40988656878471375,
"eval_runtime": 182.204,
"eval_samples_per_second": 16.493,
"eval_steps_per_second": 0.516,
"step": 150
},
{
"epoch": 0.35,
"learning_rate": 0.0009645390070921985,
"loss": 0.4079,
"step": 300
},
{
"epoch": 0.35,
"eval_google_bleu": 0.417700589351588,
"eval_loss": 0.3693748116493225,
"eval_runtime": 184.6312,
"eval_samples_per_second": 16.276,
"eval_steps_per_second": 0.509,
"step": 300
},
{
"epoch": 0.53,
"learning_rate": 0.0009468085106382979,
"loss": 0.3947,
"step": 450
},
{
"epoch": 0.53,
"eval_google_bleu": 0.42065639483903294,
"eval_loss": 0.3382872939109802,
"eval_runtime": 182.6943,
"eval_samples_per_second": 16.448,
"eval_steps_per_second": 0.515,
"step": 450
},
{
"epoch": 0.71,
"learning_rate": 0.0009290780141843972,
"loss": 0.3699,
"step": 600
},
{
"epoch": 0.71,
"eval_google_bleu": 0.4213219269019765,
"eval_loss": 0.329349160194397,
"eval_runtime": 182.8557,
"eval_samples_per_second": 16.434,
"eval_steps_per_second": 0.514,
"step": 600
},
{
"epoch": 0.89,
"learning_rate": 0.0009113475177304966,
"loss": 0.384,
"step": 750
},
{
"epoch": 0.89,
"eval_google_bleu": 0.41960295446754137,
"eval_loss": 0.30846577882766724,
"eval_runtime": 182.9587,
"eval_samples_per_second": 16.424,
"eval_steps_per_second": 0.514,
"step": 750
},
{
"epoch": 1.06,
"learning_rate": 0.0008936170212765957,
"loss": 0.301,
"step": 900
},
{
"epoch": 1.06,
"eval_google_bleu": 0.41823031122631055,
"eval_loss": 0.30877065658569336,
"eval_runtime": 182.6834,
"eval_samples_per_second": 16.449,
"eval_steps_per_second": 0.515,
"step": 900
},
{
"epoch": 1.24,
"learning_rate": 0.000875886524822695,
"loss": 0.2613,
"step": 1050
},
{
"epoch": 1.24,
"eval_google_bleu": 0.41964051019514087,
"eval_loss": 0.31575024127960205,
"eval_runtime": 182.4274,
"eval_samples_per_second": 16.472,
"eval_steps_per_second": 0.515,
"step": 1050
},
{
"epoch": 1.42,
"learning_rate": 0.0008581560283687944,
"loss": 0.2715,
"step": 1200
},
{
"epoch": 1.42,
"eval_google_bleu": 0.41992548295187704,
"eval_loss": 0.3233252167701721,
"eval_runtime": 182.3422,
"eval_samples_per_second": 16.48,
"eval_steps_per_second": 0.516,
"step": 1200
},
{
"epoch": 1.6,
"learning_rate": 0.0008404255319148936,
"loss": 0.2861,
"step": 1350
},
{
"epoch": 1.6,
"eval_google_bleu": 0.41982505463129355,
"eval_loss": 0.31152355670928955,
"eval_runtime": 182.3799,
"eval_samples_per_second": 16.477,
"eval_steps_per_second": 0.515,
"step": 1350
},
{
"epoch": 1.77,
"learning_rate": 0.0008226950354609929,
"loss": 0.2903,
"step": 1500
},
{
"epoch": 1.77,
"eval_google_bleu": 0.41824821175792654,
"eval_loss": 0.33445030450820923,
"eval_runtime": 182.1203,
"eval_samples_per_second": 16.5,
"eval_steps_per_second": 0.516,
"step": 1500
},
{
"epoch": 1.95,
"learning_rate": 0.0008049645390070922,
"loss": 0.2811,
"step": 1650
},
{
"epoch": 1.95,
"eval_google_bleu": 0.42025291085487376,
"eval_loss": 0.3121837377548218,
"eval_runtime": 182.2332,
"eval_samples_per_second": 16.49,
"eval_steps_per_second": 0.516,
"step": 1650
}
],
"max_steps": 8460,
"num_train_epochs": 10,
"total_flos": 6751858532244480.0,
"trial_name": null,
"trial_params": null
}