|
{ |
|
"best_metric": 0.3236161470413208, |
|
"best_model_checkpoint": "uk-mt5-small-gec/checkpoint-2550", |
|
"epoch": 3.0141843971631204, |
|
"global_step": 2550, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0009822695035460994, |
|
"loss": 1.7105, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_google_bleu": 0.4163395132458002, |
|
"eval_loss": 0.5110855102539062, |
|
"eval_runtime": 63.8854, |
|
"eval_samples_per_second": 47.037, |
|
"eval_steps_per_second": 1.471, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0009645390070921985, |
|
"loss": 0.5262, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_google_bleu": 0.4177246109040804, |
|
"eval_loss": 0.4555143713951111, |
|
"eval_runtime": 64.7255, |
|
"eval_samples_per_second": 46.427, |
|
"eval_steps_per_second": 1.452, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0009468085106382979, |
|
"loss": 0.4747, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_google_bleu": 0.4190545982950951, |
|
"eval_loss": 0.39340439438819885, |
|
"eval_runtime": 65.0885, |
|
"eval_samples_per_second": 46.168, |
|
"eval_steps_per_second": 1.444, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0009290780141843972, |
|
"loss": 0.4397, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_google_bleu": 0.4194098007404113, |
|
"eval_loss": 0.38720330595970154, |
|
"eval_runtime": 64.9943, |
|
"eval_samples_per_second": 46.235, |
|
"eval_steps_per_second": 1.446, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0009113475177304966, |
|
"loss": 0.4011, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_google_bleu": 0.4206793462964932, |
|
"eval_loss": 0.3757050335407257, |
|
"eval_runtime": 64.9697, |
|
"eval_samples_per_second": 46.252, |
|
"eval_steps_per_second": 1.447, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.0008936170212765957, |
|
"loss": 0.3596, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"eval_google_bleu": 0.41963648653723384, |
|
"eval_loss": 0.3498598635196686, |
|
"eval_runtime": 65.0647, |
|
"eval_samples_per_second": 46.185, |
|
"eval_steps_per_second": 1.445, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.000875886524822695, |
|
"loss": 0.3304, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_google_bleu": 0.41878056569628735, |
|
"eval_loss": 0.35025209188461304, |
|
"eval_runtime": 65.045, |
|
"eval_samples_per_second": 46.199, |
|
"eval_steps_per_second": 1.445, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.0008581560283687944, |
|
"loss": 0.3459, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_google_bleu": 0.4209402244713899, |
|
"eval_loss": 0.3510892987251282, |
|
"eval_runtime": 65.018, |
|
"eval_samples_per_second": 46.218, |
|
"eval_steps_per_second": 1.446, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0008404255319148936, |
|
"loss": 0.3481, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_google_bleu": 0.421413227953932, |
|
"eval_loss": 0.3524581491947174, |
|
"eval_runtime": 64.891, |
|
"eval_samples_per_second": 46.308, |
|
"eval_steps_per_second": 1.449, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.0008226950354609929, |
|
"loss": 0.3474, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_google_bleu": 0.4201196404522534, |
|
"eval_loss": 0.34515607357025146, |
|
"eval_runtime": 65.0925, |
|
"eval_samples_per_second": 46.165, |
|
"eval_steps_per_second": 1.444, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.0008049645390070922, |
|
"loss": 0.3337, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_google_bleu": 0.4206501794224663, |
|
"eval_loss": 0.349977970123291, |
|
"eval_runtime": 64.8413, |
|
"eval_samples_per_second": 46.344, |
|
"eval_steps_per_second": 1.45, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 0.0007872340425531915, |
|
"loss": 0.2976, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_google_bleu": 0.4203146331993136, |
|
"eval_loss": 0.34735825657844543, |
|
"eval_runtime": 65.2646, |
|
"eval_samples_per_second": 46.043, |
|
"eval_steps_per_second": 1.44, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 0.0007695035460992907, |
|
"loss": 0.2791, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"eval_google_bleu": 0.41895903795565576, |
|
"eval_loss": 0.35174980759620667, |
|
"eval_runtime": 65.0609, |
|
"eval_samples_per_second": 46.187, |
|
"eval_steps_per_second": 1.445, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 0.0007517730496453901, |
|
"loss": 0.2882, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_google_bleu": 0.41924575092997335, |
|
"eval_loss": 0.3675171136856079, |
|
"eval_runtime": 65.2859, |
|
"eval_samples_per_second": 46.028, |
|
"eval_steps_per_second": 1.44, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 0.0007340425531914894, |
|
"loss": 0.2835, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_google_bleu": 0.4194459403609403, |
|
"eval_loss": 0.33840152621269226, |
|
"eval_runtime": 65.1731, |
|
"eval_samples_per_second": 46.108, |
|
"eval_steps_per_second": 1.442, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 0.0007163120567375887, |
|
"loss": 0.2791, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_google_bleu": 0.4186248912097476, |
|
"eval_loss": 0.3389674425125122, |
|
"eval_runtime": 65.3042, |
|
"eval_samples_per_second": 46.015, |
|
"eval_steps_per_second": 1.439, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 0.0006985815602836879, |
|
"loss": 0.286, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"eval_google_bleu": 0.4192825533140128, |
|
"eval_loss": 0.3236161470413208, |
|
"eval_runtime": 65.0214, |
|
"eval_samples_per_second": 46.216, |
|
"eval_steps_per_second": 1.446, |
|
"step": 2550 |
|
} |
|
], |
|
"max_steps": 8460, |
|
"num_train_epochs": 10, |
|
"total_flos": 2801591305076736.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|