|
{ |
|
"best_metric": 0.30846577882766724, |
|
"best_model_checkpoint": "uk-mt5-small-gec/checkpoint-750", |
|
"epoch": 1.950354609929078, |
|
"global_step": 1650, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0009822695035460994, |
|
"loss": 1.1542, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_google_bleu": 0.41727698590897433, |
|
"eval_loss": 0.40988656878471375, |
|
"eval_runtime": 182.204, |
|
"eval_samples_per_second": 16.493, |
|
"eval_steps_per_second": 0.516, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0009645390070921985, |
|
"loss": 0.4079, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_google_bleu": 0.417700589351588, |
|
"eval_loss": 0.3693748116493225, |
|
"eval_runtime": 184.6312, |
|
"eval_samples_per_second": 16.276, |
|
"eval_steps_per_second": 0.509, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0009468085106382979, |
|
"loss": 0.3947, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_google_bleu": 0.42065639483903294, |
|
"eval_loss": 0.3382872939109802, |
|
"eval_runtime": 182.6943, |
|
"eval_samples_per_second": 16.448, |
|
"eval_steps_per_second": 0.515, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0009290780141843972, |
|
"loss": 0.3699, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_google_bleu": 0.4213219269019765, |
|
"eval_loss": 0.329349160194397, |
|
"eval_runtime": 182.8557, |
|
"eval_samples_per_second": 16.434, |
|
"eval_steps_per_second": 0.514, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0009113475177304966, |
|
"loss": 0.384, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_google_bleu": 0.41960295446754137, |
|
"eval_loss": 0.30846577882766724, |
|
"eval_runtime": 182.9587, |
|
"eval_samples_per_second": 16.424, |
|
"eval_steps_per_second": 0.514, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.0008936170212765957, |
|
"loss": 0.301, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"eval_google_bleu": 0.41823031122631055, |
|
"eval_loss": 0.30877065658569336, |
|
"eval_runtime": 182.6834, |
|
"eval_samples_per_second": 16.449, |
|
"eval_steps_per_second": 0.515, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.000875886524822695, |
|
"loss": 0.2613, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_google_bleu": 0.41964051019514087, |
|
"eval_loss": 0.31575024127960205, |
|
"eval_runtime": 182.4274, |
|
"eval_samples_per_second": 16.472, |
|
"eval_steps_per_second": 0.515, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.0008581560283687944, |
|
"loss": 0.2715, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_google_bleu": 0.41992548295187704, |
|
"eval_loss": 0.3233252167701721, |
|
"eval_runtime": 182.3422, |
|
"eval_samples_per_second": 16.48, |
|
"eval_steps_per_second": 0.516, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0008404255319148936, |
|
"loss": 0.2861, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_google_bleu": 0.41982505463129355, |
|
"eval_loss": 0.31152355670928955, |
|
"eval_runtime": 182.3799, |
|
"eval_samples_per_second": 16.477, |
|
"eval_steps_per_second": 0.515, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.0008226950354609929, |
|
"loss": 0.2903, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_google_bleu": 0.41824821175792654, |
|
"eval_loss": 0.33445030450820923, |
|
"eval_runtime": 182.1203, |
|
"eval_samples_per_second": 16.5, |
|
"eval_steps_per_second": 0.516, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.0008049645390070922, |
|
"loss": 0.2811, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_google_bleu": 0.42025291085487376, |
|
"eval_loss": 0.3121837377548218, |
|
"eval_runtime": 182.2332, |
|
"eval_samples_per_second": 16.49, |
|
"eval_steps_per_second": 0.516, |
|
"step": 1650 |
|
} |
|
], |
|
"max_steps": 8460, |
|
"num_train_epochs": 10, |
|
"total_flos": 6751858532244480.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|