{ "best_metric": 0.30846577882766724, "best_model_checkpoint": "uk-mt5-small-gec/checkpoint-750", "epoch": 1.950354609929078, "global_step": 1650, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.18, "learning_rate": 0.0009822695035460994, "loss": 1.1542, "step": 150 }, { "epoch": 0.18, "eval_google_bleu": 0.41727698590897433, "eval_loss": 0.40988656878471375, "eval_runtime": 182.204, "eval_samples_per_second": 16.493, "eval_steps_per_second": 0.516, "step": 150 }, { "epoch": 0.35, "learning_rate": 0.0009645390070921985, "loss": 0.4079, "step": 300 }, { "epoch": 0.35, "eval_google_bleu": 0.417700589351588, "eval_loss": 0.3693748116493225, "eval_runtime": 184.6312, "eval_samples_per_second": 16.276, "eval_steps_per_second": 0.509, "step": 300 }, { "epoch": 0.53, "learning_rate": 0.0009468085106382979, "loss": 0.3947, "step": 450 }, { "epoch": 0.53, "eval_google_bleu": 0.42065639483903294, "eval_loss": 0.3382872939109802, "eval_runtime": 182.6943, "eval_samples_per_second": 16.448, "eval_steps_per_second": 0.515, "step": 450 }, { "epoch": 0.71, "learning_rate": 0.0009290780141843972, "loss": 0.3699, "step": 600 }, { "epoch": 0.71, "eval_google_bleu": 0.4213219269019765, "eval_loss": 0.329349160194397, "eval_runtime": 182.8557, "eval_samples_per_second": 16.434, "eval_steps_per_second": 0.514, "step": 600 }, { "epoch": 0.89, "learning_rate": 0.0009113475177304966, "loss": 0.384, "step": 750 }, { "epoch": 0.89, "eval_google_bleu": 0.41960295446754137, "eval_loss": 0.30846577882766724, "eval_runtime": 182.9587, "eval_samples_per_second": 16.424, "eval_steps_per_second": 0.514, "step": 750 }, { "epoch": 1.06, "learning_rate": 0.0008936170212765957, "loss": 0.301, "step": 900 }, { "epoch": 1.06, "eval_google_bleu": 0.41823031122631055, "eval_loss": 0.30877065658569336, "eval_runtime": 182.6834, "eval_samples_per_second": 16.449, "eval_steps_per_second": 0.515, "step": 900 }, { "epoch": 1.24, "learning_rate": 0.000875886524822695, "loss": 0.2613, "step": 1050 }, { "epoch": 1.24, "eval_google_bleu": 0.41964051019514087, "eval_loss": 0.31575024127960205, "eval_runtime": 182.4274, "eval_samples_per_second": 16.472, "eval_steps_per_second": 0.515, "step": 1050 }, { "epoch": 1.42, "learning_rate": 0.0008581560283687944, "loss": 0.2715, "step": 1200 }, { "epoch": 1.42, "eval_google_bleu": 0.41992548295187704, "eval_loss": 0.3233252167701721, "eval_runtime": 182.3422, "eval_samples_per_second": 16.48, "eval_steps_per_second": 0.516, "step": 1200 }, { "epoch": 1.6, "learning_rate": 0.0008404255319148936, "loss": 0.2861, "step": 1350 }, { "epoch": 1.6, "eval_google_bleu": 0.41982505463129355, "eval_loss": 0.31152355670928955, "eval_runtime": 182.3799, "eval_samples_per_second": 16.477, "eval_steps_per_second": 0.515, "step": 1350 }, { "epoch": 1.77, "learning_rate": 0.0008226950354609929, "loss": 0.2903, "step": 1500 }, { "epoch": 1.77, "eval_google_bleu": 0.41824821175792654, "eval_loss": 0.33445030450820923, "eval_runtime": 182.1203, "eval_samples_per_second": 16.5, "eval_steps_per_second": 0.516, "step": 1500 }, { "epoch": 1.95, "learning_rate": 0.0008049645390070922, "loss": 0.2811, "step": 1650 }, { "epoch": 1.95, "eval_google_bleu": 0.42025291085487376, "eval_loss": 0.3121837377548218, "eval_runtime": 182.2332, "eval_samples_per_second": 16.49, "eval_steps_per_second": 0.516, "step": 1650 } ], "max_steps": 8460, "num_train_epochs": 10, "total_flos": 6751858532244480.0, "trial_name": null, "trial_params": null }