{ "best_metric": 0.3236161470413208, "best_model_checkpoint": "uk-mt5-small-gec/checkpoint-2550", "epoch": 3.0141843971631204, "global_step": 2550, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.18, "learning_rate": 0.0009822695035460994, "loss": 1.7105, "step": 150 }, { "epoch": 0.18, "eval_google_bleu": 0.4163395132458002, "eval_loss": 0.5110855102539062, "eval_runtime": 63.8854, "eval_samples_per_second": 47.037, "eval_steps_per_second": 1.471, "step": 150 }, { "epoch": 0.35, "learning_rate": 0.0009645390070921985, "loss": 0.5262, "step": 300 }, { "epoch": 0.35, "eval_google_bleu": 0.4177246109040804, "eval_loss": 0.4555143713951111, "eval_runtime": 64.7255, "eval_samples_per_second": 46.427, "eval_steps_per_second": 1.452, "step": 300 }, { "epoch": 0.53, "learning_rate": 0.0009468085106382979, "loss": 0.4747, "step": 450 }, { "epoch": 0.53, "eval_google_bleu": 0.4190545982950951, "eval_loss": 0.39340439438819885, "eval_runtime": 65.0885, "eval_samples_per_second": 46.168, "eval_steps_per_second": 1.444, "step": 450 }, { "epoch": 0.71, "learning_rate": 0.0009290780141843972, "loss": 0.4397, "step": 600 }, { "epoch": 0.71, "eval_google_bleu": 0.4194098007404113, "eval_loss": 0.38720330595970154, "eval_runtime": 64.9943, "eval_samples_per_second": 46.235, "eval_steps_per_second": 1.446, "step": 600 }, { "epoch": 0.89, "learning_rate": 0.0009113475177304966, "loss": 0.4011, "step": 750 }, { "epoch": 0.89, "eval_google_bleu": 0.4206793462964932, "eval_loss": 0.3757050335407257, "eval_runtime": 64.9697, "eval_samples_per_second": 46.252, "eval_steps_per_second": 1.447, "step": 750 }, { "epoch": 1.06, "learning_rate": 0.0008936170212765957, "loss": 0.3596, "step": 900 }, { "epoch": 1.06, "eval_google_bleu": 0.41963648653723384, "eval_loss": 0.3498598635196686, "eval_runtime": 65.0647, "eval_samples_per_second": 46.185, "eval_steps_per_second": 1.445, "step": 900 }, { "epoch": 1.24, "learning_rate": 0.000875886524822695, "loss": 0.3304, "step": 1050 }, { "epoch": 1.24, "eval_google_bleu": 0.41878056569628735, "eval_loss": 0.35025209188461304, "eval_runtime": 65.045, "eval_samples_per_second": 46.199, "eval_steps_per_second": 1.445, "step": 1050 }, { "epoch": 1.42, "learning_rate": 0.0008581560283687944, "loss": 0.3459, "step": 1200 }, { "epoch": 1.42, "eval_google_bleu": 0.4209402244713899, "eval_loss": 0.3510892987251282, "eval_runtime": 65.018, "eval_samples_per_second": 46.218, "eval_steps_per_second": 1.446, "step": 1200 }, { "epoch": 1.6, "learning_rate": 0.0008404255319148936, "loss": 0.3481, "step": 1350 }, { "epoch": 1.6, "eval_google_bleu": 0.421413227953932, "eval_loss": 0.3524581491947174, "eval_runtime": 64.891, "eval_samples_per_second": 46.308, "eval_steps_per_second": 1.449, "step": 1350 }, { "epoch": 1.77, "learning_rate": 0.0008226950354609929, "loss": 0.3474, "step": 1500 }, { "epoch": 1.77, "eval_google_bleu": 0.4201196404522534, "eval_loss": 0.34515607357025146, "eval_runtime": 65.0925, "eval_samples_per_second": 46.165, "eval_steps_per_second": 1.444, "step": 1500 }, { "epoch": 1.95, "learning_rate": 0.0008049645390070922, "loss": 0.3337, "step": 1650 }, { "epoch": 1.95, "eval_google_bleu": 0.4206501794224663, "eval_loss": 0.349977970123291, "eval_runtime": 64.8413, "eval_samples_per_second": 46.344, "eval_steps_per_second": 1.45, "step": 1650 }, { "epoch": 2.13, "learning_rate": 0.0007872340425531915, "loss": 0.2976, "step": 1800 }, { "epoch": 2.13, "eval_google_bleu": 0.4203146331993136, "eval_loss": 0.34735825657844543, "eval_runtime": 65.2646, "eval_samples_per_second": 46.043, "eval_steps_per_second": 1.44, "step": 1800 }, { "epoch": 2.3, "learning_rate": 0.0007695035460992907, "loss": 0.2791, "step": 1950 }, { "epoch": 2.3, "eval_google_bleu": 0.41895903795565576, "eval_loss": 0.35174980759620667, "eval_runtime": 65.0609, "eval_samples_per_second": 46.187, "eval_steps_per_second": 1.445, "step": 1950 }, { "epoch": 2.48, "learning_rate": 0.0007517730496453901, "loss": 0.2882, "step": 2100 }, { "epoch": 2.48, "eval_google_bleu": 0.41924575092997335, "eval_loss": 0.3675171136856079, "eval_runtime": 65.2859, "eval_samples_per_second": 46.028, "eval_steps_per_second": 1.44, "step": 2100 }, { "epoch": 2.66, "learning_rate": 0.0007340425531914894, "loss": 0.2835, "step": 2250 }, { "epoch": 2.66, "eval_google_bleu": 0.4194459403609403, "eval_loss": 0.33840152621269226, "eval_runtime": 65.1731, "eval_samples_per_second": 46.108, "eval_steps_per_second": 1.442, "step": 2250 }, { "epoch": 2.84, "learning_rate": 0.0007163120567375887, "loss": 0.2791, "step": 2400 }, { "epoch": 2.84, "eval_google_bleu": 0.4186248912097476, "eval_loss": 0.3389674425125122, "eval_runtime": 65.3042, "eval_samples_per_second": 46.015, "eval_steps_per_second": 1.439, "step": 2400 }, { "epoch": 3.01, "learning_rate": 0.0006985815602836879, "loss": 0.286, "step": 2550 }, { "epoch": 3.01, "eval_google_bleu": 0.4192825533140128, "eval_loss": 0.3236161470413208, "eval_runtime": 65.0214, "eval_samples_per_second": 46.216, "eval_steps_per_second": 1.446, "step": 2550 } ], "max_steps": 8460, "num_train_epochs": 10, "total_flos": 2801591305076736.0, "trial_name": null, "trial_params": null }