{ "best_metric": 0.08161530643701553, "best_model_checkpoint": "/data/wheld3/mt5-small-pointer-cstop_artificial/checkpoint-800", "epoch": 428.5019607843137, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 28.5, "learning_rate": 0.0009333333333333333, "loss": 2.08, "step": 200 }, { "epoch": 28.5, "eval_exact_match": 0.03756708407871199, "eval_loss": 0.331952303647995, "eval_runtime": 10.6308, "eval_samples_per_second": 52.583, "eval_steps_per_second": 3.292, "step": 200 }, { "epoch": 57.13, "learning_rate": 0.0008666666666666667, "loss": 0.272, "step": 400 }, { "epoch": 57.13, "eval_exact_match": 0.2629695885509839, "eval_loss": 0.10836012661457062, "eval_runtime": 10.9563, "eval_samples_per_second": 51.021, "eval_steps_per_second": 3.194, "step": 400 }, { "epoch": 85.63, "learning_rate": 0.0008, "loss": 0.0789, "step": 600 }, { "epoch": 85.63, "eval_exact_match": 0.3184257602862254, "eval_loss": 0.08299708366394043, "eval_runtime": 11.1629, "eval_samples_per_second": 50.077, "eval_steps_per_second": 3.135, "step": 600 }, { "epoch": 114.25, "learning_rate": 0.0007333333333333333, "loss": 0.0355, "step": 800 }, { "epoch": 114.25, "eval_exact_match": 0.3363148479427549, "eval_loss": 0.08161530643701553, "eval_runtime": 11.0139, "eval_samples_per_second": 50.754, "eval_steps_per_second": 3.178, "step": 800 }, { "epoch": 142.75, "learning_rate": 0.0006666666666666666, "loss": 0.0207, "step": 1000 }, { "epoch": 142.75, "eval_exact_match": 0.3291592128801431, "eval_loss": 0.08680771291255951, "eval_runtime": 11.1063, "eval_samples_per_second": 50.332, "eval_steps_per_second": 3.151, "step": 1000 }, { "epoch": 171.38, "learning_rate": 0.0006, "loss": 0.014, "step": 1200 }, { "epoch": 171.38, "eval_exact_match": 0.33989266547406083, "eval_loss": 0.09518682211637497, "eval_runtime": 10.9222, "eval_samples_per_second": 51.18, "eval_steps_per_second": 3.204, "step": 1200 }, { "epoch": 199.88, "learning_rate": 0.0005333333333333334, "loss": 0.0099, "step": 1400 }, { "epoch": 199.88, "eval_exact_match": 0.33810375670840787, "eval_loss": 0.1089245155453682, "eval_runtime": 10.85, "eval_samples_per_second": 51.521, "eval_steps_per_second": 3.226, "step": 1400 }, { "epoch": 228.5, "learning_rate": 0.00046666666666666666, "loss": 0.0076, "step": 1600 }, { "epoch": 228.5, "eval_exact_match": 0.33810375670840787, "eval_loss": 0.11035683006048203, "eval_runtime": 11.2414, "eval_samples_per_second": 49.727, "eval_steps_per_second": 3.113, "step": 1600 }, { "epoch": 257.13, "learning_rate": 0.0004, "loss": 0.0057, "step": 1800 }, { "epoch": 257.13, "eval_exact_match": 0.3291592128801431, "eval_loss": 0.11526591330766678, "eval_runtime": 10.9866, "eval_samples_per_second": 50.88, "eval_steps_per_second": 3.186, "step": 1800 }, { "epoch": 285.63, "learning_rate": 0.0003333333333333333, "loss": 0.0048, "step": 2000 }, { "epoch": 285.63, "eval_exact_match": 0.33273703041144903, "eval_loss": 0.11532965302467346, "eval_runtime": 11.0349, "eval_samples_per_second": 50.658, "eval_steps_per_second": 3.172, "step": 2000 }, { "epoch": 314.25, "learning_rate": 0.0002666666666666667, "loss": 0.004, "step": 2200 }, { "epoch": 314.25, "eval_exact_match": 0.3363148479427549, "eval_loss": 0.1206129789352417, "eval_runtime": 10.904, "eval_samples_per_second": 51.266, "eval_steps_per_second": 3.21, "step": 2200 }, { "epoch": 342.75, "learning_rate": 0.0002, "loss": 0.0032, "step": 2400 }, { "epoch": 342.75, "eval_exact_match": 0.3363148479427549, "eval_loss": 0.1229204535484314, "eval_runtime": 10.9966, "eval_samples_per_second": 50.834, "eval_steps_per_second": 3.183, "step": 2400 }, { "epoch": 371.38, "learning_rate": 0.00013333333333333334, "loss": 0.0028, "step": 2600 }, { "epoch": 371.38, "eval_exact_match": 0.33810375670840787, "eval_loss": 0.12684644758701324, "eval_runtime": 10.9769, "eval_samples_per_second": 50.925, "eval_steps_per_second": 3.189, "step": 2600 }, { "epoch": 399.88, "learning_rate": 6.666666666666667e-05, "loss": 0.0023, "step": 2800 }, { "epoch": 399.88, "eval_exact_match": 0.33989266547406083, "eval_loss": 0.12882176041603088, "eval_runtime": 10.9618, "eval_samples_per_second": 50.995, "eval_steps_per_second": 3.193, "step": 2800 }, { "epoch": 428.5, "learning_rate": 0.0, "loss": 0.002, "step": 3000 }, { "epoch": 428.5, "eval_exact_match": 0.33989266547406083, "eval_loss": 0.12921586632728577, "eval_runtime": 11.4552, "eval_samples_per_second": 48.799, "eval_steps_per_second": 3.055, "step": 3000 }, { "epoch": 428.5, "step": 3000, "total_flos": 3.4731986474409984e+16, "train_loss": 0.16957045908768972, "train_runtime": 14253.2821, "train_samples_per_second": 107.765, "train_steps_per_second": 0.21 } ], "max_steps": 3000, "num_train_epochs": 429, "total_flos": 3.4731986474409984e+16, "trial_name": null, "trial_params": null }