{ "best_metric": 0.09972350299358368, "best_model_checkpoint": "/data/wheld3/mt5-base-adv-cstop_artificial/checkpoint-400", "epoch": 187.49805447470817, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 12.5, "learning_rate": 0.0009333333333333333, "loss": 1.8954, "step": 200 }, { "epoch": 12.5, "eval_exact_match": 0.49016100178890876, "eval_loss": 0.10028170049190521, "eval_runtime": 33.6526, "eval_samples_per_second": 16.611, "eval_steps_per_second": 2.08, "step": 200 }, { "epoch": 25.0, "learning_rate": 0.0008666666666666667, "loss": 0.3392, "step": 400 }, { "epoch": 25.0, "eval_exact_match": 0.5670840787119857, "eval_loss": 0.09972350299358368, "eval_runtime": 27.4321, "eval_samples_per_second": 20.378, "eval_steps_per_second": 2.552, "step": 400 }, { "epoch": 37.5, "learning_rate": 0.0008, "loss": 0.3092, "step": 600 }, { "epoch": 37.5, "eval_exact_match": 0.5652951699463328, "eval_loss": 0.10668598115444183, "eval_runtime": 29.8338, "eval_samples_per_second": 18.737, "eval_steps_per_second": 2.346, "step": 600 }, { "epoch": 50.0, "learning_rate": 0.0007333333333333333, "loss": 0.3062, "step": 800 }, { "epoch": 50.0, "eval_exact_match": 0.5688729874776386, "eval_loss": 0.1245071217417717, "eval_runtime": 34.3916, "eval_samples_per_second": 16.254, "eval_steps_per_second": 2.035, "step": 800 }, { "epoch": 62.5, "learning_rate": 0.0006666666666666666, "loss": 0.5401, "step": 1000 }, { "epoch": 62.5, "eval_exact_match": 0.5581395348837209, "eval_loss": 0.10960090905427933, "eval_runtime": 27.6178, "eval_samples_per_second": 20.241, "eval_steps_per_second": 2.535, "step": 1000 }, { "epoch": 75.0, "learning_rate": 0.0006, "loss": 0.3075, "step": 1200 }, { "epoch": 75.0, "eval_exact_match": 0.5581395348837209, "eval_loss": 0.1196974366903305, "eval_runtime": 27.702, "eval_samples_per_second": 20.179, "eval_steps_per_second": 2.527, "step": 1200 }, { "epoch": 87.5, "learning_rate": 0.0005333333333333334, "loss": 0.3039, "step": 1400 }, { "epoch": 87.5, "eval_exact_match": 0.5688729874776386, "eval_loss": 0.13387714326381683, "eval_runtime": 27.7679, "eval_samples_per_second": 20.131, "eval_steps_per_second": 2.521, "step": 1400 }, { "epoch": 100.0, "learning_rate": 0.00046666666666666666, "loss": 0.3041, "step": 1600 }, { "epoch": 100.0, "eval_exact_match": 0.5635062611806798, "eval_loss": 0.14850349724292755, "eval_runtime": 30.2352, "eval_samples_per_second": 18.488, "eval_steps_per_second": 2.315, "step": 1600 }, { "epoch": 112.5, "learning_rate": 0.0004, "loss": 0.3036, "step": 1800 }, { "epoch": 112.5, "eval_exact_match": 0.5581395348837209, "eval_loss": 0.1497897505760193, "eval_runtime": 27.7022, "eval_samples_per_second": 20.179, "eval_steps_per_second": 2.527, "step": 1800 }, { "epoch": 125.0, "learning_rate": 0.0003333333333333333, "loss": 0.304, "step": 2000 }, { "epoch": 125.0, "eval_exact_match": 0.5617173524150268, "eval_loss": 0.14541126787662506, "eval_runtime": 27.6179, "eval_samples_per_second": 20.24, "eval_steps_per_second": 2.535, "step": 2000 }, { "epoch": 137.5, "learning_rate": 0.0002666666666666667, "loss": 0.3022, "step": 2200 }, { "epoch": 137.5, "eval_exact_match": 0.5688729874776386, "eval_loss": 0.15164676308631897, "eval_runtime": 27.6474, "eval_samples_per_second": 20.219, "eval_steps_per_second": 2.532, "step": 2200 }, { "epoch": 150.0, "learning_rate": 0.0002, "loss": 0.3032, "step": 2400 }, { "epoch": 150.0, "eval_exact_match": 0.5635062611806798, "eval_loss": 0.13605555891990662, "eval_runtime": 27.6026, "eval_samples_per_second": 20.252, "eval_steps_per_second": 2.536, "step": 2400 }, { "epoch": 162.5, "learning_rate": 0.00013333333333333334, "loss": 0.3035, "step": 2600 }, { "epoch": 162.5, "eval_exact_match": 0.5635062611806798, "eval_loss": 0.14268916845321655, "eval_runtime": 27.6725, "eval_samples_per_second": 20.201, "eval_steps_per_second": 2.53, "step": 2600 }, { "epoch": 175.0, "learning_rate": 6.666666666666667e-05, "loss": 0.3001, "step": 2800 }, { "epoch": 175.0, "eval_exact_match": 0.5635062611806798, "eval_loss": 0.14656579494476318, "eval_runtime": 28.9718, "eval_samples_per_second": 19.295, "eval_steps_per_second": 2.416, "step": 2800 }, { "epoch": 187.5, "learning_rate": 0.0, "loss": 0.3048, "step": 3000 }, { "epoch": 187.5, "eval_exact_match": 0.5635062611806798, "eval_loss": 0.1470813751220703, "eval_runtime": 27.5607, "eval_samples_per_second": 20.282, "eval_steps_per_second": 2.54, "step": 3000 }, { "epoch": 187.5, "step": 3000, "total_flos": 6.444875739755491e+16, "train_loss": 0.4284551607767741, "train_runtime": 33652.2435, "train_samples_per_second": 45.643, "train_steps_per_second": 0.089 } ], "max_steps": 3000, "num_train_epochs": 188, "total_flos": 6.444875739755491e+16, "trial_name": null, "trial_params": null }