{ "best_metric": 0.0727909728884697, "best_model_checkpoint": "/data/wheld3/mt5-base-pointer-adv-cstop_artificial/checkpoint-400", "epoch": 187.49805447470817, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 12.5, "learning_rate": 0.0009333333333333333, "loss": 1.7423, "step": 200 }, { "epoch": 12.5, "eval_exact_match": 0.23971377459749552, "eval_loss": 0.11730749905109406, "eval_runtime": 27.8131, "eval_samples_per_second": 20.098, "eval_steps_per_second": 2.517, "step": 200 }, { "epoch": 25.0, "learning_rate": 0.0008666666666666667, "loss": 0.3678, "step": 400 }, { "epoch": 25.0, "eval_exact_match": 0.3363148479427549, "eval_loss": 0.0727909728884697, "eval_runtime": 29.0466, "eval_samples_per_second": 19.245, "eval_steps_per_second": 2.41, "step": 400 }, { "epoch": 37.5, "learning_rate": 0.0008, "loss": 0.3202, "step": 600 }, { "epoch": 37.5, "eval_exact_match": 0.33810375670840787, "eval_loss": 0.08794570714235306, "eval_runtime": 29.1974, "eval_samples_per_second": 19.146, "eval_steps_per_second": 2.397, "step": 600 }, { "epoch": 50.0, "learning_rate": 0.0007333333333333333, "loss": 0.3452, "step": 800 }, { "epoch": 50.0, "eval_exact_match": 0.3363148479427549, "eval_loss": 0.09075574576854706, "eval_runtime": 28.6647, "eval_samples_per_second": 19.501, "eval_steps_per_second": 2.442, "step": 800 }, { "epoch": 62.5, "learning_rate": 0.0006666666666666666, "loss": 0.3099, "step": 1000 }, { "epoch": 62.5, "eval_exact_match": 0.3434704830053667, "eval_loss": 0.10556001961231232, "eval_runtime": 28.9715, "eval_samples_per_second": 19.295, "eval_steps_per_second": 2.416, "step": 1000 }, { "epoch": 75.0, "learning_rate": 0.0006, "loss": 0.3057, "step": 1200 }, { "epoch": 75.0, "eval_exact_match": 0.3470483005366726, "eval_loss": 0.11086518317461014, "eval_runtime": 29.2047, "eval_samples_per_second": 19.141, "eval_steps_per_second": 2.397, "step": 1200 }, { "epoch": 87.5, "learning_rate": 0.0005333333333333334, "loss": 0.3045, "step": 1400 }, { "epoch": 87.5, "eval_exact_match": 0.34525939177101966, "eval_loss": 0.1273432970046997, "eval_runtime": 29.0031, "eval_samples_per_second": 19.274, "eval_steps_per_second": 2.414, "step": 1400 }, { "epoch": 100.0, "learning_rate": 0.00046666666666666666, "loss": 0.3052, "step": 1600 }, { "epoch": 100.0, "eval_exact_match": 0.3416815742397138, "eval_loss": 0.10654404759407043, "eval_runtime": 28.7998, "eval_samples_per_second": 19.41, "eval_steps_per_second": 2.431, "step": 1600 }, { "epoch": 112.5, "learning_rate": 0.0004, "loss": 0.3037, "step": 1800 }, { "epoch": 112.5, "eval_exact_match": 0.33810375670840787, "eval_loss": 0.13873372972011566, "eval_runtime": 29.2765, "eval_samples_per_second": 19.094, "eval_steps_per_second": 2.391, "step": 1800 }, { "epoch": 125.0, "learning_rate": 0.0003333333333333333, "loss": 0.3036, "step": 2000 }, { "epoch": 125.0, "eval_exact_match": 0.34525939177101966, "eval_loss": 0.1421414017677307, "eval_runtime": 28.8117, "eval_samples_per_second": 19.402, "eval_steps_per_second": 2.43, "step": 2000 }, { "epoch": 137.5, "learning_rate": 0.0002666666666666667, "loss": 0.3023, "step": 2200 }, { "epoch": 137.5, "eval_exact_match": 0.33989266547406083, "eval_loss": 0.16489343345165253, "eval_runtime": 30.9662, "eval_samples_per_second": 18.052, "eval_steps_per_second": 2.261, "step": 2200 }, { "epoch": 150.0, "learning_rate": 0.0002, "loss": 0.3028, "step": 2400 }, { "epoch": 150.0, "eval_exact_match": 0.33989266547406083, "eval_loss": 0.1573849618434906, "eval_runtime": 29.0042, "eval_samples_per_second": 19.273, "eval_steps_per_second": 2.413, "step": 2400 }, { "epoch": 162.5, "learning_rate": 0.00013333333333333334, "loss": 0.3025, "step": 2600 }, { "epoch": 162.5, "eval_exact_match": 0.33989266547406083, "eval_loss": 0.15625949203968048, "eval_runtime": 28.8424, "eval_samples_per_second": 19.381, "eval_steps_per_second": 2.427, "step": 2600 }, { "epoch": 175.0, "learning_rate": 6.666666666666667e-05, "loss": 0.3017, "step": 2800 }, { "epoch": 175.0, "eval_exact_match": 0.33989266547406083, "eval_loss": 0.1589040458202362, "eval_runtime": 28.7706, "eval_samples_per_second": 19.43, "eval_steps_per_second": 2.433, "step": 2800 }, { "epoch": 187.5, "learning_rate": 0.0, "loss": 0.302, "step": 3000 }, { "epoch": 187.5, "eval_exact_match": 0.3416815742397138, "eval_loss": 0.15874968469142914, "eval_runtime": 33.2641, "eval_samples_per_second": 16.805, "eval_steps_per_second": 2.104, "step": 3000 }, { "epoch": 187.5, "step": 3000, "total_flos": 6.474719775139762e+16, "train_loss": 0.4079610900878906, "train_runtime": 34544.7098, "train_samples_per_second": 44.464, "train_steps_per_second": 0.087 } ], "max_steps": 3000, "num_train_epochs": 188, "total_flos": 6.474719775139762e+16, "trial_name": null, "trial_params": null }