{ "best_metric": 0.13407668471336365, "best_model_checkpoint": "/data/wheld3/mt5-small-pointer-adv-mtop/checkpoint-3000", "epoch": 16.304012214776485, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.09, "learning_rate": 0.0009333333333333333, "loss": 2.1628, "step": 200 }, { "epoch": 1.09, "eval_exact_match": 0.0022371364653243847, "eval_loss": 0.7204959988594055, "eval_runtime": 75.5591, "eval_samples_per_second": 29.58, "eval_steps_per_second": 3.706, "step": 200 }, { "epoch": 2.17, "learning_rate": 0.0008666666666666667, "loss": 1.1208, "step": 400 }, { "epoch": 2.17, "eval_exact_match": 0.0013422818791946308, "eval_loss": 0.63932865858078, "eval_runtime": 78.5979, "eval_samples_per_second": 28.436, "eval_steps_per_second": 3.562, "step": 400 }, { "epoch": 3.26, "learning_rate": 0.0008, "loss": 0.8675, "step": 600 }, { "epoch": 3.26, "eval_exact_match": 0.0026845637583892616, "eval_loss": 0.5905107259750366, "eval_runtime": 71.821, "eval_samples_per_second": 31.119, "eval_steps_per_second": 3.899, "step": 600 }, { "epoch": 4.35, "learning_rate": 0.0007333333333333333, "loss": 1.8729, "step": 800 }, { "epoch": 4.35, "eval_exact_match": 0.003131991051454139, "eval_loss": 0.5726307034492493, "eval_runtime": 78.7696, "eval_samples_per_second": 28.374, "eval_steps_per_second": 3.555, "step": 800 }, { "epoch": 5.43, "learning_rate": 0.0006666666666666666, "loss": 3.5417, "step": 1000 }, { "epoch": 5.43, "eval_exact_match": 0.006711409395973154, "eval_loss": 0.5370941758155823, "eval_runtime": 81.2185, "eval_samples_per_second": 27.518, "eval_steps_per_second": 3.447, "step": 1000 }, { "epoch": 6.52, "learning_rate": 0.0006, "loss": 0.9087, "step": 1200 }, { "epoch": 6.52, "eval_exact_match": 0.11185682326621924, "eval_loss": 0.3511998653411865, "eval_runtime": 82.0065, "eval_samples_per_second": 27.254, "eval_steps_per_second": 3.414, "step": 1200 }, { "epoch": 7.61, "learning_rate": 0.0005333333333333334, "loss": 1.2224, "step": 1400 }, { "epoch": 7.61, "eval_exact_match": 0.19105145413870245, "eval_loss": 0.27385014295578003, "eval_runtime": 82.7127, "eval_samples_per_second": 27.021, "eval_steps_per_second": 3.385, "step": 1400 }, { "epoch": 8.69, "learning_rate": 0.00046666666666666666, "loss": 0.7597, "step": 1600 }, { "epoch": 8.69, "eval_exact_match": 0.30156599552572705, "eval_loss": 0.21514081954956055, "eval_runtime": 82.6161, "eval_samples_per_second": 27.053, "eval_steps_per_second": 3.389, "step": 1600 }, { "epoch": 9.78, "learning_rate": 0.0004, "loss": 0.6981, "step": 1800 }, { "epoch": 9.78, "eval_exact_match": 0.3749440715883669, "eval_loss": 0.17362748086452484, "eval_runtime": 82.0736, "eval_samples_per_second": 27.232, "eval_steps_per_second": 3.412, "step": 1800 }, { "epoch": 10.87, "learning_rate": 0.0003333333333333333, "loss": 0.4779, "step": 2000 }, { "epoch": 10.87, "eval_exact_match": 0.41655480984340043, "eval_loss": 0.15482261776924133, "eval_runtime": 82.6077, "eval_samples_per_second": 27.056, "eval_steps_per_second": 3.39, "step": 2000 }, { "epoch": 11.96, "learning_rate": 0.0002666666666666667, "loss": 0.4397, "step": 2200 }, { "epoch": 11.96, "eval_exact_match": 0.45100671140939597, "eval_loss": 0.13771148025989532, "eval_runtime": 79.9201, "eval_samples_per_second": 27.965, "eval_steps_per_second": 3.503, "step": 2200 }, { "epoch": 13.04, "learning_rate": 0.0002, "loss": 0.4101, "step": 2400 }, { "epoch": 13.04, "eval_exact_match": 0.4196868008948546, "eval_loss": 0.14801675081253052, "eval_runtime": 81.9056, "eval_samples_per_second": 27.288, "eval_steps_per_second": 3.419, "step": 2400 }, { "epoch": 14.13, "learning_rate": 0.00013333333333333334, "loss": 0.3323, "step": 2600 }, { "epoch": 14.13, "eval_exact_match": 0.43982102908277404, "eval_loss": 0.13956378400325775, "eval_runtime": 80.3363, "eval_samples_per_second": 27.821, "eval_steps_per_second": 3.485, "step": 2600 }, { "epoch": 15.22, "learning_rate": 6.666666666666667e-05, "loss": 0.2565, "step": 2800 }, { "epoch": 15.22, "eval_exact_match": 0.45234899328859063, "eval_loss": 0.13505251705646515, "eval_runtime": 80.6724, "eval_samples_per_second": 27.705, "eval_steps_per_second": 3.471, "step": 2800 }, { "epoch": 16.3, "learning_rate": 0.0, "loss": 0.2108, "step": 3000 }, { "epoch": 16.3, "eval_exact_match": 0.4541387024608501, "eval_loss": 0.13407668471336365, "eval_runtime": 80.1776, "eval_samples_per_second": 27.876, "eval_steps_per_second": 3.492, "step": 3000 }, { "epoch": 16.3, "step": 3000, "total_flos": 3.3508283441823776e+16, "train_loss": 1.0188030764261882, "train_runtime": 23687.0417, "train_samples_per_second": 64.846, "train_steps_per_second": 0.127 } ], "max_steps": 3000, "num_train_epochs": 17, "total_flos": 3.3508283441823776e+16, "trial_name": null, "trial_params": null }