{ "best_metric": 0.02550993673503399, "best_model_checkpoint": "/data/wheld3/mt5-base-pointer-adv-top_v2/checkpoint-3000", "epoch": 6.160056430151656, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.41, "learning_rate": 0.0009333333333333333, "loss": 2.2938, "step": 200 }, { "epoch": 0.41, "eval_exact_match": 0.0011655011655011655, "eval_loss": 0.5532176494598389, "eval_runtime": 914.7564, "eval_samples_per_second": 18.759, "eval_steps_per_second": 2.345, "step": 200 }, { "epoch": 0.82, "learning_rate": 0.0008666666666666667, "loss": 0.671, "step": 400 }, { "epoch": 0.82, "eval_exact_match": 0.16095571095571096, "eval_loss": 0.16243064403533936, "eval_runtime": 902.0916, "eval_samples_per_second": 19.022, "eval_steps_per_second": 2.378, "step": 400 }, { "epoch": 1.23, "learning_rate": 0.0008, "loss": 0.5276, "step": 600 }, { "epoch": 1.23, "eval_exact_match": 0.21567599067599066, "eval_loss": 0.06916385143995285, "eval_runtime": 882.9446, "eval_samples_per_second": 19.435, "eval_steps_per_second": 2.429, "step": 600 }, { "epoch": 1.64, "learning_rate": 0.0007333333333333333, "loss": 0.4196, "step": 800 }, { "epoch": 1.64, "eval_exact_match": 0.22587412587412586, "eval_loss": 0.04908030480146408, "eval_runtime": 882.5419, "eval_samples_per_second": 19.444, "eval_steps_per_second": 2.43, "step": 800 }, { "epoch": 2.05, "learning_rate": 0.0006666666666666666, "loss": 0.3593, "step": 1000 }, { "epoch": 2.05, "eval_exact_match": 0.22913752913752913, "eval_loss": 0.03997402638196945, "eval_runtime": 888.3883, "eval_samples_per_second": 19.316, "eval_steps_per_second": 2.414, "step": 1000 }, { "epoch": 2.46, "learning_rate": 0.0006, "loss": 0.3471, "step": 1200 }, { "epoch": 2.46, "eval_exact_match": 0.22966200466200465, "eval_loss": 0.03349088877439499, "eval_runtime": 876.9206, "eval_samples_per_second": 19.568, "eval_steps_per_second": 2.446, "step": 1200 }, { "epoch": 2.87, "learning_rate": 0.0005333333333333334, "loss": 0.3416, "step": 1400 }, { "epoch": 2.87, "eval_exact_match": 0.23175990675990676, "eval_loss": 0.03065803460776806, "eval_runtime": 880.241, "eval_samples_per_second": 19.495, "eval_steps_per_second": 2.437, "step": 1400 }, { "epoch": 3.29, "learning_rate": 0.00046666666666666666, "loss": 0.3351, "step": 1600 }, { "epoch": 3.29, "eval_exact_match": 0.23344988344988346, "eval_loss": 0.030744880437850952, "eval_runtime": 884.2271, "eval_samples_per_second": 19.407, "eval_steps_per_second": 2.426, "step": 1600 }, { "epoch": 3.7, "learning_rate": 0.0004, "loss": 0.3316, "step": 1800 }, { "epoch": 3.7, "eval_exact_match": 0.23432400932400932, "eval_loss": 0.029669322073459625, "eval_runtime": 876.7173, "eval_samples_per_second": 19.573, "eval_steps_per_second": 2.447, "step": 1800 }, { "epoch": 4.11, "learning_rate": 0.0003333333333333333, "loss": 0.3312, "step": 2000 }, { "epoch": 4.11, "eval_exact_match": 0.23444055944055944, "eval_loss": 0.02815121039748192, "eval_runtime": 885.0693, "eval_samples_per_second": 19.388, "eval_steps_per_second": 2.424, "step": 2000 }, { "epoch": 4.52, "learning_rate": 0.0002666666666666667, "loss": 0.3271, "step": 2200 }, { "epoch": 4.52, "eval_exact_match": 0.23648018648018648, "eval_loss": 0.02621879242360592, "eval_runtime": 890.2789, "eval_samples_per_second": 19.275, "eval_steps_per_second": 2.409, "step": 2200 }, { "epoch": 4.93, "learning_rate": 0.0002, "loss": 0.3241, "step": 2400 }, { "epoch": 4.93, "eval_exact_match": 0.23653846153846153, "eval_loss": 0.02629098668694496, "eval_runtime": 876.3961, "eval_samples_per_second": 19.58, "eval_steps_per_second": 2.448, "step": 2400 }, { "epoch": 5.34, "learning_rate": 0.00013333333333333334, "loss": 0.3227, "step": 2600 }, { "epoch": 5.34, "eval_exact_match": 0.23677156177156178, "eval_loss": 0.02586781419813633, "eval_runtime": 890.1947, "eval_samples_per_second": 19.277, "eval_steps_per_second": 2.41, "step": 2600 }, { "epoch": 5.75, "learning_rate": 6.666666666666667e-05, "loss": 0.3201, "step": 2800 }, { "epoch": 5.75, "eval_exact_match": 0.23653846153846153, "eval_loss": 0.025654641911387444, "eval_runtime": 876.5182, "eval_samples_per_second": 19.577, "eval_steps_per_second": 2.447, "step": 2800 }, { "epoch": 6.16, "learning_rate": 0.0, "loss": 0.3227, "step": 3000 }, { "epoch": 6.16, "eval_exact_match": 0.23653846153846153, "eval_loss": 0.02550993673503399, "eval_runtime": 881.7574, "eval_samples_per_second": 19.461, "eval_steps_per_second": 2.433, "step": 3000 }, { "epoch": 6.16, "step": 3000, "total_flos": 8.2913691900475e+16, "train_loss": 0.5049722137451171, "train_runtime": 47265.062, "train_samples_per_second": 32.498, "train_steps_per_second": 0.063 } ], "max_steps": 3000, "num_train_epochs": 7, "total_flos": 8.2913691900475e+16, "trial_name": null, "trial_params": null }