{ "best_metric": 0.030559765174984932, "best_model_checkpoint": "/data/wheld3/mt5-small-pointer-top_v2/checkpoint-3000", "epoch": 12.345168539325842, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.82, "learning_rate": 0.0009333333333333333, "loss": 1.9316, "step": 200 }, { "epoch": 0.82, "eval_exact_match": 0.008391608391608392, "eval_loss": 0.45658260583877563, "eval_runtime": 661.3564, "eval_samples_per_second": 25.947, "eval_steps_per_second": 3.243, "step": 200 }, { "epoch": 1.65, "learning_rate": 0.0008666666666666667, "loss": 0.3713, "step": 400 }, { "epoch": 1.65, "eval_exact_match": 0.12301864801864802, "eval_loss": 0.14726108312606812, "eval_runtime": 655.0457, "eval_samples_per_second": 26.197, "eval_steps_per_second": 3.275, "step": 400 }, { "epoch": 2.47, "learning_rate": 0.0008, "loss": 0.1747, "step": 600 }, { "epoch": 2.47, "eval_exact_match": 0.19842657342657344, "eval_loss": 0.07877045124769211, "eval_runtime": 646.2259, "eval_samples_per_second": 26.554, "eval_steps_per_second": 3.319, "step": 600 }, { "epoch": 3.29, "learning_rate": 0.0007333333333333333, "loss": 0.1104, "step": 800 }, { "epoch": 3.29, "eval_exact_match": 0.21486013986013985, "eval_loss": 0.056847672909498215, "eval_runtime": 648.473, "eval_samples_per_second": 26.462, "eval_steps_per_second": 3.308, "step": 800 }, { "epoch": 4.12, "learning_rate": 0.0006666666666666666, "loss": 0.0842, "step": 1000 }, { "epoch": 4.12, "eval_exact_match": 0.22173659673659674, "eval_loss": 0.04728136211633682, "eval_runtime": 656.2209, "eval_samples_per_second": 26.15, "eval_steps_per_second": 3.269, "step": 1000 }, { "epoch": 4.94, "learning_rate": 0.0006, "loss": 0.0694, "step": 1200 }, { "epoch": 4.94, "eval_exact_match": 0.225990675990676, "eval_loss": 0.0425742082297802, "eval_runtime": 650.7447, "eval_samples_per_second": 26.37, "eval_steps_per_second": 3.296, "step": 1200 }, { "epoch": 5.76, "learning_rate": 0.0005333333333333334, "loss": 0.0603, "step": 1400 }, { "epoch": 5.76, "eval_exact_match": 0.2279137529137529, "eval_loss": 0.03834143653512001, "eval_runtime": 652.2239, "eval_samples_per_second": 26.31, "eval_steps_per_second": 3.289, "step": 1400 }, { "epoch": 6.58, "learning_rate": 0.00046666666666666666, "loss": 0.0534, "step": 1600 }, { "epoch": 6.58, "eval_exact_match": 0.22808857808857808, "eval_loss": 0.03673423081636429, "eval_runtime": 652.7536, "eval_samples_per_second": 26.289, "eval_steps_per_second": 3.286, "step": 1600 }, { "epoch": 7.41, "learning_rate": 0.0004, "loss": 0.0477, "step": 1800 }, { "epoch": 7.41, "eval_exact_match": 0.23006993006993007, "eval_loss": 0.03471648693084717, "eval_runtime": 664.2219, "eval_samples_per_second": 25.835, "eval_steps_per_second": 3.229, "step": 1800 }, { "epoch": 8.23, "learning_rate": 0.0003333333333333333, "loss": 0.0441, "step": 2000 }, { "epoch": 8.23, "eval_exact_match": 0.23135198135198135, "eval_loss": 0.03336101025342941, "eval_runtime": 672.8476, "eval_samples_per_second": 25.504, "eval_steps_per_second": 3.188, "step": 2000 }, { "epoch": 9.05, "learning_rate": 0.0002666666666666667, "loss": 0.0413, "step": 2200 }, { "epoch": 9.05, "eval_exact_match": 0.23146853146853147, "eval_loss": 0.03233984857797623, "eval_runtime": 660.8292, "eval_samples_per_second": 25.967, "eval_steps_per_second": 3.246, "step": 2200 }, { "epoch": 9.88, "learning_rate": 0.0002, "loss": 0.0387, "step": 2400 }, { "epoch": 9.88, "eval_exact_match": 0.2315850815850816, "eval_loss": 0.03159063309431076, "eval_runtime": 652.4785, "eval_samples_per_second": 26.3, "eval_steps_per_second": 3.287, "step": 2400 }, { "epoch": 10.7, "learning_rate": 0.00013333333333333334, "loss": 0.0366, "step": 2600 }, { "epoch": 10.7, "eval_exact_match": 0.2324009324009324, "eval_loss": 0.03113115206360817, "eval_runtime": 657.1693, "eval_samples_per_second": 26.112, "eval_steps_per_second": 3.264, "step": 2600 }, { "epoch": 11.52, "learning_rate": 6.666666666666667e-05, "loss": 0.0358, "step": 2800 }, { "epoch": 11.52, "eval_exact_match": 0.2324009324009324, "eval_loss": 0.03069169819355011, "eval_runtime": 652.5597, "eval_samples_per_second": 26.296, "eval_steps_per_second": 3.287, "step": 2800 }, { "epoch": 12.35, "learning_rate": 0.0, "loss": 0.0343, "step": 3000 }, { "epoch": 12.35, "eval_exact_match": 0.2326923076923077, "eval_loss": 0.030559765174984932, "eval_runtime": 648.4066, "eval_samples_per_second": 26.465, "eval_steps_per_second": 3.308, "step": 3000 }, { "epoch": 12.35, "step": 3000, "total_flos": 3.654446752585728e+16, "train_loss": 0.2089100898106893, "train_runtime": 30994.3996, "train_samples_per_second": 49.557, "train_steps_per_second": 0.097 } ], "max_steps": 3000, "num_train_epochs": 13, "total_flos": 3.654446752585728e+16, "trial_name": null, "trial_params": null }