{ "best_metric": 0.12814190983772278, "best_model_checkpoint": "/data/wheld3/mt5-base-pointer-adv-mtop/checkpoint-1000", "epoch": 16.304012214776485, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.09, "learning_rate": 0.0009333333333333333, "loss": 1.7704, "step": 200 }, { "epoch": 1.09, "eval_exact_match": 0.13154362416107382, "eval_loss": 0.3664160668849945, "eval_runtime": 115.2843, "eval_samples_per_second": 19.387, "eval_steps_per_second": 2.429, "step": 200 }, { "epoch": 2.17, "learning_rate": 0.0008666666666666667, "loss": 1.9751, "step": 400 }, { "epoch": 2.17, "eval_exact_match": 0.3400447427293065, "eval_loss": 0.20914442837238312, "eval_runtime": 116.9108, "eval_samples_per_second": 19.117, "eval_steps_per_second": 2.395, "step": 400 }, { "epoch": 3.26, "learning_rate": 0.0008, "loss": 1.0019, "step": 600 }, { "epoch": 3.26, "eval_exact_match": 0.45861297539149887, "eval_loss": 0.14529764652252197, "eval_runtime": 115.0721, "eval_samples_per_second": 19.423, "eval_steps_per_second": 2.433, "step": 600 }, { "epoch": 4.35, "learning_rate": 0.0007333333333333333, "loss": 1.313, "step": 800 }, { "epoch": 4.35, "eval_exact_match": 0.5064876957494407, "eval_loss": 0.13125699758529663, "eval_runtime": 116.3825, "eval_samples_per_second": 19.204, "eval_steps_per_second": 2.406, "step": 800 }, { "epoch": 5.43, "learning_rate": 0.0006666666666666666, "loss": 0.6593, "step": 1000 }, { "epoch": 5.43, "eval_exact_match": 0.5266219239373602, "eval_loss": 0.12814190983772278, "eval_runtime": 116.3912, "eval_samples_per_second": 19.202, "eval_steps_per_second": 2.406, "step": 1000 }, { "epoch": 6.52, "learning_rate": 0.0006, "loss": 0.3216, "step": 1200 }, { "epoch": 6.52, "eval_exact_match": 0.5252796420581656, "eval_loss": 0.13165239989757538, "eval_runtime": 116.4297, "eval_samples_per_second": 19.196, "eval_steps_per_second": 2.405, "step": 1200 }, { "epoch": 7.61, "learning_rate": 0.0005333333333333334, "loss": 0.4614, "step": 1400 }, { "epoch": 7.61, "eval_exact_match": 0.5261744966442953, "eval_loss": 0.1507694572210312, "eval_runtime": 116.8619, "eval_samples_per_second": 19.125, "eval_steps_per_second": 2.396, "step": 1400 }, { "epoch": 8.69, "learning_rate": 0.00046666666666666666, "loss": 0.3577, "step": 1600 }, { "epoch": 8.69, "eval_exact_match": 0.5360178970917227, "eval_loss": 0.1421622931957245, "eval_runtime": 121.8344, "eval_samples_per_second": 18.345, "eval_steps_per_second": 2.298, "step": 1600 }, { "epoch": 9.78, "learning_rate": 0.0004, "loss": 0.3748, "step": 1800 }, { "epoch": 9.78, "eval_exact_match": 0.5458612975391499, "eval_loss": 0.14189742505550385, "eval_runtime": 119.5737, "eval_samples_per_second": 18.691, "eval_steps_per_second": 2.342, "step": 1800 }, { "epoch": 10.87, "learning_rate": 0.0003333333333333333, "loss": 0.2422, "step": 2000 }, { "epoch": 10.87, "eval_exact_match": 0.5355704697986577, "eval_loss": 0.16032171249389648, "eval_runtime": 125.8745, "eval_samples_per_second": 17.756, "eval_steps_per_second": 2.224, "step": 2000 }, { "epoch": 11.96, "learning_rate": 0.0002666666666666667, "loss": 0.4443, "step": 2200 }, { "epoch": 11.96, "eval_exact_match": 0.5472035794183445, "eval_loss": 0.15260477364063263, "eval_runtime": 118.267, "eval_samples_per_second": 18.898, "eval_steps_per_second": 2.368, "step": 2200 }, { "epoch": 13.04, "learning_rate": 0.0002, "loss": 0.2671, "step": 2400 }, { "epoch": 13.04, "eval_exact_match": 0.5480984340044742, "eval_loss": 0.16060054302215576, "eval_runtime": 118.1286, "eval_samples_per_second": 18.92, "eval_steps_per_second": 2.37, "step": 2400 }, { "epoch": 14.13, "learning_rate": 0.00013333333333333334, "loss": 0.227, "step": 2600 }, { "epoch": 14.13, "eval_exact_match": 0.5440715883668904, "eval_loss": 0.1774316281080246, "eval_runtime": 119.688, "eval_samples_per_second": 18.674, "eval_steps_per_second": 2.339, "step": 2600 }, { "epoch": 15.22, "learning_rate": 6.666666666666667e-05, "loss": 0.2053, "step": 2800 }, { "epoch": 15.22, "eval_exact_match": 0.5440715883668904, "eval_loss": 0.1752384901046753, "eval_runtime": 119.0856, "eval_samples_per_second": 18.768, "eval_steps_per_second": 2.351, "step": 2800 }, { "epoch": 16.3, "learning_rate": 0.0, "loss": 0.1517, "step": 3000 }, { "epoch": 16.3, "eval_exact_match": 0.5480984340044742, "eval_loss": 0.1770186424255371, "eval_runtime": 119.4908, "eval_samples_per_second": 18.704, "eval_steps_per_second": 2.343, "step": 3000 }, { "epoch": 16.3, "step": 3000, "total_flos": 7.59685289063286e+16, "train_loss": 0.6515167045593262, "train_runtime": 35508.9264, "train_samples_per_second": 43.257, "train_steps_per_second": 0.084 } ], "max_steps": 3000, "num_train_epochs": 17, "total_flos": 7.59685289063286e+16, "trial_name": null, "trial_params": null }