{ "best_metric": 0.8855701147554326, "best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/bert/bert-base-finetuned-pos-ud-Japanese-GSD/checkpoint-2000", "epoch": 20.361990950226243, "global_step": 4500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.45, "learning_rate": 4.9500000000000004e-05, "loss": 1.3362, "step": 100 }, { "epoch": 0.9, "learning_rate": 4.966778523489933e-05, "loss": 0.5269, "step": 200 }, { "epoch": 1.36, "learning_rate": 4.933221476510068e-05, "loss": 0.432, "step": 300 }, { "epoch": 1.81, "learning_rate": 4.8996644295302016e-05, "loss": 0.4029, "step": 400 }, { "epoch": 2.26, "learning_rate": 4.8661073825503355e-05, "loss": 0.3774, "step": 500 }, { "epoch": 2.26, "eval_accuracy": 0.8758850817937658, "eval_loss": 0.37171030044555664, "eval_runtime": 2.6026, "eval_samples_per_second": 194.803, "eval_steps_per_second": 24.591, "step": 500 }, { "epoch": 2.71, "learning_rate": 4.83255033557047e-05, "loss": 0.3581, "step": 600 }, { "epoch": 3.17, "learning_rate": 4.798993288590604e-05, "loss": 0.3378, "step": 700 }, { "epoch": 3.62, "learning_rate": 4.765436241610739e-05, "loss": 0.3239, "step": 800 }, { "epoch": 4.07, "learning_rate": 4.7318791946308726e-05, "loss": 0.3132, "step": 900 }, { "epoch": 4.52, "learning_rate": 4.698322147651007e-05, "loss": 0.2882, "step": 1000 }, { "epoch": 4.52, "eval_accuracy": 0.8841865386180516, "eval_loss": 0.3594276010990143, "eval_runtime": 2.6266, "eval_samples_per_second": 193.024, "eval_steps_per_second": 24.366, "step": 1000 }, { "epoch": 4.98, "learning_rate": 4.664765100671141e-05, "loss": 0.2882, "step": 1100 }, { "epoch": 5.43, "learning_rate": 4.631208053691276e-05, "loss": 0.2499, "step": 1200 }, { "epoch": 5.88, "learning_rate": 4.5976510067114097e-05, "loss": 0.2633, "step": 1300 }, { "epoch": 6.33, "learning_rate": 4.564093959731544e-05, "loss": 0.2364, "step": 1400 }, { "epoch": 6.79, "learning_rate": 4.5305369127516775e-05, "loss": 0.2304, "step": 1500 }, { "epoch": 6.79, "eval_accuracy": 0.8854887279238219, "eval_loss": 0.3736213743686676, "eval_runtime": 2.6247, "eval_samples_per_second": 193.165, "eval_steps_per_second": 24.384, "step": 1500 }, { "epoch": 7.24, "learning_rate": 4.496979865771812e-05, "loss": 0.2144, "step": 1600 }, { "epoch": 7.69, "learning_rate": 4.463422818791946e-05, "loss": 0.2067, "step": 1700 }, { "epoch": 8.14, "learning_rate": 4.4298657718120806e-05, "loss": 0.1925, "step": 1800 }, { "epoch": 8.6, "learning_rate": 4.3963087248322146e-05, "loss": 0.1764, "step": 1900 }, { "epoch": 9.05, "learning_rate": 4.362751677852349e-05, "loss": 0.1778, "step": 2000 }, { "epoch": 9.05, "eval_accuracy": 0.8855701147554326, "eval_loss": 0.43164435029029846, "eval_runtime": 2.6105, "eval_samples_per_second": 194.219, "eval_steps_per_second": 24.517, "step": 2000 }, { "epoch": 9.5, "learning_rate": 4.329194630872484e-05, "loss": 0.1529, "step": 2100 }, { "epoch": 9.95, "learning_rate": 4.295637583892618e-05, "loss": 0.1583, "step": 2200 }, { "epoch": 10.41, "learning_rate": 4.262080536912752e-05, "loss": 0.1356, "step": 2300 }, { "epoch": 10.86, "learning_rate": 4.228523489932886e-05, "loss": 0.1374, "step": 2400 }, { "epoch": 11.31, "learning_rate": 4.194966442953021e-05, "loss": 0.1217, "step": 2500 }, { "epoch": 11.31, "eval_accuracy": 0.8805241311955726, "eval_loss": 0.493513286113739, "eval_runtime": 2.6161, "eval_samples_per_second": 193.798, "eval_steps_per_second": 24.464, "step": 2500 }, { "epoch": 11.76, "learning_rate": 4.161409395973155e-05, "loss": 0.1193, "step": 2600 }, { "epoch": 12.22, "learning_rate": 4.127852348993289e-05, "loss": 0.1098, "step": 2700 }, { "epoch": 12.67, "learning_rate": 4.0942953020134226e-05, "loss": 0.103, "step": 2800 }, { "epoch": 13.12, "learning_rate": 4.060738255033557e-05, "loss": 0.1012, "step": 2900 }, { "epoch": 13.57, "learning_rate": 4.027181208053691e-05, "loss": 0.0896, "step": 3000 }, { "epoch": 13.57, "eval_accuracy": 0.8823146414910068, "eval_loss": 0.5315696001052856, "eval_runtime": 2.6096, "eval_samples_per_second": 194.28, "eval_steps_per_second": 24.525, "step": 3000 }, { "epoch": 14.03, "learning_rate": 3.993624161073826e-05, "loss": 0.0869, "step": 3100 }, { "epoch": 14.48, "learning_rate": 3.96006711409396e-05, "loss": 0.0784, "step": 3200 }, { "epoch": 14.93, "learning_rate": 3.926510067114094e-05, "loss": 0.0783, "step": 3300 }, { "epoch": 15.38, "learning_rate": 3.892953020134228e-05, "loss": 0.0691, "step": 3400 }, { "epoch": 15.84, "learning_rate": 3.859395973154363e-05, "loss": 0.0685, "step": 3500 }, { "epoch": 15.84, "eval_accuracy": 0.8823960283226174, "eval_loss": 0.5921140313148499, "eval_runtime": 2.6221, "eval_samples_per_second": 193.357, "eval_steps_per_second": 24.408, "step": 3500 }, { "epoch": 16.29, "learning_rate": 3.825838926174497e-05, "loss": 0.0646, "step": 3600 }, { "epoch": 16.74, "learning_rate": 3.7922818791946313e-05, "loss": 0.0592, "step": 3700 }, { "epoch": 17.19, "learning_rate": 3.758724832214765e-05, "loss": 0.0593, "step": 3800 }, { "epoch": 17.65, "learning_rate": 3.725167785234899e-05, "loss": 0.0546, "step": 3900 }, { "epoch": 18.1, "learning_rate": 3.691610738255034e-05, "loss": 0.0546, "step": 4000 }, { "epoch": 18.1, "eval_accuracy": 0.88272157564906, "eval_loss": 0.6464724540710449, "eval_runtime": 2.62, "eval_samples_per_second": 193.511, "eval_steps_per_second": 24.427, "step": 4000 }, { "epoch": 18.55, "learning_rate": 3.658053691275168e-05, "loss": 0.0494, "step": 4100 }, { "epoch": 19.0, "learning_rate": 3.624496644295302e-05, "loss": 0.0519, "step": 4200 }, { "epoch": 19.46, "learning_rate": 3.590939597315436e-05, "loss": 0.0422, "step": 4300 }, { "epoch": 19.91, "learning_rate": 3.557382550335571e-05, "loss": 0.0445, "step": 4400 }, { "epoch": 20.36, "learning_rate": 3.523825503355705e-05, "loss": 0.0395, "step": 4500 }, { "epoch": 20.36, "eval_accuracy": 0.8812566126800684, "eval_loss": 0.6766866445541382, "eval_runtime": 2.6147, "eval_samples_per_second": 193.903, "eval_steps_per_second": 24.477, "step": 4500 }, { "epoch": 20.36, "step": 4500, "total_flos": 1.875842476013568e+16, "train_loss": 0.20138724523120458, "train_runtime": 1165.6408, "train_samples_per_second": 411.791, "train_steps_per_second": 12.868 } ], "max_steps": 15000, "num_train_epochs": 68, "total_flos": 1.875842476013568e+16, "trial_name": null, "trial_params": null }