{ "best_metric": 70.80451562424408, "best_model_checkpoint": "/root/turkic_qa/en_kaz_models/en_kaz_xlm_roberta_base_squad_model/checkpoint-3260", "epoch": 10.0, "eval_steps": 500, "global_step": 6520, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "step": 652, "train_exact_match": 54.645354645354644, "train_f1": 71.43920046551005, "train_runtime": 13.6364, "train_samples_per_second": 89.98, "train_steps_per_second": 3.227 }, { "epoch": 1.0, "grad_norm": 42.09185028076172, "learning_rate": 5e-06, "loss": 1.6894, "step": 652 }, { "epoch": 1.0, "eval_exact_match": 51.0, "eval_f1": 66.75535596211667, "eval_runtime": 42.9083, "eval_samples_per_second": 89.913, "eval_steps_per_second": 3.216, "step": 652 }, { "epoch": 2.0, "step": 1304, "train_exact_match": 60.03996003996004, "train_f1": 74.97586392957278, "train_runtime": 13.9152, "train_samples_per_second": 89.183, "train_steps_per_second": 3.234 }, { "epoch": 2.0, "grad_norm": 25.847925186157227, "learning_rate": 1e-05, "loss": 1.3571, "step": 1304 }, { "epoch": 2.0, "eval_exact_match": 54.0625, "eval_f1": 69.25389311582437, "eval_runtime": 43.0882, "eval_samples_per_second": 89.537, "eval_steps_per_second": 3.203, "step": 1304 }, { "epoch": 3.0, "step": 1956, "train_exact_match": 68.23176823176823, "train_f1": 81.76253341059837, "train_runtime": 13.5561, "train_samples_per_second": 88.964, "train_steps_per_second": 3.246 }, { "epoch": 3.0, "grad_norm": 34.729644775390625, "learning_rate": 8.750000000000001e-06, "loss": 1.1643, "step": 1956 }, { "epoch": 3.0, "eval_exact_match": 54.9375, "eval_f1": 69.89098890413919, "eval_runtime": 43.0049, "eval_samples_per_second": 89.711, "eval_steps_per_second": 3.209, "step": 1956 }, { "epoch": 4.0, "step": 2608, "train_exact_match": 71.82817182817183, "train_f1": 85.225535613071, "train_runtime": 14.2911, "train_samples_per_second": 88.167, "train_steps_per_second": 3.149 }, { "epoch": 4.0, "grad_norm": 33.257789611816406, "learning_rate": 7.500000000000001e-06, "loss": 0.9703, "step": 2608 }, { "epoch": 4.0, "eval_exact_match": 56.1875, "eval_f1": 70.69884423902388, "eval_runtime": 43.7869, "eval_samples_per_second": 88.109, "eval_steps_per_second": 3.152, "step": 2608 }, { "epoch": 5.0, "step": 3260, "train_exact_match": 76.42357642357642, "train_f1": 88.7083439838053, "train_runtime": 13.5599, "train_samples_per_second": 89.234, "train_steps_per_second": 3.245 }, { "epoch": 5.0, "grad_norm": 29.723169326782227, "learning_rate": 6.25e-06, "loss": 0.8246, "step": 3260 }, { "epoch": 5.0, "eval_exact_match": 56.53125, "eval_f1": 70.80451562424408, "eval_runtime": 43.1752, "eval_samples_per_second": 89.357, "eval_steps_per_second": 3.196, "step": 3260 }, { "epoch": 6.0, "step": 3912, "train_exact_match": 77.32267732267732, "train_f1": 89.41850450829368, "train_runtime": 13.3805, "train_samples_per_second": 87.964, "train_steps_per_second": 3.214 }, { "epoch": 6.0, "grad_norm": 43.614688873291016, "learning_rate": 5e-06, "loss": 0.7106, "step": 3912 }, { "epoch": 6.0, "eval_exact_match": 56.28125, "eval_f1": 70.69128089372614, "eval_runtime": 43.3271, "eval_samples_per_second": 89.044, "eval_steps_per_second": 3.185, "step": 3912 }, { "epoch": 7.0, "step": 4564, "train_exact_match": 80.31968031968032, "train_f1": 90.9932970975269, "train_runtime": 13.7154, "train_samples_per_second": 88.222, "train_steps_per_second": 3.208 }, { "epoch": 7.0, "grad_norm": 16.68464469909668, "learning_rate": 3.7500000000000005e-06, "loss": 0.6206, "step": 4564 }, { "epoch": 7.0, "eval_exact_match": 56.28125, "eval_f1": 70.50206404433744, "eval_runtime": 43.2324, "eval_samples_per_second": 89.239, "eval_steps_per_second": 3.192, "step": 4564 }, { "epoch": 8.0, "step": 5216, "train_exact_match": 82.41758241758242, "train_f1": 92.16600808934162, "train_runtime": 13.6769, "train_samples_per_second": 88.105, "train_steps_per_second": 3.217 }, { "epoch": 8.0, "grad_norm": 30.79519271850586, "learning_rate": 2.5e-06, "loss": 0.5603, "step": 5216 }, { "epoch": 8.0, "eval_exact_match": 56.40625, "eval_f1": 70.75408379302462, "eval_runtime": 43.2887, "eval_samples_per_second": 89.123, "eval_steps_per_second": 3.188, "step": 5216 }, { "epoch": 9.0, "step": 5868, "train_exact_match": 83.61638361638362, "train_f1": 93.40487188804495, "train_runtime": 13.7502, "train_samples_per_second": 88.508, "train_steps_per_second": 3.2 }, { "epoch": 9.0, "grad_norm": 45.799346923828125, "learning_rate": 1.25e-06, "loss": 0.5138, "step": 5868 }, { "epoch": 9.0, "eval_exact_match": 56.46875, "eval_f1": 70.54295738591223, "eval_runtime": 43.2261, "eval_samples_per_second": 89.252, "eval_steps_per_second": 3.193, "step": 5868 }, { "epoch": 10.0, "step": 6520, "train_exact_match": 84.31568431568432, "train_f1": 93.73109223264476, "train_runtime": 13.3013, "train_samples_per_second": 88.563, "train_steps_per_second": 3.233 }, { "epoch": 10.0, "grad_norm": 22.400699615478516, "learning_rate": 0.0, "loss": 0.4785, "step": 6520 }, { "epoch": 10.0, "eval_exact_match": 56.34375, "eval_f1": 70.56177522205788, "eval_runtime": 43.1408, "eval_samples_per_second": 89.428, "eval_steps_per_second": 3.199, "step": 6520 }, { "epoch": 10.0, "step": 6520, "total_flos": 3.575911440121344e+16, "train_loss": 0.8889602286684001, "train_runtime": 4193.6079, "train_samples_per_second": 43.511, "train_steps_per_second": 1.555 } ], "logging_steps": 500, "max_steps": 6520, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 3.575911440121344e+16, "train_batch_size": 28, "trial_name": null, "trial_params": null }