{ "best_metric": 80.9666820924435, "best_model_checkpoint": "/root/turkic_qa/tr_kaz_models/tr_kaz_xlm_roberta_base_squad_model/checkpoint-5243", "epoch": 10.0, "eval_steps": 500, "global_step": 7490, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "step": 749, "train_exact_match": 58.74125874125874, "train_f1": 76.91933391150722, "train_runtime": 17.3698, "train_samples_per_second": 87.508, "train_steps_per_second": 3.166 }, { "epoch": 1.0, "grad_norm": 24.767520904541016, "learning_rate": 5e-06, "loss": 1.366, "step": 749 }, { "epoch": 1.0, "eval_exact_match": 56.03125, "eval_f1": 75.5726701661797, "eval_runtime": 52.3069, "eval_samples_per_second": 88.287, "eval_steps_per_second": 3.154, "step": 749 }, { "epoch": 2.0, "step": 1498, "train_exact_match": 65.33466533466533, "train_f1": 81.16405259356212, "train_runtime": 16.8317, "train_samples_per_second": 88.048, "train_steps_per_second": 3.149 }, { "epoch": 2.0, "grad_norm": 22.722991943359375, "learning_rate": 1e-05, "loss": 1.0296, "step": 1498 }, { "epoch": 2.0, "eval_exact_match": 60.6875, "eval_f1": 77.7357851907795, "eval_runtime": 52.4104, "eval_samples_per_second": 88.112, "eval_steps_per_second": 3.148, "step": 1498 }, { "epoch": 3.0, "step": 2247, "train_exact_match": 74.52547452547452, "train_f1": 87.61463327451462, "train_runtime": 16.7088, "train_samples_per_second": 89.593, "train_steps_per_second": 3.232 }, { "epoch": 3.0, "grad_norm": 18.895648956298828, "learning_rate": 8.750000000000001e-06, "loss": 0.8407, "step": 2247 }, { "epoch": 3.0, "eval_exact_match": 63.0625, "eval_f1": 79.25734099418638, "eval_runtime": 51.9473, "eval_samples_per_second": 88.898, "eval_steps_per_second": 3.176, "step": 2247 }, { "epoch": 4.0, "step": 2996, "train_exact_match": 75.02497502497502, "train_f1": 88.1924695005796, "train_runtime": 16.7212, "train_samples_per_second": 90.065, "train_steps_per_second": 3.229 }, { "epoch": 4.0, "grad_norm": 21.92975425720215, "learning_rate": 7.500000000000001e-06, "loss": 0.6761, "step": 2996 }, { "epoch": 4.0, "eval_exact_match": 64.3125, "eval_f1": 80.41214611674394, "eval_runtime": 51.1453, "eval_samples_per_second": 90.292, "eval_steps_per_second": 3.226, "step": 2996 }, { "epoch": 5.0, "step": 3745, "train_exact_match": 80.71928071928072, "train_f1": 91.80017345637452, "train_runtime": 16.6722, "train_samples_per_second": 89.73, "train_steps_per_second": 3.239 }, { "epoch": 5.0, "grad_norm": 10.589529991149902, "learning_rate": 6.25e-06, "loss": 0.5642, "step": 3745 }, { "epoch": 5.0, "eval_exact_match": 65.0625, "eval_f1": 80.50496259716563, "eval_runtime": 51.2576, "eval_samples_per_second": 90.094, "eval_steps_per_second": 3.219, "step": 3745 }, { "epoch": 6.0, "step": 4494, "train_exact_match": 82.71728271728271, "train_f1": 92.8088019961768, "train_runtime": 16.7647, "train_samples_per_second": 88.997, "train_steps_per_second": 3.221 }, { "epoch": 6.0, "grad_norm": 17.399965286254883, "learning_rate": 5e-06, "loss": 0.4769, "step": 4494 }, { "epoch": 6.0, "eval_exact_match": 65.1875, "eval_f1": 80.88709112615079, "eval_runtime": 51.1815, "eval_samples_per_second": 90.228, "eval_steps_per_second": 3.224, "step": 4494 }, { "epoch": 7.0, "step": 5243, "train_exact_match": 84.41558441558442, "train_f1": 93.70818045593467, "train_runtime": 16.3897, "train_samples_per_second": 90.118, "train_steps_per_second": 3.234 }, { "epoch": 7.0, "grad_norm": 15.382094383239746, "learning_rate": 3.7500000000000005e-06, "loss": 0.4089, "step": 5243 }, { "epoch": 7.0, "eval_exact_match": 65.21875, "eval_f1": 80.9666820924435, "eval_runtime": 51.2038, "eval_samples_per_second": 90.189, "eval_steps_per_second": 3.222, "step": 5243 }, { "epoch": 8.0, "step": 5992, "train_exact_match": 84.41558441558442, "train_f1": 94.00170296456992, "train_runtime": 17.5397, "train_samples_per_second": 87.573, "train_steps_per_second": 3.136 }, { "epoch": 8.0, "grad_norm": 48.553558349609375, "learning_rate": 2.5e-06, "loss": 0.3646, "step": 5992 }, { "epoch": 8.0, "eval_exact_match": 64.71875, "eval_f1": 80.35415857951902, "eval_runtime": 52.7202, "eval_samples_per_second": 87.594, "eval_steps_per_second": 3.13, "step": 5992 }, { "epoch": 9.0, "step": 6741, "train_exact_match": 87.61238761238761, "train_f1": 95.10743821560438, "train_runtime": 16.7145, "train_samples_per_second": 88.008, "train_steps_per_second": 3.171 }, { "epoch": 9.0, "grad_norm": 54.76875305175781, "learning_rate": 1.25e-06, "loss": 0.3226, "step": 6741 }, { "epoch": 9.0, "eval_exact_match": 65.40625, "eval_f1": 80.62880989809192, "eval_runtime": 52.4199, "eval_samples_per_second": 88.096, "eval_steps_per_second": 3.148, "step": 6741 }, { "epoch": 10.0, "step": 7490, "train_exact_match": 88.1118881118881, "train_f1": 95.35905521584445, "train_runtime": 16.8298, "train_samples_per_second": 88.771, "train_steps_per_second": 3.209 }, { "epoch": 10.0, "grad_norm": 20.260765075683594, "learning_rate": 0.0, "loss": 0.2978, "step": 7490 }, { "epoch": 10.0, "eval_exact_match": 65.59375, "eval_f1": 80.93066187820529, "eval_runtime": 51.2112, "eval_samples_per_second": 90.176, "eval_steps_per_second": 3.222, "step": 7490 }, { "epoch": 10.0, "step": 7490, "total_flos": 4.108760851295232e+16, "train_loss": 0.6347312815198911, "train_runtime": 4825.6442, "train_samples_per_second": 43.447, "train_steps_per_second": 1.552 } ], "logging_steps": 500, "max_steps": 7490, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 4.108760851295232e+16, "train_batch_size": 28, "trial_name": null, "trial_params": null }