{ "best_metric": 81.63811371088175, "best_model_checkpoint": "/root/turkic_qa/ru_kaz_models/ru_kaz_xlm_roberta_large_squad_model/checkpoint-2208", "epoch": 5.0, "eval_steps": 500, "global_step": 2760, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "step": 552, "train_exact_match": 67.83216783216783, "train_f1": 84.61477737180347, "train_runtime": 23.2961, "train_samples_per_second": 43.612, "train_steps_per_second": 1.588 }, { "epoch": 1.0, "grad_norm": 209.68630981445312, "learning_rate": 1e-05, "loss": 1.187, "step": 552 }, { "epoch": 1.0, "eval_exact_match": 62.8125, "eval_f1": 80.31583567739357, "eval_runtime": 74.7802, "eval_samples_per_second": 43.875, "eval_steps_per_second": 1.578, "step": 552 }, { "epoch": 2.0, "step": 1104, "train_exact_match": 77.12287712287713, "train_f1": 90.22014020687811, "train_runtime": 23.5003, "train_samples_per_second": 43.744, "train_steps_per_second": 1.574 }, { "epoch": 2.0, "grad_norm": 55.167510986328125, "learning_rate": 7.500000000000001e-06, "loss": 0.8237, "step": 1104 }, { "epoch": 2.0, "eval_exact_match": 64.59375, "eval_f1": 81.19927962759496, "eval_runtime": 74.7421, "eval_samples_per_second": 43.898, "eval_steps_per_second": 1.579, "step": 1104 }, { "epoch": 3.0, "step": 1656, "train_exact_match": 80.01998001998003, "train_f1": 92.75806800650366, "train_runtime": 23.2732, "train_samples_per_second": 43.698, "train_steps_per_second": 1.59 }, { "epoch": 3.0, "grad_norm": 60.38401794433594, "learning_rate": 5e-06, "loss": 0.5947, "step": 1656 }, { "epoch": 3.0, "eval_exact_match": 65.34375, "eval_f1": 81.60673478065382, "eval_runtime": 75.1587, "eval_samples_per_second": 43.654, "eval_steps_per_second": 1.57, "step": 1656 }, { "epoch": 4.0, "step": 2208, "train_exact_match": 85.11488511488511, "train_f1": 95.27935906423745, "train_runtime": 23.5078, "train_samples_per_second": 43.135, "train_steps_per_second": 1.574 }, { "epoch": 4.0, "grad_norm": 106.58255004882812, "learning_rate": 2.5e-06, "loss": 0.4463, "step": 2208 }, { "epoch": 4.0, "eval_exact_match": 65.3125, "eval_f1": 81.63811371088175, "eval_runtime": 75.6398, "eval_samples_per_second": 43.377, "eval_steps_per_second": 1.56, "step": 2208 }, { "epoch": 5.0, "step": 2760, "train_exact_match": 88.21178821178822, "train_f1": 96.20658088193036, "train_runtime": 23.4676, "train_samples_per_second": 43.549, "train_steps_per_second": 1.577 }, { "epoch": 5.0, "grad_norm": 25.936321258544922, "learning_rate": 0.0, "loss": 0.3537, "step": 2760 }, { "epoch": 5.0, "eval_exact_match": 65.25, "eval_f1": 81.53757682259878, "eval_runtime": 75.1221, "eval_samples_per_second": 43.676, "eval_steps_per_second": 1.571, "step": 2760 }, { "epoch": 5.0, "step": 2760, "total_flos": 5.376168689054976e+16, "train_loss": 0.6810590108235677, "train_runtime": 4970.3143, "train_samples_per_second": 15.529, "train_steps_per_second": 0.555 } ], "logging_steps": 500, "max_steps": 2760, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 5.376168689054976e+16, "train_batch_size": 28, "trial_name": null, "trial_params": null }