{ "best_metric": 80.63612438300382, "best_model_checkpoint": "/root/turkic_qa/tr_uzn_models/tr_uzn_xlm_roberta_base_squad_model/checkpoint-8120", "epoch": 10.0, "eval_steps": 500, "global_step": 8120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "step": 812, "train_exact_match": 56.743256743256744, "train_f1": 75.16234224627293, "train_runtime": 18.93, "train_samples_per_second": 87.692, "train_steps_per_second": 3.17 }, { "epoch": 1.0, "grad_norm": 33.5258674621582, "learning_rate": 5e-06, "loss": 1.3302, "step": 812 }, { "epoch": 1.0, "eval_exact_match": 56.78125, "eval_f1": 74.92855180045365, "eval_runtime": 58.0842, "eval_samples_per_second": 87.993, "eval_steps_per_second": 3.151, "step": 812 }, { "epoch": 2.0, "step": 1624, "train_exact_match": 66.53346653346654, "train_f1": 81.01468766554503, "train_runtime": 19.1594, "train_samples_per_second": 84.919, "train_steps_per_second": 3.079 }, { "epoch": 2.0, "grad_norm": 39.375850677490234, "learning_rate": 1e-05, "loss": 0.9867, "step": 1624 }, { "epoch": 2.0, "eval_exact_match": 60.71875, "eval_f1": 77.48042477059983, "eval_runtime": 59.4097, "eval_samples_per_second": 86.03, "eval_steps_per_second": 3.08, "step": 1624 }, { "epoch": 3.0, "step": 2436, "train_exact_match": 73.32667332667333, "train_f1": 86.12936978278546, "train_runtime": 18.141, "train_samples_per_second": 87.04, "train_steps_per_second": 3.142 }, { "epoch": 3.0, "grad_norm": 43.98362350463867, "learning_rate": 8.750000000000001e-06, "loss": 0.7954, "step": 2436 }, { "epoch": 3.0, "eval_exact_match": 63.53125, "eval_f1": 79.36767777349903, "eval_runtime": 58.582, "eval_samples_per_second": 87.245, "eval_steps_per_second": 3.124, "step": 2436 }, { "epoch": 4.0, "step": 3248, "train_exact_match": 74.72527472527473, "train_f1": 88.40189950318806, "train_runtime": 18.0666, "train_samples_per_second": 88.174, "train_steps_per_second": 3.155 }, { "epoch": 4.0, "grad_norm": 28.4106502532959, "learning_rate": 7.500000000000001e-06, "loss": 0.6427, "step": 3248 }, { "epoch": 4.0, "eval_exact_match": 65.21875, "eval_f1": 80.19939907948, "eval_runtime": 57.9356, "eval_samples_per_second": 88.219, "eval_steps_per_second": 3.159, "step": 3248 }, { "epoch": 5.0, "step": 4060, "train_exact_match": 79.72027972027972, "train_f1": 90.39293523910672, "train_runtime": 18.0742, "train_samples_per_second": 88.303, "train_steps_per_second": 3.154 }, { "epoch": 5.0, "grad_norm": 58.56404495239258, "learning_rate": 6.25e-06, "loss": 0.5287, "step": 4060 }, { "epoch": 5.0, "eval_exact_match": 64.53125, "eval_f1": 79.76982128782555, "eval_runtime": 57.84, "eval_samples_per_second": 88.365, "eval_steps_per_second": 3.164, "step": 4060 }, { "epoch": 6.0, "step": 4872, "train_exact_match": 79.42057942057941, "train_f1": 91.11407011087398, "train_runtime": 18.4811, "train_samples_per_second": 88.252, "train_steps_per_second": 3.192 }, { "epoch": 6.0, "grad_norm": 45.58940124511719, "learning_rate": 5e-06, "loss": 0.4506, "step": 4872 }, { "epoch": 6.0, "eval_exact_match": 65.46875, "eval_f1": 80.18984733785271, "eval_runtime": 58.0085, "eval_samples_per_second": 88.108, "eval_steps_per_second": 3.155, "step": 4872 }, { "epoch": 7.0, "step": 5684, "train_exact_match": 80.41958041958041, "train_f1": 91.05656126706158, "train_runtime": 17.8755, "train_samples_per_second": 89.172, "train_steps_per_second": 3.189 }, { "epoch": 7.0, "grad_norm": 26.53660774230957, "learning_rate": 3.7500000000000005e-06, "loss": 0.3805, "step": 5684 }, { "epoch": 7.0, "eval_exact_match": 65.125, "eval_f1": 80.04186683186089, "eval_runtime": 57.4933, "eval_samples_per_second": 88.897, "eval_steps_per_second": 3.183, "step": 5684 }, { "epoch": 8.0, "step": 6496, "train_exact_match": 86.31368631368632, "train_f1": 93.75812824286764, "train_runtime": 17.4124, "train_samples_per_second": 88.328, "train_steps_per_second": 3.159 }, { "epoch": 8.0, "grad_norm": 34.22230911254883, "learning_rate": 2.5e-06, "loss": 0.3381, "step": 6496 }, { "epoch": 8.0, "eval_exact_match": 65.90625, "eval_f1": 80.14333737032665, "eval_runtime": 57.8402, "eval_samples_per_second": 88.364, "eval_steps_per_second": 3.164, "step": 6496 }, { "epoch": 9.0, "step": 7308, "train_exact_match": 86.01398601398601, "train_f1": 94.05045602201064, "train_runtime": 18.8091, "train_samples_per_second": 86.767, "train_steps_per_second": 3.137 }, { "epoch": 9.0, "grad_norm": 87.17084503173828, "learning_rate": 1.25e-06, "loss": 0.3032, "step": 7308 }, { "epoch": 9.0, "eval_exact_match": 66.21875, "eval_f1": 80.52702562432783, "eval_runtime": 58.1783, "eval_samples_per_second": 87.851, "eval_steps_per_second": 3.146, "step": 7308 }, { "epoch": 10.0, "step": 8120, "train_exact_match": 86.41358641358642, "train_f1": 94.52312811018126, "train_runtime": 17.2785, "train_samples_per_second": 87.276, "train_steps_per_second": 3.125 }, { "epoch": 10.0, "grad_norm": 31.117448806762695, "learning_rate": 0.0, "loss": 0.2815, "step": 8120 }, { "epoch": 10.0, "eval_exact_match": 66.4375, "eval_f1": 80.63612438300382, "eval_runtime": 58.3935, "eval_samples_per_second": 87.527, "eval_steps_per_second": 3.134, "step": 8120 }, { "epoch": 10.0, "step": 8120, "total_flos": 4.451320899376128e+16, "train_loss": 0.6037781062384544, "train_runtime": 5220.6695, "train_samples_per_second": 43.508, "train_steps_per_second": 1.555 } ], "logging_steps": 500, "max_steps": 8120, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 4.451320899376128e+16, "train_batch_size": 28, "trial_name": null, "trial_params": null }