{ "best_metric": 78.54224063649897, "best_model_checkpoint": "/root/turkic_qa/en_uzn_models/en_uzn_mdeberta_base_squad_model/checkpoint-2229", "epoch": 10.0, "eval_steps": 500, "global_step": 7430, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "step": 743, "train_exact_match": 60.739260739260736, "train_f1": 76.79740346661637, "train_runtime": 18.4418, "train_samples_per_second": 73.529, "train_steps_per_second": 2.657 }, { "epoch": 1.0, "grad_norm": 12.275191307067871, "learning_rate": 5e-06, "loss": 1.1938, "step": 743 }, { "epoch": 1.0, "eval_exact_match": 59.40625, "eval_f1": 74.4654162201273, "eval_runtime": 58.6195, "eval_samples_per_second": 74.583, "eval_steps_per_second": 2.678, "step": 743 }, { "epoch": 2.0, "step": 1486, "train_exact_match": 71.32867132867133, "train_f1": 82.88126787145742, "train_runtime": 18.7819, "train_samples_per_second": 73.901, "train_steps_per_second": 2.662 }, { "epoch": 2.0, "grad_norm": 17.339393615722656, "learning_rate": 1e-05, "loss": 0.9538, "step": 1486 }, { "epoch": 2.0, "eval_exact_match": 63.21875, "eval_f1": 77.06909842105833, "eval_runtime": 59.7376, "eval_samples_per_second": 73.187, "eval_steps_per_second": 2.628, "step": 1486 }, { "epoch": 3.0, "step": 2229, "train_exact_match": 72.02797202797203, "train_f1": 85.06033245209376, "train_runtime": 19.1902, "train_samples_per_second": 72.954, "train_steps_per_second": 2.605 }, { "epoch": 3.0, "grad_norm": 31.182722091674805, "learning_rate": 8.750000000000001e-06, "loss": 0.7754, "step": 2229 }, { "epoch": 3.0, "eval_exact_match": 65.03125, "eval_f1": 78.54224063649897, "eval_runtime": 60.2459, "eval_samples_per_second": 72.569, "eval_steps_per_second": 2.606, "step": 2229 }, { "epoch": 4.0, "step": 2972, "train_exact_match": 77.42257742257742, "train_f1": 89.5672832756712, "train_runtime": 18.9023, "train_samples_per_second": 73.483, "train_steps_per_second": 2.645 }, { "epoch": 4.0, "grad_norm": 31.596288681030273, "learning_rate": 7.500000000000001e-06, "loss": 0.6219, "step": 2972 }, { "epoch": 4.0, "eval_exact_match": 65.0, "eval_f1": 78.32854487938292, "eval_runtime": 59.9399, "eval_samples_per_second": 72.94, "eval_steps_per_second": 2.619, "step": 2972 }, { "epoch": 5.0, "step": 3715, "train_exact_match": 78.92107892107892, "train_f1": 90.10872697449445, "train_runtime": 18.4815, "train_samples_per_second": 71.964, "train_steps_per_second": 2.597 }, { "epoch": 5.0, "grad_norm": 18.16082000732422, "learning_rate": 6.25e-06, "loss": 0.5195, "step": 3715 }, { "epoch": 5.0, "eval_exact_match": 64.40625, "eval_f1": 78.09634918973954, "eval_runtime": 59.7181, "eval_samples_per_second": 73.211, "eval_steps_per_second": 2.629, "step": 3715 }, { "epoch": 6.0, "step": 4458, "train_exact_match": 82.61738261738262, "train_f1": 91.68526538584675, "train_runtime": 18.7765, "train_samples_per_second": 72.005, "train_steps_per_second": 2.61 }, { "epoch": 6.0, "grad_norm": 23.594913482666016, "learning_rate": 5e-06, "loss": 0.4385, "step": 4458 }, { "epoch": 6.0, "eval_exact_match": 64.59375, "eval_f1": 77.90198302625599, "eval_runtime": 60.4426, "eval_samples_per_second": 72.333, "eval_steps_per_second": 2.598, "step": 4458 }, { "epoch": 7.0, "step": 5201, "train_exact_match": 84.81518481518482, "train_f1": 93.31685648855274, "train_runtime": 18.5303, "train_samples_per_second": 72.206, "train_steps_per_second": 2.59 }, { "epoch": 7.0, "grad_norm": 28.661422729492188, "learning_rate": 3.7500000000000005e-06, "loss": 0.3756, "step": 5201 }, { "epoch": 7.0, "eval_exact_match": 64.34375, "eval_f1": 77.75398362289235, "eval_runtime": 60.0044, "eval_samples_per_second": 72.861, "eval_steps_per_second": 2.616, "step": 5201 }, { "epoch": 8.0, "step": 5944, "train_exact_match": 84.01598401598402, "train_f1": 92.96191804754487, "train_runtime": 19.1375, "train_samples_per_second": 71.953, "train_steps_per_second": 2.613 }, { "epoch": 8.0, "grad_norm": 36.12919998168945, "learning_rate": 2.5e-06, "loss": 0.3313, "step": 5944 }, { "epoch": 8.0, "eval_exact_match": 64.40625, "eval_f1": 77.95679362066461, "eval_runtime": 59.8689, "eval_samples_per_second": 73.026, "eval_steps_per_second": 2.622, "step": 5944 }, { "epoch": 9.0, "step": 6687, "train_exact_match": 87.01298701298701, "train_f1": 94.75304040815107, "train_runtime": 19.6099, "train_samples_per_second": 71.443, "train_steps_per_second": 2.601 }, { "epoch": 9.0, "grad_norm": 7.826128959655762, "learning_rate": 1.25e-06, "loss": 0.2966, "step": 6687 }, { "epoch": 9.0, "eval_exact_match": 64.40625, "eval_f1": 77.80886757487892, "eval_runtime": 59.9613, "eval_samples_per_second": 72.914, "eval_steps_per_second": 2.618, "step": 6687 }, { "epoch": 10.0, "step": 7430, "train_exact_match": 87.81218781218782, "train_f1": 95.03010006914994, "train_runtime": 19.1654, "train_samples_per_second": 71.274, "train_steps_per_second": 2.557 }, { "epoch": 10.0, "grad_norm": 16.050716400146484, "learning_rate": 0.0, "loss": 0.2775, "step": 7430 }, { "epoch": 10.0, "eval_exact_match": 64.21875, "eval_f1": 77.80834688072528, "eval_runtime": 60.7473, "eval_samples_per_second": 71.97, "eval_steps_per_second": 2.584, "step": 7430 }, { "epoch": 10.0, "step": 7430, "total_flos": 4.073559349625856e+16, "train_loss": 0.5783894863616409, "train_runtime": 6220.8095, "train_samples_per_second": 33.414, "train_steps_per_second": 1.194 } ], "logging_steps": 500, "max_steps": 7430, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 4.073559349625856e+16, "train_batch_size": 28, "trial_name": null, "trial_params": null }