{ "best_metric": 74.36086976945779, "best_model_checkpoint": "/root/turkic_qa/ru_uzn_models/ru_uzn_xlm_roberta_base_model/checkpoint-3906", "epoch": 10.0, "eval_steps": 500, "global_step": 5580, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "step": 558, "train_exact_match": 23.176823176823177, "train_f1": 40.91790695060761, "train_runtime": 11.8072, "train_samples_per_second": 89.437, "train_steps_per_second": 3.218 }, { "epoch": 1.0, "grad_norm": 69.84556579589844, "learning_rate": 5e-06, "loss": 4.7931, "step": 558 }, { "epoch": 1.0, "eval_exact_match": 23.375, "eval_f1": 40.588302053792845, "eval_runtime": 37.5548, "eval_samples_per_second": 89.443, "eval_steps_per_second": 3.195, "step": 558 }, { "epoch": 2.0, "step": 1116, "train_exact_match": 48.15184815184815, "train_f1": 67.62163572060659, "train_runtime": 11.6905, "train_samples_per_second": 89.047, "train_steps_per_second": 3.251 }, { "epoch": 2.0, "grad_norm": 141.2274932861328, "learning_rate": 1e-05, "loss": 2.3399, "step": 1116 }, { "epoch": 2.0, "eval_exact_match": 46.0, "eval_f1": 64.31534088055481, "eval_runtime": 37.2801, "eval_samples_per_second": 90.102, "eval_steps_per_second": 3.219, "step": 1116 }, { "epoch": 3.0, "step": 1674, "train_exact_match": 59.74025974025974, "train_f1": 77.9890564127976, "train_runtime": 12.2542, "train_samples_per_second": 85.848, "train_steps_per_second": 3.101 }, { "epoch": 3.0, "grad_norm": 176.3468475341797, "learning_rate": 8.750000000000001e-06, "loss": 1.5669, "step": 1674 }, { "epoch": 3.0, "eval_exact_match": 53.71875, "eval_f1": 71.6046462048705, "eval_runtime": 38.2025, "eval_samples_per_second": 87.926, "eval_steps_per_second": 3.141, "step": 1674 }, { "epoch": 4.0, "step": 2232, "train_exact_match": 60.93906093906094, "train_f1": 78.733011923745, "train_runtime": 11.7993, "train_samples_per_second": 88.649, "train_steps_per_second": 3.221 }, { "epoch": 4.0, "grad_norm": 37.7187385559082, "learning_rate": 7.500000000000001e-06, "loss": 1.2585, "step": 2232 }, { "epoch": 4.0, "eval_exact_match": 54.65625, "eval_f1": 72.55858557669187, "eval_runtime": 37.5502, "eval_samples_per_second": 89.454, "eval_steps_per_second": 3.196, "step": 2232 }, { "epoch": 5.0, "step": 2790, "train_exact_match": 67.03296703296704, "train_f1": 83.4692833669754, "train_runtime": 11.75, "train_samples_per_second": 88.681, "train_steps_per_second": 3.234 }, { "epoch": 5.0, "grad_norm": 65.70742797851562, "learning_rate": 6.25e-06, "loss": 1.0746, "step": 2790 }, { "epoch": 5.0, "eval_exact_match": 56.3125, "eval_f1": 73.3893893291572, "eval_runtime": 37.5867, "eval_samples_per_second": 89.367, "eval_steps_per_second": 3.193, "step": 2790 }, { "epoch": 6.0, "step": 3348, "train_exact_match": 71.42857142857143, "train_f1": 86.60157934407324, "train_runtime": 11.7396, "train_samples_per_second": 88.93, "train_steps_per_second": 3.237 }, { "epoch": 6.0, "grad_norm": 64.60488891601562, "learning_rate": 5e-06, "loss": 0.9341, "step": 3348 }, { "epoch": 6.0, "eval_exact_match": 57.09375, "eval_f1": 73.83274981499065, "eval_runtime": 37.5525, "eval_samples_per_second": 89.448, "eval_steps_per_second": 3.196, "step": 3348 }, { "epoch": 7.0, "step": 3906, "train_exact_match": 74.22577422577423, "train_f1": 88.79932331084886, "train_runtime": 11.745, "train_samples_per_second": 88.548, "train_steps_per_second": 3.235 }, { "epoch": 7.0, "grad_norm": 63.03971862792969, "learning_rate": 3.7500000000000005e-06, "loss": 0.8362, "step": 3906 }, { "epoch": 7.0, "eval_exact_match": 56.9375, "eval_f1": 74.36086976945779, "eval_runtime": 37.3638, "eval_samples_per_second": 89.9, "eval_steps_per_second": 3.212, "step": 3906 }, { "epoch": 8.0, "step": 4464, "train_exact_match": 75.22477522477523, "train_f1": 89.24938452782732, "train_runtime": 11.7145, "train_samples_per_second": 89.206, "train_steps_per_second": 3.244 }, { "epoch": 8.0, "grad_norm": 115.92487335205078, "learning_rate": 2.5e-06, "loss": 0.7519, "step": 4464 }, { "epoch": 8.0, "eval_exact_match": 56.875, "eval_f1": 73.74170941323764, "eval_runtime": 37.4071, "eval_samples_per_second": 89.796, "eval_steps_per_second": 3.208, "step": 4464 }, { "epoch": 9.0, "step": 5022, "train_exact_match": 77.92207792207792, "train_f1": 90.77887866843285, "train_runtime": 11.7653, "train_samples_per_second": 88.735, "train_steps_per_second": 3.23 }, { "epoch": 9.0, "grad_norm": 67.61051177978516, "learning_rate": 1.25e-06, "loss": 0.7002, "step": 5022 }, { "epoch": 9.0, "eval_exact_match": 57.09375, "eval_f1": 73.99562834029655, "eval_runtime": 37.3049, "eval_samples_per_second": 90.042, "eval_steps_per_second": 3.217, "step": 5022 }, { "epoch": 10.0, "step": 5580, "train_exact_match": 76.82317682317682, "train_f1": 90.5111904232365, "train_runtime": 11.6949, "train_samples_per_second": 89.526, "train_steps_per_second": 3.249 }, { "epoch": 10.0, "grad_norm": 77.0350341796875, "learning_rate": 0.0, "loss": 0.6664, "step": 5580 }, { "epoch": 10.0, "eval_exact_match": 57.09375, "eval_f1": 74.00492648190163, "eval_runtime": 37.3623, "eval_samples_per_second": 89.904, "eval_steps_per_second": 3.212, "step": 5580 }, { "epoch": 10.0, "step": 5580, "total_flos": 3.056976081243648e+16, "train_loss": 1.4921705567281307, "train_runtime": 3588.4893, "train_samples_per_second": 43.47, "train_steps_per_second": 1.555 } ], "logging_steps": 500, "max_steps": 5580, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 3.056976081243648e+16, "train_batch_size": 28, "trial_name": null, "trial_params": null }