{ "best_metric": 80.33013168664009, "best_model_checkpoint": "/root/turkic_qa/ru_uzn_models/ru_uzn_xlm_roberta_large_squad_model/checkpoint-2790", "epoch": 5.0, "eval_steps": 500, "global_step": 2790, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "step": 558, "train_exact_match": 64.83516483516483, "train_f1": 83.08204401574274, "train_runtime": 24.0955, "train_samples_per_second": 43.826, "train_steps_per_second": 1.577 }, { "epoch": 1.0, "grad_norm": 80.57772827148438, "learning_rate": 1e-05, "loss": 1.2698, "step": 558 }, { "epoch": 1.0, "eval_exact_match": 61.25, "eval_f1": 79.38877529103148, "eval_runtime": 76.3721, "eval_samples_per_second": 43.982, "eval_steps_per_second": 1.571, "step": 558 }, { "epoch": 2.0, "step": 1116, "train_exact_match": 74.82517482517483, "train_f1": 89.2404305639639, "train_runtime": 24.2914, "train_samples_per_second": 42.855, "train_steps_per_second": 1.564 }, { "epoch": 2.0, "grad_norm": 80.02055358886719, "learning_rate": 7.500000000000001e-06, "loss": 0.8886, "step": 1116 }, { "epoch": 2.0, "eval_exact_match": 62.3125, "eval_f1": 80.2126480616015, "eval_runtime": 77.9968, "eval_samples_per_second": 43.066, "eval_steps_per_second": 1.539, "step": 1116 }, { "epoch": 3.0, "step": 1674, "train_exact_match": 78.52147852147853, "train_f1": 92.40489073606565, "train_runtime": 25.6503, "train_samples_per_second": 41.013, "train_steps_per_second": 1.481 }, { "epoch": 3.0, "grad_norm": 28.389789581298828, "learning_rate": 5e-06, "loss": 0.6286, "step": 1674 }, { "epoch": 3.0, "eval_exact_match": 61.9375, "eval_f1": 80.06485926873198, "eval_runtime": 83.2186, "eval_samples_per_second": 40.364, "eval_steps_per_second": 1.442, "step": 1674 }, { "epoch": 4.0, "step": 2232, "train_exact_match": 82.31768231768231, "train_f1": 93.82612819903538, "train_runtime": 23.9529, "train_samples_per_second": 43.669, "train_steps_per_second": 1.586 }, { "epoch": 4.0, "grad_norm": 21.804250717163086, "learning_rate": 2.5e-06, "loss": 0.4725, "step": 2232 }, { "epoch": 4.0, "eval_exact_match": 62.625, "eval_f1": 80.25175708657538, "eval_runtime": 77.0148, "eval_samples_per_second": 43.615, "eval_steps_per_second": 1.558, "step": 2232 }, { "epoch": 5.0, "step": 2790, "train_exact_match": 85.21478521478521, "train_f1": 94.39980772824222, "train_runtime": 24.3057, "train_samples_per_second": 42.871, "train_steps_per_second": 1.563 }, { "epoch": 5.0, "grad_norm": 48.04233932495117, "learning_rate": 0.0, "loss": 0.378, "step": 2790 }, { "epoch": 5.0, "eval_exact_match": 62.46875, "eval_f1": 80.33013168664009, "eval_runtime": 78.9765, "eval_samples_per_second": 42.532, "eval_steps_per_second": 1.519, "step": 2790 }, { "epoch": 5.0, "step": 2790, "total_flos": 5.432587638826752e+16, "train_loss": 0.7275064119728663, "train_runtime": 5090.5681, "train_samples_per_second": 15.321, "train_steps_per_second": 0.548 } ], "logging_steps": 500, "max_steps": 2790, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 5.432587638826752e+16, "train_batch_size": 28, "trial_name": null, "trial_params": null }