{ "best_metric": 79.20930220037307, "best_model_checkpoint": "/root/turkic_qa/ru_uzn_models/ru_uzn_xlm_roberta_large_model/checkpoint-1674", "epoch": 5.0, "eval_steps": 500, "global_step": 2790, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "step": 558, "train_exact_match": 58.64135864135864, "train_f1": 78.03999717537812, "train_runtime": 24.1085, "train_samples_per_second": 43.802, "train_steps_per_second": 1.576 }, { "epoch": 1.0, "grad_norm": 71.476318359375, "learning_rate": 1e-05, "loss": 3.094, "step": 558 }, { "epoch": 1.0, "eval_exact_match": 55.96875, "eval_f1": 75.1917557548502, "eval_runtime": 76.2534, "eval_samples_per_second": 44.05, "eval_steps_per_second": 1.574, "step": 558 }, { "epoch": 2.0, "step": 1116, "train_exact_match": 69.43056943056943, "train_f1": 86.54359079483112, "train_runtime": 23.8497, "train_samples_per_second": 43.648, "train_steps_per_second": 1.593 }, { "epoch": 2.0, "grad_norm": 64.74529266357422, "learning_rate": 7.500000000000001e-06, "loss": 1.1571, "step": 1116 }, { "epoch": 2.0, "eval_exact_match": 60.25, "eval_f1": 78.34951496921676, "eval_runtime": 76.3189, "eval_samples_per_second": 44.013, "eval_steps_per_second": 1.572, "step": 1116 }, { "epoch": 3.0, "step": 1674, "train_exact_match": 77.32267732267732, "train_f1": 90.90190658126197, "train_runtime": 24.1109, "train_samples_per_second": 43.632, "train_steps_per_second": 1.576 }, { "epoch": 3.0, "grad_norm": 59.769203186035156, "learning_rate": 5e-06, "loss": 0.8046, "step": 1674 }, { "epoch": 3.0, "eval_exact_match": 60.71875, "eval_f1": 79.20930220037307, "eval_runtime": 76.4543, "eval_samples_per_second": 43.935, "eval_steps_per_second": 1.57, "step": 1674 }, { "epoch": 4.0, "step": 2232, "train_exact_match": 79.12087912087912, "train_f1": 91.91790747073001, "train_runtime": 24.69, "train_samples_per_second": 42.365, "train_steps_per_second": 1.539 }, { "epoch": 4.0, "grad_norm": 10.912280082702637, "learning_rate": 2.5e-06, "loss": 0.612, "step": 2232 }, { "epoch": 4.0, "eval_exact_match": 61.15625, "eval_f1": 79.20198937913115, "eval_runtime": 76.6624, "eval_samples_per_second": 43.816, "eval_steps_per_second": 1.565, "step": 2232 }, { "epoch": 5.0, "step": 2790, "train_exact_match": 82.71728271728271, "train_f1": 92.99953055921905, "train_runtime": 23.9309, "train_samples_per_second": 43.542, "train_steps_per_second": 1.588 }, { "epoch": 5.0, "grad_norm": 39.581809997558594, "learning_rate": 0.0, "loss": 0.4905, "step": 2790 }, { "epoch": 5.0, "eval_exact_match": 61.03125, "eval_f1": 78.98703726982994, "eval_runtime": 76.5756, "eval_samples_per_second": 43.865, "eval_steps_per_second": 1.567, "step": 2790 }, { "epoch": 5.0, "step": 2790, "total_flos": 5.432587638826752e+16, "train_loss": 1.231632082265765, "train_runtime": 5057.1596, "train_samples_per_second": 15.423, "train_steps_per_second": 0.552 } ], "logging_steps": 500, "max_steps": 2790, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 5.432587638826752e+16, "train_batch_size": 28, "trial_name": null, "trial_params": null }