{ "best_metric": 82.66116908888208, "best_model_checkpoint": "/root/turkic_qa/tr_uzn_models/tr_uzn_xlm_roberta_large_model/checkpoint-3248", "epoch": 5.0, "eval_steps": 500, "global_step": 4060, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "step": 812, "train_exact_match": 61.63836163836164, "train_f1": 79.31791325882773, "train_runtime": 38.4548, "train_samples_per_second": 43.168, "train_steps_per_second": 1.56 }, { "epoch": 1.0, "grad_norm": 30.129222869873047, "learning_rate": 1e-05, "loss": 2.7411, "step": 812 }, { "epoch": 1.0, "eval_exact_match": 59.46875, "eval_f1": 77.35390101597599, "eval_runtime": 118.1334, "eval_samples_per_second": 43.265, "eval_steps_per_second": 1.549, "step": 812 }, { "epoch": 2.0, "step": 1624, "train_exact_match": 75.62437562437563, "train_f1": 87.86513543878338, "train_runtime": 37.8364, "train_samples_per_second": 43.001, "train_steps_per_second": 1.559 }, { "epoch": 2.0, "grad_norm": 113.6387710571289, "learning_rate": 7.500000000000001e-06, "loss": 0.8103, "step": 1624 }, { "epoch": 2.0, "eval_exact_match": 66.59375, "eval_f1": 81.70014057851566, "eval_runtime": 118.1729, "eval_samples_per_second": 43.25, "eval_steps_per_second": 1.549, "step": 1624 }, { "epoch": 3.0, "step": 2436, "train_exact_match": 81.31868131868131, "train_f1": 91.60071635098949, "train_runtime": 36.5906, "train_samples_per_second": 43.153, "train_steps_per_second": 1.558 }, { "epoch": 3.0, "grad_norm": 58.30486297607422, "learning_rate": 5e-06, "loss": 0.5305, "step": 2436 }, { "epoch": 3.0, "eval_exact_match": 66.9375, "eval_f1": 82.51782698770623, "eval_runtime": 118.3438, "eval_samples_per_second": 43.188, "eval_steps_per_second": 1.546, "step": 2436 }, { "epoch": 4.0, "step": 3248, "train_exact_match": 83.61638361638362, "train_f1": 93.04689171723578, "train_runtime": 36.8939, "train_samples_per_second": 43.178, "train_steps_per_second": 1.545 }, { "epoch": 4.0, "grad_norm": 11.345389366149902, "learning_rate": 2.5e-06, "loss": 0.3795, "step": 3248 }, { "epoch": 4.0, "eval_exact_match": 67.8125, "eval_f1": 82.66116908888208, "eval_runtime": 118.298, "eval_samples_per_second": 43.204, "eval_steps_per_second": 1.547, "step": 3248 }, { "epoch": 5.0, "step": 4060, "train_exact_match": 86.91308691308691, "train_f1": 93.86544501027228, "train_runtime": 36.964, "train_samples_per_second": 43.177, "train_steps_per_second": 1.542 }, { "epoch": 5.0, "grad_norm": 37.001564025878906, "learning_rate": 0.0, "loss": 0.2878, "step": 4060 }, { "epoch": 5.0, "eval_exact_match": 67.5625, "eval_f1": 82.48067936529503, "eval_runtime": 118.5494, "eval_samples_per_second": 43.113, "eval_steps_per_second": 1.544, "step": 4060 }, { "epoch": 5.0, "step": 4060, "total_flos": 7.910493982198272e+16, "train_loss": 0.9498688984387027, "train_runtime": 7336.9641, "train_samples_per_second": 15.479, "train_steps_per_second": 0.553 } ], "logging_steps": 500, "max_steps": 4060, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 7.910493982198272e+16, "train_batch_size": 28, "trial_name": null, "trial_params": null }