{ "best_metric": 78.92909907885071, "best_model_checkpoint": "/root/turkic_qa/tr_uzn_models/tr_uzn_xlm_roberta_base_model/checkpoint-7308", "epoch": 10.0, "eval_steps": 500, "global_step": 8120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "step": 812, "train_exact_match": 28.97102897102897, "train_f1": 47.43836868671638, "train_runtime": 18.4368, "train_samples_per_second": 90.037, "train_steps_per_second": 3.254 }, { "epoch": 1.0, "grad_norm": 62.82646942138672, "learning_rate": 5e-06, "loss": 4.1982, "step": 812 }, { "epoch": 1.0, "eval_exact_match": 27.4375, "eval_f1": 46.68812257417232, "eval_runtime": 56.5084, "eval_samples_per_second": 90.447, "eval_steps_per_second": 3.238, "step": 812 }, { "epoch": 2.0, "step": 1624, "train_exact_match": 59.54045954045954, "train_f1": 75.73675798511488, "train_runtime": 18.1425, "train_samples_per_second": 89.679, "train_steps_per_second": 3.252 }, { "epoch": 2.0, "grad_norm": 65.56416320800781, "learning_rate": 1e-05, "loss": 1.581, "step": 1624 }, { "epoch": 2.0, "eval_exact_match": 52.5625, "eval_f1": 71.03718374348476, "eval_runtime": 56.6314, "eval_samples_per_second": 90.25, "eval_steps_per_second": 3.231, "step": 1624 }, { "epoch": 3.0, "step": 2436, "train_exact_match": 66.03396603396604, "train_f1": 80.90860196742766, "train_runtime": 17.5378, "train_samples_per_second": 90.034, "train_steps_per_second": 3.25 }, { "epoch": 3.0, "grad_norm": 56.48591613769531, "learning_rate": 8.750000000000001e-06, "loss": 1.0352, "step": 2436 }, { "epoch": 3.0, "eval_exact_match": 59.1875, "eval_f1": 75.75121931796592, "eval_runtime": 56.5131, "eval_samples_per_second": 90.439, "eval_steps_per_second": 3.238, "step": 2436 }, { "epoch": 4.0, "step": 3248, "train_exact_match": 70.92907092907093, "train_f1": 85.47419759277868, "train_runtime": 18.0502, "train_samples_per_second": 88.254, "train_steps_per_second": 3.158 }, { "epoch": 4.0, "grad_norm": 23.379392623901367, "learning_rate": 7.500000000000001e-06, "loss": 0.8114, "step": 3248 }, { "epoch": 4.0, "eval_exact_match": 61.375, "eval_f1": 77.2668497754134, "eval_runtime": 57.6273, "eval_samples_per_second": 88.691, "eval_steps_per_second": 3.176, "step": 3248 }, { "epoch": 5.0, "step": 4060, "train_exact_match": 76.42357642357642, "train_f1": 87.93119243163716, "train_runtime": 18.0592, "train_samples_per_second": 88.376, "train_steps_per_second": 3.156 }, { "epoch": 5.0, "grad_norm": 36.056884765625, "learning_rate": 6.25e-06, "loss": 0.6739, "step": 4060 }, { "epoch": 5.0, "eval_exact_match": 62.0, "eval_f1": 77.40449528954417, "eval_runtime": 58.1956, "eval_samples_per_second": 87.824, "eval_steps_per_second": 3.145, "step": 4060 }, { "epoch": 6.0, "step": 4872, "train_exact_match": 76.72327672327673, "train_f1": 89.14222582781746, "train_runtime": 18.7035, "train_samples_per_second": 87.203, "train_steps_per_second": 3.154 }, { "epoch": 6.0, "grad_norm": 14.642334938049316, "learning_rate": 5e-06, "loss": 0.567, "step": 4872 }, { "epoch": 6.0, "eval_exact_match": 62.71875, "eval_f1": 78.11173049836967, "eval_runtime": 57.9474, "eval_samples_per_second": 88.201, "eval_steps_per_second": 3.158, "step": 4872 }, { "epoch": 7.0, "step": 5684, "train_exact_match": 78.52147852147853, "train_f1": 89.51285838266682, "train_runtime": 18.2594, "train_samples_per_second": 87.297, "train_steps_per_second": 3.122 }, { "epoch": 7.0, "grad_norm": 17.107421875, "learning_rate": 3.7500000000000005e-06, "loss": 0.4875, "step": 5684 }, { "epoch": 7.0, "eval_exact_match": 62.875, "eval_f1": 78.08839073173891, "eval_runtime": 58.6231, "eval_samples_per_second": 87.184, "eval_steps_per_second": 3.122, "step": 5684 }, { "epoch": 8.0, "step": 6496, "train_exact_match": 82.31768231768231, "train_f1": 91.06806991799819, "train_runtime": 17.4317, "train_samples_per_second": 88.23, "train_steps_per_second": 3.155 }, { "epoch": 8.0, "grad_norm": 37.56465148925781, "learning_rate": 2.5e-06, "loss": 0.4315, "step": 6496 }, { "epoch": 8.0, "eval_exact_match": 63.4375, "eval_f1": 78.70384115394582, "eval_runtime": 58.0199, "eval_samples_per_second": 88.09, "eval_steps_per_second": 3.154, "step": 6496 }, { "epoch": 9.0, "step": 7308, "train_exact_match": 83.31668331668331, "train_f1": 92.69987725307878, "train_runtime": 18.674, "train_samples_per_second": 87.394, "train_steps_per_second": 3.159 }, { "epoch": 9.0, "grad_norm": 52.06901168823242, "learning_rate": 1.25e-06, "loss": 0.3884, "step": 7308 }, { "epoch": 9.0, "eval_exact_match": 63.78125, "eval_f1": 78.92909907885071, "eval_runtime": 58.0352, "eval_samples_per_second": 88.067, "eval_steps_per_second": 3.153, "step": 7308 }, { "epoch": 10.0, "step": 8120, "train_exact_match": 82.61738261738262, "train_f1": 91.90869875798059, "train_runtime": 17.1824, "train_samples_per_second": 87.764, "train_steps_per_second": 3.143 }, { "epoch": 10.0, "grad_norm": 24.732524871826172, "learning_rate": 0.0, "loss": 0.3607, "step": 8120 }, { "epoch": 10.0, "eval_exact_match": 63.53125, "eval_f1": 78.82926814591693, "eval_runtime": 58.3589, "eval_samples_per_second": 87.579, "eval_steps_per_second": 3.136, "step": 8120 }, { "epoch": 10.0, "step": 8120, "total_flos": 4.451320899376128e+16, "train_loss": 1.0534881366297528, "train_runtime": 5226.7321, "train_samples_per_second": 43.457, "train_steps_per_second": 1.554 } ], "logging_steps": 500, "max_steps": 8120, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 4.451320899376128e+16, "train_batch_size": 28, "trial_name": null, "trial_params": null }