{ "best_metric": 73.01711252807637, "best_model_checkpoint": "/root/turkic_qa/en_uzn_models/en_uzn_xlm_roberta_base_squad_model/checkpoint-4140", "epoch": 10.0, "eval_steps": 500, "global_step": 6900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "step": 690, "train_exact_match": 53.84615384615385, "train_f1": 69.79095478605717, "train_runtime": 14.0504, "train_samples_per_second": 89.82, "train_steps_per_second": 3.274 }, { "epoch": 1.0, "grad_norm": 19.47218894958496, "learning_rate": 5e-06, "loss": 1.6813, "step": 690 }, { "epoch": 1.0, "eval_exact_match": 52.0625, "eval_f1": 67.50170309345776, "eval_runtime": 44.891, "eval_samples_per_second": 90.263, "eval_steps_per_second": 3.23, "step": 690 }, { "epoch": 2.0, "step": 1380, "train_exact_match": 63.93606393606394, "train_f1": 76.38693770706172, "train_runtime": 14.2008, "train_samples_per_second": 89.643, "train_steps_per_second": 3.239 }, { "epoch": 2.0, "grad_norm": 45.075042724609375, "learning_rate": 1e-05, "loss": 1.3266, "step": 1380 }, { "epoch": 2.0, "eval_exact_match": 56.15625, "eval_f1": 70.83430989837035, "eval_runtime": 45.0891, "eval_samples_per_second": 89.867, "eval_steps_per_second": 3.216, "step": 1380 }, { "epoch": 3.0, "step": 2070, "train_exact_match": 67.43256743256744, "train_f1": 80.45847460323496, "train_runtime": 14.4622, "train_samples_per_second": 89.544, "train_steps_per_second": 3.25 }, { "epoch": 3.0, "grad_norm": 17.318368911743164, "learning_rate": 8.750000000000001e-06, "loss": 1.1036, "step": 2070 }, { "epoch": 3.0, "eval_exact_match": 57.46875, "eval_f1": 71.70302249821802, "eval_runtime": 44.9476, "eval_samples_per_second": 90.149, "eval_steps_per_second": 3.226, "step": 2070 }, { "epoch": 4.0, "step": 2760, "train_exact_match": 71.72827172827172, "train_f1": 84.90184305868037, "train_runtime": 14.4996, "train_samples_per_second": 89.313, "train_steps_per_second": 3.241 }, { "epoch": 4.0, "grad_norm": 217.5539093017578, "learning_rate": 7.500000000000001e-06, "loss": 0.9255, "step": 2760 }, { "epoch": 4.0, "eval_exact_match": 58.34375, "eval_f1": 72.47808689784853, "eval_runtime": 44.8378, "eval_samples_per_second": 90.37, "eval_steps_per_second": 3.234, "step": 2760 }, { "epoch": 5.0, "step": 3450, "train_exact_match": 76.32367632367632, "train_f1": 88.37491424418958, "train_runtime": 14.4084, "train_samples_per_second": 85.992, "train_steps_per_second": 3.123 }, { "epoch": 5.0, "grad_norm": 394.8098449707031, "learning_rate": 6.25e-06, "loss": 0.7806, "step": 3450 }, { "epoch": 5.0, "eval_exact_match": 58.21875, "eval_f1": 72.48290373070347, "eval_runtime": 45.4975, "eval_samples_per_second": 89.06, "eval_steps_per_second": 3.187, "step": 3450 }, { "epoch": 6.0, "step": 4140, "train_exact_match": 78.72127872127872, "train_f1": 88.97987588000849, "train_runtime": 14.2096, "train_samples_per_second": 88.813, "train_steps_per_second": 3.237 }, { "epoch": 6.0, "grad_norm": 17.177759170532227, "learning_rate": 5e-06, "loss": 0.6664, "step": 4140 }, { "epoch": 6.0, "eval_exact_match": 59.0, "eval_f1": 73.01711252807637, "eval_runtime": 45.351, "eval_samples_per_second": 89.348, "eval_steps_per_second": 3.197, "step": 4140 }, { "epoch": 7.0, "step": 4830, "train_exact_match": 82.51748251748252, "train_f1": 91.72028497776134, "train_runtime": 13.942, "train_samples_per_second": 89.37, "train_steps_per_second": 3.228 }, { "epoch": 7.0, "grad_norm": 354.42584228515625, "learning_rate": 3.7500000000000005e-06, "loss": 0.5911, "step": 4830 }, { "epoch": 7.0, "eval_exact_match": 58.53125, "eval_f1": 72.55506746037935, "eval_runtime": 45.1047, "eval_samples_per_second": 89.835, "eval_steps_per_second": 3.215, "step": 4830 }, { "epoch": 8.0, "step": 5520, "train_exact_match": 81.91808191808192, "train_f1": 91.29001859546244, "train_runtime": 14.3482, "train_samples_per_second": 89.14, "train_steps_per_second": 3.206 }, { "epoch": 8.0, "grad_norm": 184.72279357910156, "learning_rate": 2.5e-06, "loss": 0.5247, "step": 5520 }, { "epoch": 8.0, "eval_exact_match": 58.25, "eval_f1": 72.07621980566323, "eval_runtime": 45.0133, "eval_samples_per_second": 90.018, "eval_steps_per_second": 3.221, "step": 5520 }, { "epoch": 9.0, "step": 6210, "train_exact_match": 83.21678321678321, "train_f1": 92.6376023057958, "train_runtime": 14.5039, "train_samples_per_second": 89.355, "train_steps_per_second": 3.24 }, { "epoch": 9.0, "grad_norm": 3.3138372898101807, "learning_rate": 1.25e-06, "loss": 0.4735, "step": 6210 }, { "epoch": 9.0, "eval_exact_match": 58.25, "eval_f1": 72.13528491061855, "eval_runtime": 45.1402, "eval_samples_per_second": 89.765, "eval_steps_per_second": 3.212, "step": 6210 }, { "epoch": 10.0, "step": 6900, "train_exact_match": 86.81318681318682, "train_f1": 94.26051176344673, "train_runtime": 14.1643, "train_samples_per_second": 89.309, "train_steps_per_second": 3.248 }, { "epoch": 10.0, "grad_norm": 12.08403491973877, "learning_rate": 0.0, "loss": 0.4451, "step": 6900 }, { "epoch": 10.0, "eval_exact_match": 58.25, "eval_f1": 72.1039896331836, "eval_runtime": 45.0152, "eval_samples_per_second": 90.014, "eval_steps_per_second": 3.221, "step": 6900 }, { "epoch": 10.0, "step": 6900, "total_flos": 3.780898745780736e+16, "train_loss": 0.8518442347429801, "train_runtime": 4369.5998, "train_samples_per_second": 44.153, "train_steps_per_second": 1.579 } ], "logging_steps": 500, "max_steps": 6900, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 3.780898745780736e+16, "train_batch_size": 28, "trial_name": null, "trial_params": null }