{ "best_metric": 48.71262499235238, "best_model_checkpoint": "/root/turkic_qa/tr_kaz_models/orig_kaz_roberta_base_model/checkpoint-5823", "epoch": 10.0, "eval_steps": 500, "global_step": 6470, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "step": 647, "train_exact_match": 3.5964035964035963, "train_f1": 11.56795258837141, "train_runtime": 11.3677, "train_samples_per_second": 115.503, "train_steps_per_second": 4.135 }, { "epoch": 1.0, "grad_norm": 11.74316120147705, "learning_rate": 5e-06, "loss": 4.6868, "step": 647 }, { "epoch": 1.0, "eval_exact_match": 4.4375, "eval_f1": 11.321307166156533, "eval_runtime": 35.34, "eval_samples_per_second": 113.384, "eval_steps_per_second": 4.075, "step": 647 }, { "epoch": 2.0, "step": 1294, "train_exact_match": 11.688311688311689, "train_f1": 19.152344840271617, "train_runtime": 11.2191, "train_samples_per_second": 112.844, "train_steps_per_second": 4.1 }, { "epoch": 2.0, "grad_norm": 15.642627716064453, "learning_rate": 1e-05, "loss": 3.5751, "step": 1294 }, { "epoch": 2.0, "eval_exact_match": 9.6875, "eval_f1": 17.179159683522684, "eval_runtime": 35.4377, "eval_samples_per_second": 113.072, "eval_steps_per_second": 4.063, "step": 1294 }, { "epoch": 3.0, "step": 1941, "train_exact_match": 27.47252747252747, "train_f1": 39.09864539474671, "train_runtime": 11.4784, "train_samples_per_second": 112.646, "train_steps_per_second": 4.095 }, { "epoch": 3.0, "grad_norm": 15.969060897827148, "learning_rate": 8.750000000000001e-06, "loss": 3.0602, "step": 1941 }, { "epoch": 3.0, "eval_exact_match": 20.625, "eval_f1": 31.5365409883315, "eval_runtime": 35.4806, "eval_samples_per_second": 112.935, "eval_steps_per_second": 4.059, "step": 1941 }, { "epoch": 4.0, "step": 2588, "train_exact_match": 37.46253746253746, "train_f1": 50.75619914926189, "train_runtime": 11.2214, "train_samples_per_second": 114.513, "train_steps_per_second": 4.099 }, { "epoch": 4.0, "grad_norm": 23.334590911865234, "learning_rate": 7.500000000000001e-06, "loss": 2.5025, "step": 2588 }, { "epoch": 4.0, "eval_exact_match": 28.5, "eval_f1": 42.64759962559622, "eval_runtime": 35.4891, "eval_samples_per_second": 112.908, "eval_steps_per_second": 4.058, "step": 2588 }, { "epoch": 5.0, "step": 3235, "train_exact_match": 47.55244755244755, "train_f1": 60.596926593269394, "train_runtime": 11.4376, "train_samples_per_second": 113.922, "train_steps_per_second": 4.109 }, { "epoch": 5.0, "grad_norm": 26.927326202392578, "learning_rate": 6.25e-06, "loss": 2.1403, "step": 3235 }, { "epoch": 5.0, "eval_exact_match": 30.9375, "eval_f1": 45.23519557953087, "eval_runtime": 35.5162, "eval_samples_per_second": 112.822, "eval_steps_per_second": 4.054, "step": 3235 }, { "epoch": 6.0, "step": 3882, "train_exact_match": 53.04695304695305, "train_f1": 65.4389894343176, "train_runtime": 11.2606, "train_samples_per_second": 114.381, "train_steps_per_second": 4.085 }, { "epoch": 6.0, "grad_norm": 19.14756965637207, "learning_rate": 5e-06, "loss": 1.9037, "step": 3882 }, { "epoch": 6.0, "eval_exact_match": 32.46875, "eval_f1": 47.29669704036703, "eval_runtime": 35.4202, "eval_samples_per_second": 113.127, "eval_steps_per_second": 4.065, "step": 3882 }, { "epoch": 7.0, "step": 4529, "train_exact_match": 57.642357642357645, "train_f1": 69.53035836992328, "train_runtime": 11.2232, "train_samples_per_second": 112.802, "train_steps_per_second": 4.099 }, { "epoch": 7.0, "grad_norm": 24.628639221191406, "learning_rate": 3.7500000000000005e-06, "loss": 1.7438, "step": 4529 }, { "epoch": 7.0, "eval_exact_match": 33.15625, "eval_f1": 47.60121866597253, "eval_runtime": 35.4127, "eval_samples_per_second": 113.152, "eval_steps_per_second": 4.066, "step": 4529 }, { "epoch": 8.0, "step": 5176, "train_exact_match": 56.54345654345654, "train_f1": 68.42451603920425, "train_runtime": 11.6061, "train_samples_per_second": 113.044, "train_steps_per_second": 4.05 }, { "epoch": 8.0, "grad_norm": 25.127649307250977, "learning_rate": 2.5e-06, "loss": 1.6205, "step": 5176 }, { "epoch": 8.0, "eval_exact_match": 33.625, "eval_f1": 48.59237042611232, "eval_runtime": 35.4006, "eval_samples_per_second": 113.19, "eval_steps_per_second": 4.068, "step": 5176 }, { "epoch": 9.0, "step": 5823, "train_exact_match": 60.53946053946054, "train_f1": 71.41358726367281, "train_runtime": 11.2378, "train_samples_per_second": 113.545, "train_steps_per_second": 4.093 }, { "epoch": 9.0, "grad_norm": 22.670251846313477, "learning_rate": 1.25e-06, "loss": 1.5369, "step": 5823 }, { "epoch": 9.0, "eval_exact_match": 33.8125, "eval_f1": 48.71262499235238, "eval_runtime": 35.5251, "eval_samples_per_second": 112.793, "eval_steps_per_second": 4.053, "step": 5823 }, { "epoch": 10.0, "step": 6470, "train_exact_match": 63.73626373626374, "train_f1": 74.25641909537842, "train_runtime": 11.4055, "train_samples_per_second": 113.892, "train_steps_per_second": 4.121 }, { "epoch": 10.0, "grad_norm": 26.434606552124023, "learning_rate": 0.0, "loss": 1.4813, "step": 6470 }, { "epoch": 10.0, "eval_exact_match": 33.78125, "eval_f1": 48.574128923315, "eval_runtime": 35.5406, "eval_samples_per_second": 112.744, "eval_steps_per_second": 4.052, "step": 6470 }, { "epoch": 10.0, "step": 6470, "total_flos": 1.773811812584448e+16, "train_loss": 2.425102486234544, "train_runtime": 2362.6071, "train_samples_per_second": 76.619, "train_steps_per_second": 2.739 } ], "logging_steps": 500, "max_steps": 6470, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 1.773811812584448e+16, "train_batch_size": 28, "trial_name": null, "trial_params": null }