{ "best_metric": 44.04121981988171, "best_model_checkpoint": "/root/turkic_qa/ru_uzn_models/orig_uzn_roberta_base_model/checkpoint-5430", "epoch": 10.0, "eval_steps": 500, "global_step": 5430, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "step": 543, "train_exact_match": 6.993006993006993, "train_f1": 14.72956145700345, "train_runtime": 8.9919, "train_samples_per_second": 114.102, "train_steps_per_second": 4.115 }, { "epoch": 1.0, "grad_norm": 22.987104415893555, "learning_rate": 5e-06, "loss": 4.7716, "step": 543 }, { "epoch": 1.0, "eval_exact_match": 6.65625, "eval_f1": 13.460548189578436, "eval_runtime": 28.875, "eval_samples_per_second": 112.935, "eval_steps_per_second": 4.052, "step": 543 }, { "epoch": 2.0, "step": 1086, "train_exact_match": 12.987012987012987, "train_f1": 21.035135926338754, "train_runtime": 9.0115, "train_samples_per_second": 112.523, "train_steps_per_second": 4.106 }, { "epoch": 2.0, "grad_norm": 27.96239471435547, "learning_rate": 1e-05, "loss": 3.7695, "step": 1086 }, { "epoch": 2.0, "eval_exact_match": 10.71875, "eval_f1": 17.87426936207211, "eval_runtime": 28.7945, "eval_samples_per_second": 113.251, "eval_steps_per_second": 4.063, "step": 1086 }, { "epoch": 3.0, "step": 1629, "train_exact_match": 25.174825174825173, "train_f1": 34.8258899586999, "train_runtime": 9.0183, "train_samples_per_second": 113.214, "train_steps_per_second": 4.103 }, { "epoch": 3.0, "grad_norm": 38.839942932128906, "learning_rate": 8.750000000000001e-06, "loss": 3.4391, "step": 1629 }, { "epoch": 3.0, "eval_exact_match": 15.84375, "eval_f1": 25.41741224618572, "eval_runtime": 29.0024, "eval_samples_per_second": 112.439, "eval_steps_per_second": 4.034, "step": 1629 }, { "epoch": 4.0, "step": 2172, "train_exact_match": 29.97002997002997, "train_f1": 41.494806937045226, "train_runtime": 9.0706, "train_samples_per_second": 112.671, "train_steps_per_second": 4.079 }, { "epoch": 4.0, "grad_norm": 35.55900955200195, "learning_rate": 7.500000000000001e-06, "loss": 2.9975, "step": 2172 }, { "epoch": 4.0, "eval_exact_match": 20.4375, "eval_f1": 31.800313776639523, "eval_runtime": 28.9344, "eval_samples_per_second": 112.703, "eval_steps_per_second": 4.044, "step": 2172 }, { "epoch": 5.0, "step": 2715, "train_exact_match": 38.86113886113886, "train_f1": 51.45493720149751, "train_runtime": 9.0591, "train_samples_per_second": 112.483, "train_steps_per_second": 4.084 }, { "epoch": 5.0, "grad_norm": 60.02823257446289, "learning_rate": 6.25e-06, "loss": 2.6357, "step": 2715 }, { "epoch": 5.0, "eval_exact_match": 23.65625, "eval_f1": 36.81284091086545, "eval_runtime": 28.8941, "eval_samples_per_second": 112.861, "eval_steps_per_second": 4.049, "step": 2715 }, { "epoch": 6.0, "step": 3258, "train_exact_match": 47.752247752247754, "train_f1": 60.68401873806664, "train_runtime": 8.9792, "train_samples_per_second": 113.373, "train_steps_per_second": 4.121 }, { "epoch": 6.0, "grad_norm": 37.850128173828125, "learning_rate": 5e-06, "loss": 2.3155, "step": 3258 }, { "epoch": 6.0, "eval_exact_match": 26.125, "eval_f1": 40.40349142174044, "eval_runtime": 29.0794, "eval_samples_per_second": 112.141, "eval_steps_per_second": 4.023, "step": 3258 }, { "epoch": 7.0, "step": 3801, "train_exact_match": 50.44955044955045, "train_f1": 64.76949286642217, "train_runtime": 8.8774, "train_samples_per_second": 113.547, "train_steps_per_second": 4.055 }, { "epoch": 7.0, "grad_norm": 51.20785140991211, "learning_rate": 3.7500000000000005e-06, "loss": 2.0695, "step": 3801 }, { "epoch": 7.0, "eval_exact_match": 27.8125, "eval_f1": 42.61264490274304, "eval_runtime": 29.0203, "eval_samples_per_second": 112.37, "eval_steps_per_second": 4.032, "step": 3801 }, { "epoch": 8.0, "step": 4344, "train_exact_match": 51.34865134865135, "train_f1": 65.11930431582645, "train_runtime": 8.9923, "train_samples_per_second": 113.208, "train_steps_per_second": 4.115 }, { "epoch": 8.0, "grad_norm": 56.128509521484375, "learning_rate": 2.5e-06, "loss": 1.8975, "step": 4344 }, { "epoch": 8.0, "eval_exact_match": 29.125, "eval_f1": 43.50290837664036, "eval_runtime": 28.9786, "eval_samples_per_second": 112.531, "eval_steps_per_second": 4.037, "step": 4344 }, { "epoch": 9.0, "step": 4887, "train_exact_match": 57.24275724275724, "train_f1": 69.40786106942967, "train_runtime": 9.0219, "train_samples_per_second": 112.615, "train_steps_per_second": 4.101 }, { "epoch": 9.0, "grad_norm": 60.7649040222168, "learning_rate": 1.25e-06, "loss": 1.7833, "step": 4887 }, { "epoch": 9.0, "eval_exact_match": 29.1875, "eval_f1": 43.93033202135494, "eval_runtime": 29.1076, "eval_samples_per_second": 112.033, "eval_steps_per_second": 4.02, "step": 4887 }, { "epoch": 10.0, "step": 5430, "train_exact_match": 58.04195804195804, "train_f1": 70.9609187667352, "train_runtime": 9.0761, "train_samples_per_second": 112.383, "train_steps_per_second": 4.077 }, { "epoch": 10.0, "grad_norm": 42.582576751708984, "learning_rate": 0.0, "loss": 1.7125, "step": 5430 }, { "epoch": 10.0, "eval_exact_match": 29.1875, "eval_f1": 44.04121981988171, "eval_runtime": 28.9802, "eval_samples_per_second": 112.525, "eval_steps_per_second": 4.037, "step": 5430 }, { "epoch": 10.0, "step": 5430, "total_flos": 1.487387554039296e+16, "train_loss": 2.7391692272207355, "train_runtime": 1969.257, "train_samples_per_second": 77.08, "train_steps_per_second": 2.757 } ], "logging_steps": 500, "max_steps": 5430, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 1.487387554039296e+16, "train_batch_size": 28, "trial_name": null, "trial_params": null }