{ "best_metric": 60.62761059424247, "best_model_checkpoint": "/data/jcanete/all_results/mlqa/distillbeto/epochs_3_bs_16_lr_5e-5/checkpoint-12300", "epoch": 3.0, "global_step": 15381, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "eval_exact_match": 17.4, "eval_f1": 30.32932709945737, "step": 300 }, { "epoch": 0.1, "learning_rate": 4.838437032702685e-05, "loss": 3.719, "step": 500 }, { "epoch": 0.12, "eval_exact_match": 20.6, "eval_f1": 37.076640043048776, "step": 600 }, { "epoch": 0.18, "eval_exact_match": 24.8, "eval_f1": 41.84428151388094, "step": 900 }, { "epoch": 0.2, "learning_rate": 4.6758988362265134e-05, "loss": 2.9745, "step": 1000 }, { "epoch": 0.23, "eval_exact_match": 24.4, "eval_f1": 42.64360101358078, "step": 1200 }, { "epoch": 0.29, "learning_rate": 4.5133606397503415e-05, "loss": 2.7579, "step": 1500 }, { "epoch": 0.29, "eval_exact_match": 26.8, "eval_f1": 48.268680757504505, "step": 1500 }, { "epoch": 0.35, "eval_exact_match": 28.2, "eval_f1": 49.756554071203084, "step": 1800 }, { "epoch": 0.39, "learning_rate": 4.3508224432741696e-05, "loss": 2.5993, "step": 2000 }, { "epoch": 0.41, "eval_exact_match": 29.0, "eval_f1": 48.999017405606445, "step": 2100 }, { "epoch": 0.47, "eval_exact_match": 28.2, "eval_f1": 50.21409850637284, "step": 2400 }, { "epoch": 0.49, "learning_rate": 4.188284246797998e-05, "loss": 2.5658, "step": 2500 }, { "epoch": 0.53, "eval_exact_match": 30.8, "eval_f1": 51.69176784487257, "step": 2700 }, { "epoch": 0.59, "learning_rate": 4.0260711267147784e-05, "loss": 2.4763, "step": 3000 }, { "epoch": 0.59, "eval_exact_match": 32.6, "eval_f1": 53.82386360217891, "step": 3000 }, { "epoch": 0.64, "eval_exact_match": 31.6, "eval_f1": 53.311489367004775, "step": 3300 }, { "epoch": 0.68, "learning_rate": 3.8635329302386065e-05, "loss": 2.3893, "step": 3500 }, { "epoch": 0.7, "eval_exact_match": 32.0, "eval_f1": 53.23265973484611, "step": 3600 }, { "epoch": 0.76, "eval_exact_match": 31.8, "eval_f1": 54.46584036020615, "step": 3900 }, { "epoch": 0.78, "learning_rate": 3.7009947337624346e-05, "loss": 2.3713, "step": 4000 }, { "epoch": 0.82, "eval_exact_match": 30.4, "eval_f1": 53.87761850361369, "step": 4200 }, { "epoch": 0.88, "learning_rate": 3.5387816136792146e-05, "loss": 2.3229, "step": 4500 }, { "epoch": 0.88, "eval_exact_match": 33.8, "eval_f1": 56.159600258090194, "step": 4500 }, { "epoch": 0.94, "eval_exact_match": 34.8, "eval_f1": 57.45088180281838, "step": 4800 }, { "epoch": 0.98, "learning_rate": 3.376243417203043e-05, "loss": 2.2892, "step": 5000 }, { "epoch": 0.99, "eval_exact_match": 33.4, "eval_f1": 57.09233458055284, "step": 5100 }, { "epoch": 1.05, "eval_exact_match": 33.6, "eval_f1": 57.65540686906378, "step": 5400 }, { "epoch": 1.07, "learning_rate": 3.213705220726871e-05, "loss": 2.0818, "step": 5500 }, { "epoch": 1.11, "eval_exact_match": 34.0, "eval_f1": 56.63309020916699, "step": 5700 }, { "epoch": 1.17, "learning_rate": 3.051167024250699e-05, "loss": 1.9928, "step": 6000 }, { "epoch": 1.17, "eval_exact_match": 34.8, "eval_f1": 57.55733406275571, "step": 6000 }, { "epoch": 1.23, "eval_exact_match": 35.4, "eval_f1": 56.60799657495974, "step": 6300 }, { "epoch": 1.27, "learning_rate": 2.888628827774527e-05, "loss": 1.9304, "step": 6500 }, { "epoch": 1.29, "eval_exact_match": 33.8, "eval_f1": 56.337531192657984, "step": 6600 }, { "epoch": 1.35, "eval_exact_match": 35.8, "eval_f1": 58.39477883856857, "step": 6900 }, { "epoch": 1.37, "learning_rate": 2.7260906312983555e-05, "loss": 1.984, "step": 7000 }, { "epoch": 1.4, "eval_exact_match": 35.2, "eval_f1": 57.36112838371099, "step": 7200 }, { "epoch": 1.46, "learning_rate": 2.5635524348221836e-05, "loss": 1.9726, "step": 7500 }, { "epoch": 1.46, "eval_exact_match": 35.6, "eval_f1": 57.98892993007854, "step": 7500 }, { "epoch": 1.52, "eval_exact_match": 36.8, "eval_f1": 57.73167903988771, "step": 7800 }, { "epoch": 1.56, "learning_rate": 2.4010142383460113e-05, "loss": 1.9706, "step": 8000 }, { "epoch": 1.58, "eval_exact_match": 34.2, "eval_f1": 57.1473434081718, "step": 8100 }, { "epoch": 1.64, "eval_exact_match": 36.6, "eval_f1": 58.059032558136906, "step": 8400 }, { "epoch": 1.66, "learning_rate": 2.2384760418698394e-05, "loss": 1.9159, "step": 8500 }, { "epoch": 1.7, "eval_exact_match": 36.8, "eval_f1": 57.88380136273412, "step": 8700 }, { "epoch": 1.76, "learning_rate": 2.07626292178662e-05, "loss": 1.9019, "step": 9000 }, { "epoch": 1.76, "eval_exact_match": 36.6, "eval_f1": 58.80808564736181, "step": 9000 }, { "epoch": 1.81, "eval_exact_match": 36.2, "eval_f1": 58.473738166006726, "step": 9300 }, { "epoch": 1.85, "learning_rate": 1.9137247253104482e-05, "loss": 1.9478, "step": 9500 }, { "epoch": 1.87, "eval_exact_match": 35.6, "eval_f1": 58.20909169330069, "step": 9600 }, { "epoch": 1.93, "eval_exact_match": 35.0, "eval_f1": 58.29322380617596, "step": 9900 }, { "epoch": 1.95, "learning_rate": 1.751186528834276e-05, "loss": 1.8909, "step": 10000 }, { "epoch": 1.99, "eval_exact_match": 35.8, "eval_f1": 59.20379968263694, "step": 10200 }, { "epoch": 2.05, "learning_rate": 1.5889734087510566e-05, "loss": 1.7441, "step": 10500 }, { "epoch": 2.05, "eval_exact_match": 36.6, "eval_f1": 59.70561445422752, "step": 10500 }, { "epoch": 2.11, "eval_exact_match": 36.0, "eval_f1": 59.06336874221, "step": 10800 }, { "epoch": 2.15, "learning_rate": 1.4264352122748847e-05, "loss": 1.6342, "step": 11000 }, { "epoch": 2.17, "eval_exact_match": 34.8, "eval_f1": 58.16179505760981, "step": 11100 }, { "epoch": 2.22, "eval_exact_match": 35.0, "eval_f1": 58.02777082755699, "step": 11400 }, { "epoch": 2.24, "learning_rate": 1.2638970157987128e-05, "loss": 1.6129, "step": 11500 }, { "epoch": 2.28, "eval_exact_match": 34.6, "eval_f1": 58.29141560339857, "step": 11700 }, { "epoch": 2.34, "learning_rate": 1.101358819322541e-05, "loss": 1.5814, "step": 12000 }, { "epoch": 2.34, "eval_exact_match": 36.6, "eval_f1": 58.77641537872069, "step": 12000 }, { "epoch": 2.4, "eval_exact_match": 37.2, "eval_f1": 60.62761059424247, "step": 12300 }, { "epoch": 2.44, "learning_rate": 9.388206228463689e-06, "loss": 1.6041, "step": 12500 }, { "epoch": 2.46, "eval_exact_match": 36.0, "eval_f1": 59.05191316875973, "step": 12600 }, { "epoch": 2.52, "eval_exact_match": 34.0, "eval_f1": 58.3915453062455, "step": 12900 }, { "epoch": 2.54, "learning_rate": 7.76282426370197e-06, "loss": 1.6054, "step": 13000 }, { "epoch": 2.57, "eval_exact_match": 35.8, "eval_f1": 59.222805863165625, "step": 13200 }, { "epoch": 2.63, "learning_rate": 6.137442298940251e-06, "loss": 1.5882, "step": 13500 }, { "epoch": 2.63, "eval_exact_match": 34.6, "eval_f1": 58.24542684372941, "step": 13500 }, { "epoch": 2.69, "eval_exact_match": 35.4, "eval_f1": 58.46770769703503, "step": 13800 }, { "epoch": 2.73, "learning_rate": 4.512060334178532e-06, "loss": 1.5841, "step": 14000 }, { "epoch": 2.75, "eval_exact_match": 35.8, "eval_f1": 58.797998172208, "step": 14100 }, { "epoch": 2.81, "eval_exact_match": 35.4, "eval_f1": 59.277714258266194, "step": 14400 }, { "epoch": 2.83, "learning_rate": 2.8899291333463367e-06, "loss": 1.5605, "step": 14500 }, { "epoch": 2.87, "eval_exact_match": 35.8, "eval_f1": 59.45930485720972, "step": 14700 }, { "epoch": 2.93, "learning_rate": 1.2645471685846174e-06, "loss": 1.5615, "step": 15000 }, { "epoch": 2.93, "eval_exact_match": 35.2, "eval_f1": 58.96715598234879, "step": 15000 }, { "epoch": 2.98, "eval_exact_match": 35.0, "eval_f1": 58.67411209812842, "step": 15300 }, { "epoch": 3.0, "step": 15381, "total_flos": 2.2092851276461056e+16, "train_loss": 2.057734964200962, "train_runtime": 14184.383, "train_samples_per_second": 17.348, "train_steps_per_second": 1.084 } ], "max_steps": 15381, "num_train_epochs": 3, "total_flos": 2.2092851276461056e+16, "trial_name": null, "trial_params": null }