{ "best_metric": 70.29279085912718, "best_model_checkpoint": "/data/jcanete/all_results/mlqa/albeto_base/epochs_4_bs_64_lr_5e-5/checkpoint-4200", "epoch": 3.9996101364522416, "global_step": 5128, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.23, "eval_exact_match": 36.4, "eval_f1": 61.84101912660733, "step": 300 }, { "epoch": 0.39, "learning_rate": 4.517355694227769e-05, "loss": 2.3283, "step": 500 }, { "epoch": 0.47, "eval_exact_match": 38.6, "eval_f1": 64.05444583625557, "step": 600 }, { "epoch": 0.7, "eval_exact_match": 39.8, "eval_f1": 64.38318427781859, "step": 900 }, { "epoch": 0.78, "learning_rate": 4.030811232449298e-05, "loss": 1.8958, "step": 1000 }, { "epoch": 0.94, "eval_exact_match": 42.6, "eval_f1": 67.53463785062704, "step": 1200 }, { "epoch": 1.17, "learning_rate": 3.5432917316692674e-05, "loss": 1.7304, "step": 1500 }, { "epoch": 1.17, "eval_exact_match": 41.6, "eval_f1": 67.26857117036066, "step": 1500 }, { "epoch": 1.4, "eval_exact_match": 40.8, "eval_f1": 68.19818328108937, "step": 1800 }, { "epoch": 1.56, "learning_rate": 3.055772230889236e-05, "loss": 1.606, "step": 2000 }, { "epoch": 1.64, "eval_exact_match": 42.6, "eval_f1": 68.13227861379248, "step": 2100 }, { "epoch": 1.87, "eval_exact_match": 45.2, "eval_f1": 69.37715961143962, "step": 2400 }, { "epoch": 1.95, "learning_rate": 2.5682527301092048e-05, "loss": 1.5968, "step": 2500 }, { "epoch": 2.11, "eval_exact_match": 42.4, "eval_f1": 68.23125120786263, "step": 2700 }, { "epoch": 2.34, "learning_rate": 2.080733229329173e-05, "loss": 1.4139, "step": 3000 }, { "epoch": 2.34, "eval_exact_match": 43.2, "eval_f1": 69.28256897389873, "step": 3000 }, { "epoch": 2.57, "eval_exact_match": 43.8, "eval_f1": 69.8210897722378, "step": 3300 }, { "epoch": 2.73, "learning_rate": 1.593213728549142e-05, "loss": 1.3802, "step": 3500 }, { "epoch": 2.81, "eval_exact_match": 44.0, "eval_f1": 69.9704820053898, "step": 3600 }, { "epoch": 3.04, "eval_exact_match": 43.6, "eval_f1": 70.21665964298872, "step": 3900 }, { "epoch": 3.12, "learning_rate": 1.1056942277691108e-05, "loss": 1.3413, "step": 4000 }, { "epoch": 3.28, "eval_exact_match": 44.0, "eval_f1": 70.29279085912718, "step": 4200 }, { "epoch": 3.51, "learning_rate": 6.1817472698907965e-06, "loss": 1.2214, "step": 4500 }, { "epoch": 3.51, "eval_exact_match": 43.2, "eval_f1": 69.84257510948594, "step": 4500 }, { "epoch": 3.74, "eval_exact_match": 44.2, "eval_f1": 69.60819358399327, "step": 4800 }, { "epoch": 3.9, "learning_rate": 1.3065522620904836e-06, "loss": 1.2215, "step": 5000 }, { "epoch": 3.98, "eval_exact_match": 43.4, "eval_f1": 69.19469528834327, "step": 5100 }, { "epoch": 4.0, "step": 5128, "total_flos": 5575305579646752.0, "train_loss": 1.5647673971381462, "train_runtime": 3554.3046, "train_samples_per_second": 92.34, "train_steps_per_second": 1.443 } ], "max_steps": 5128, "num_train_epochs": 4, "total_flos": 5575305579646752.0, "trial_name": null, "trial_params": null }