{ "best_metric": 78.98580203731828, "best_model_checkpoint": "/home/fbravo/data/all_results/tar/albeto_xxlarge/epochs_2_bs_16_lr_5e-6/checkpoint-7800", "epoch": 2.0, "global_step": 10970, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "eval_exact_match": 50.73793755912961, "eval_f1": 69.4730775689932, "step": 300 }, { "epoch": 0.09, "learning_rate": 4.774384685505926e-06, "loss": 2.2004, "step": 500 }, { "epoch": 0.11, "eval_exact_match": 54.370860927152314, "eval_f1": 73.05853038044215, "step": 600 }, { "epoch": 0.16, "eval_exact_match": 54.89120151371807, "eval_f1": 73.65027892880356, "step": 900 }, { "epoch": 0.18, "learning_rate": 4.546490428441203e-06, "loss": 1.6573, "step": 1000 }, { "epoch": 0.22, "eval_exact_match": 56.9914853358562, "eval_f1": 75.0513540652836, "step": 1200 }, { "epoch": 0.27, "learning_rate": 4.319051959890611e-06, "loss": 1.5668, "step": 1500 }, { "epoch": 0.27, "eval_exact_match": 57.8240302743614, "eval_f1": 75.63669001183044, "step": 1500 }, { "epoch": 0.33, "eval_exact_match": 58.81740775780511, "eval_f1": 76.36054723510281, "step": 1800 }, { "epoch": 0.36, "learning_rate": 4.091157702825889e-06, "loss": 1.5322, "step": 2000 }, { "epoch": 0.38, "eval_exact_match": 59.14853358561968, "eval_f1": 76.483161808874, "step": 2100 }, { "epoch": 0.44, "eval_exact_match": 59.25260170293283, "eval_f1": 76.67791535670501, "step": 2400 }, { "epoch": 0.46, "learning_rate": 3.863263445761167e-06, "loss": 1.5015, "step": 2500 }, { "epoch": 0.49, "eval_exact_match": 59.290444654683064, "eval_f1": 76.94674808406094, "step": 2700 }, { "epoch": 0.55, "learning_rate": 3.6353691886964453e-06, "loss": 1.4561, "step": 3000 }, { "epoch": 0.55, "eval_exact_match": 60.13245033112583, "eval_f1": 77.30126888604488, "step": 3000 }, { "epoch": 0.6, "eval_exact_match": 59.93377483443709, "eval_f1": 77.31924407449924, "step": 3300 }, { "epoch": 0.64, "learning_rate": 3.407474931631723e-06, "loss": 1.4414, "step": 3500 }, { "epoch": 0.66, "eval_exact_match": 60.44465468306528, "eval_f1": 77.63204103944324, "step": 3600 }, { "epoch": 0.71, "eval_exact_match": 61.31504257332072, "eval_f1": 78.13561765566448, "step": 3900 }, { "epoch": 0.73, "learning_rate": 3.179580674567001e-06, "loss": 1.4206, "step": 4000 }, { "epoch": 0.77, "eval_exact_match": 60.52034058656575, "eval_f1": 77.94067744854881, "step": 4200 }, { "epoch": 0.82, "learning_rate": 2.9516864175022793e-06, "loss": 1.3835, "step": 4500 }, { "epoch": 0.82, "eval_exact_match": 60.98391674550615, "eval_f1": 78.30309425310817, "step": 4500 }, { "epoch": 0.88, "eval_exact_match": 61.21097445600757, "eval_f1": 78.3458557935229, "step": 4800 }, { "epoch": 0.91, "learning_rate": 2.7237921604375573e-06, "loss": 1.3656, "step": 5000 }, { "epoch": 0.93, "eval_exact_match": 61.30558183538316, "eval_f1": 78.35466990254258, "step": 5100 }, { "epoch": 0.98, "eval_exact_match": 61.485335856196784, "eval_f1": 78.61331349695031, "step": 5400 }, { "epoch": 1.0, "learning_rate": 2.4958979033728353e-06, "loss": 1.3461, "step": 5500 }, { "epoch": 1.04, "eval_exact_match": 61.258278145695364, "eval_f1": 78.44203108653505, "step": 5700 }, { "epoch": 1.09, "learning_rate": 2.268459434822243e-06, "loss": 1.0985, "step": 6000 }, { "epoch": 1.09, "eval_exact_match": 61.22989593188269, "eval_f1": 78.6338704184745, "step": 6000 }, { "epoch": 1.15, "eval_exact_match": 61.21097445600757, "eval_f1": 78.69664962146094, "step": 6300 }, { "epoch": 1.19, "learning_rate": 2.0405651777575204e-06, "loss": 1.0548, "step": 6500 }, { "epoch": 1.2, "eval_exact_match": 61.343424787133394, "eval_f1": 78.77498098426705, "step": 6600 }, { "epoch": 1.26, "eval_exact_match": 61.65562913907285, "eval_f1": 78.67261809065106, "step": 6900 }, { "epoch": 1.28, "learning_rate": 1.8126709206927986e-06, "loss": 1.0615, "step": 7000 }, { "epoch": 1.31, "eval_exact_match": 61.53263954588458, "eval_f1": 78.5005776920404, "step": 7200 }, { "epoch": 1.37, "learning_rate": 1.5847766636280768e-06, "loss": 1.0828, "step": 7500 }, { "epoch": 1.37, "eval_exact_match": 61.551561021759696, "eval_f1": 78.55843313017921, "step": 7500 }, { "epoch": 1.42, "eval_exact_match": 61.84484389782403, "eval_f1": 78.98580203731828, "step": 7800 }, { "epoch": 1.46, "learning_rate": 1.3573381950774841e-06, "loss": 1.0492, "step": 8000 }, { "epoch": 1.48, "eval_exact_match": 61.73131504257332, "eval_f1": 78.56521589229312, "step": 8100 }, { "epoch": 1.53, "eval_exact_match": 61.81646168401135, "eval_f1": 78.62740589986592, "step": 8400 }, { "epoch": 1.55, "learning_rate": 1.1294439380127621e-06, "loss": 1.0551, "step": 8500 }, { "epoch": 1.59, "eval_exact_match": 61.57994323557237, "eval_f1": 78.64237467540597, "step": 8700 }, { "epoch": 1.64, "learning_rate": 9.015496809480402e-07, "loss": 1.0353, "step": 9000 }, { "epoch": 1.64, "eval_exact_match": 61.30558183538316, "eval_f1": 78.50591438549111, "step": 9000 }, { "epoch": 1.7, "eval_exact_match": 61.63670766319773, "eval_f1": 78.6172319874888, "step": 9300 }, { "epoch": 1.73, "learning_rate": 6.736554238833181e-07, "loss": 1.0373, "step": 9500 }, { "epoch": 1.75, "eval_exact_match": 61.66508987701041, "eval_f1": 78.81109427991775, "step": 9600 }, { "epoch": 1.8, "eval_exact_match": 61.59886471144749, "eval_f1": 78.57587678017954, "step": 9900 }, { "epoch": 1.82, "learning_rate": 4.457611668185962e-07, "loss": 1.0469, "step": 10000 }, { "epoch": 1.86, "eval_exact_match": 61.81646168401135, "eval_f1": 78.77340428037925, "step": 10200 }, { "epoch": 1.91, "learning_rate": 2.1832269826800366e-07, "loss": 1.0179, "step": 10500 }, { "epoch": 1.91, "eval_exact_match": 61.84484389782403, "eval_f1": 78.74115858178497, "step": 10500 }, { "epoch": 1.97, "eval_exact_match": 61.74077578051088, "eval_f1": 78.74584305622463, "step": 10800 }, { "epoch": 2.0, "step": 10970, "total_flos": 6.330675040878592e+16, "train_loss": 1.2922884611618335, "train_runtime": 6069.4707, "train_samples_per_second": 28.914, "train_steps_per_second": 1.807 } ], "max_steps": 10970, "num_train_epochs": 2, "total_flos": 6.330675040878592e+16, "trial_name": null, "trial_params": null }