|
{ |
|
"best_metric": 78.98580203731828, |
|
"best_model_checkpoint": "/home/fbravo/data/all_results/tar/albeto_xxlarge/epochs_2_bs_16_lr_5e-6/checkpoint-7800", |
|
"epoch": 2.0, |
|
"global_step": 10970, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"eval_exact_match": 50.73793755912961, |
|
"eval_f1": 69.4730775689932, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.774384685505926e-06, |
|
"loss": 2.2004, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_exact_match": 54.370860927152314, |
|
"eval_f1": 73.05853038044215, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_exact_match": 54.89120151371807, |
|
"eval_f1": 73.65027892880356, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.546490428441203e-06, |
|
"loss": 1.6573, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_exact_match": 56.9914853358562, |
|
"eval_f1": 75.0513540652836, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.319051959890611e-06, |
|
"loss": 1.5668, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_exact_match": 57.8240302743614, |
|
"eval_f1": 75.63669001183044, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_exact_match": 58.81740775780511, |
|
"eval_f1": 76.36054723510281, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.091157702825889e-06, |
|
"loss": 1.5322, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_exact_match": 59.14853358561968, |
|
"eval_f1": 76.483161808874, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_exact_match": 59.25260170293283, |
|
"eval_f1": 76.67791535670501, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.863263445761167e-06, |
|
"loss": 1.5015, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_exact_match": 59.290444654683064, |
|
"eval_f1": 76.94674808406094, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 3.6353691886964453e-06, |
|
"loss": 1.4561, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_exact_match": 60.13245033112583, |
|
"eval_f1": 77.30126888604488, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_exact_match": 59.93377483443709, |
|
"eval_f1": 77.31924407449924, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.407474931631723e-06, |
|
"loss": 1.4414, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_exact_match": 60.44465468306528, |
|
"eval_f1": 77.63204103944324, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_exact_match": 61.31504257332072, |
|
"eval_f1": 78.13561765566448, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.179580674567001e-06, |
|
"loss": 1.4206, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_exact_match": 60.52034058656575, |
|
"eval_f1": 77.94067744854881, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 2.9516864175022793e-06, |
|
"loss": 1.3835, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_exact_match": 60.98391674550615, |
|
"eval_f1": 78.30309425310817, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_exact_match": 61.21097445600757, |
|
"eval_f1": 78.3458557935229, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.7237921604375573e-06, |
|
"loss": 1.3656, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_exact_match": 61.30558183538316, |
|
"eval_f1": 78.35466990254258, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_exact_match": 61.485335856196784, |
|
"eval_f1": 78.61331349695031, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.4958979033728353e-06, |
|
"loss": 1.3461, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_exact_match": 61.258278145695364, |
|
"eval_f1": 78.44203108653505, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.268459434822243e-06, |
|
"loss": 1.0985, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_exact_match": 61.22989593188269, |
|
"eval_f1": 78.6338704184745, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_exact_match": 61.21097445600757, |
|
"eval_f1": 78.69664962146094, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.0405651777575204e-06, |
|
"loss": 1.0548, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_exact_match": 61.343424787133394, |
|
"eval_f1": 78.77498098426705, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_exact_match": 61.65562913907285, |
|
"eval_f1": 78.67261809065106, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.8126709206927986e-06, |
|
"loss": 1.0615, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_exact_match": 61.53263954588458, |
|
"eval_f1": 78.5005776920404, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.5847766636280768e-06, |
|
"loss": 1.0828, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_exact_match": 61.551561021759696, |
|
"eval_f1": 78.55843313017921, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_exact_match": 61.84484389782403, |
|
"eval_f1": 78.98580203731828, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.3573381950774841e-06, |
|
"loss": 1.0492, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_exact_match": 61.73131504257332, |
|
"eval_f1": 78.56521589229312, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_exact_match": 61.81646168401135, |
|
"eval_f1": 78.62740589986592, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.1294439380127621e-06, |
|
"loss": 1.0551, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_exact_match": 61.57994323557237, |
|
"eval_f1": 78.64237467540597, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 9.015496809480402e-07, |
|
"loss": 1.0353, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_exact_match": 61.30558183538316, |
|
"eval_f1": 78.50591438549111, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_exact_match": 61.63670766319773, |
|
"eval_f1": 78.6172319874888, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 6.736554238833181e-07, |
|
"loss": 1.0373, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_exact_match": 61.66508987701041, |
|
"eval_f1": 78.81109427991775, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_exact_match": 61.59886471144749, |
|
"eval_f1": 78.57587678017954, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 4.457611668185962e-07, |
|
"loss": 1.0469, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_exact_match": 61.81646168401135, |
|
"eval_f1": 78.77340428037925, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.1832269826800366e-07, |
|
"loss": 1.0179, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"eval_exact_match": 61.84484389782403, |
|
"eval_f1": 78.74115858178497, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_exact_match": 61.74077578051088, |
|
"eval_f1": 78.74584305622463, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 10970, |
|
"total_flos": 6.330675040878592e+16, |
|
"train_loss": 1.2922884611618335, |
|
"train_runtime": 6069.4707, |
|
"train_samples_per_second": 28.914, |
|
"train_steps_per_second": 1.807 |
|
} |
|
], |
|
"max_steps": 10970, |
|
"num_train_epochs": 2, |
|
"total_flos": 6.330675040878592e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|