|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"global_step": 5538, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.945828819068256e-05, |
|
"loss": 0.6014, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.8916576381365114e-05, |
|
"loss": 0.0885, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.837486457204767e-05, |
|
"loss": 0.1166, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.783315276273023e-05, |
|
"loss": 0.1077, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.7291440953412788e-05, |
|
"loss": 0.1371, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_exact_match": 3.91674550614948, |
|
"eval_f1": 7.935587309728481, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.674972914409534e-05, |
|
"loss": 0.1071, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.6208017334777897e-05, |
|
"loss": 0.0913, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.5666305525460455e-05, |
|
"loss": 0.1045, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.5124593716143014e-05, |
|
"loss": 0.0945, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.4582881906825568e-05, |
|
"loss": 0.1195, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_exact_match": 4.541154210028382, |
|
"eval_f1": 11.066264443560108, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.4041170097508126e-05, |
|
"loss": 0.137, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.3499458288190684e-05, |
|
"loss": 0.0922, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.2957746478873243e-05, |
|
"loss": 0.1068, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.2416034669555797e-05, |
|
"loss": 0.0938, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.1874322860238355e-05, |
|
"loss": 0.129, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_exact_match": 5.922421948912016, |
|
"eval_f1": 11.530882619495905, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.133261105092091e-05, |
|
"loss": 0.1281, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.0790899241603465e-05, |
|
"loss": 0.0706, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.0249187432286023e-05, |
|
"loss": 0.0867, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.970747562296858e-05, |
|
"loss": 0.0995, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.916576381365114e-05, |
|
"loss": 0.0784, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_exact_match": 7.634815515610217, |
|
"eval_f1": 13.322384534549878, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.8624052004333694e-05, |
|
"loss": 0.0722, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.8082340195016252e-05, |
|
"loss": 0.0866, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.754062838569881e-05, |
|
"loss": 0.07, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.6998916576381368e-05, |
|
"loss": 0.1021, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.6457204767063923e-05, |
|
"loss": 0.0987, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_exact_match": 9.810785241248817, |
|
"eval_f1": 17.14007217511169, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.5915492957746478e-05, |
|
"loss": 0.1044, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.5373781148429036e-05, |
|
"loss": 0.096, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.4832069339111594e-05, |
|
"loss": 0.085, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.4290357529794148e-05, |
|
"loss": 0.0775, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.3748645720476707e-05, |
|
"loss": 0.0734, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_exact_match": 17.37937559129612, |
|
"eval_f1": 26.829174771390555, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.3206933911159263e-05, |
|
"loss": 0.0583, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.2665222101841821e-05, |
|
"loss": 0.1033, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.2123510292524377e-05, |
|
"loss": 0.0787, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.1581798483206934e-05, |
|
"loss": 0.0747, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.104008667388949e-05, |
|
"loss": 0.0821, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_exact_match": 18.68495742667928, |
|
"eval_f1": 28.805039874191937, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.0498374864572048e-05, |
|
"loss": 0.0689, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 9.956663055254605e-06, |
|
"loss": 0.091, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 9.414951245937163e-06, |
|
"loss": 0.077, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 8.873239436619718e-06, |
|
"loss": 0.0579, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 8.331527627302276e-06, |
|
"loss": 0.0844, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_exact_match": 19.858088930936614, |
|
"eval_f1": 30.209894117774223, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 7.789815817984832e-06, |
|
"loss": 0.0881, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 7.248104008667389e-06, |
|
"loss": 0.0804, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 6.706392199349946e-06, |
|
"loss": 0.0528, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 6.164680390032503e-06, |
|
"loss": 0.0695, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 5.62296858071506e-06, |
|
"loss": 0.0637, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_exact_match": 20.085146641438033, |
|
"eval_f1": 30.456921284874575, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 5.081256771397617e-06, |
|
"loss": 0.0884, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 4.539544962080174e-06, |
|
"loss": 0.0629, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 3.99783315276273e-06, |
|
"loss": 0.0637, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 3.456121343445287e-06, |
|
"loss": 0.0809, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.914409534127844e-06, |
|
"loss": 0.0505, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_exact_match": 21.61778618732261, |
|
"eval_f1": 32.5124924843064, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.3726977248104013e-06, |
|
"loss": 0.0746, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.8309859154929577e-06, |
|
"loss": 0.0744, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.2892741061755147e-06, |
|
"loss": 0.0423, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 7.475622968580716e-07, |
|
"loss": 0.0624, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 2.0585048754062841e-07, |
|
"loss": 0.0713, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_exact_match": 21.788079470198674, |
|
"eval_f1": 32.61873772930997, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 5538, |
|
"total_flos": 3.4718499267936256e+16, |
|
"train_loss": 0.09521910717186836, |
|
"train_runtime": 2769.712, |
|
"train_samples_per_second": 63.963, |
|
"train_steps_per_second": 1.999 |
|
} |
|
], |
|
"max_steps": 5538, |
|
"num_train_epochs": 2, |
|
"total_flos": 3.4718499267936256e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|