|
{ |
|
"best_metric": 0.6465878115264797, |
|
"best_model_checkpoint": "/xdisk/msurdeanu/enoriega/kw_pubmed_experiments/kw_pubmed_vanilla_sentence_10000_0.0003/checkpoint-100", |
|
"epoch": 1.3559903108724085, |
|
"global_step": 350, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00029998499999999995, |
|
"loss": 2.5567, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00029997, |
|
"loss": 1.9715, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00029995499999999997, |
|
"loss": 1.8692, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00029994, |
|
"loss": 1.8491, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.000299925, |
|
"loss": 1.8861, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.6368282710280374, |
|
"eval_loss": 1.842035174369812, |
|
"eval_runtime": 60.5739, |
|
"eval_samples_per_second": 165.088, |
|
"eval_steps_per_second": 10.318, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00029991299999999996, |
|
"loss": 1.8289, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.000299898, |
|
"loss": 1.813, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.000299883, |
|
"loss": 1.8096, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00029986799999999996, |
|
"loss": 1.8019, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.000299853, |
|
"loss": 1.7913, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.6451718892607241, |
|
"eval_loss": 1.7883105278015137, |
|
"eval_runtime": 60.6012, |
|
"eval_samples_per_second": 165.013, |
|
"eval_steps_per_second": 10.313, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00029983799999999997, |
|
"loss": 1.7907, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.000299823, |
|
"loss": 1.7853, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.000299808, |
|
"loss": 1.7915, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00029979299999999997, |
|
"loss": 1.7815, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.000299778, |
|
"loss": 1.7876, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_accuracy": 0.6446729540614542, |
|
"eval_loss": 1.7794435024261475, |
|
"eval_runtime": 61.0108, |
|
"eval_samples_per_second": 163.905, |
|
"eval_steps_per_second": 10.244, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.000299763, |
|
"loss": 1.7707, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00029974799999999996, |
|
"loss": 1.7773, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.000299733, |
|
"loss": 1.7759, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.000299718, |
|
"loss": 1.7662, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00029970299999999996, |
|
"loss": 1.7729, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_accuracy": 0.6465878115264797, |
|
"eval_loss": 1.7647390365600586, |
|
"eval_runtime": 60.7957, |
|
"eval_samples_per_second": 164.485, |
|
"eval_steps_per_second": 10.28, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00029968799999999994, |
|
"loss": 1.7748, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.000299673, |
|
"loss": 1.7706, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00029965799999999996, |
|
"loss": 1.785, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.000299643, |
|
"loss": 1.778, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00029962799999999997, |
|
"loss": 1.7919, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_accuracy": 0.6427293636595962, |
|
"eval_loss": 1.7991459369659424, |
|
"eval_runtime": 60.6682, |
|
"eval_samples_per_second": 164.831, |
|
"eval_steps_per_second": 10.302, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00029961299999999995, |
|
"loss": 1.8059, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.000299598, |
|
"loss": 1.7882, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00029958299999999997, |
|
"loss": 2.0033, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00029956799999999995, |
|
"loss": 1.9014, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.000299553, |
|
"loss": 1.858, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 0.6386691654294546, |
|
"eval_loss": 1.8227039575576782, |
|
"eval_runtime": 60.7206, |
|
"eval_samples_per_second": 164.689, |
|
"eval_steps_per_second": 10.293, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00029953799999999996, |
|
"loss": 1.8575, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.000299523, |
|
"loss": 1.8191, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00029951099999999997, |
|
"loss": 1.8317, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00029949599999999995, |
|
"loss": 1.8389, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.000299481, |
|
"loss": 1.8151, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy": 0.632689569566646, |
|
"eval_loss": 1.8751027584075928, |
|
"eval_runtime": 62.2872, |
|
"eval_samples_per_second": 160.547, |
|
"eval_steps_per_second": 10.034, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00029946599999999997, |
|
"loss": 1.8842, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00029945099999999995, |
|
"loss": 1.8344, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.000299436, |
|
"loss": 1.8594, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00029942099999999996, |
|
"loss": 1.9278, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.000299406, |
|
"loss": 1.8539, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_accuracy": 0.6372984484332218, |
|
"eval_loss": 1.8315496444702148, |
|
"eval_runtime": 61.0171, |
|
"eval_samples_per_second": 163.888, |
|
"eval_steps_per_second": 10.243, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.000299391, |
|
"loss": 1.8632, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00029937599999999996, |
|
"loss": 1.907, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.000299361, |
|
"loss": 1.8705, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.000299346, |
|
"loss": 1.8825, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.000299331, |
|
"loss": 1.8623, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_accuracy": 0.6348281107392759, |
|
"eval_loss": 1.846932053565979, |
|
"eval_runtime": 60.9038, |
|
"eval_samples_per_second": 164.193, |
|
"eval_steps_per_second": 10.262, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.000299316, |
|
"loss": 1.8923, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00029930099999999997, |
|
"loss": 1.852, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00029928599999999995, |
|
"loss": 1.8754, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00029927099999999993, |
|
"loss": 1.9014, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00029925599999999997, |
|
"loss": 1.8992, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_accuracy": 0.6353137321715426, |
|
"eval_loss": 1.846245288848877, |
|
"eval_runtime": 60.8105, |
|
"eval_samples_per_second": 164.445, |
|
"eval_steps_per_second": 10.278, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00029924099999999995, |
|
"loss": 1.8759, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.000299226, |
|
"loss": 2.0852, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00029921099999999996, |
|
"loss": 1.8663, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00029919599999999995, |
|
"loss": 1.8555, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.000299181, |
|
"loss": 1.8717, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_accuracy": 0.6298189252336449, |
|
"eval_loss": 1.8705145120620728, |
|
"eval_runtime": 60.9334, |
|
"eval_samples_per_second": 164.114, |
|
"eval_steps_per_second": 10.257, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.00029916599999999996, |
|
"loss": 1.9226, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.000299151, |
|
"loss": 2.0094, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.000299136, |
|
"loss": 1.9307, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00029912099999999996, |
|
"loss": 1.9055, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.000299106, |
|
"loss": 1.9057, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_accuracy": 0.6247444509345794, |
|
"eval_loss": 1.913194179534912, |
|
"eval_runtime": 60.8541, |
|
"eval_samples_per_second": 164.327, |
|
"eval_steps_per_second": 10.27, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00029909099999999997, |
|
"loss": 1.8821, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.000299076, |
|
"loss": 1.8635, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.000299061, |
|
"loss": 1.8655, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00029904599999999997, |
|
"loss": 1.8642, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.000299031, |
|
"loss": 1.8637, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_accuracy": 0.6270322235202492, |
|
"eval_loss": 1.900992751121521, |
|
"eval_runtime": 60.9918, |
|
"eval_samples_per_second": 163.957, |
|
"eval_steps_per_second": 10.247, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.000299016, |
|
"loss": 1.9827, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00029900099999999997, |
|
"loss": 1.95, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.00029898599999999995, |
|
"loss": 1.9358, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.000298971, |
|
"loss": 1.9403, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00029895599999999996, |
|
"loss": 1.947, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_accuracy": 0.6236979166666666, |
|
"eval_loss": 1.903308629989624, |
|
"eval_runtime": 60.9439, |
|
"eval_samples_per_second": 164.085, |
|
"eval_steps_per_second": 10.255, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"step": 350, |
|
"total_flos": 1.899141968469924e+17, |
|
"train_loss": 1.8697899763924735, |
|
"train_runtime": 46350.9183, |
|
"train_samples_per_second": 17259.636, |
|
"train_steps_per_second": 2.157 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_accuracy": 0.6470223174085522, |
|
"eval_loss": 1.767507791519165, |
|
"eval_runtime": 61.0991, |
|
"eval_samples_per_second": 163.669, |
|
"eval_steps_per_second": 10.229, |
|
"step": 350 |
|
} |
|
], |
|
"max_steps": 100000, |
|
"num_train_epochs": 388, |
|
"total_flos": 1.899141968469924e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|