{ "best_metric": 0.6465878115264797, "best_model_checkpoint": "/xdisk/msurdeanu/enoriega/kw_pubmed_experiments/kw_pubmed_vanilla_sentence_10000_0.0003/checkpoint-100", "epoch": 1.3559903108724085, "global_step": 350, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 0.00029998499999999995, "loss": 2.5567, "step": 5 }, { "epoch": 0.04, "learning_rate": 0.00029997, "loss": 1.9715, "step": 10 }, { "epoch": 0.06, "learning_rate": 0.00029995499999999997, "loss": 1.8692, "step": 15 }, { "epoch": 0.08, "learning_rate": 0.00029994, "loss": 1.8491, "step": 20 }, { "epoch": 0.1, "learning_rate": 0.000299925, "loss": 1.8861, "step": 25 }, { "epoch": 0.1, "eval_accuracy": 0.6368282710280374, "eval_loss": 1.842035174369812, "eval_runtime": 60.5739, "eval_samples_per_second": 165.088, "eval_steps_per_second": 10.318, "step": 25 }, { "epoch": 0.12, "learning_rate": 0.00029991299999999996, "loss": 1.8289, "step": 30 }, { "epoch": 0.14, "learning_rate": 0.000299898, "loss": 1.813, "step": 35 }, { "epoch": 0.15, "learning_rate": 0.000299883, "loss": 1.8096, "step": 40 }, { "epoch": 0.17, "learning_rate": 0.00029986799999999996, "loss": 1.8019, "step": 45 }, { "epoch": 0.19, "learning_rate": 0.000299853, "loss": 1.7913, "step": 50 }, { "epoch": 0.19, "eval_accuracy": 0.6451718892607241, "eval_loss": 1.7883105278015137, "eval_runtime": 60.6012, "eval_samples_per_second": 165.013, "eval_steps_per_second": 10.313, "step": 50 }, { "epoch": 0.21, "learning_rate": 0.00029983799999999997, "loss": 1.7907, "step": 55 }, { "epoch": 0.23, "learning_rate": 0.000299823, "loss": 1.7853, "step": 60 }, { "epoch": 0.25, "learning_rate": 0.000299808, "loss": 1.7915, "step": 65 }, { "epoch": 0.27, "learning_rate": 0.00029979299999999997, "loss": 1.7815, "step": 70 }, { "epoch": 0.29, "learning_rate": 0.000299778, "loss": 1.7876, "step": 75 }, { "epoch": 0.29, "eval_accuracy": 0.6446729540614542, "eval_loss": 1.7794435024261475, "eval_runtime": 61.0108, "eval_samples_per_second": 163.905, "eval_steps_per_second": 10.244, "step": 75 }, { "epoch": 0.31, "learning_rate": 0.000299763, "loss": 1.7707, "step": 80 }, { "epoch": 0.33, "learning_rate": 0.00029974799999999996, "loss": 1.7773, "step": 85 }, { "epoch": 0.35, "learning_rate": 0.000299733, "loss": 1.7759, "step": 90 }, { "epoch": 0.37, "learning_rate": 0.000299718, "loss": 1.7662, "step": 95 }, { "epoch": 0.39, "learning_rate": 0.00029970299999999996, "loss": 1.7729, "step": 100 }, { "epoch": 0.39, "eval_accuracy": 0.6465878115264797, "eval_loss": 1.7647390365600586, "eval_runtime": 60.7957, "eval_samples_per_second": 164.485, "eval_steps_per_second": 10.28, "step": 100 }, { "epoch": 0.41, "learning_rate": 0.00029968799999999994, "loss": 1.7748, "step": 105 }, { "epoch": 0.43, "learning_rate": 0.000299673, "loss": 1.7706, "step": 110 }, { "epoch": 0.44, "learning_rate": 0.00029965799999999996, "loss": 1.785, "step": 115 }, { "epoch": 0.46, "learning_rate": 0.000299643, "loss": 1.778, "step": 120 }, { "epoch": 0.48, "learning_rate": 0.00029962799999999997, "loss": 1.7919, "step": 125 }, { "epoch": 0.48, "eval_accuracy": 0.6427293636595962, "eval_loss": 1.7991459369659424, "eval_runtime": 60.6682, "eval_samples_per_second": 164.831, "eval_steps_per_second": 10.302, "step": 125 }, { "epoch": 0.5, "learning_rate": 0.00029961299999999995, "loss": 1.8059, "step": 130 }, { "epoch": 0.52, "learning_rate": 0.000299598, "loss": 1.7882, "step": 135 }, { "epoch": 0.54, "learning_rate": 0.00029958299999999997, "loss": 2.0033, "step": 140 }, { "epoch": 0.56, "learning_rate": 0.00029956799999999995, "loss": 1.9014, "step": 145 }, { "epoch": 0.58, "learning_rate": 0.000299553, "loss": 1.858, "step": 150 }, { "epoch": 0.58, "eval_accuracy": 0.6386691654294546, "eval_loss": 1.8227039575576782, "eval_runtime": 60.7206, "eval_samples_per_second": 164.689, "eval_steps_per_second": 10.293, "step": 150 }, { "epoch": 0.6, "learning_rate": 0.00029953799999999996, "loss": 1.8575, "step": 155 }, { "epoch": 0.62, "learning_rate": 0.000299523, "loss": 1.8191, "step": 160 }, { "epoch": 0.64, "learning_rate": 0.00029951099999999997, "loss": 1.8317, "step": 165 }, { "epoch": 0.66, "learning_rate": 0.00029949599999999995, "loss": 1.8389, "step": 170 }, { "epoch": 0.68, "learning_rate": 0.000299481, "loss": 1.8151, "step": 175 }, { "epoch": 0.68, "eval_accuracy": 0.632689569566646, "eval_loss": 1.8751027584075928, "eval_runtime": 62.2872, "eval_samples_per_second": 160.547, "eval_steps_per_second": 10.034, "step": 175 }, { "epoch": 0.7, "learning_rate": 0.00029946599999999997, "loss": 1.8842, "step": 180 }, { "epoch": 0.72, "learning_rate": 0.00029945099999999995, "loss": 1.8344, "step": 185 }, { "epoch": 0.74, "learning_rate": 0.000299436, "loss": 1.8594, "step": 190 }, { "epoch": 0.75, "learning_rate": 0.00029942099999999996, "loss": 1.9278, "step": 195 }, { "epoch": 0.77, "learning_rate": 0.000299406, "loss": 1.8539, "step": 200 }, { "epoch": 0.77, "eval_accuracy": 0.6372984484332218, "eval_loss": 1.8315496444702148, "eval_runtime": 61.0171, "eval_samples_per_second": 163.888, "eval_steps_per_second": 10.243, "step": 200 }, { "epoch": 0.79, "learning_rate": 0.000299391, "loss": 1.8632, "step": 205 }, { "epoch": 0.81, "learning_rate": 0.00029937599999999996, "loss": 1.907, "step": 210 }, { "epoch": 0.83, "learning_rate": 0.000299361, "loss": 1.8705, "step": 215 }, { "epoch": 0.85, "learning_rate": 0.000299346, "loss": 1.8825, "step": 220 }, { "epoch": 0.87, "learning_rate": 0.000299331, "loss": 1.8623, "step": 225 }, { "epoch": 0.87, "eval_accuracy": 0.6348281107392759, "eval_loss": 1.846932053565979, "eval_runtime": 60.9038, "eval_samples_per_second": 164.193, "eval_steps_per_second": 10.262, "step": 225 }, { "epoch": 0.89, "learning_rate": 0.000299316, "loss": 1.8923, "step": 230 }, { "epoch": 0.91, "learning_rate": 0.00029930099999999997, "loss": 1.852, "step": 235 }, { "epoch": 0.93, "learning_rate": 0.00029928599999999995, "loss": 1.8754, "step": 240 }, { "epoch": 0.95, "learning_rate": 0.00029927099999999993, "loss": 1.9014, "step": 245 }, { "epoch": 0.97, "learning_rate": 0.00029925599999999997, "loss": 1.8992, "step": 250 }, { "epoch": 0.97, "eval_accuracy": 0.6353137321715426, "eval_loss": 1.846245288848877, "eval_runtime": 60.8105, "eval_samples_per_second": 164.445, "eval_steps_per_second": 10.278, "step": 250 }, { "epoch": 0.99, "learning_rate": 0.00029924099999999995, "loss": 1.8759, "step": 255 }, { "epoch": 1.01, "learning_rate": 0.000299226, "loss": 2.0852, "step": 260 }, { "epoch": 1.03, "learning_rate": 0.00029921099999999996, "loss": 1.8663, "step": 265 }, { "epoch": 1.05, "learning_rate": 0.00029919599999999995, "loss": 1.8555, "step": 270 }, { "epoch": 1.07, "learning_rate": 0.000299181, "loss": 1.8717, "step": 275 }, { "epoch": 1.07, "eval_accuracy": 0.6298189252336449, "eval_loss": 1.8705145120620728, "eval_runtime": 60.9334, "eval_samples_per_second": 164.114, "eval_steps_per_second": 10.257, "step": 275 }, { "epoch": 1.09, "learning_rate": 0.00029916599999999996, "loss": 1.9226, "step": 280 }, { "epoch": 1.1, "learning_rate": 0.000299151, "loss": 2.0094, "step": 285 }, { "epoch": 1.12, "learning_rate": 0.000299136, "loss": 1.9307, "step": 290 }, { "epoch": 1.14, "learning_rate": 0.00029912099999999996, "loss": 1.9055, "step": 295 }, { "epoch": 1.16, "learning_rate": 0.000299106, "loss": 1.9057, "step": 300 }, { "epoch": 1.16, "eval_accuracy": 0.6247444509345794, "eval_loss": 1.913194179534912, "eval_runtime": 60.8541, "eval_samples_per_second": 164.327, "eval_steps_per_second": 10.27, "step": 300 }, { "epoch": 1.18, "learning_rate": 0.00029909099999999997, "loss": 1.8821, "step": 305 }, { "epoch": 1.2, "learning_rate": 0.000299076, "loss": 1.8635, "step": 310 }, { "epoch": 1.22, "learning_rate": 0.000299061, "loss": 1.8655, "step": 315 }, { "epoch": 1.24, "learning_rate": 0.00029904599999999997, "loss": 1.8642, "step": 320 }, { "epoch": 1.26, "learning_rate": 0.000299031, "loss": 1.8637, "step": 325 }, { "epoch": 1.26, "eval_accuracy": 0.6270322235202492, "eval_loss": 1.900992751121521, "eval_runtime": 60.9918, "eval_samples_per_second": 163.957, "eval_steps_per_second": 10.247, "step": 325 }, { "epoch": 1.28, "learning_rate": 0.000299016, "loss": 1.9827, "step": 330 }, { "epoch": 1.3, "learning_rate": 0.00029900099999999997, "loss": 1.95, "step": 335 }, { "epoch": 1.32, "learning_rate": 0.00029898599999999995, "loss": 1.9358, "step": 340 }, { "epoch": 1.34, "learning_rate": 0.000298971, "loss": 1.9403, "step": 345 }, { "epoch": 1.36, "learning_rate": 0.00029895599999999996, "loss": 1.947, "step": 350 }, { "epoch": 1.36, "eval_accuracy": 0.6236979166666666, "eval_loss": 1.903308629989624, "eval_runtime": 60.9439, "eval_samples_per_second": 164.085, "eval_steps_per_second": 10.255, "step": 350 }, { "epoch": 1.36, "step": 350, "total_flos": 1.899141968469924e+17, "train_loss": 1.8697899763924735, "train_runtime": 46350.9183, "train_samples_per_second": 17259.636, "train_steps_per_second": 2.157 }, { "epoch": 1.36, "eval_accuracy": 0.6470223174085522, "eval_loss": 1.767507791519165, "eval_runtime": 61.0991, "eval_samples_per_second": 163.669, "eval_steps_per_second": 10.229, "step": 350 } ], "max_steps": 100000, "num_train_epochs": 388, "total_flos": 1.899141968469924e+17, "trial_name": null, "trial_params": null }