{ "best_metric": 0.735632183908046, "best_model_checkpoint": "/xdisk/msurdeanu/enoriega/kw_pubmed_experiments/kw_pubmed_keyword_sentence_10000_0.0003/checkpoint-25", "epoch": 1.06578081831338, "global_step": 275, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 0.000299991, "loss": 2.0241, "step": 5 }, { "epoch": 0.04, "learning_rate": 0.000299976, "loss": 1.3033, "step": 10 }, { "epoch": 0.06, "learning_rate": 0.000299961, "loss": 1.5029, "step": 15 }, { "epoch": 0.08, "learning_rate": 0.000299946, "loss": 1.2483, "step": 20 }, { "epoch": 0.1, "learning_rate": 0.00029993099999999997, "loss": 1.4697, "step": 25 }, { "epoch": 0.1, "eval_accuracy": 0.735632183908046, "eval_loss": NaN, "eval_runtime": 59.6178, "eval_samples_per_second": 167.735, "eval_steps_per_second": 10.483, "step": 25 }, { "epoch": 0.12, "learning_rate": 0.000299916, "loss": 1.2378, "step": 30 }, { "epoch": 0.14, "learning_rate": 0.00029990099999999993, "loss": 1.7161, "step": 35 }, { "epoch": 0.15, "learning_rate": 0.00029988599999999997, "loss": 2.0569, "step": 40 }, { "epoch": 0.17, "learning_rate": 0.00029987099999999995, "loss": 2.1263, "step": 45 }, { "epoch": 0.19, "learning_rate": 0.000299856, "loss": 2.5526, "step": 50 }, { "epoch": 0.19, "eval_accuracy": 0.723092998955068, "eval_loss": NaN, "eval_runtime": 59.378, "eval_samples_per_second": 168.412, "eval_steps_per_second": 10.526, "step": 50 }, { "epoch": 0.21, "learning_rate": 0.00029984099999999996, "loss": 1.2865, "step": 55 }, { "epoch": 0.23, "learning_rate": 0.000299829, "loss": 2.0071, "step": 60 }, { "epoch": 0.25, "learning_rate": 0.00029981399999999997, "loss": 2.3439, "step": 65 }, { "epoch": 0.27, "learning_rate": 0.000299799, "loss": 1.5348, "step": 70 }, { "epoch": 0.29, "learning_rate": 0.00029978399999999993, "loss": 2.3377, "step": 75 }, { "epoch": 0.29, "eval_accuracy": 0.7082027168234065, "eval_loss": NaN, "eval_runtime": 59.6718, "eval_samples_per_second": 167.583, "eval_steps_per_second": 10.474, "step": 75 }, { "epoch": 0.31, "learning_rate": 0.00029976899999999997, "loss": 1.7853, "step": 80 }, { "epoch": 0.33, "learning_rate": 0.00029975399999999995, "loss": 1.9305, "step": 85 }, { "epoch": 0.35, "learning_rate": 0.000299739, "loss": 1.9862, "step": 90 }, { "epoch": 0.37, "learning_rate": 0.00029972399999999996, "loss": 3.8541, "step": 95 }, { "epoch": 0.39, "learning_rate": 0.00029970899999999995, "loss": 2.9479, "step": 100 }, { "epoch": 0.39, "eval_accuracy": 0.6987983281086729, "eval_loss": NaN, "eval_runtime": 59.6999, "eval_samples_per_second": 167.505, "eval_steps_per_second": 10.469, "step": 100 }, { "epoch": 0.41, "learning_rate": 0.000299694, "loss": 2.1467, "step": 105 }, { "epoch": 0.43, "learning_rate": 0.00029967899999999996, "loss": 2.8996, "step": 110 }, { "epoch": 0.44, "learning_rate": 0.000299664, "loss": 3.0211, "step": 115 }, { "epoch": 0.46, "learning_rate": 0.000299649, "loss": 2.2422, "step": 120 }, { "epoch": 0.48, "learning_rate": 0.00029963399999999996, "loss": 1.4212, "step": 125 }, { "epoch": 0.48, "eval_accuracy": 0.705067920585162, "eval_loss": NaN, "eval_runtime": 59.6589, "eval_samples_per_second": 167.62, "eval_steps_per_second": 10.476, "step": 125 }, { "epoch": 0.5, "learning_rate": 0.000299619, "loss": 1.8562, "step": 130 }, { "epoch": 0.52, "learning_rate": 0.00029960399999999997, "loss": 2.0218, "step": 135 }, { "epoch": 0.54, "learning_rate": 0.00029958899999999995, "loss": 1.331, "step": 140 }, { "epoch": 0.56, "learning_rate": 0.000299574, "loss": 1.9291, "step": 145 }, { "epoch": 0.58, "learning_rate": 0.00029955899999999997, "loss": 1.8373, "step": 150 }, { "epoch": 0.58, "eval_accuracy": 0.7189132706374086, "eval_loss": NaN, "eval_runtime": 59.6305, "eval_samples_per_second": 167.699, "eval_steps_per_second": 10.481, "step": 150 }, { "epoch": 0.6, "learning_rate": 0.000299544, "loss": 1.9669, "step": 155 }, { "epoch": 0.62, "learning_rate": 0.000299529, "loss": 1.8719, "step": 160 }, { "epoch": 0.64, "learning_rate": 0.00029951399999999997, "loss": 2.4312, "step": 165 }, { "epoch": 0.66, "learning_rate": 0.000299499, "loss": 3.1182, "step": 170 }, { "epoch": 0.68, "learning_rate": 0.000299484, "loss": 1.5001, "step": 175 }, { "epoch": 0.68, "eval_accuracy": 0.6812957157784744, "eval_loss": NaN, "eval_runtime": 59.7742, "eval_samples_per_second": 167.296, "eval_steps_per_second": 10.456, "step": 175 }, { "epoch": 0.7, "learning_rate": 0.00029946899999999996, "loss": 1.493, "step": 180 }, { "epoch": 0.72, "learning_rate": 0.00029945399999999994, "loss": 1.5281, "step": 185 }, { "epoch": 0.74, "learning_rate": 0.000299439, "loss": 1.6081, "step": 190 }, { "epoch": 0.75, "learning_rate": 0.00029942399999999996, "loss": 1.4183, "step": 195 }, { "epoch": 0.77, "learning_rate": 0.000299409, "loss": 1.9615, "step": 200 }, { "epoch": 0.77, "eval_accuracy": 0.7055903866248694, "eval_loss": NaN, "eval_runtime": 59.6776, "eval_samples_per_second": 167.567, "eval_steps_per_second": 10.473, "step": 200 }, { "epoch": 0.79, "learning_rate": 0.000299394, "loss": 1.7213, "step": 205 }, { "epoch": 0.81, "learning_rate": 0.00029937899999999995, "loss": 1.5657, "step": 210 }, { "epoch": 0.83, "learning_rate": 0.000299364, "loss": 1.6782, "step": 215 }, { "epoch": 0.85, "learning_rate": 0.00029934899999999997, "loss": 2.0081, "step": 220 }, { "epoch": 0.87, "learning_rate": 0.00029933399999999995, "loss": 1.4458, "step": 225 }, { "epoch": 0.87, "eval_accuracy": 0.6974921630094044, "eval_loss": NaN, "eval_runtime": 59.7906, "eval_samples_per_second": 167.25, "eval_steps_per_second": 10.453, "step": 225 }, { "epoch": 0.89, "learning_rate": 0.000299322, "loss": 1.7357, "step": 230 }, { "epoch": 0.91, "learning_rate": 0.00029930699999999996, "loss": 4.7272, "step": 235 }, { "epoch": 0.93, "learning_rate": 0.00029929199999999994, "loss": 1.6994, "step": 240 }, { "epoch": 0.95, "learning_rate": 0.000299277, "loss": 1.6879, "step": 245 }, { "epoch": 0.97, "learning_rate": 0.00029926199999999995, "loss": 1.4237, "step": 250 }, { "epoch": 0.97, "eval_accuracy": 0.7087251828631139, "eval_loss": NaN, "eval_runtime": 59.7381, "eval_samples_per_second": 167.397, "eval_steps_per_second": 10.462, "step": 250 }, { "epoch": 0.99, "learning_rate": 0.000299247, "loss": 1.5802, "step": 255 }, { "epoch": 1.01, "learning_rate": 0.00029923199999999997, "loss": 2.2637, "step": 260 }, { "epoch": 1.03, "learning_rate": 0.00029921699999999995, "loss": 1.4615, "step": 265 }, { "epoch": 1.05, "learning_rate": 0.000299202, "loss": 1.6544, "step": 270 }, { "epoch": 1.07, "learning_rate": 0.00029918699999999997, "loss": 1.6758, "step": 275 }, { "epoch": 1.07, "eval_accuracy": 0.6922675026123302, "eval_loss": NaN, "eval_runtime": 59.6857, "eval_samples_per_second": 167.544, "eval_steps_per_second": 10.472, "step": 275 }, { "epoch": 1.07, "step": 275, "total_flos": 1.493606932357716e+17, "train_loss": 1.9488013319535689, "train_runtime": 36132.536, "train_samples_per_second": 22140.71, "train_steps_per_second": 2.768 }, { "epoch": 1.07, "eval_accuracy": 0.7343260188087775, "eval_loss": NaN, "eval_runtime": 59.7956, "eval_samples_per_second": 167.236, "eval_steps_per_second": 10.452, "step": 275 } ], "max_steps": 100000, "num_train_epochs": 388, "total_flos": 1.493606932357716e+17, "trial_name": null, "trial_params": null }