{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "global_step": 22160, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 2.9323104693140794e-05, "loss": 1.365, "step": 500 }, { "epoch": 0.09, "learning_rate": 2.864620938628159e-05, "loss": 0.9493, "step": 1000 }, { "epoch": 0.14, "learning_rate": 2.7969314079422385e-05, "loss": 0.8795, "step": 1500 }, { "epoch": 0.18, "learning_rate": 2.729241877256318e-05, "loss": 0.8473, "step": 2000 }, { "epoch": 0.23, "learning_rate": 2.6615523465703972e-05, "loss": 0.8401, "step": 2500 }, { "epoch": 0.27, "learning_rate": 2.5938628158844765e-05, "loss": 0.8402, "step": 3000 }, { "epoch": 0.32, "learning_rate": 2.526173285198556e-05, "loss": 0.8296, "step": 3500 }, { "epoch": 0.36, "learning_rate": 2.4584837545126353e-05, "loss": 0.7466, "step": 4000 }, { "epoch": 0.41, "learning_rate": 2.390794223826715e-05, "loss": 0.7833, "step": 4500 }, { "epoch": 0.45, "learning_rate": 2.3231046931407943e-05, "loss": 0.8053, "step": 5000 }, { "epoch": 0.5, "learning_rate": 2.2554151624548737e-05, "loss": 0.7537, "step": 5500 }, { "epoch": 0.54, "learning_rate": 2.1877256317689534e-05, "loss": 0.7497, "step": 6000 }, { "epoch": 0.59, "learning_rate": 2.1200361010830327e-05, "loss": 0.7674, "step": 6500 }, { "epoch": 0.63, "learning_rate": 2.0523465703971117e-05, "loss": 0.7417, "step": 7000 }, { "epoch": 0.68, "learning_rate": 1.9846570397111914e-05, "loss": 0.7584, "step": 7500 }, { "epoch": 0.72, "learning_rate": 1.9169675090252708e-05, "loss": 0.7545, "step": 8000 }, { "epoch": 0.77, "learning_rate": 1.84927797833935e-05, "loss": 0.766, "step": 8500 }, { "epoch": 0.81, "learning_rate": 1.7815884476534298e-05, "loss": 0.7339, "step": 9000 }, { "epoch": 0.86, "learning_rate": 1.7138989169675092e-05, "loss": 0.714, "step": 9500 }, { "epoch": 0.9, "learning_rate": 1.6462093862815885e-05, "loss": 0.7173, "step": 10000 }, { "epoch": 0.95, "learning_rate": 1.578519855595668e-05, "loss": 0.7207, "step": 10500 }, { "epoch": 0.99, "learning_rate": 1.5108303249097474e-05, "loss": 0.6918, "step": 11000 }, { "epoch": 1.0, "eval_exact_match": 88.30652790917692, "eval_f1": 94.12994072622395, "step": 11080 }, { "epoch": 1.04, "learning_rate": 1.4431407942238268e-05, "loss": 0.5424, "step": 11500 }, { "epoch": 1.08, "learning_rate": 1.3754512635379063e-05, "loss": 0.4775, "step": 12000 }, { "epoch": 1.13, "learning_rate": 1.3077617328519855e-05, "loss": 0.483, "step": 12500 }, { "epoch": 1.17, "learning_rate": 1.240072202166065e-05, "loss": 0.4584, "step": 13000 }, { "epoch": 1.22, "learning_rate": 1.1723826714801445e-05, "loss": 0.4604, "step": 13500 }, { "epoch": 1.26, "learning_rate": 1.1046931407942239e-05, "loss": 0.4531, "step": 14000 }, { "epoch": 1.31, "learning_rate": 1.0370036101083034e-05, "loss": 0.4533, "step": 14500 }, { "epoch": 1.35, "learning_rate": 9.693140794223826e-06, "loss": 0.4583, "step": 15000 }, { "epoch": 1.4, "learning_rate": 9.016245487364621e-06, "loss": 0.4623, "step": 15500 }, { "epoch": 1.44, "learning_rate": 8.339350180505416e-06, "loss": 0.4756, "step": 16000 }, { "epoch": 1.49, "learning_rate": 7.66245487364621e-06, "loss": 0.4339, "step": 16500 }, { "epoch": 1.53, "learning_rate": 6.985559566787004e-06, "loss": 0.4502, "step": 17000 }, { "epoch": 1.58, "learning_rate": 6.308664259927798e-06, "loss": 0.44, "step": 17500 }, { "epoch": 1.62, "learning_rate": 5.631768953068592e-06, "loss": 0.4057, "step": 18000 }, { "epoch": 1.67, "learning_rate": 4.954873646209386e-06, "loss": 0.4528, "step": 18500 }, { "epoch": 1.71, "learning_rate": 4.277978339350181e-06, "loss": 0.4447, "step": 19000 }, { "epoch": 1.76, "learning_rate": 3.6010830324909746e-06, "loss": 0.4338, "step": 19500 }, { "epoch": 1.81, "learning_rate": 2.924187725631769e-06, "loss": 0.4449, "step": 20000 }, { "epoch": 1.85, "learning_rate": 2.247292418772563e-06, "loss": 0.4263, "step": 20500 }, { "epoch": 1.9, "learning_rate": 1.5703971119133576e-06, "loss": 0.4382, "step": 21000 }, { "epoch": 1.94, "learning_rate": 8.935018050541516e-07, "loss": 0.397, "step": 21500 }, { "epoch": 1.99, "learning_rate": 2.1660649819494586e-07, "loss": 0.435, "step": 22000 }, { "epoch": 2.0, "eval_exact_match": 88.31598864711448, "eval_f1": 94.44474801701165, "step": 22160 }, { "epoch": 2.0, "step": 22160, "total_flos": 8.247163627136614e+16, "train_loss": 0.6275850592107118, "train_runtime": 29795.4441, "train_samples_per_second": 5.95, "train_steps_per_second": 0.744 } ], "max_steps": 22160, "num_train_epochs": 2, "total_flos": 8.247163627136614e+16, "trial_name": null, "trial_params": null }