{ "best_metric": 0.8369131635471003, "best_model_checkpoint": "/hdd1/mujeen/retrieval_prf/output/labeler_nq.train.v7.0_nq.dev.v7.0_rlnq_title/checkpoint-76000", "epoch": 0.9999910574558462, "global_step": 83868, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 1.904755091334001e-05, "loss": 0.5261, "step": 4000 }, { "epoch": 0.05, "eval_accuracy": 0.7910401963792575, "eval_loss": 0.4569447934627533, "eval_runtime": 61.7181, "eval_samples_per_second": 211.219, "eval_steps_per_second": 26.41, "step": 4000 }, { "epoch": 0.1, "learning_rate": 1.8093671006820245e-05, "loss": 0.4674, "step": 8000 }, { "epoch": 0.1, "eval_accuracy": 0.7984811291807303, "eval_loss": 0.42789292335510254, "eval_runtime": 62.0897, "eval_samples_per_second": 209.954, "eval_steps_per_second": 26.252, "step": 8000 }, { "epoch": 0.14, "learning_rate": 1.7140744980206995e-05, "loss": 0.4453, "step": 12000 }, { "epoch": 0.14, "eval_accuracy": 0.804311138386008, "eval_loss": 0.4111127257347107, "eval_runtime": 73.971, "eval_samples_per_second": 176.231, "eval_steps_per_second": 22.036, "step": 12000 }, { "epoch": 0.19, "learning_rate": 1.6186865073687225e-05, "loss": 0.4247, "step": 16000 }, { "epoch": 0.19, "eval_accuracy": 0.8105247008284749, "eval_loss": 0.4133109748363495, "eval_runtime": 67.9092, "eval_samples_per_second": 191.962, "eval_steps_per_second": 24.003, "step": 16000 }, { "epoch": 0.24, "learning_rate": 1.5233462107120716e-05, "loss": 0.4097, "step": 20000 }, { "epoch": 0.24, "eval_accuracy": 0.8149739183798711, "eval_loss": 0.411438524723053, "eval_runtime": 76.7711, "eval_samples_per_second": 169.803, "eval_steps_per_second": 21.232, "step": 20000 }, { "epoch": 0.29, "learning_rate": 1.4280059140554206e-05, "loss": 0.3915, "step": 24000 }, { "epoch": 0.29, "eval_accuracy": 0.8158944461491255, "eval_loss": 0.4367925524711609, "eval_runtime": 73.8165, "eval_samples_per_second": 176.6, "eval_steps_per_second": 22.082, "step": 24000 }, { "epoch": 0.33, "learning_rate": 1.3326179234034435e-05, "loss": 0.3783, "step": 28000 }, { "epoch": 0.33, "eval_accuracy": 0.8168916845658177, "eval_loss": 0.43372228741645813, "eval_runtime": 74.7748, "eval_samples_per_second": 174.337, "eval_steps_per_second": 21.799, "step": 28000 }, { "epoch": 0.38, "learning_rate": 1.2373014737444558e-05, "loss": 0.365, "step": 32000 }, { "epoch": 0.38, "eval_accuracy": 0.8242559067198527, "eval_loss": 0.40461644530296326, "eval_runtime": 77.9179, "eval_samples_per_second": 167.304, "eval_steps_per_second": 20.919, "step": 32000 }, { "epoch": 0.43, "learning_rate": 1.1419373300901418e-05, "loss": 0.3477, "step": 36000 }, { "epoch": 0.43, "eval_accuracy": 0.828628413623811, "eval_loss": 0.4262824058532715, "eval_runtime": 74.7614, "eval_samples_per_second": 174.368, "eval_steps_per_second": 21.803, "step": 36000 }, { "epoch": 0.48, "learning_rate": 1.0465731864358278e-05, "loss": 0.3341, "step": 40000 }, { "epoch": 0.48, "eval_accuracy": 0.8258668303160479, "eval_loss": 0.40725135803222656, "eval_runtime": 77.1145, "eval_samples_per_second": 169.047, "eval_steps_per_second": 21.137, "step": 40000 }, { "epoch": 0.52, "learning_rate": 9.51209042781514e-06, "loss": 0.3214, "step": 44000 }, { "epoch": 0.52, "eval_accuracy": 0.8284749923289353, "eval_loss": 0.4358045756816864, "eval_runtime": 79.2128, "eval_samples_per_second": 164.569, "eval_steps_per_second": 20.577, "step": 44000 }, { "epoch": 0.57, "learning_rate": 8.55868746124863e-06, "loss": 0.3112, "step": 48000 }, { "epoch": 0.57, "eval_accuracy": 0.8285517029763732, "eval_loss": 0.4215088188648224, "eval_runtime": 76.3765, "eval_samples_per_second": 170.681, "eval_steps_per_second": 21.342, "step": 48000 }, { "epoch": 0.62, "learning_rate": 7.6050460247054905e-06, "loss": 0.2996, "step": 52000 }, { "epoch": 0.62, "eval_accuracy": 0.8277845965019944, "eval_loss": 0.4198075234889984, "eval_runtime": 75.7616, "eval_samples_per_second": 172.066, "eval_steps_per_second": 21.515, "step": 52000 }, { "epoch": 0.67, "learning_rate": 6.651643058138981e-06, "loss": 0.2863, "step": 56000 }, { "epoch": 0.67, "eval_accuracy": 0.8310831543418227, "eval_loss": 0.4417212903499603, "eval_runtime": 80.0618, "eval_samples_per_second": 162.824, "eval_steps_per_second": 20.359, "step": 56000 }, { "epoch": 0.72, "learning_rate": 5.698240091572471e-06, "loss": 0.2739, "step": 60000 }, { "epoch": 0.72, "eval_accuracy": 0.8222614298864682, "eval_loss": 0.5145458579063416, "eval_runtime": 78.9172, "eval_samples_per_second": 165.186, "eval_steps_per_second": 20.655, "step": 60000 }, { "epoch": 0.76, "learning_rate": 4.744598655029332e-06, "loss": 0.2656, "step": 64000 }, { "epoch": 0.76, "eval_accuracy": 0.8265572261429887, "eval_loss": 0.5052666068077087, "eval_runtime": 77.2292, "eval_samples_per_second": 168.796, "eval_steps_per_second": 21.106, "step": 64000 }, { "epoch": 0.81, "learning_rate": 3.7911956884628224e-06, "loss": 0.2558, "step": 68000 }, { "epoch": 0.81, "eval_accuracy": 0.8353022399509051, "eval_loss": 0.47119611501693726, "eval_runtime": 79.1339, "eval_samples_per_second": 164.733, "eval_steps_per_second": 20.598, "step": 68000 }, { "epoch": 0.86, "learning_rate": 2.8377927218963137e-06, "loss": 0.2467, "step": 72000 }, { "epoch": 0.86, "eval_accuracy": 0.8312365756366984, "eval_loss": 0.5034319162368774, "eval_runtime": 82.6755, "eval_samples_per_second": 157.677, "eval_steps_per_second": 19.716, "step": 72000 }, { "epoch": 0.91, "learning_rate": 1.8839128153765443e-06, "loss": 0.2342, "step": 76000 }, { "epoch": 0.91, "eval_accuracy": 0.8369131635471003, "eval_loss": 0.4584212303161621, "eval_runtime": 81.5904, "eval_samples_per_second": 159.774, "eval_steps_per_second": 19.978, "step": 76000 }, { "epoch": 0.95, "learning_rate": 9.305098488100349e-07, "loss": 0.23, "step": 80000 }, { "epoch": 0.95, "eval_accuracy": 0.8341515802393372, "eval_loss": 0.49471235275268555, "eval_runtime": 87.9548, "eval_samples_per_second": 148.213, "eval_steps_per_second": 18.532, "step": 80000 }, { "epoch": 1.0, "step": 83868, "total_flos": 7.653345366712497e+17, "train_loss": 0.3355050985067308, "train_runtime": 56077.3175, "train_samples_per_second": 47.859, "train_steps_per_second": 1.496 } ], "max_steps": 83868, "num_train_epochs": 1, "total_flos": 7.653345366712497e+17, "trial_name": null, "trial_params": null }