{ "best_metric": 0.8122448979591836, "best_model_checkpoint": "/hdd1/mujeen/retrieval_prf/output/labeler_multi.train.v4.1_nq.dev.v4.1_rlmulti_title_wq/checkpoint-300", "epoch": 1.0, "global_step": 3080, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 1.903896103896104e-05, "loss": 0.2844, "step": 150 }, { "epoch": 0.05, "eval_accuracy": 0.7714285714285715, "eval_loss": 0.5584465861320496, "eval_runtime": 3.5617, "eval_samples_per_second": 137.576, "eval_steps_per_second": 17.408, "step": 150 }, { "epoch": 0.1, "learning_rate": 1.8064935064935067e-05, "loss": 0.2673, "step": 300 }, { "epoch": 0.1, "eval_accuracy": 0.8122448979591836, "eval_loss": 0.5982859134674072, "eval_runtime": 3.1189, "eval_samples_per_second": 157.106, "eval_steps_per_second": 19.879, "step": 300 }, { "epoch": 0.15, "learning_rate": 1.7090909090909092e-05, "loss": 0.2653, "step": 450 }, { "epoch": 0.15, "eval_accuracy": 0.8040816326530612, "eval_loss": 0.5716066956520081, "eval_runtime": 3.0903, "eval_samples_per_second": 158.56, "eval_steps_per_second": 20.063, "step": 450 }, { "epoch": 0.19, "learning_rate": 1.6116883116883118e-05, "loss": 0.2453, "step": 600 }, { "epoch": 0.19, "eval_accuracy": 0.7836734693877551, "eval_loss": 0.6890708208084106, "eval_runtime": 2.9422, "eval_samples_per_second": 166.541, "eval_steps_per_second": 21.073, "step": 600 }, { "epoch": 0.24, "learning_rate": 1.514935064935065e-05, "loss": 0.2234, "step": 750 }, { "epoch": 0.24, "eval_accuracy": 0.8, "eval_loss": 0.6545684337615967, "eval_runtime": 3.0903, "eval_samples_per_second": 158.559, "eval_steps_per_second": 20.063, "step": 750 }, { "epoch": 0.29, "learning_rate": 1.4175324675324675e-05, "loss": 0.2117, "step": 900 }, { "epoch": 0.29, "eval_accuracy": 0.7938775510204081, "eval_loss": 0.6074094772338867, "eval_runtime": 3.4718, "eval_samples_per_second": 141.137, "eval_steps_per_second": 17.858, "step": 900 }, { "epoch": 0.34, "learning_rate": 1.3201298701298702e-05, "loss": 0.1867, "step": 1050 }, { "epoch": 0.34, "eval_accuracy": 0.7938775510204081, "eval_loss": 0.7075100541114807, "eval_runtime": 3.2481, "eval_samples_per_second": 150.857, "eval_steps_per_second": 19.088, "step": 1050 }, { "epoch": 0.39, "learning_rate": 1.2227272727272728e-05, "loss": 0.179, "step": 1200 }, { "epoch": 0.39, "eval_accuracy": 0.8040816326530612, "eval_loss": 0.6317723989486694, "eval_runtime": 3.0265, "eval_samples_per_second": 161.903, "eval_steps_per_second": 20.486, "step": 1200 }, { "epoch": 0.44, "learning_rate": 1.1253246753246754e-05, "loss": 0.1718, "step": 1350 }, { "epoch": 0.44, "eval_accuracy": 0.7836734693877551, "eval_loss": 0.7868985533714294, "eval_runtime": 3.0224, "eval_samples_per_second": 162.125, "eval_steps_per_second": 20.514, "step": 1350 }, { "epoch": 0.49, "learning_rate": 1.027922077922078e-05, "loss": 0.1713, "step": 1500 }, { "epoch": 0.49, "eval_accuracy": 0.7775510204081633, "eval_loss": 0.6507557034492493, "eval_runtime": 3.6832, "eval_samples_per_second": 133.036, "eval_steps_per_second": 16.833, "step": 1500 }, { "epoch": 0.54, "learning_rate": 9.318181818181819e-06, "loss": 0.1502, "step": 1650 }, { "epoch": 0.54, "eval_accuracy": 0.7836734693877551, "eval_loss": 0.738645613193512, "eval_runtime": 3.2984, "eval_samples_per_second": 148.555, "eval_steps_per_second": 18.797, "step": 1650 }, { "epoch": 0.58, "learning_rate": 8.344155844155845e-06, "loss": 0.1541, "step": 1800 }, { "epoch": 0.58, "eval_accuracy": 0.7877551020408163, "eval_loss": 0.8032358884811401, "eval_runtime": 3.6265, "eval_samples_per_second": 135.118, "eval_steps_per_second": 17.097, "step": 1800 }, { "epoch": 0.63, "learning_rate": 7.370129870129871e-06, "loss": 0.1358, "step": 1950 }, { "epoch": 0.63, "eval_accuracy": 0.7959183673469388, "eval_loss": 0.7314993739128113, "eval_runtime": 3.7106, "eval_samples_per_second": 132.054, "eval_steps_per_second": 16.709, "step": 1950 }, { "epoch": 0.68, "learning_rate": 6.3961038961038964e-06, "loss": 0.1251, "step": 2100 }, { "epoch": 0.68, "eval_accuracy": 0.7816326530612245, "eval_loss": 0.9869228601455688, "eval_runtime": 3.3927, "eval_samples_per_second": 144.427, "eval_steps_per_second": 18.274, "step": 2100 }, { "epoch": 0.73, "learning_rate": 5.422077922077923e-06, "loss": 0.1244, "step": 2250 }, { "epoch": 0.73, "eval_accuracy": 0.7836734693877551, "eval_loss": 0.8370808362960815, "eval_runtime": 3.5148, "eval_samples_per_second": 139.41, "eval_steps_per_second": 17.64, "step": 2250 }, { "epoch": 0.78, "learning_rate": 4.448051948051948e-06, "loss": 0.1223, "step": 2400 }, { "epoch": 0.78, "eval_accuracy": 0.7816326530612245, "eval_loss": 0.833113968372345, "eval_runtime": 3.1172, "eval_samples_per_second": 157.192, "eval_steps_per_second": 19.89, "step": 2400 }, { "epoch": 0.83, "learning_rate": 3.474025974025974e-06, "loss": 0.096, "step": 2550 }, { "epoch": 0.83, "eval_accuracy": 0.7979591836734694, "eval_loss": 0.8976467847824097, "eval_runtime": 3.0168, "eval_samples_per_second": 162.426, "eval_steps_per_second": 20.552, "step": 2550 }, { "epoch": 0.88, "learning_rate": 2.5e-06, "loss": 0.1113, "step": 2700 }, { "epoch": 0.88, "eval_accuracy": 0.7755102040816326, "eval_loss": 0.96781325340271, "eval_runtime": 3.3837, "eval_samples_per_second": 144.814, "eval_steps_per_second": 18.323, "step": 2700 }, { "epoch": 0.93, "learning_rate": 1.525974025974026e-06, "loss": 0.094, "step": 2850 }, { "epoch": 0.93, "eval_accuracy": 0.7816326530612245, "eval_loss": 1.0002238750457764, "eval_runtime": 2.9984, "eval_samples_per_second": 163.421, "eval_steps_per_second": 20.678, "step": 2850 }, { "epoch": 0.97, "learning_rate": 5.51948051948052e-07, "loss": 0.1084, "step": 3000 }, { "epoch": 0.97, "eval_accuracy": 0.7836734693877551, "eval_loss": 1.0199074745178223, "eval_runtime": 3.3069, "eval_samples_per_second": 148.174, "eval_steps_per_second": 18.748, "step": 3000 }, { "epoch": 1.0, "step": 3080, "total_flos": 2.867459611823923e+16, "train_loss": 0.16962762838834292, "train_runtime": 2524.7018, "train_samples_per_second": 39.038, "train_steps_per_second": 1.22 } ], "max_steps": 3080, "num_train_epochs": 1, "total_flos": 2.867459611823923e+16, "trial_name": null, "trial_params": null }