{ "best_metric": 0.6963669061660767, "best_model_checkpoint": "hBERTv2_sst2/checkpoint-2376", "epoch": 14.0, "global_step": 3696, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 4.9003787878787885e-05, "loss": 0.6916, "step": 264 }, { "epoch": 1.0, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.6998637914657593, "eval_runtime": 1.0282, "eval_samples_per_second": 848.118, "eval_steps_per_second": 3.89, "step": 264 }, { "epoch": 2.0, "learning_rate": 4.800378787878788e-05, "loss": 0.6885, "step": 528 }, { "epoch": 2.0, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.6977964639663696, "eval_runtime": 1.0265, "eval_samples_per_second": 849.502, "eval_steps_per_second": 3.897, "step": 528 }, { "epoch": 3.0, "learning_rate": 4.7003787878787884e-05, "loss": 0.6871, "step": 792 }, { "epoch": 3.0, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.698425829410553, "eval_runtime": 1.0241, "eval_samples_per_second": 851.447, "eval_steps_per_second": 3.906, "step": 792 }, { "epoch": 4.0, "learning_rate": 4.600378787878788e-05, "loss": 0.6869, "step": 1056 }, { "epoch": 4.0, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.69902503490448, "eval_runtime": 1.0269, "eval_samples_per_second": 849.176, "eval_steps_per_second": 3.895, "step": 1056 }, { "epoch": 5.0, "learning_rate": 4.5003787878787876e-05, "loss": 0.6868, "step": 1320 }, { "epoch": 5.0, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.6974067091941833, "eval_runtime": 1.0288, "eval_samples_per_second": 847.597, "eval_steps_per_second": 3.888, "step": 1320 }, { "epoch": 6.0, "learning_rate": 4.4003787878787885e-05, "loss": 0.6869, "step": 1584 }, { "epoch": 6.0, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.6980025172233582, "eval_runtime": 1.0196, "eval_samples_per_second": 855.224, "eval_steps_per_second": 3.923, "step": 1584 }, { "epoch": 7.0, "learning_rate": 4.300378787878788e-05, "loss": 0.6867, "step": 1848 }, { "epoch": 7.0, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.6984168887138367, "eval_runtime": 1.0285, "eval_samples_per_second": 847.819, "eval_steps_per_second": 3.889, "step": 1848 }, { "epoch": 8.0, "learning_rate": 4.2003787878787884e-05, "loss": 0.6868, "step": 2112 }, { "epoch": 8.0, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.6974700093269348, "eval_runtime": 1.0196, "eval_samples_per_second": 855.241, "eval_steps_per_second": 3.923, "step": 2112 }, { "epoch": 9.0, "learning_rate": 4.100378787878788e-05, "loss": 0.6868, "step": 2376 }, { "epoch": 9.0, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.6963669061660767, "eval_runtime": 1.0241, "eval_samples_per_second": 851.456, "eval_steps_per_second": 3.906, "step": 2376 }, { "epoch": 10.0, "learning_rate": 4.0003787878787876e-05, "loss": 0.6865, "step": 2640 }, { "epoch": 10.0, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.6977964639663696, "eval_runtime": 1.022, "eval_samples_per_second": 853.257, "eval_steps_per_second": 3.914, "step": 2640 }, { "epoch": 11.0, "learning_rate": 3.900378787878788e-05, "loss": 0.6868, "step": 2904 }, { "epoch": 11.0, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.6980271935462952, "eval_runtime": 1.027, "eval_samples_per_second": 849.06, "eval_steps_per_second": 3.895, "step": 2904 }, { "epoch": 12.0, "learning_rate": 3.800378787878788e-05, "loss": 0.6865, "step": 3168 }, { "epoch": 12.0, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.700083315372467, "eval_runtime": 1.0275, "eval_samples_per_second": 848.679, "eval_steps_per_second": 3.893, "step": 3168 }, { "epoch": 13.0, "learning_rate": 3.7003787878787885e-05, "loss": 0.6867, "step": 3432 }, { "epoch": 13.0, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.6965734958648682, "eval_runtime": 1.0183, "eval_samples_per_second": 856.305, "eval_steps_per_second": 3.928, "step": 3432 }, { "epoch": 14.0, "learning_rate": 3.600378787878788e-05, "loss": 0.6867, "step": 3696 }, { "epoch": 14.0, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.6980003118515015, "eval_runtime": 1.0301, "eval_samples_per_second": 846.51, "eval_steps_per_second": 3.883, "step": 3696 }, { "epoch": 14.0, "step": 3696, "total_flos": 1.1743968521879552e+17, "train_loss": 0.6872374216715494, "train_runtime": 3525.1846, "train_samples_per_second": 955.255, "train_steps_per_second": 3.744 } ], "max_steps": 13200, "num_train_epochs": 50, "total_flos": 1.1743968521879552e+17, "trial_name": null, "trial_params": null }