{ "best_metric": 1.098163366317749, "best_model_checkpoint": "hBERTv1_mnli/checkpoint-19942", "epoch": 18.0, "global_step": 27612, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 4.9e-05, "loss": 1.1001, "step": 1534 }, { "epoch": 1.0, "eval_accuracy": 0.31818644931227713, "eval_loss": 1.0993714332580566, "eval_runtime": 12.6071, "eval_samples_per_second": 778.528, "eval_steps_per_second": 3.093, "step": 1534 }, { "epoch": 2.0, "learning_rate": 4.8e-05, "loss": 1.0988, "step": 3068 }, { "epoch": 2.0, "eval_accuracy": 0.31818644931227713, "eval_loss": 1.0989975929260254, "eval_runtime": 12.5819, "eval_samples_per_second": 780.088, "eval_steps_per_second": 3.1, "step": 3068 }, { "epoch": 3.0, "learning_rate": 4.7e-05, "loss": 1.0987, "step": 4602 }, { "epoch": 3.0, "eval_accuracy": 0.3273560876209883, "eval_loss": 1.099189043045044, "eval_runtime": 12.6822, "eval_samples_per_second": 773.921, "eval_steps_per_second": 3.075, "step": 4602 }, { "epoch": 4.0, "learning_rate": 4.600000000000001e-05, "loss": 1.0987, "step": 6136 }, { "epoch": 4.0, "eval_accuracy": 0.3273560876209883, "eval_loss": 1.0986238718032837, "eval_runtime": 12.5133, "eval_samples_per_second": 784.368, "eval_steps_per_second": 3.117, "step": 6136 }, { "epoch": 5.0, "learning_rate": 4.50006518904824e-05, "loss": 1.0987, "step": 7670 }, { "epoch": 5.0, "eval_accuracy": 0.3544574630667346, "eval_loss": 1.0984646081924438, "eval_runtime": 12.6008, "eval_samples_per_second": 778.92, "eval_steps_per_second": 3.095, "step": 7670 }, { "epoch": 6.0, "learning_rate": 4.40013037809648e-05, "loss": 1.0986, "step": 9204 }, { "epoch": 6.0, "eval_accuracy": 0.3273560876209883, "eval_loss": 1.0987476110458374, "eval_runtime": 12.4519, "eval_samples_per_second": 788.232, "eval_steps_per_second": 3.132, "step": 9204 }, { "epoch": 7.0, "learning_rate": 4.3003259452411996e-05, "loss": 1.105, "step": 10738 }, { "epoch": 7.0, "eval_accuracy": 0.3273560876209883, "eval_loss": 1.0986319780349731, "eval_runtime": 12.6436, "eval_samples_per_second": 776.285, "eval_steps_per_second": 3.085, "step": 10738 }, { "epoch": 8.0, "learning_rate": 4.2003259452412e-05, "loss": 1.1045, "step": 12272 }, { "epoch": 8.0, "eval_accuracy": 0.31818644931227713, "eval_loss": 1.0985896587371826, "eval_runtime": 12.6272, "eval_samples_per_second": 777.291, "eval_steps_per_second": 3.089, "step": 12272 }, { "epoch": 9.0, "learning_rate": 4.1003259452411995e-05, "loss": 1.0988, "step": 13806 }, { "epoch": 9.0, "eval_accuracy": 0.3273560876209883, "eval_loss": 1.0983130931854248, "eval_runtime": 12.4879, "eval_samples_per_second": 785.962, "eval_steps_per_second": 3.123, "step": 13806 }, { "epoch": 10.0, "learning_rate": 4.0003259452412e-05, "loss": 1.0987, "step": 15340 }, { "epoch": 10.0, "eval_accuracy": 0.31818644931227713, "eval_loss": 1.098677158355713, "eval_runtime": 12.4301, "eval_samples_per_second": 789.616, "eval_steps_per_second": 3.138, "step": 15340 }, { "epoch": 11.0, "learning_rate": 3.9003259452411994e-05, "loss": 1.0987, "step": 16874 }, { "epoch": 11.0, "eval_accuracy": 0.31818644931227713, "eval_loss": 1.0991103649139404, "eval_runtime": 12.5586, "eval_samples_per_second": 781.533, "eval_steps_per_second": 3.105, "step": 16874 }, { "epoch": 12.0, "learning_rate": 3.8003911342894394e-05, "loss": 1.0986, "step": 18408 }, { "epoch": 12.0, "eval_accuracy": 0.3544574630667346, "eval_loss": 1.0986063480377197, "eval_runtime": 12.5872, "eval_samples_per_second": 779.762, "eval_steps_per_second": 3.098, "step": 18408 }, { "epoch": 13.0, "learning_rate": 3.700391134289439e-05, "loss": 1.0986, "step": 19942 }, { "epoch": 13.0, "eval_accuracy": 0.3544574630667346, "eval_loss": 1.098163366317749, "eval_runtime": 12.4723, "eval_samples_per_second": 786.944, "eval_steps_per_second": 3.127, "step": 19942 }, { "epoch": 14.0, "learning_rate": 3.600456323337679e-05, "loss": 1.0986, "step": 21476 }, { "epoch": 14.0, "eval_accuracy": 0.3544574630667346, "eval_loss": 1.0988693237304688, "eval_runtime": 12.5684, "eval_samples_per_second": 780.924, "eval_steps_per_second": 3.103, "step": 21476 }, { "epoch": 15.0, "learning_rate": 3.500521512385919e-05, "loss": 1.0986, "step": 23010 }, { "epoch": 15.0, "eval_accuracy": 0.31818644931227713, "eval_loss": 1.098739504814148, "eval_runtime": 12.6125, "eval_samples_per_second": 778.196, "eval_steps_per_second": 3.092, "step": 23010 }, { "epoch": 16.0, "learning_rate": 3.4005215123859194e-05, "loss": 1.0986, "step": 24544 }, { "epoch": 16.0, "eval_accuracy": 0.3544574630667346, "eval_loss": 1.0985974073410034, "eval_runtime": 12.4698, "eval_samples_per_second": 787.103, "eval_steps_per_second": 3.128, "step": 24544 }, { "epoch": 17.0, "learning_rate": 3.300586701434159e-05, "loss": 1.0986, "step": 26078 }, { "epoch": 17.0, "eval_accuracy": 0.3544574630667346, "eval_loss": 1.098615288734436, "eval_runtime": 12.5264, "eval_samples_per_second": 783.545, "eval_steps_per_second": 3.113, "step": 26078 }, { "epoch": 18.0, "learning_rate": 3.200586701434159e-05, "loss": 1.0986, "step": 27612 }, { "epoch": 18.0, "eval_accuracy": 0.31818644931227713, "eval_loss": 1.0982955694198608, "eval_runtime": 12.8565, "eval_samples_per_second": 763.425, "eval_steps_per_second": 3.033, "step": 27612 }, { "epoch": 18.0, "step": 27612, "total_flos": 8.928550500811407e+17, "train_loss": 1.0994189529164742, "train_runtime": 25959.7521, "train_samples_per_second": 756.367, "train_steps_per_second": 2.955 } ], "max_steps": 76700, "num_train_epochs": 50, "total_flos": 8.928550500811407e+17, "trial_name": null, "trial_params": null }