{ "best_metric": 1.0982866287231445, "best_model_checkpoint": "hBERTv2_mnli/checkpoint-15340", "epoch": 15.0, "global_step": 23010, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 4.90013037809648e-05, "loss": 1.0992, "step": 1534 }, { "epoch": 1.0, "eval_accuracy": 0.31818644931227713, "eval_loss": 1.0996192693710327, "eval_runtime": 11.4609, "eval_samples_per_second": 856.391, "eval_steps_per_second": 3.403, "step": 1534 }, { "epoch": 2.0, "learning_rate": 4.80013037809648e-05, "loss": 1.0988, "step": 3068 }, { "epoch": 2.0, "eval_accuracy": 0.31818644931227713, "eval_loss": 1.098758578300476, "eval_runtime": 11.4241, "eval_samples_per_second": 859.146, "eval_steps_per_second": 3.414, "step": 3068 }, { "epoch": 3.0, "learning_rate": 4.70013037809648e-05, "loss": 1.0987, "step": 4602 }, { "epoch": 3.0, "eval_accuracy": 0.3273560876209883, "eval_loss": 1.098650336265564, "eval_runtime": 11.3879, "eval_samples_per_second": 861.881, "eval_steps_per_second": 3.425, "step": 4602 }, { "epoch": 4.0, "learning_rate": 4.60013037809648e-05, "loss": 1.0986, "step": 6136 }, { "epoch": 4.0, "eval_accuracy": 0.3273560876209883, "eval_loss": 1.0986592769622803, "eval_runtime": 11.4873, "eval_samples_per_second": 854.425, "eval_steps_per_second": 3.395, "step": 6136 }, { "epoch": 5.0, "learning_rate": 4.50013037809648e-05, "loss": 1.0987, "step": 7670 }, { "epoch": 5.0, "eval_accuracy": 0.3544574630667346, "eval_loss": 1.0984472036361694, "eval_runtime": 11.4054, "eval_samples_per_second": 860.558, "eval_steps_per_second": 3.419, "step": 7670 }, { "epoch": 6.0, "learning_rate": 4.40013037809648e-05, "loss": 1.0987, "step": 9204 }, { "epoch": 6.0, "eval_accuracy": 0.3273560876209883, "eval_loss": 1.0986328125, "eval_runtime": 11.4628, "eval_samples_per_second": 856.251, "eval_steps_per_second": 3.402, "step": 9204 }, { "epoch": 7.0, "learning_rate": 4.3001303780964804e-05, "loss": 1.0986, "step": 10738 }, { "epoch": 7.0, "eval_accuracy": 0.3544574630667346, "eval_loss": 1.0986063480377197, "eval_runtime": 11.4244, "eval_samples_per_second": 859.13, "eval_steps_per_second": 3.414, "step": 10738 }, { "epoch": 8.0, "learning_rate": 4.20039113428944e-05, "loss": 1.0987, "step": 12272 }, { "epoch": 8.0, "eval_accuracy": 0.3544574630667346, "eval_loss": 1.0986063480377197, "eval_runtime": 11.4277, "eval_samples_per_second": 858.877, "eval_steps_per_second": 3.413, "step": 12272 }, { "epoch": 9.0, "learning_rate": 4.10045632333768e-05, "loss": 1.0986, "step": 13806 }, { "epoch": 9.0, "eval_accuracy": 0.3544574630667346, "eval_loss": 1.098358392715454, "eval_runtime": 11.4149, "eval_samples_per_second": 859.839, "eval_steps_per_second": 3.417, "step": 13806 }, { "epoch": 10.0, "learning_rate": 4.000456323337679e-05, "loss": 1.0986, "step": 15340 }, { "epoch": 10.0, "eval_accuracy": 0.3544574630667346, "eval_loss": 1.0982866287231445, "eval_runtime": 11.4072, "eval_samples_per_second": 860.424, "eval_steps_per_second": 3.419, "step": 15340 }, { "epoch": 11.0, "learning_rate": 3.9004563233376796e-05, "loss": 1.0987, "step": 16874 }, { "epoch": 11.0, "eval_accuracy": 0.31818644931227713, "eval_loss": 1.0986417531967163, "eval_runtime": 11.4708, "eval_samples_per_second": 855.65, "eval_steps_per_second": 3.4, "step": 16874 }, { "epoch": 12.0, "learning_rate": 3.800456323337679e-05, "loss": 1.0987, "step": 18408 }, { "epoch": 12.0, "eval_accuracy": 0.31818644931227713, "eval_loss": 1.0983929634094238, "eval_runtime": 11.4592, "eval_samples_per_second": 856.517, "eval_steps_per_second": 3.403, "step": 18408 }, { "epoch": 13.0, "learning_rate": 3.7004563233376795e-05, "loss": 1.0986, "step": 19942 }, { "epoch": 13.0, "eval_accuracy": 0.3544574630667346, "eval_loss": 1.0983316898345947, "eval_runtime": 11.4401, "eval_samples_per_second": 857.948, "eval_steps_per_second": 3.409, "step": 19942 }, { "epoch": 14.0, "learning_rate": 3.600456323337679e-05, "loss": 1.0986, "step": 21476 }, { "epoch": 14.0, "eval_accuracy": 0.31818644931227713, "eval_loss": 1.0983843803405762, "eval_runtime": 11.4436, "eval_samples_per_second": 857.683, "eval_steps_per_second": 3.408, "step": 21476 }, { "epoch": 15.0, "learning_rate": 3.50045632333768e-05, "loss": 1.0986, "step": 23010 }, { "epoch": 15.0, "eval_accuracy": 0.3544574630667346, "eval_loss": 1.0986328125, "eval_runtime": 11.4531, "eval_samples_per_second": 856.97, "eval_steps_per_second": 3.405, "step": 23010 }, { "epoch": 15.0, "step": 23010, "total_flos": 7.33692663127081e+17, "train_loss": 1.0986910359541369, "train_runtime": 20364.3039, "train_samples_per_second": 964.192, "train_steps_per_second": 3.766 } ], "max_steps": 76700, "num_train_epochs": 50, "total_flos": 7.33692663127081e+17, "trial_name": null, "trial_params": null }