{ "best_metric": 0.6855193376541138, "best_model_checkpoint": "hBERTv1_new_pretrain_w_init__wnli/checkpoint-45", "epoch": 14.0, "global_step": 70, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 0.00049, "loss": 12.3688, "step": 5 }, { "epoch": 1.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 6.223592281341553, "eval_runtime": 0.1345, "eval_samples_per_second": 527.809, "eval_steps_per_second": 7.434, "step": 5 }, { "epoch": 2.0, "learning_rate": 0.00048, "loss": 3.5093, "step": 10 }, { "epoch": 2.0, "eval_accuracy": 0.43661971830985913, "eval_loss": 0.7491196393966675, "eval_runtime": 0.1348, "eval_samples_per_second": 526.695, "eval_steps_per_second": 7.418, "step": 10 }, { "epoch": 3.0, "learning_rate": 0.00047, "loss": 1.9112, "step": 15 }, { "epoch": 3.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 2.5145533084869385, "eval_runtime": 0.1344, "eval_samples_per_second": 528.182, "eval_steps_per_second": 7.439, "step": 15 }, { "epoch": 4.0, "learning_rate": 0.00046, "loss": 1.4995, "step": 20 }, { "epoch": 4.0, "eval_accuracy": 0.43661971830985913, "eval_loss": 1.8103646039962769, "eval_runtime": 0.1343, "eval_samples_per_second": 528.656, "eval_steps_per_second": 7.446, "step": 20 }, { "epoch": 5.0, "learning_rate": 0.00045000000000000004, "loss": 1.3047, "step": 25 }, { "epoch": 5.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.6936343908309937, "eval_runtime": 0.1346, "eval_samples_per_second": 527.436, "eval_steps_per_second": 7.429, "step": 25 }, { "epoch": 6.0, "learning_rate": 0.00044, "loss": 1.4685, "step": 30 }, { "epoch": 6.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.7440030574798584, "eval_runtime": 0.1342, "eval_samples_per_second": 528.98, "eval_steps_per_second": 7.45, "step": 30 }, { "epoch": 7.0, "learning_rate": 0.00043, "loss": 0.924, "step": 35 }, { "epoch": 7.0, "eval_accuracy": 0.43661971830985913, "eval_loss": 1.1065967082977295, "eval_runtime": 0.1344, "eval_samples_per_second": 528.272, "eval_steps_per_second": 7.44, "step": 35 }, { "epoch": 8.0, "learning_rate": 0.00042, "loss": 0.8423, "step": 40 }, { "epoch": 8.0, "eval_accuracy": 0.43661971830985913, "eval_loss": 0.8221006989479065, "eval_runtime": 0.1346, "eval_samples_per_second": 527.644, "eval_steps_per_second": 7.432, "step": 40 }, { "epoch": 9.0, "learning_rate": 0.00041, "loss": 0.8166, "step": 45 }, { "epoch": 9.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.6855193376541138, "eval_runtime": 0.1345, "eval_samples_per_second": 528.041, "eval_steps_per_second": 7.437, "step": 45 }, { "epoch": 10.0, "learning_rate": 0.0004, "loss": 0.7552, "step": 50 }, { "epoch": 10.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.718089759349823, "eval_runtime": 0.1345, "eval_samples_per_second": 527.974, "eval_steps_per_second": 7.436, "step": 50 }, { "epoch": 11.0, "learning_rate": 0.00039000000000000005, "loss": 0.7515, "step": 55 }, { "epoch": 11.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.6951475739479065, "eval_runtime": 0.1352, "eval_samples_per_second": 525.152, "eval_steps_per_second": 7.397, "step": 55 }, { "epoch": 12.0, "learning_rate": 0.00038, "loss": 0.7127, "step": 60 }, { "epoch": 12.0, "eval_accuracy": 0.43661971830985913, "eval_loss": 0.7139633893966675, "eval_runtime": 0.1349, "eval_samples_per_second": 526.347, "eval_steps_per_second": 7.413, "step": 60 }, { "epoch": 13.0, "learning_rate": 0.00037, "loss": 0.7112, "step": 65 }, { "epoch": 13.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.6901406645774841, "eval_runtime": 0.1345, "eval_samples_per_second": 527.704, "eval_steps_per_second": 7.432, "step": 65 }, { "epoch": 14.0, "learning_rate": 0.00035999999999999997, "loss": 0.6976, "step": 70 }, { "epoch": 14.0, "eval_accuracy": 0.43661971830985913, "eval_loss": 0.700924277305603, "eval_runtime": 0.1352, "eval_samples_per_second": 525.153, "eval_steps_per_second": 7.397, "step": 70 }, { "epoch": 14.0, "step": 70, "total_flos": 1316580365434880.0, "train_loss": 2.0194963932037355, "train_runtime": 90.6818, "train_samples_per_second": 350.126, "train_steps_per_second": 2.757 } ], "max_steps": 250, "num_train_epochs": 50, "total_flos": 1316580365434880.0, "trial_name": null, "trial_params": null }