{ "best_metric": 0.8231664299964905, "best_model_checkpoint": "hBERTv1_data_aug_wnli/checkpoint-218", "epoch": 6.0, "global_step": 1308, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 4.900917431192661e-05, "loss": 0.6916, "step": 218 }, { "epoch": 1.0, "eval_accuracy": 0.323943661971831, "eval_loss": 0.8231664299964905, "eval_runtime": 0.1005, "eval_samples_per_second": 706.229, "eval_steps_per_second": 9.947, "step": 218 }, { "epoch": 2.0, "learning_rate": 4.800917431192661e-05, "loss": 0.5909, "step": 436 }, { "epoch": 2.0, "eval_accuracy": 0.07042253521126761, "eval_loss": 2.906492233276367, "eval_runtime": 0.101, "eval_samples_per_second": 703.289, "eval_steps_per_second": 9.905, "step": 436 }, { "epoch": 3.0, "learning_rate": 4.7009174311926604e-05, "loss": 0.3754, "step": 654 }, { "epoch": 3.0, "eval_accuracy": 0.08450704225352113, "eval_loss": 4.767050266265869, "eval_runtime": 0.1009, "eval_samples_per_second": 703.829, "eval_steps_per_second": 9.913, "step": 654 }, { "epoch": 4.0, "learning_rate": 4.600917431192661e-05, "loss": 0.2639, "step": 872 }, { "epoch": 4.0, "eval_accuracy": 0.11267605633802817, "eval_loss": 5.692210674285889, "eval_runtime": 0.1019, "eval_samples_per_second": 696.536, "eval_steps_per_second": 9.81, "step": 872 }, { "epoch": 5.0, "learning_rate": 4.50091743119266e-05, "loss": 0.1921, "step": 1090 }, { "epoch": 5.0, "eval_accuracy": 0.08450704225352113, "eval_loss": 5.994777202606201, "eval_runtime": 0.1009, "eval_samples_per_second": 703.865, "eval_steps_per_second": 9.914, "step": 1090 }, { "epoch": 6.0, "learning_rate": 4.400917431192661e-05, "loss": 0.1317, "step": 1308 }, { "epoch": 6.0, "eval_accuracy": 0.09859154929577464, "eval_loss": 6.7443647384643555, "eval_runtime": 0.1002, "eval_samples_per_second": 708.652, "eval_steps_per_second": 9.981, "step": 1308 }, { "epoch": 6.0, "step": 1308, "total_flos": 4.216767430577357e+16, "train_loss": 0.3742816163859236, "train_runtime": 1346.1035, "train_samples_per_second": 2066.706, "train_steps_per_second": 8.097 } ], "max_steps": 10900, "num_train_epochs": 50, "total_flos": 4.216767430577357e+16, "trial_name": null, "trial_params": null }