{ "best_metric": 0.6978054046630859, "best_model_checkpoint": "add_BERT_no_pretrain_sst2/checkpoint-527", "epoch": 6.0, "global_step": 3162, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 0.0004900379506641366, "loss": 0.7276, "step": 527 }, { "epoch": 1.0, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.6978054046630859, "eval_runtime": 1.3727, "eval_samples_per_second": 635.255, "eval_steps_per_second": 5.1, "step": 527 }, { "epoch": 2.0, "learning_rate": 0.00048003795066413666, "loss": 0.6872, "step": 1054 }, { "epoch": 2.0, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.7013314366340637, "eval_runtime": 1.3699, "eval_samples_per_second": 636.538, "eval_steps_per_second": 5.11, "step": 1054 }, { "epoch": 3.0, "learning_rate": 0.00047003795066413663, "loss": 0.6873, "step": 1581 }, { "epoch": 3.0, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.6995900273323059, "eval_runtime": 1.3659, "eval_samples_per_second": 638.425, "eval_steps_per_second": 5.125, "step": 1581 }, { "epoch": 4.0, "learning_rate": 0.0004604554079696395, "loss": 0.6868, "step": 2108 }, { "epoch": 4.0, "eval_accuracy": 0.4908256880733945, "eval_loss": NaN, "eval_runtime": 1.3643, "eval_samples_per_second": 639.154, "eval_steps_per_second": 5.131, "step": 2108 }, { "epoch": 5.0, "learning_rate": 0.000453168880455408, "loss": 0.6864, "step": 2635 }, { "epoch": 5.0, "eval_accuracy": 0.4908256880733945, "eval_loss": NaN, "eval_runtime": 1.3451, "eval_samples_per_second": 648.288, "eval_steps_per_second": 5.204, "step": 2635 }, { "epoch": 6.0, "learning_rate": 0.000443168880455408, "loss": 0.0, "step": 3162 }, { "epoch": 6.0, "eval_accuracy": 0.4908256880733945, "eval_loss": NaN, "eval_runtime": 1.3487, "eval_samples_per_second": 646.555, "eval_steps_per_second": 5.19, "step": 3162 }, { "epoch": 6.0, "step": 3162, "total_flos": 5.771431023476736e+16, "train_loss": 0.5792089338019104, "train_runtime": 1859.6741, "train_samples_per_second": 1810.774, "train_steps_per_second": 14.169 } ], "max_steps": 26350, "num_train_epochs": 50, "total_flos": 5.771431023476736e+16, "trial_name": null, "trial_params": null }