{ "best_metric": 1.0956659317016602, "best_model_checkpoint": "hBERTv2_new_pretrain_w_init__mnli/checkpoint-6136", "epoch": 7.0, "global_step": 21476, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 3.9200000000000004e-05, "loss": 1.1017, "step": 3068 }, { "epoch": 1.0, "eval_accuracy": 0.3273560876209883, "eval_loss": 1.098468542098999, "eval_runtime": 16.8183, "eval_samples_per_second": 583.592, "eval_steps_per_second": 4.578, "step": 3068 }, { "epoch": 2.0, "learning_rate": 3.8400000000000005e-05, "loss": 1.0989, "step": 6136 }, { "epoch": 2.0, "eval_accuracy": 0.3273560876209883, "eval_loss": 1.0956659317016602, "eval_runtime": 16.7915, "eval_samples_per_second": 584.522, "eval_steps_per_second": 4.586, "step": 6136 }, { "epoch": 3.0, "learning_rate": 3.76e-05, "loss": 1.0987, "step": 9204 }, { "epoch": 3.0, "eval_accuracy": 0.3273560876209883, "eval_loss": 1.0989995002746582, "eval_runtime": 16.7925, "eval_samples_per_second": 584.486, "eval_steps_per_second": 4.585, "step": 9204 }, { "epoch": 4.0, "learning_rate": 3.680000000000001e-05, "loss": 1.0986, "step": 12272 }, { "epoch": 4.0, "eval_accuracy": 0.3273560876209883, "eval_loss": 1.0989333391189575, "eval_runtime": 16.8176, "eval_samples_per_second": 583.614, "eval_steps_per_second": 4.579, "step": 12272 }, { "epoch": 5.0, "learning_rate": 3.6e-05, "loss": 1.0988, "step": 15340 }, { "epoch": 5.0, "eval_accuracy": 0.3273560876209883, "eval_loss": 1.110775113105774, "eval_runtime": 16.8012, "eval_samples_per_second": 584.185, "eval_steps_per_second": 4.583, "step": 15340 }, { "epoch": 6.0, "learning_rate": 3.52e-05, "loss": 1.099, "step": 18408 }, { "epoch": 6.0, "eval_accuracy": 0.3544574630667346, "eval_loss": 1.096816897392273, "eval_runtime": 16.8121, "eval_samples_per_second": 583.805, "eval_steps_per_second": 4.58, "step": 18408 }, { "epoch": 7.0, "learning_rate": 3.44e-05, "loss": 1.0988, "step": 21476 }, { "epoch": 7.0, "eval_accuracy": 0.3273560876209883, "eval_loss": 1.0975699424743652, "eval_runtime": 16.8047, "eval_samples_per_second": 584.062, "eval_steps_per_second": 4.582, "step": 21476 }, { "epoch": 7.0, "step": 21476, "total_flos": 4.022770393541509e+17, "train_loss": 1.0992188363973157, "train_runtime": 13829.8552, "train_samples_per_second": 1419.762, "train_steps_per_second": 11.092 } ], "max_steps": 153400, "num_train_epochs": 50, "total_flos": 4.022770393541509e+17, "trial_name": null, "trial_params": null }