{ "best_metric": 0.7371178269386292, "best_model_checkpoint": "hBERTv2_new_pretrain_rte/checkpoint-20", "epoch": 6.0, "global_step": 120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 0.00049, "loss": 9.1657, "step": 20 }, { "epoch": 1.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.7371178269386292, "eval_runtime": 0.4926, "eval_samples_per_second": 562.316, "eval_steps_per_second": 6.09, "step": 20 }, { "epoch": 2.0, "learning_rate": 0.00048, "loss": 9.3188, "step": 40 }, { "epoch": 2.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.7859036326408386, "eval_runtime": 0.4896, "eval_samples_per_second": 565.793, "eval_steps_per_second": 6.128, "step": 40 }, { "epoch": 3.0, "learning_rate": 0.00047, "loss": 9.3339, "step": 60 }, { "epoch": 3.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.9236378073692322, "eval_runtime": 0.4893, "eval_samples_per_second": 566.073, "eval_steps_per_second": 6.131, "step": 60 }, { "epoch": 4.0, "learning_rate": 0.00046, "loss": 9.7488, "step": 80 }, { "epoch": 4.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.914161205291748, "eval_runtime": 0.4899, "eval_samples_per_second": 565.365, "eval_steps_per_second": 6.123, "step": 80 }, { "epoch": 5.0, "learning_rate": 0.00045000000000000004, "loss": 9.1542, "step": 100 }, { "epoch": 5.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.8327292799949646, "eval_runtime": 0.4923, "eval_samples_per_second": 562.691, "eval_steps_per_second": 6.094, "step": 100 }, { "epoch": 6.0, "learning_rate": 0.00044, "loss": 9.1755, "step": 120 }, { "epoch": 6.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.8221527934074402, "eval_runtime": 0.4828, "eval_samples_per_second": 573.69, "eval_steps_per_second": 6.213, "step": 120 }, { "epoch": 6.0, "step": 120, "total_flos": 2186307132456960.0, "train_loss": 9.316140111287435, "train_runtime": 100.0781, "train_samples_per_second": 1244.029, "train_steps_per_second": 9.992 } ], "max_steps": 1000, "num_train_epochs": 50, "total_flos": 2186307132456960.0, "trial_name": null, "trial_params": null }