{ "best_metric": 0.6844189763069153, "best_model_checkpoint": "hBERTv1_new_pretrain_w_init__wnli/checkpoint-10", "epoch": 7.0, "global_step": 35, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 3.9200000000000004e-05, "loss": 0.9261, "step": 5 }, { "epoch": 1.0, "eval_accuracy": 0.5352112676056338, "eval_loss": 0.6915163993835449, "eval_runtime": 0.1353, "eval_samples_per_second": 524.626, "eval_steps_per_second": 7.389, "step": 5 }, { "epoch": 2.0, "learning_rate": 3.8400000000000005e-05, "loss": 0.7312, "step": 10 }, { "epoch": 2.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.6844189763069153, "eval_runtime": 0.1352, "eval_samples_per_second": 525.264, "eval_steps_per_second": 7.398, "step": 10 }, { "epoch": 3.0, "learning_rate": 3.76e-05, "loss": 0.7289, "step": 15 }, { "epoch": 3.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.733659565448761, "eval_runtime": 0.1387, "eval_samples_per_second": 511.967, "eval_steps_per_second": 7.211, "step": 15 }, { "epoch": 4.0, "learning_rate": 3.680000000000001e-05, "loss": 0.7656, "step": 20 }, { "epoch": 4.0, "eval_accuracy": 0.43661971830985913, "eval_loss": 0.758527934551239, "eval_runtime": 0.1346, "eval_samples_per_second": 527.673, "eval_steps_per_second": 7.432, "step": 20 }, { "epoch": 5.0, "learning_rate": 3.6e-05, "loss": 0.7189, "step": 25 }, { "epoch": 5.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.6906359791755676, "eval_runtime": 0.1389, "eval_samples_per_second": 511.121, "eval_steps_per_second": 7.199, "step": 25 }, { "epoch": 6.0, "learning_rate": 3.52e-05, "loss": 0.7167, "step": 30 }, { "epoch": 6.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.6910760402679443, "eval_runtime": 0.1418, "eval_samples_per_second": 500.863, "eval_steps_per_second": 7.054, "step": 30 }, { "epoch": 7.0, "learning_rate": 3.44e-05, "loss": 0.7089, "step": 35 }, { "epoch": 7.0, "eval_accuracy": 0.43661971830985913, "eval_loss": 0.7181998491287231, "eval_runtime": 0.1365, "eval_samples_per_second": 520.086, "eval_steps_per_second": 7.325, "step": 35 }, { "epoch": 7.0, "step": 35, "total_flos": 658290182717440.0, "train_loss": 0.7566231863839286, "train_runtime": 61.1061, "train_samples_per_second": 519.588, "train_steps_per_second": 4.091 } ], "max_steps": 250, "num_train_epochs": 50, "total_flos": 658290182717440.0, "trial_name": null, "trial_params": null }