{ "best_metric": 0.687747597694397, "best_model_checkpoint": "hBERTv1_wnli/checkpoint-36", "epoch": 17.0, "global_step": 51, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 4.9e-05, "loss": 0.7359, "step": 3 }, { "epoch": 1.0, "eval_accuracy": 0.43661971830985913, "eval_loss": 0.7193758487701416, "eval_runtime": 0.1035, "eval_samples_per_second": 685.963, "eval_steps_per_second": 9.661, "step": 3 }, { "epoch": 2.0, "learning_rate": 4.8e-05, "loss": 0.6989, "step": 6 }, { "epoch": 2.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.68994140625, "eval_runtime": 0.0997, "eval_samples_per_second": 712.235, "eval_steps_per_second": 10.031, "step": 6 }, { "epoch": 3.0, "learning_rate": 4.7e-05, "loss": 0.7031, "step": 9 }, { "epoch": 3.0, "eval_accuracy": 0.43661971830985913, "eval_loss": 0.7027672529220581, "eval_runtime": 0.0992, "eval_samples_per_second": 715.838, "eval_steps_per_second": 10.082, "step": 9 }, { "epoch": 4.0, "learning_rate": 4.600000000000001e-05, "loss": 0.7012, "step": 12 }, { "epoch": 4.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.6888756155967712, "eval_runtime": 0.0988, "eval_samples_per_second": 718.868, "eval_steps_per_second": 10.125, "step": 12 }, { "epoch": 5.0, "learning_rate": 4.5e-05, "loss": 0.697, "step": 15 }, { "epoch": 5.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.6893500089645386, "eval_runtime": 0.0984, "eval_samples_per_second": 721.2, "eval_steps_per_second": 10.158, "step": 15 }, { "epoch": 6.0, "learning_rate": 4.4000000000000006e-05, "loss": 0.6971, "step": 18 }, { "epoch": 6.0, "eval_accuracy": 0.43661971830985913, "eval_loss": 0.701467752456665, "eval_runtime": 0.0981, "eval_samples_per_second": 723.896, "eval_steps_per_second": 10.196, "step": 18 }, { "epoch": 7.0, "learning_rate": 4.3e-05, "loss": 0.7, "step": 21 }, { "epoch": 7.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.6881740093231201, "eval_runtime": 0.1052, "eval_samples_per_second": 674.863, "eval_steps_per_second": 9.505, "step": 21 }, { "epoch": 8.0, "learning_rate": 4.2e-05, "loss": 0.6928, "step": 24 }, { "epoch": 8.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.6890406012535095, "eval_runtime": 0.1, "eval_samples_per_second": 710.295, "eval_steps_per_second": 10.004, "step": 24 }, { "epoch": 9.0, "learning_rate": 4.1e-05, "loss": 0.6932, "step": 27 }, { "epoch": 9.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.689693808555603, "eval_runtime": 0.0997, "eval_samples_per_second": 712.293, "eval_steps_per_second": 10.032, "step": 27 }, { "epoch": 10.0, "learning_rate": 4e-05, "loss": 0.6954, "step": 30 }, { "epoch": 10.0, "eval_accuracy": 0.43661971830985913, "eval_loss": 0.695560097694397, "eval_runtime": 0.1007, "eval_samples_per_second": 704.825, "eval_steps_per_second": 9.927, "step": 30 }, { "epoch": 11.0, "learning_rate": 3.9000000000000006e-05, "loss": 0.6962, "step": 33 }, { "epoch": 11.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.6913100481033325, "eval_runtime": 0.0999, "eval_samples_per_second": 710.909, "eval_steps_per_second": 10.013, "step": 33 }, { "epoch": 12.0, "learning_rate": 3.8e-05, "loss": 0.6956, "step": 36 }, { "epoch": 12.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.687747597694397, "eval_runtime": 0.1, "eval_samples_per_second": 710.233, "eval_steps_per_second": 10.003, "step": 36 }, { "epoch": 13.0, "learning_rate": 3.7e-05, "loss": 0.6973, "step": 39 }, { "epoch": 13.0, "eval_accuracy": 0.5070422535211268, "eval_loss": 0.6925820708274841, "eval_runtime": 0.1034, "eval_samples_per_second": 686.38, "eval_steps_per_second": 9.667, "step": 39 }, { "epoch": 14.0, "learning_rate": 3.6e-05, "loss": 0.6978, "step": 42 }, { "epoch": 14.0, "eval_accuracy": 0.49295774647887325, "eval_loss": 0.6933045983314514, "eval_runtime": 0.1003, "eval_samples_per_second": 707.79, "eval_steps_per_second": 9.969, "step": 42 }, { "epoch": 15.0, "learning_rate": 3.5e-05, "loss": 0.6945, "step": 45 }, { "epoch": 15.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.688297688961029, "eval_runtime": 0.0985, "eval_samples_per_second": 720.537, "eval_steps_per_second": 10.148, "step": 45 }, { "epoch": 16.0, "learning_rate": 3.4000000000000007e-05, "loss": 0.6974, "step": 48 }, { "epoch": 16.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.6881258487701416, "eval_runtime": 0.0996, "eval_samples_per_second": 712.95, "eval_steps_per_second": 10.042, "step": 48 }, { "epoch": 17.0, "learning_rate": 3.3e-05, "loss": 0.6936, "step": 51 }, { "epoch": 17.0, "eval_accuracy": 0.5211267605633803, "eval_loss": 0.6924790143966675, "eval_runtime": 0.0992, "eval_samples_per_second": 715.872, "eval_steps_per_second": 10.083, "step": 51 }, { "epoch": 17.0, "step": 51, "total_flos": 1363527629537280.0, "train_loss": 0.6992356636944939, "train_runtime": 99.2747, "train_samples_per_second": 319.82, "train_steps_per_second": 1.511 } ], "max_steps": 150, "num_train_epochs": 50, "total_flos": 1363527629537280.0, "trial_name": null, "trial_params": null }