{ "best_metric": 0.6930651664733887, "best_model_checkpoint": "hBERTv1_no_pretrain_qnli/checkpoint-9009", "epoch": 16.0, "global_step": 13104, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 0.00049002442002442, "loss": 0.715, "step": 819 }, { "epoch": 1.0, "eval_accuracy": 0.4946000366099213, "eval_loss": 0.6931389570236206, "eval_runtime": 9.5675, "eval_samples_per_second": 570.995, "eval_steps_per_second": 4.494, "step": 819 }, { "epoch": 2.0, "learning_rate": 0.00048002442002442004, "loss": 0.6932, "step": 1638 }, { "epoch": 2.0, "eval_accuracy": 0.4946000366099213, "eval_loss": 0.6931495070457458, "eval_runtime": 9.5694, "eval_samples_per_second": 570.88, "eval_steps_per_second": 4.493, "step": 1638 }, { "epoch": 3.0, "learning_rate": 0.00047002442002442, "loss": 0.6936, "step": 2457 }, { "epoch": 3.0, "eval_accuracy": 0.5053999633900788, "eval_loss": 0.6931167840957642, "eval_runtime": 9.5644, "eval_samples_per_second": 571.18, "eval_steps_per_second": 4.496, "step": 2457 }, { "epoch": 4.0, "learning_rate": 0.00046002442002442004, "loss": 0.6932, "step": 3276 }, { "epoch": 4.0, "eval_accuracy": 0.4946000366099213, "eval_loss": 0.6931969523429871, "eval_runtime": 9.5713, "eval_samples_per_second": 570.77, "eval_steps_per_second": 4.493, "step": 3276 }, { "epoch": 5.0, "learning_rate": 0.00045002442002442, "loss": 0.6932, "step": 4095 }, { "epoch": 5.0, "eval_accuracy": 0.5053999633900788, "eval_loss": 0.6933488249778748, "eval_runtime": 9.573, "eval_samples_per_second": 570.669, "eval_steps_per_second": 4.492, "step": 4095 }, { "epoch": 6.0, "learning_rate": 0.00044002442002442004, "loss": 0.6932, "step": 4914 }, { "epoch": 6.0, "eval_accuracy": 0.5053999633900788, "eval_loss": 0.6930915117263794, "eval_runtime": 9.5567, "eval_samples_per_second": 571.639, "eval_steps_per_second": 4.499, "step": 4914 }, { "epoch": 7.0, "learning_rate": 0.00043002442002442007, "loss": 0.6932, "step": 5733 }, { "epoch": 7.0, "eval_accuracy": 0.5053999633900788, "eval_loss": 0.6931073069572449, "eval_runtime": 9.5808, "eval_samples_per_second": 570.202, "eval_steps_per_second": 4.488, "step": 5733 }, { "epoch": 8.0, "learning_rate": 0.00042002442002442005, "loss": 0.6932, "step": 6552 }, { "epoch": 8.0, "eval_accuracy": 0.5053999633900788, "eval_loss": 0.6930862069129944, "eval_runtime": 9.5904, "eval_samples_per_second": 569.63, "eval_steps_per_second": 4.484, "step": 6552 }, { "epoch": 9.0, "learning_rate": 0.00041003663003663003, "loss": 0.6935, "step": 7371 }, { "epoch": 9.0, "eval_accuracy": 0.5053999633900788, "eval_loss": 0.693478524684906, "eval_runtime": 9.5344, "eval_samples_per_second": 572.981, "eval_steps_per_second": 4.51, "step": 7371 }, { "epoch": 10.0, "learning_rate": 0.00040003663003663006, "loss": 0.6932, "step": 8190 }, { "epoch": 10.0, "eval_accuracy": 0.5053999633900788, "eval_loss": 0.6930915117263794, "eval_runtime": 9.5321, "eval_samples_per_second": 573.116, "eval_steps_per_second": 4.511, "step": 8190 }, { "epoch": 11.0, "learning_rate": 0.00039003663003663004, "loss": 0.6932, "step": 9009 }, { "epoch": 11.0, "eval_accuracy": 0.5053999633900788, "eval_loss": 0.6930651664733887, "eval_runtime": 9.5448, "eval_samples_per_second": 572.352, "eval_steps_per_second": 4.505, "step": 9009 }, { "epoch": 12.0, "learning_rate": 0.00038004884004884, "loss": 0.6932, "step": 9828 }, { "epoch": 12.0, "eval_accuracy": 0.5053999633900788, "eval_loss": 0.6931073069572449, "eval_runtime": 9.5697, "eval_samples_per_second": 570.866, "eval_steps_per_second": 4.493, "step": 9828 }, { "epoch": 13.0, "learning_rate": 0.00037004884004884005, "loss": 0.6932, "step": 10647 }, { "epoch": 13.0, "eval_accuracy": 0.5053999633900788, "eval_loss": 0.6931073069572449, "eval_runtime": 9.5909, "eval_samples_per_second": 569.603, "eval_steps_per_second": 4.483, "step": 10647 }, { "epoch": 14.0, "learning_rate": 0.00036004884004884, "loss": 0.6932, "step": 11466 }, { "epoch": 14.0, "eval_accuracy": 0.4946000366099213, "eval_loss": 0.6931231617927551, "eval_runtime": 9.5446, "eval_samples_per_second": 572.363, "eval_steps_per_second": 4.505, "step": 11466 }, { "epoch": 15.0, "learning_rate": 0.00035004884004884005, "loss": 0.6932, "step": 12285 }, { "epoch": 15.0, "eval_accuracy": 0.4946000366099213, "eval_loss": 0.693396270275116, "eval_runtime": 9.5573, "eval_samples_per_second": 571.603, "eval_steps_per_second": 4.499, "step": 12285 }, { "epoch": 16.0, "learning_rate": 0.0003400488400488401, "loss": 0.6932, "step": 13104 }, { "epoch": 16.0, "eval_accuracy": 0.4946000366099213, "eval_loss": 0.6931284070014954, "eval_runtime": 9.5555, "eval_samples_per_second": 571.71, "eval_steps_per_second": 4.5, "step": 13104 }, { "epoch": 16.0, "step": 13104, "total_flos": 2.481936015514665e+17, "train_loss": 0.694585192334521, "train_runtime": 8903.1517, "train_samples_per_second": 588.236, "train_steps_per_second": 4.599 } ], "max_steps": 40950, "num_train_epochs": 50, "total_flos": 2.481936015514665e+17, "trial_name": null, "trial_params": null }