{ "best_metric": 0.4098166227340698, "best_model_checkpoint": "hBERTv1_new_pretrain_48_KD_sst2/checkpoint-527", "epoch": 6.0, "global_step": 3162, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 3.9200000000000004e-05, "loss": 0.3947, "step": 527 }, { "epoch": 1.0, "eval_accuracy": 0.8165137614678899, "eval_loss": 0.4098166227340698, "eval_runtime": 1.8008, "eval_samples_per_second": 484.227, "eval_steps_per_second": 3.887, "step": 527 }, { "epoch": 2.0, "learning_rate": 3.8400000000000005e-05, "loss": 0.2426, "step": 1054 }, { "epoch": 2.0, "eval_accuracy": 0.8256880733944955, "eval_loss": 0.47963330149650574, "eval_runtime": 1.8005, "eval_samples_per_second": 484.31, "eval_steps_per_second": 3.888, "step": 1054 }, { "epoch": 3.0, "learning_rate": 3.76e-05, "loss": 0.1948, "step": 1581 }, { "epoch": 3.0, "eval_accuracy": 0.8188073394495413, "eval_loss": 0.4834706783294678, "eval_runtime": 1.8087, "eval_samples_per_second": 482.113, "eval_steps_per_second": 3.87, "step": 1581 }, { "epoch": 4.0, "learning_rate": 3.680000000000001e-05, "loss": 0.1702, "step": 2108 }, { "epoch": 4.0, "eval_accuracy": 0.8027522935779816, "eval_loss": 0.5115558505058289, "eval_runtime": 1.8015, "eval_samples_per_second": 484.037, "eval_steps_per_second": 3.886, "step": 2108 }, { "epoch": 5.0, "learning_rate": 3.6e-05, "loss": 0.1484, "step": 2635 }, { "epoch": 5.0, "eval_accuracy": 0.8084862385321101, "eval_loss": 0.5547378063201904, "eval_runtime": 1.8032, "eval_samples_per_second": 483.586, "eval_steps_per_second": 3.882, "step": 2635 }, { "epoch": 6.0, "learning_rate": 3.52e-05, "loss": 0.1355, "step": 3162 }, { "epoch": 6.0, "eval_accuracy": 0.7993119266055045, "eval_loss": 0.6597509980201721, "eval_runtime": 1.8009, "eval_samples_per_second": 484.198, "eval_steps_per_second": 3.887, "step": 3162 }, { "epoch": 6.0, "step": 3162, "total_flos": 5.984501559302554e+16, "train_loss": 0.21436870558061305, "train_runtime": 2799.5615, "train_samples_per_second": 1202.849, "train_steps_per_second": 9.412 } ], "max_steps": 26350, "num_train_epochs": 50, "total_flos": 5.984501559302554e+16, "trial_name": null, "trial_params": null }