{ "best_metric": 1.2698698043823242, "best_model_checkpoint": "distilbert_sa_GLUE_Experiment_data_aug_qnli/checkpoint-16604", "epoch": 6.0, "global_step": 99624, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 4.900030113225729e-05, "loss": 0.3057, "step": 16604 }, { "epoch": 1.0, "eval_accuracy": 0.5996705107084019, "eval_loss": 1.2698698043823242, "eval_runtime": 3.2256, "eval_samples_per_second": 1693.631, "eval_steps_per_second": 6.82, "step": 16604 }, { "epoch": 2.0, "learning_rate": 4.800072271741749e-05, "loss": 0.0735, "step": 33208 }, { "epoch": 2.0, "eval_accuracy": 0.5952773201537617, "eval_loss": 1.778567910194397, "eval_runtime": 3.2082, "eval_samples_per_second": 1702.822, "eval_steps_per_second": 6.857, "step": 33208 }, { "epoch": 3.0, "learning_rate": 4.700108407612624e-05, "loss": 0.0313, "step": 49812 }, { "epoch": 3.0, "eval_accuracy": 0.5800842028189639, "eval_loss": 1.9603168964385986, "eval_runtime": 3.2082, "eval_samples_per_second": 1702.844, "eval_steps_per_second": 6.858, "step": 49812 }, { "epoch": 4.0, "learning_rate": 4.600150566128644e-05, "loss": 0.0188, "step": 66416 }, { "epoch": 4.0, "eval_accuracy": 0.5927146256635548, "eval_loss": 2.2528913021087646, "eval_runtime": 3.1975, "eval_samples_per_second": 1708.529, "eval_steps_per_second": 6.88, "step": 66416 }, { "epoch": 5.0, "learning_rate": 4.500180679354373e-05, "loss": 0.0134, "step": 83020 }, { "epoch": 5.0, "eval_accuracy": 0.5912502288120081, "eval_loss": 2.4498074054718018, "eval_runtime": 3.1892, "eval_samples_per_second": 1712.986, "eval_steps_per_second": 6.898, "step": 83020 }, { "epoch": 6.0, "learning_rate": 4.400228860515538e-05, "loss": 0.0106, "step": 99624 }, { "epoch": 6.0, "eval_accuracy": 0.6031484532308256, "eval_loss": 2.51812481880188, "eval_runtime": 3.4228, "eval_samples_per_second": 1596.056, "eval_steps_per_second": 6.427, "step": 99624 }, { "epoch": 6.0, "step": 99624, "total_flos": 1.6891623964441313e+18, "train_loss": 0.07554671816640922, "train_runtime": 33064.6768, "train_samples_per_second": 6427.571, "train_steps_per_second": 25.108 } ], "max_steps": 830200, "num_train_epochs": 50, "total_flos": 1.6891623964441313e+18, "trial_name": null, "trial_params": null }