{ "best_metric": 0.6648005843162537, "best_model_checkpoint": "distilbert_add_GLUE_Experiment_qnli/checkpoint-410", "epoch": 6.0, "global_step": 2460, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 4.9e-05, "loss": 0.6886, "step": 410 }, { "epoch": 1.0, "eval_accuracy": 0.6066263957532492, "eval_loss": 0.6648005843162537, "eval_runtime": 4.3884, "eval_samples_per_second": 1244.864, "eval_steps_per_second": 5.013, "step": 410 }, { "epoch": 2.0, "learning_rate": 4.8e-05, "loss": 0.6569, "step": 820 }, { "epoch": 2.0, "eval_accuracy": 0.5998535603148453, "eval_loss": 0.6677054762840271, "eval_runtime": 4.3835, "eval_samples_per_second": 1246.26, "eval_steps_per_second": 5.019, "step": 820 }, { "epoch": 3.0, "learning_rate": 4.7e-05, "loss": 0.6419, "step": 1230 }, { "epoch": 3.0, "eval_accuracy": 0.5914332784184514, "eval_loss": 0.6671571731567383, "eval_runtime": 4.383, "eval_samples_per_second": 1246.413, "eval_steps_per_second": 5.019, "step": 1230 }, { "epoch": 4.0, "learning_rate": 4.600000000000001e-05, "loss": 0.6293, "step": 1640 }, { "epoch": 4.0, "eval_accuracy": 0.5976569650375252, "eval_loss": 0.6676780581474304, "eval_runtime": 4.085, "eval_samples_per_second": 1337.341, "eval_steps_per_second": 5.386, "step": 1640 }, { "epoch": 5.0, "learning_rate": 4.5e-05, "loss": 0.6118, "step": 2050 }, { "epoch": 5.0, "eval_accuracy": 0.600219659527732, "eval_loss": 0.6690582633018494, "eval_runtime": 4.3841, "eval_samples_per_second": 1246.106, "eval_steps_per_second": 5.018, "step": 2050 }, { "epoch": 6.0, "learning_rate": 4.4000000000000006e-05, "loss": 0.5857, "step": 2460 }, { "epoch": 6.0, "eval_accuracy": 0.6077246933919092, "eval_loss": 0.6853577494621277, "eval_runtime": 4.3892, "eval_samples_per_second": 1244.634, "eval_steps_per_second": 5.012, "step": 2460 }, { "epoch": 6.0, "step": 2460, "total_flos": 3.831136168339046e+16, "train_loss": 0.6356980114448362, "train_runtime": 1226.2807, "train_samples_per_second": 4270.759, "train_steps_per_second": 16.717 } ], "max_steps": 20500, "num_train_epochs": 50, "total_flos": 3.831136168339046e+16, "trial_name": null, "trial_params": null }