{ "best_metric": 0.6574940085411072, "best_model_checkpoint": "distilbert_add_GLUE_Experiment_qnli_96/checkpoint-2050", "epoch": 10.0, "global_step": 4100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 4.9e-05, "loss": 0.6932, "step": 410 }, { "epoch": 1.0, "eval_accuracy": 0.4946000366099213, "eval_loss": 0.6931495070457458, "eval_runtime": 2.18, "eval_samples_per_second": 2505.925, "eval_steps_per_second": 10.092, "step": 410 }, { "epoch": 2.0, "learning_rate": 4.8e-05, "loss": 0.6932, "step": 820 }, { "epoch": 2.0, "eval_accuracy": 0.4946000366099213, "eval_loss": 0.6931758522987366, "eval_runtime": 2.2333, "eval_samples_per_second": 2446.178, "eval_steps_per_second": 9.851, "step": 820 }, { "epoch": 3.0, "learning_rate": 4.7e-05, "loss": 0.6932, "step": 1230 }, { "epoch": 3.0, "eval_accuracy": 0.5053999633900788, "eval_loss": 0.6930550336837769, "eval_runtime": 2.2746, "eval_samples_per_second": 2401.706, "eval_steps_per_second": 9.672, "step": 1230 }, { "epoch": 4.0, "learning_rate": 4.600000000000001e-05, "loss": 0.6826, "step": 1640 }, { "epoch": 4.0, "eval_accuracy": 0.5967417170053084, "eval_loss": 0.6659221053123474, "eval_runtime": 2.27, "eval_samples_per_second": 2406.63, "eval_steps_per_second": 9.692, "step": 1640 }, { "epoch": 5.0, "learning_rate": 4.5e-05, "loss": 0.6539, "step": 2050 }, { "epoch": 5.0, "eval_accuracy": 0.6071755445725792, "eval_loss": 0.6574940085411072, "eval_runtime": 2.2051, "eval_samples_per_second": 2477.384, "eval_steps_per_second": 9.977, "step": 2050 }, { "epoch": 6.0, "learning_rate": 4.4000000000000006e-05, "loss": 0.6403, "step": 2460 }, { "epoch": 6.0, "eval_accuracy": 0.6073585941790225, "eval_loss": 0.660847008228302, "eval_runtime": 2.3863, "eval_samples_per_second": 2289.345, "eval_steps_per_second": 9.219, "step": 2460 }, { "epoch": 7.0, "learning_rate": 4.3e-05, "loss": 0.6288, "step": 2870 }, { "epoch": 7.0, "eval_accuracy": 0.603880651656599, "eval_loss": 0.6702004671096802, "eval_runtime": 2.9467, "eval_samples_per_second": 1853.935, "eval_steps_per_second": 7.466, "step": 2870 }, { "epoch": 8.0, "learning_rate": 4.2e-05, "loss": 0.6186, "step": 3280 }, { "epoch": 8.0, "eval_accuracy": 0.6022332051986088, "eval_loss": 0.6729844808578491, "eval_runtime": 2.2837, "eval_samples_per_second": 2392.185, "eval_steps_per_second": 9.634, "step": 3280 }, { "epoch": 9.0, "learning_rate": 4.1e-05, "loss": 0.6094, "step": 3690 }, { "epoch": 9.0, "eval_accuracy": 0.6013179571663921, "eval_loss": 0.6739695072174072, "eval_runtime": 2.3362, "eval_samples_per_second": 2338.401, "eval_steps_per_second": 9.417, "step": 3690 }, { "epoch": 10.0, "learning_rate": 4e-05, "loss": 0.5995, "step": 4100 }, { "epoch": 10.0, "eval_accuracy": 0.5919824272377814, "eval_loss": 0.6906076073646545, "eval_runtime": 2.2795, "eval_samples_per_second": 2396.535, "eval_steps_per_second": 9.651, "step": 4100 }, { "epoch": 10.0, "step": 4100, "total_flos": 6016944507453440.0, "train_loss": 0.651266948420827, "train_runtime": 839.7217, "train_samples_per_second": 6236.768, "train_steps_per_second": 24.413 } ], "max_steps": 20500, "num_train_epochs": 50, "total_flos": 6016944507453440.0, "trial_name": null, "trial_params": null }