{ "best_metric": 0.848714292049408, "best_model_checkpoint": "distilbert_sa_GLUE_Experiment_mnli_384/checkpoint-7670", "epoch": 10.0, "global_step": 15340, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 4.9e-05, "loss": 1.0075, "step": 1534 }, { "epoch": 1.0, "eval_accuracy": 0.5303107488537953, "eval_loss": 0.9587203860282898, "eval_runtime": 5.6935, "eval_samples_per_second": 1723.896, "eval_steps_per_second": 6.85, "step": 1534 }, { "epoch": 2.0, "learning_rate": 4.8e-05, "loss": 0.9233, "step": 3068 }, { "epoch": 2.0, "eval_accuracy": 0.5728986245542537, "eval_loss": 0.9005416631698608, "eval_runtime": 5.5438, "eval_samples_per_second": 1770.436, "eval_steps_per_second": 7.035, "step": 3068 }, { "epoch": 3.0, "learning_rate": 4.7e-05, "loss": 0.8749, "step": 4602 }, { "epoch": 3.0, "eval_accuracy": 0.5887926642893531, "eval_loss": 0.8833683133125305, "eval_runtime": 5.9778, "eval_samples_per_second": 1641.912, "eval_steps_per_second": 6.524, "step": 4602 }, { "epoch": 4.0, "learning_rate": 4.600000000000001e-05, "loss": 0.8389, "step": 6136 }, { "epoch": 4.0, "eval_accuracy": 0.610697911360163, "eval_loss": 0.8563822507858276, "eval_runtime": 5.6621, "eval_samples_per_second": 1733.461, "eval_steps_per_second": 6.888, "step": 6136 }, { "epoch": 5.0, "learning_rate": 4.50006518904824e-05, "loss": 0.8058, "step": 7670 }, { "epoch": 5.0, "eval_accuracy": 0.6141619969434539, "eval_loss": 0.848714292049408, "eval_runtime": 5.565, "eval_samples_per_second": 1763.688, "eval_steps_per_second": 7.008, "step": 7670 }, { "epoch": 6.0, "learning_rate": 4.40013037809648e-05, "loss": 0.776, "step": 9204 }, { "epoch": 6.0, "eval_accuracy": 0.6220071319409067, "eval_loss": 0.8578256368637085, "eval_runtime": 5.8496, "eval_samples_per_second": 1677.901, "eval_steps_per_second": 6.667, "step": 9204 }, { "epoch": 7.0, "learning_rate": 4.3001955671447194e-05, "loss": 0.7467, "step": 10738 }, { "epoch": 7.0, "eval_accuracy": 0.6187468160978095, "eval_loss": 0.8617885708808899, "eval_runtime": 5.8167, "eval_samples_per_second": 1687.379, "eval_steps_per_second": 6.705, "step": 10738 }, { "epoch": 8.0, "learning_rate": 4.20019556714472e-05, "loss": 0.7171, "step": 12272 }, { "epoch": 8.0, "eval_accuracy": 0.6206826286296485, "eval_loss": 0.8828079104423523, "eval_runtime": 5.7638, "eval_samples_per_second": 1702.865, "eval_steps_per_second": 6.766, "step": 12272 }, { "epoch": 9.0, "learning_rate": 4.10026075619296e-05, "loss": 0.6876, "step": 13806 }, { "epoch": 9.0, "eval_accuracy": 0.6292409577177789, "eval_loss": 0.890081524848938, "eval_runtime": 5.7709, "eval_samples_per_second": 1700.785, "eval_steps_per_second": 6.758, "step": 13806 }, { "epoch": 10.0, "learning_rate": 4.00026075619296e-05, "loss": 0.6589, "step": 15340 }, { "epoch": 10.0, "eval_accuracy": 0.62190524707081, "eval_loss": 0.8952736854553223, "eval_runtime": 5.7281, "eval_samples_per_second": 1713.472, "eval_steps_per_second": 6.808, "step": 15340 }, { "epoch": 10.0, "step": 15340, "total_flos": 1.0787244155600896e+17, "train_loss": 0.8036706678248757, "train_runtime": 5092.3208, "train_samples_per_second": 3855.825, "train_steps_per_second": 15.062 } ], "max_steps": 76700, "num_train_epochs": 50, "total_flos": 1.0787244155600896e+17, "trial_name": null, "trial_params": null }