{ "best_metric": 0.8776137828826904, "best_model_checkpoint": "mobilebert_sa_GLUE_Experiment_mnli_128/checkpoint-27612", "epoch": 14.0, "global_step": 42952, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 4.9e-05, "loss": 1.0129, "step": 3068 }, { "epoch": 1.0, "eval_accuracy": 0.5437595517065715, "eval_loss": 0.9528754353523254, "eval_runtime": 21.6888, "eval_samples_per_second": 452.537, "eval_steps_per_second": 3.55, "step": 3068 }, { "epoch": 2.0, "learning_rate": 4.8e-05, "loss": 0.9284, "step": 6136 }, { "epoch": 2.0, "eval_accuracy": 0.5593479368313805, "eval_loss": 0.9265554547309875, "eval_runtime": 21.6883, "eval_samples_per_second": 452.548, "eval_steps_per_second": 3.55, "step": 6136 }, { "epoch": 3.0, "learning_rate": 4.7e-05, "loss": 0.8999, "step": 9204 }, { "epoch": 3.0, "eval_accuracy": 0.5774834437086093, "eval_loss": 0.9054868817329407, "eval_runtime": 21.6795, "eval_samples_per_second": 452.731, "eval_steps_per_second": 3.552, "step": 9204 }, { "epoch": 4.0, "learning_rate": 4.600000000000001e-05, "loss": 0.8803, "step": 12272 }, { "epoch": 4.0, "eval_accuracy": 0.5854304635761589, "eval_loss": 0.8951208591461182, "eval_runtime": 21.7169, "eval_samples_per_second": 451.952, "eval_steps_per_second": 3.546, "step": 12272 }, { "epoch": 5.0, "learning_rate": 4.5e-05, "loss": 0.8637, "step": 15340 }, { "epoch": 5.0, "eval_accuracy": 0.5885888945491594, "eval_loss": 0.8991015553474426, "eval_runtime": 21.7239, "eval_samples_per_second": 451.806, "eval_steps_per_second": 3.544, "step": 15340 }, { "epoch": 6.0, "learning_rate": 4.4000000000000006e-05, "loss": 0.8472, "step": 18408 }, { "epoch": 6.0, "eval_accuracy": 0.5913397860417728, "eval_loss": 0.8906782269477844, "eval_runtime": 21.7654, "eval_samples_per_second": 450.945, "eval_steps_per_second": 3.538, "step": 18408 }, { "epoch": 7.0, "learning_rate": 4.3e-05, "loss": 0.8309, "step": 21476 }, { "epoch": 7.0, "eval_accuracy": 0.5927661742231278, "eval_loss": 0.8940486907958984, "eval_runtime": 21.665, "eval_samples_per_second": 453.036, "eval_steps_per_second": 3.554, "step": 21476 }, { "epoch": 8.0, "learning_rate": 4.2e-05, "loss": 0.814, "step": 24544 }, { "epoch": 8.0, "eval_accuracy": 0.5987773815588385, "eval_loss": 0.8879620432853699, "eval_runtime": 21.7009, "eval_samples_per_second": 452.286, "eval_steps_per_second": 3.548, "step": 24544 }, { "epoch": 9.0, "learning_rate": 4.1e-05, "loss": 0.7988, "step": 27612 }, { "epoch": 9.0, "eval_accuracy": 0.6022414671421293, "eval_loss": 0.8776137828826904, "eval_runtime": 21.7212, "eval_samples_per_second": 451.863, "eval_steps_per_second": 3.545, "step": 27612 }, { "epoch": 10.0, "learning_rate": 4e-05, "loss": 0.7825, "step": 30680 }, { "epoch": 10.0, "eval_accuracy": 0.6022414671421293, "eval_loss": 0.8958155512809753, "eval_runtime": 21.7108, "eval_samples_per_second": 452.08, "eval_steps_per_second": 3.547, "step": 30680 }, { "epoch": 11.0, "learning_rate": 3.9000000000000006e-05, "loss": 0.7662, "step": 33748 }, { "epoch": 11.0, "eval_accuracy": 0.6061130922058074, "eval_loss": 0.8834749460220337, "eval_runtime": 21.6874, "eval_samples_per_second": 452.567, "eval_steps_per_second": 3.55, "step": 33748 }, { "epoch": 12.0, "learning_rate": 3.8e-05, "loss": 0.7504, "step": 36816 }, { "epoch": 12.0, "eval_accuracy": 0.6040753948038716, "eval_loss": 0.9004417657852173, "eval_runtime": 21.7036, "eval_samples_per_second": 452.229, "eval_steps_per_second": 3.548, "step": 36816 }, { "epoch": 13.0, "learning_rate": 3.7e-05, "loss": 0.7359, "step": 39884 }, { "epoch": 13.0, "eval_accuracy": 0.6, "eval_loss": 0.9251638650894165, "eval_runtime": 21.7106, "eval_samples_per_second": 452.083, "eval_steps_per_second": 3.547, "step": 39884 }, { "epoch": 14.0, "learning_rate": 3.6e-05, "loss": 0.7204, "step": 42952 }, { "epoch": 14.0, "eval_accuracy": 0.6007131940906776, "eval_loss": 0.9130809903144836, "eval_runtime": 21.7023, "eval_samples_per_second": 452.257, "eval_steps_per_second": 3.548, "step": 42952 }, { "epoch": 14.0, "step": 42952, "total_flos": 1.3103958963991347e+17, "train_loss": 0.8308279570709018, "train_runtime": 35893.6313, "train_samples_per_second": 547.036, "train_steps_per_second": 4.274 } ], "max_steps": 153400, "num_train_epochs": 50, "total_flos": 1.3103958963991347e+17, "trial_name": null, "trial_params": null }