{ "best_metric": 0.7072979807853699, "best_model_checkpoint": "sa_BERT_48_mnli/checkpoint-16364", "epoch": 9.0, "global_step": 36819, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 3.9200000000000004e-05, "loss": 0.9145, "step": 4091 }, { "epoch": 1.0, "eval_accuracy": 0.6535914416709119, "eval_loss": 0.8005583882331848, "eval_runtime": 20.7416, "eval_samples_per_second": 473.203, "eval_steps_per_second": 4.966, "step": 4091 }, { "epoch": 2.0, "learning_rate": 3.8400000000000005e-05, "loss": 0.7442, "step": 8182 }, { "epoch": 2.0, "eval_accuracy": 0.6902699949057565, "eval_loss": 0.7245084047317505, "eval_runtime": 20.6753, "eval_samples_per_second": 474.721, "eval_steps_per_second": 4.982, "step": 8182 }, { "epoch": 3.0, "learning_rate": 3.76e-05, "loss": 0.6631, "step": 12273 }, { "epoch": 3.0, "eval_accuracy": 0.6979113601630158, "eval_loss": 0.7323198914527893, "eval_runtime": 20.6762, "eval_samples_per_second": 474.702, "eval_steps_per_second": 4.982, "step": 12273 }, { "epoch": 4.0, "learning_rate": 3.680000000000001e-05, "loss": 0.5942, "step": 16364 }, { "epoch": 4.0, "eval_accuracy": 0.7075904228222109, "eval_loss": 0.7072979807853699, "eval_runtime": 20.589, "eval_samples_per_second": 476.712, "eval_steps_per_second": 5.003, "step": 16364 }, { "epoch": 5.0, "learning_rate": 3.6e-05, "loss": 0.5241, "step": 20455 }, { "epoch": 5.0, "eval_accuracy": 0.7015792154865003, "eval_loss": 0.7474916577339172, "eval_runtime": 20.6918, "eval_samples_per_second": 474.342, "eval_steps_per_second": 4.978, "step": 20455 }, { "epoch": 6.0, "learning_rate": 3.52e-05, "loss": 0.4526, "step": 24546 }, { "epoch": 6.0, "eval_accuracy": 0.7088130412633724, "eval_loss": 0.8377429842948914, "eval_runtime": 20.7171, "eval_samples_per_second": 473.764, "eval_steps_per_second": 4.972, "step": 24546 }, { "epoch": 7.0, "learning_rate": 3.44e-05, "loss": 0.3842, "step": 28637 }, { "epoch": 7.0, "eval_accuracy": 0.6955680081507896, "eval_loss": 0.8736042380332947, "eval_runtime": 20.7409, "eval_samples_per_second": 473.219, "eval_steps_per_second": 4.966, "step": 28637 }, { "epoch": 8.0, "learning_rate": 3.3600000000000004e-05, "loss": 0.3213, "step": 32728 }, { "epoch": 8.0, "eval_accuracy": 0.6945491594498217, "eval_loss": 0.9334085583686829, "eval_runtime": 20.6303, "eval_samples_per_second": 475.756, "eval_steps_per_second": 4.993, "step": 32728 }, { "epoch": 9.0, "learning_rate": 3.28e-05, "loss": 0.2669, "step": 36819 }, { "epoch": 9.0, "eval_accuracy": 0.7026999490575649, "eval_loss": 1.0196205377578735, "eval_runtime": 20.722, "eval_samples_per_second": 473.652, "eval_steps_per_second": 4.971, "step": 36819 }, { "epoch": 9.0, "step": 36819, "total_flos": 4.6496322950057165e+17, "train_loss": 0.5405747011563323, "train_runtime": 22664.7689, "train_samples_per_second": 866.327, "train_steps_per_second": 9.025 } ], "max_steps": 204550, "num_train_epochs": 50, "total_flos": 4.6496322950057165e+17, "trial_name": null, "trial_params": null }