{ "best_metric": null, "best_model_checkpoint": null, "epoch": 6.990967741935484, "global_step": 336, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.99, "learning_rate": 9e-06, "loss": 0.4828, "step": 48 }, { "epoch": 0.99, "eval_acc": 86.17021276595744, "eval_loss": 0.3562283217906952, "eval_runtime": 82.5249, "eval_samples_per_second": 4.556, "step": 48 }, { "epoch": 1.99, "learning_rate": 8.000000000000001e-06, "loss": 0.1477, "step": 96 }, { "epoch": 1.99, "eval_acc": 85.63829787234043, "eval_loss": 0.3522704243659973, "eval_runtime": 82.6705, "eval_samples_per_second": 4.548, "step": 96 }, { "epoch": 2.99, "learning_rate": 7e-06, "loss": 0.0398, "step": 144 }, { "epoch": 2.99, "eval_acc": 87.76595744680851, "eval_loss": 0.7263116836547852, "eval_runtime": 82.5944, "eval_samples_per_second": 4.552, "step": 144 }, { "epoch": 3.99, "learning_rate": 6e-06, "loss": 0.0179, "step": 192 }, { "epoch": 3.99, "eval_acc": 87.5, "eval_loss": 0.8694424033164978, "eval_runtime": 82.5077, "eval_samples_per_second": 4.557, "step": 192 }, { "epoch": 4.99, "learning_rate": 5e-06, "loss": 0.0047, "step": 240 }, { "epoch": 4.99, "eval_acc": 86.70212765957447, "eval_loss": 1.1555883884429932, "eval_runtime": 82.5664, "eval_samples_per_second": 4.554, "step": 240 }, { "epoch": 5.99, "learning_rate": 4.000000000000001e-06, "loss": 0.0005, "step": 288 }, { "epoch": 5.99, "eval_acc": 89.09574468085107, "eval_loss": 1.2453709840774536, "eval_runtime": 82.6312, "eval_samples_per_second": 4.55, "step": 288 }, { "epoch": 6.99, "learning_rate": 3e-06, "loss": 0.0004, "step": 336 }, { "epoch": 6.99, "eval_acc": 89.09574468085107, "eval_loss": 1.210463523864746, "eval_runtime": 82.502, "eval_samples_per_second": 4.557, "step": 336 } ], "max_steps": 480, "num_train_epochs": 10, "total_flos": 1.4028769825494106e+17, "trial_name": null, "trial_params": null }