{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.361930294906166, "eval_steps": 500, "global_step": 4000, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.6702412868632708, "grad_norm": 12.96187973022461, "learning_rate": 5.852478627669732e-06, "loss": 0.5779, "step": 500 }, { "epoch": 1.0, "eval_accuracy": 0.7619845867156982, "eval_loss": 0.48268476128578186, "eval_runtime": 9.469, "eval_samples_per_second": 315.027, "eval_steps_per_second": 19.749, "step": 746 }, { "epoch": 1.3404825737265416, "grad_norm": 19.64900779724121, "learning_rate": 5.432041944647482e-06, "loss": 0.464, "step": 1000 }, { "epoch": 2.0, "eval_accuracy": 0.7680187821388245, "eval_loss": 0.5030698180198669, "eval_runtime": 9.5015, "eval_samples_per_second": 313.951, "eval_steps_per_second": 19.681, "step": 1492 }, { "epoch": 2.0107238605898123, "grad_norm": 9.11796760559082, "learning_rate": 5.0116052616252305e-06, "loss": 0.4025, "step": 1500 }, { "epoch": 2.680965147453083, "grad_norm": 11.002016067504883, "learning_rate": 4.59116857860298e-06, "loss": 0.2688, "step": 2000 }, { "epoch": 3.0, "eval_accuracy": 0.7747234106063843, "eval_loss": 0.5899690389633179, "eval_runtime": 9.4806, "eval_samples_per_second": 314.643, "eval_steps_per_second": 19.725, "step": 2238 }, { "epoch": 3.351206434316354, "grad_norm": 3.2045211791992188, "learning_rate": 4.170731895580729e-06, "loss": 0.2182, "step": 2500 }, { "epoch": 4.0, "eval_accuracy": 0.7673482894897461, "eval_loss": 0.7394132614135742, "eval_runtime": 9.6202, "eval_samples_per_second": 310.077, "eval_steps_per_second": 19.438, "step": 2984 }, { "epoch": 4.021447721179625, "grad_norm": 0.5618192553520203, "learning_rate": 3.7502952125584775e-06, "loss": 0.1644, "step": 3000 }, { "epoch": 4.6916890080428955, "grad_norm": 19.86639976501465, "learning_rate": 3.3298585295362272e-06, "loss": 0.1173, "step": 3500 }, { "epoch": 5.0, "eval_accuracy": 0.7646664381027222, "eval_loss": 0.7920505404472351, "eval_runtime": 9.4974, "eval_samples_per_second": 314.085, "eval_steps_per_second": 19.69, "step": 3730 }, { "epoch": 5.361930294906166, "grad_norm": 7.497531414031982, "learning_rate": 2.909421846513976e-06, "loss": 0.1026, "step": 4000 } ], "logging_steps": 500, "max_steps": 7460, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 1.1054878313466996e+16, "train_batch_size": 16, "trial_name": null, "trial_params": { "learning_rate": 6.272915310691983e-06, "per_device_train_batch_size": 16 } }