{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.997356828193833, "global_step": 2830, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.7, "learning_rate": 9.307420494699646e-05, "loss": 0.5254, "step": 200 }, { "epoch": 0.7, "eval_accuracy": 0.8732394576072693, "eval_loss": 0.30324122309684753, "eval_runtime": 39.6845, "eval_samples_per_second": 14.313, "eval_steps_per_second": 7.156, "step": 200 }, { "epoch": 1.41, "learning_rate": 8.604240282685513e-05, "loss": 0.5153, "step": 400 }, { "epoch": 1.41, "eval_accuracy": 0.9049295783042908, "eval_loss": 0.28999415040016174, "eval_runtime": 40.176, "eval_samples_per_second": 14.138, "eval_steps_per_second": 7.069, "step": 400 }, { "epoch": 2.12, "learning_rate": 7.897526501766784e-05, "loss": 0.2832, "step": 600 }, { "epoch": 2.12, "eval_accuracy": 0.9419013857841492, "eval_loss": 0.24485430121421814, "eval_runtime": 43.1522, "eval_samples_per_second": 13.163, "eval_steps_per_second": 6.581, "step": 600 }, { "epoch": 2.82, "learning_rate": 7.190812720848057e-05, "loss": 0.2435, "step": 800 }, { "epoch": 2.82, "eval_accuracy": 0.9436619877815247, "eval_loss": 0.18418200314044952, "eval_runtime": 40.2247, "eval_samples_per_second": 14.121, "eval_steps_per_second": 7.06, "step": 800 }, { "epoch": 3.53, "learning_rate": 6.48409893992933e-05, "loss": 0.2056, "step": 1000 }, { "epoch": 3.53, "eval_accuracy": 0.952464759349823, "eval_loss": 0.17370758950710297, "eval_runtime": 43.6775, "eval_samples_per_second": 13.004, "eval_steps_per_second": 6.502, "step": 1000 }, { "epoch": 4.24, "learning_rate": 5.7773851590106006e-05, "loss": 0.1485, "step": 1200 }, { "epoch": 4.24, "eval_accuracy": 0.966549277305603, "eval_loss": 0.13336466252803802, "eval_runtime": 38.8727, "eval_samples_per_second": 14.612, "eval_steps_per_second": 7.306, "step": 1200 }, { "epoch": 4.94, "learning_rate": 5.0706713780918735e-05, "loss": 0.1717, "step": 1400 }, { "epoch": 4.94, "eval_accuracy": 0.9700704216957092, "eval_loss": 0.12312730401754379, "eval_runtime": 43.1241, "eval_samples_per_second": 13.171, "eval_steps_per_second": 6.586, "step": 1400 }, { "epoch": 5.65, "learning_rate": 4.367491166077738e-05, "loss": 0.1274, "step": 1600 }, { "epoch": 5.65, "eval_accuracy": 0.9700704216957092, "eval_loss": 0.12308631092309952, "eval_runtime": 39.3033, "eval_samples_per_second": 14.452, "eval_steps_per_second": 7.226, "step": 1600 }, { "epoch": 6.36, "learning_rate": 3.660777385159011e-05, "loss": 0.1268, "step": 1800 }, { "epoch": 6.36, "eval_accuracy": 0.9718309640884399, "eval_loss": 0.114546038210392, "eval_runtime": 38.8418, "eval_samples_per_second": 14.623, "eval_steps_per_second": 7.312, "step": 1800 }, { "epoch": 7.07, "learning_rate": 2.954063604240283e-05, "loss": 0.1395, "step": 2000 }, { "epoch": 7.07, "eval_accuracy": 0.9771126508712769, "eval_loss": 0.09564723074436188, "eval_runtime": 41.3738, "eval_samples_per_second": 13.728, "eval_steps_per_second": 6.864, "step": 2000 }, { "epoch": 7.77, "learning_rate": 2.247349823321555e-05, "loss": 0.0993, "step": 2200 }, { "epoch": 7.77, "eval_accuracy": 0.98591548204422, "eval_loss": 0.05968547984957695, "eval_runtime": 41.7972, "eval_samples_per_second": 13.589, "eval_steps_per_second": 6.795, "step": 2200 }, { "epoch": 8.48, "learning_rate": 1.5406360424028267e-05, "loss": 0.0785, "step": 2400 }, { "epoch": 8.48, "eval_accuracy": 0.9788732528686523, "eval_loss": 0.10253579914569855, "eval_runtime": 40.093, "eval_samples_per_second": 14.167, "eval_steps_per_second": 7.084, "step": 2400 }, { "epoch": 9.19, "learning_rate": 8.33922261484099e-06, "loss": 0.089, "step": 2600 }, { "epoch": 9.19, "eval_accuracy": 0.9788732528686523, "eval_loss": 0.10597831010818481, "eval_runtime": 37.5593, "eval_samples_per_second": 15.123, "eval_steps_per_second": 7.561, "step": 2600 }, { "epoch": 9.89, "learning_rate": 1.2720848056537103e-06, "loss": 0.0796, "step": 2800 }, { "epoch": 9.89, "eval_accuracy": 0.9806337952613831, "eval_loss": 0.09218709170818329, "eval_runtime": 40.4161, "eval_samples_per_second": 14.054, "eval_steps_per_second": 7.027, "step": 2800 }, { "epoch": 10.0, "step": 2830, "total_flos": 1.2045174996823788e+18, "train_loss": 0.20077389284915723, "train_runtime": 3673.7626, "train_samples_per_second": 6.179, "train_steps_per_second": 0.77 } ], "max_steps": 2830, "num_train_epochs": 10, "total_flos": 1.2045174996823788e+18, "trial_name": null, "trial_params": null }