{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "global_step": 29, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 3e-05, "loss": 2.6543, "step": 1 }, { "epoch": 0.03, "eval_accuracy": 0.05133992343294669, "eval_loss": 2.611328125, "eval_runtime": 4.2504, "eval_samples_per_second": 20.234, "eval_steps_per_second": 1.412, "step": 1 }, { "epoch": 0.07, "learning_rate": 3e-05, "loss": 2.6077, "step": 2 }, { "epoch": 0.07, "eval_accuracy": 0.05133992343294669, "eval_loss": 2.611328125, "eval_runtime": 3.4208, "eval_samples_per_second": 25.14, "eval_steps_per_second": 1.754, "step": 2 }, { "epoch": 0.1, "learning_rate": 2.9912069357315394e-05, "loss": 2.5964, "step": 3 }, { "epoch": 0.1, "eval_accuracy": 0.05192560425118565, "eval_loss": 2.560546875, "eval_runtime": 4.3956, "eval_samples_per_second": 19.565, "eval_steps_per_second": 1.365, "step": 3 }, { "epoch": 0.14, "learning_rate": 2.96493083356513e-05, "loss": 2.7302, "step": 4 }, { "epoch": 0.14, "eval_accuracy": 0.05265413404948289, "eval_loss": 2.5234375, "eval_runtime": 4.277, "eval_samples_per_second": 20.107, "eval_steps_per_second": 1.403, "step": 4 }, { "epoch": 0.17, "learning_rate": 2.9214797567742036e-05, "loss": 2.7, "step": 5 }, { "epoch": 0.17, "eval_accuracy": 0.05283983772355865, "eval_loss": 2.5078125, "eval_runtime": 4.3806, "eval_samples_per_second": 19.632, "eval_steps_per_second": 1.37, "step": 5 }, { "epoch": 0.21, "learning_rate": 2.861363129506436e-05, "loss": 2.5674, "step": 6 }, { "epoch": 0.21, "eval_accuracy": 0.05323981486772184, "eval_loss": 2.494140625, "eval_runtime": 4.3576, "eval_samples_per_second": 19.736, "eval_steps_per_second": 1.377, "step": 6 }, { "epoch": 0.24, "learning_rate": 2.7852857642513838e-05, "loss": 2.6406, "step": 7 }, { "epoch": 0.24, "eval_accuracy": 0.0533683789497743, "eval_loss": 2.48828125, "eval_runtime": 3.6815, "eval_samples_per_second": 23.36, "eval_steps_per_second": 1.63, "step": 7 }, { "epoch": 0.28, "learning_rate": 2.6941395985584656e-05, "loss": 2.5315, "step": 8 }, { "epoch": 0.28, "eval_accuracy": 0.05362550711387921, "eval_loss": 2.48046875, "eval_runtime": 4.3687, "eval_samples_per_second": 19.686, "eval_steps_per_second": 1.373, "step": 8 }, { "epoch": 0.31, "learning_rate": 2.5889932378846963e-05, "loss": 2.7202, "step": 9 }, { "epoch": 0.31, "eval_accuracy": 0.05369693160390835, "eval_loss": 2.47265625, "eval_runtime": 4.3981, "eval_samples_per_second": 19.554, "eval_steps_per_second": 1.364, "step": 9 }, { "epoch": 0.34, "learning_rate": 2.4710794271727415e-05, "loss": 2.5144, "step": 10 }, { "epoch": 0.34, "eval_accuracy": 0.05362550711387921, "eval_loss": 2.46484375, "eval_runtime": 4.3729, "eval_samples_per_second": 19.667, "eval_steps_per_second": 1.372, "step": 10 }, { "epoch": 0.38, "learning_rate": 2.341780598043574e-05, "loss": 2.4983, "step": 11 }, { "epoch": 0.38, "eval_accuracy": 0.05366836180789669, "eval_loss": 2.451171875, "eval_runtime": 3.9723, "eval_samples_per_second": 21.65, "eval_steps_per_second": 1.51, "step": 11 }, { "epoch": 0.41, "learning_rate": 2.2026126610496852e-05, "loss": 2.7029, "step": 12 }, { "epoch": 0.41, "eval_accuracy": 0.05386835037997829, "eval_loss": 2.44140625, "eval_runtime": 3.3723, "eval_samples_per_second": 25.502, "eval_steps_per_second": 1.779, "step": 12 }, { "epoch": 0.45, "learning_rate": 2.055207233009872e-05, "loss": 2.5198, "step": 13 }, { "epoch": 0.45, "eval_accuracy": 0.054039769156048224, "eval_loss": 2.43359375, "eval_runtime": 4.2557, "eval_samples_per_second": 20.208, "eval_steps_per_second": 1.41, "step": 13 }, { "epoch": 0.48, "learning_rate": 1.9012925077938318e-05, "loss": 2.5706, "step": 14 }, { "epoch": 0.48, "eval_accuracy": 0.0544826009942289, "eval_loss": 2.42578125, "eval_runtime": 4.3775, "eval_samples_per_second": 19.646, "eval_steps_per_second": 1.371, "step": 14 }, { "epoch": 0.52, "learning_rate": 1.7426729948291474e-05, "loss": 2.5688, "step": 15 }, { "epoch": 0.52, "eval_accuracy": 0.054768298954345464, "eval_loss": 2.41796875, "eval_runtime": 3.7002, "eval_samples_per_second": 23.242, "eval_steps_per_second": 1.622, "step": 15 }, { "epoch": 0.55, "learning_rate": 1.5812083628781265e-05, "loss": 2.3793, "step": 16 }, { "epoch": 0.55, "eval_accuracy": 0.05518256099651449, "eval_loss": 2.41015625, "eval_runtime": 4.2835, "eval_samples_per_second": 20.077, "eval_steps_per_second": 1.401, "step": 16 }, { "epoch": 0.59, "learning_rate": 1.4187916371218739e-05, "loss": 2.4785, "step": 17 }, { "epoch": 0.59, "eval_accuracy": 0.055353979772584425, "eval_loss": 2.404296875, "eval_runtime": 4.3709, "eval_samples_per_second": 19.676, "eval_steps_per_second": 1.373, "step": 17 }, { "epoch": 0.62, "learning_rate": 1.2573270051708529e-05, "loss": 2.4688, "step": 18 }, { "epoch": 0.62, "eval_accuracy": 0.05529684018056111, "eval_loss": 2.3984375, "eval_runtime": 3.974, "eval_samples_per_second": 21.641, "eval_steps_per_second": 1.51, "step": 18 }, { "epoch": 0.66, "learning_rate": 1.2573270051708529e-05, "loss": 2.5674, "step": 19 }, { "epoch": 0.66, "eval_accuracy": 0.05529684018056111, "eval_loss": 2.3984375, "eval_runtime": 3.4463, "eval_samples_per_second": 24.954, "eval_steps_per_second": 1.741, "step": 19 }, { "epoch": 0.69, "learning_rate": 1.098707492206169e-05, "loss": 2.5054, "step": 20 }, { "epoch": 0.69, "eval_accuracy": 0.05536826467059025, "eval_loss": 2.39453125, "eval_runtime": 4.3854, "eval_samples_per_second": 19.61, "eval_steps_per_second": 1.368, "step": 20 }, { "epoch": 0.72, "learning_rate": 9.447927669901284e-06, "loss": 2.452, "step": 21 }, { "epoch": 0.72, "eval_accuracy": 0.055511113650648536, "eval_loss": 2.388671875, "eval_runtime": 4.3743, "eval_samples_per_second": 19.66, "eval_steps_per_second": 1.372, "step": 21 }, { "epoch": 0.76, "learning_rate": 7.97387338950315e-06, "loss": 2.5999, "step": 22 }, { "epoch": 0.76, "eval_accuracy": 0.05558253814067768, "eval_loss": 2.3828125, "eval_runtime": 4.3782, "eval_samples_per_second": 19.643, "eval_steps_per_second": 1.37, "step": 22 }, { "epoch": 0.79, "learning_rate": 6.582194019564266e-06, "loss": 2.3665, "step": 23 }, { "epoch": 0.79, "eval_accuracy": 0.055596823038683504, "eval_loss": 2.37890625, "eval_runtime": 4.2553, "eval_samples_per_second": 20.21, "eval_steps_per_second": 1.41, "step": 23 }, { "epoch": 0.83, "learning_rate": 5.289205728272587e-06, "loss": 2.6223, "step": 24 }, { "epoch": 0.83, "eval_accuracy": 0.055653962630706814, "eval_loss": 2.375, "eval_runtime": 3.3625, "eval_samples_per_second": 25.576, "eval_steps_per_second": 1.784, "step": 24 }, { "epoch": 0.86, "learning_rate": 4.110067621153041e-06, "loss": 2.3562, "step": 25 }, { "epoch": 0.86, "eval_accuracy": 0.055653962630706814, "eval_loss": 2.37109375, "eval_runtime": 4.3982, "eval_samples_per_second": 19.554, "eval_steps_per_second": 1.364, "step": 25 }, { "epoch": 0.9, "learning_rate": 3.0586040144153436e-06, "loss": 2.429, "step": 26 }, { "epoch": 0.9, "eval_accuracy": 0.0556968173247243, "eval_loss": 2.369140625, "eval_runtime": 4.3664, "eval_samples_per_second": 19.696, "eval_steps_per_second": 1.374, "step": 26 }, { "epoch": 0.93, "learning_rate": 2.1471423574861643e-06, "loss": 2.563, "step": 27 }, { "epoch": 0.93, "eval_accuracy": 0.0557968116107651, "eval_loss": 2.3671875, "eval_runtime": 3.6475, "eval_samples_per_second": 23.578, "eval_steps_per_second": 1.645, "step": 27 }, { "epoch": 0.97, "learning_rate": 1.3863687049356465e-06, "loss": 2.4573, "step": 28 }, { "epoch": 0.97, "eval_accuracy": 0.055753956916747616, "eval_loss": 2.365234375, "eval_runtime": 4.2765, "eval_samples_per_second": 20.11, "eval_steps_per_second": 1.403, "step": 28 }, { "epoch": 1.0, "learning_rate": 7.852024322579649e-07, "loss": 2.4883, "step": 29 }, { "epoch": 1.0, "eval_accuracy": 0.05582538140677676, "eval_loss": 2.365234375, "eval_runtime": 4.3983, "eval_samples_per_second": 19.553, "eval_steps_per_second": 1.364, "step": 29 }, { "epoch": 1.0, "step": 29, "total_flos": 1557448359936.0, "train_loss": 2.5467823949353448, "train_runtime": 555.4959, "train_samples_per_second": 0.826, "train_steps_per_second": 0.052 } ], "max_steps": 29, "num_train_epochs": 1, "total_flos": 1557448359936.0, "trial_name": null, "trial_params": null }