{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "global_step": 30, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "learning_rate": 3e-05, "loss": 2.17, "step": 1 }, { "epoch": 0.07, "eval_accuracy": 0.06213930632535284, "eval_loss": 2.0546875, "eval_runtime": 3.2113, "eval_samples_per_second": 26.781, "eval_steps_per_second": 0.934, "step": 1 }, { "epoch": 0.13, "learning_rate": 3e-05, "loss": 2.1814, "step": 2 }, { "epoch": 0.13, "eval_accuracy": 0.06213930632535284, "eval_loss": 2.0546875, "eval_runtime": 3.0319, "eval_samples_per_second": 28.365, "eval_steps_per_second": 0.989, "step": 2 }, { "epoch": 0.2, "learning_rate": 2.99178284305241e-05, "loss": 2.0963, "step": 3 }, { "epoch": 0.2, "eval_accuracy": 0.06251071367350437, "eval_loss": 2.0234375, "eval_runtime": 3.4167, "eval_samples_per_second": 25.17, "eval_steps_per_second": 0.878, "step": 3 }, { "epoch": 0.27, "learning_rate": 2.9672214011007087e-05, "loss": 2.1383, "step": 4 }, { "epoch": 0.27, "eval_accuracy": 0.062453574081481056, "eval_loss": 2.01953125, "eval_runtime": 3.4275, "eval_samples_per_second": 25.091, "eval_steps_per_second": 0.875, "step": 4 }, { "epoch": 0.33, "learning_rate": 2.9265847744427305e-05, "loss": 2.1625, "step": 5 }, { "epoch": 0.33, "eval_accuracy": 0.062453574081481056, "eval_loss": 2.01953125, "eval_runtime": 3.4165, "eval_samples_per_second": 25.172, "eval_steps_per_second": 0.878, "step": 5 }, { "epoch": 0.4, "learning_rate": 2.8703181864639013e-05, "loss": 2.1808, "step": 6 }, { "epoch": 0.4, "eval_accuracy": 0.06241071938746357, "eval_loss": 2.015625, "eval_runtime": 3.0274, "eval_samples_per_second": 28.408, "eval_steps_per_second": 0.991, "step": 6 }, { "epoch": 0.47, "learning_rate": 2.7990381056766583e-05, "loss": 2.1587, "step": 7 }, { "epoch": 0.47, "eval_accuracy": 0.06255356836752185, "eval_loss": 2.017578125, "eval_runtime": 3.4415, "eval_samples_per_second": 24.989, "eval_steps_per_second": 0.872, "step": 7 }, { "epoch": 0.53, "learning_rate": 2.7135254915624213e-05, "loss": 2.0847, "step": 8 }, { "epoch": 0.53, "eval_accuracy": 0.06273927204159763, "eval_loss": 2.013671875, "eval_runtime": 3.4339, "eval_samples_per_second": 25.045, "eval_steps_per_second": 0.874, "step": 8 }, { "epoch": 0.6, "learning_rate": 2.7135254915624213e-05, "loss": 2.0336, "step": 9 }, { "epoch": 0.6, "eval_accuracy": 0.06273927204159763, "eval_loss": 2.013671875, "eval_runtime": 3.4267, "eval_samples_per_second": 25.097, "eval_steps_per_second": 0.875, "step": 9 }, { "epoch": 0.67, "learning_rate": 2.6147172382160913e-05, "loss": 2.1777, "step": 10 }, { "epoch": 0.67, "eval_accuracy": 0.06286783612365007, "eval_loss": 2.005859375, "eval_runtime": 3.4314, "eval_samples_per_second": 25.062, "eval_steps_per_second": 0.874, "step": 10 }, { "epoch": 0.73, "learning_rate": 2.5036959095382875e-05, "loss": 2.2034, "step": 11 }, { "epoch": 0.73, "eval_accuracy": 0.06299640020570253, "eval_loss": 2.0, "eval_runtime": 3.0112, "eval_samples_per_second": 28.56, "eval_steps_per_second": 0.996, "step": 11 }, { "epoch": 0.8, "learning_rate": 2.3816778784387097e-05, "loss": 2.1665, "step": 12 }, { "epoch": 0.8, "eval_accuracy": 0.06283926632763842, "eval_loss": 1.994140625, "eval_runtime": 2.8048, "eval_samples_per_second": 30.661, "eval_steps_per_second": 1.07, "step": 12 }, { "epoch": 0.87, "learning_rate": 2.25e-05, "loss": 2.0352, "step": 13 }, { "epoch": 0.87, "eval_accuracy": 0.0628821210216559, "eval_loss": 1.98828125, "eval_runtime": 2.7144, "eval_samples_per_second": 31.682, "eval_steps_per_second": 1.105, "step": 13 }, { "epoch": 0.93, "learning_rate": 2.1101049646137008e-05, "loss": 2.1263, "step": 14 }, { "epoch": 0.93, "eval_accuracy": 0.06281069653162677, "eval_loss": 1.9833984375, "eval_runtime": 3.4127, "eval_samples_per_second": 25.2, "eval_steps_per_second": 0.879, "step": 14 }, { "epoch": 1.0, "learning_rate": 1.963525491562421e-05, "loss": 2.1282, "step": 15 }, { "epoch": 1.0, "eval_accuracy": 0.06315353408376664, "eval_loss": 1.978515625, "eval_runtime": 3.4158, "eval_samples_per_second": 25.177, "eval_steps_per_second": 0.878, "step": 15 }, { "epoch": 1.07, "learning_rate": 1.8118675362266388e-05, "loss": 1.7159, "step": 16 }, { "epoch": 1.07, "eval_accuracy": 0.0633392377578424, "eval_loss": 1.9765625, "eval_runtime": 3.3102, "eval_samples_per_second": 25.98, "eval_steps_per_second": 0.906, "step": 16 }, { "epoch": 1.13, "learning_rate": 1.6567926949014805e-05, "loss": 1.8346, "step": 17 }, { "epoch": 1.13, "eval_accuracy": 0.06351065653391234, "eval_loss": 1.9775390625, "eval_runtime": 2.7366, "eval_samples_per_second": 31.426, "eval_steps_per_second": 1.096, "step": 17 }, { "epoch": 1.2, "learning_rate": 1.5e-05, "loss": 1.7183, "step": 18 }, { "epoch": 1.2, "eval_accuracy": 0.06342494714587738, "eval_loss": 1.982421875, "eval_runtime": 3.4207, "eval_samples_per_second": 25.141, "eval_steps_per_second": 0.877, "step": 18 }, { "epoch": 1.27, "learning_rate": 1.3432073050985201e-05, "loss": 1.6086, "step": 19 }, { "epoch": 1.27, "eval_accuracy": 0.06346780183989487, "eval_loss": 1.98828125, "eval_runtime": 3.4214, "eval_samples_per_second": 25.136, "eval_steps_per_second": 0.877, "step": 19 }, { "epoch": 1.33, "learning_rate": 1.1881324637733613e-05, "loss": 1.6497, "step": 20 }, { "epoch": 1.33, "eval_accuracy": 0.0634392320438832, "eval_loss": 1.9892578125, "eval_runtime": 3.3147, "eval_samples_per_second": 25.945, "eval_steps_per_second": 0.905, "step": 20 }, { "epoch": 1.4, "learning_rate": 1.036474508437579e-05, "loss": 1.6267, "step": 21 }, { "epoch": 1.4, "eval_accuracy": 0.06368207530998228, "eval_loss": 1.9853515625, "eval_runtime": 2.7501, "eval_samples_per_second": 31.272, "eval_steps_per_second": 1.091, "step": 21 }, { "epoch": 1.47, "learning_rate": 8.898950353863e-06, "loss": 1.5962, "step": 22 }, { "epoch": 1.47, "eval_accuracy": 0.06368207530998228, "eval_loss": 1.9765625, "eval_runtime": 3.1007, "eval_samples_per_second": 27.736, "eval_steps_per_second": 0.968, "step": 22 }, { "epoch": 1.53, "learning_rate": 7.500000000000004e-06, "loss": 1.5168, "step": 23 }, { "epoch": 1.53, "eval_accuracy": 0.06372493000399977, "eval_loss": 1.9697265625, "eval_runtime": 3.0137, "eval_samples_per_second": 28.536, "eval_steps_per_second": 0.995, "step": 23 }, { "epoch": 1.6, "learning_rate": 6.1832212156129045e-06, "loss": 1.6213, "step": 24 }, { "epoch": 1.6, "eval_accuracy": 0.06372493000399977, "eval_loss": 1.9619140625, "eval_runtime": 3.4172, "eval_samples_per_second": 25.167, "eval_steps_per_second": 0.878, "step": 24 }, { "epoch": 1.67, "learning_rate": 4.963040904617131e-06, "loss": 1.4789, "step": 25 }, { "epoch": 1.67, "eval_accuracy": 0.06382492429004057, "eval_loss": 1.9580078125, "eval_runtime": 2.7014, "eval_samples_per_second": 31.836, "eval_steps_per_second": 1.111, "step": 25 }, { "epoch": 1.73, "learning_rate": 3.852827617839085e-06, "loss": 1.6796, "step": 26 }, { "epoch": 1.73, "eval_accuracy": 0.06378206959602309, "eval_loss": 1.955078125, "eval_runtime": 3.0318, "eval_samples_per_second": 28.366, "eval_steps_per_second": 0.99, "step": 26 }, { "epoch": 1.8, "learning_rate": 2.86474508437579e-06, "loss": 1.5964, "step": 27 }, { "epoch": 1.8, "eval_accuracy": 0.06383920918804639, "eval_loss": 1.953125, "eval_runtime": 3.4212, "eval_samples_per_second": 25.137, "eval_steps_per_second": 0.877, "step": 27 }, { "epoch": 1.87, "learning_rate": 2.0096189432334194e-06, "loss": 1.787, "step": 28 }, { "epoch": 1.87, "eval_accuracy": 0.0639392034740872, "eval_loss": 1.951171875, "eval_runtime": 3.4776, "eval_samples_per_second": 24.729, "eval_steps_per_second": 0.863, "step": 28 }, { "epoch": 1.93, "learning_rate": 1.2968181353609854e-06, "loss": 1.6536, "step": 29 }, { "epoch": 1.93, "eval_accuracy": 0.0639963430661105, "eval_loss": 1.94921875, "eval_runtime": 3.3251, "eval_samples_per_second": 25.864, "eval_steps_per_second": 0.902, "step": 29 }, { "epoch": 2.0, "learning_rate": 7.341522555726971e-07, "loss": 1.7178, "step": 30 }, { "epoch": 2.0, "eval_accuracy": 0.06395348837209303, "eval_loss": 1.9482421875, "eval_runtime": 3.013, "eval_samples_per_second": 28.543, "eval_steps_per_second": 0.996, "step": 30 }, { "epoch": 2.0, "step": 30, "total_flos": 3114896719872.0, "train_loss": 1.8948323567708334, "train_runtime": 176.8477, "train_samples_per_second": 5.191, "train_steps_per_second": 0.17 } ], "max_steps": 30, "num_train_epochs": 2, "total_flos": 3114896719872.0, "trial_name": null, "trial_params": null }