{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "global_step": 30, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "learning_rate": 3e-05, "loss": 2.9219, "step": 1 }, { "epoch": 0.1, "eval_accuracy": 0.05291319857312723, "eval_loss": 2.6484375, "eval_runtime": 1.1592, "eval_samples_per_second": 25.016, "eval_steps_per_second": 1.725, "step": 1 }, { "epoch": 0.2, "learning_rate": 3e-05, "loss": 2.6938, "step": 2 }, { "epoch": 0.2, "eval_accuracy": 0.05291319857312723, "eval_loss": 2.6484375, "eval_runtime": 1.6595, "eval_samples_per_second": 17.475, "eval_steps_per_second": 1.205, "step": 2 }, { "epoch": 0.3, "learning_rate": 2.99178284305241e-05, "loss": 2.6365, "step": 3 }, { "epoch": 0.3, "eval_accuracy": 0.05601796802748051, "eval_loss": 2.55078125, "eval_runtime": 1.2012, "eval_samples_per_second": 24.142, "eval_steps_per_second": 1.665, "step": 3 }, { "epoch": 0.4, "learning_rate": 2.9672214011007087e-05, "loss": 2.5088, "step": 4 }, { "epoch": 0.4, "eval_accuracy": 0.05615008587660193, "eval_loss": 2.533203125, "eval_runtime": 0.9032, "eval_samples_per_second": 32.109, "eval_steps_per_second": 2.214, "step": 4 }, { "epoch": 0.5, "learning_rate": 2.9265847744427305e-05, "loss": 2.7307, "step": 5 }, { "epoch": 0.5, "eval_accuracy": 0.05654643942396618, "eval_loss": 2.517578125, "eval_runtime": 1.6122, "eval_samples_per_second": 17.988, "eval_steps_per_second": 1.241, "step": 5 }, { "epoch": 0.6, "learning_rate": 2.8703181864639013e-05, "loss": 2.969, "step": 6 }, { "epoch": 0.6, "eval_accuracy": 0.05714096974501255, "eval_loss": 2.494140625, "eval_runtime": 1.5031, "eval_samples_per_second": 19.294, "eval_steps_per_second": 1.331, "step": 6 }, { "epoch": 0.7, "learning_rate": 2.7990381056766583e-05, "loss": 2.7283, "step": 7 }, { "epoch": 0.7, "eval_accuracy": 0.056744616197648305, "eval_loss": 2.48828125, "eval_runtime": 0.999, "eval_samples_per_second": 29.03, "eval_steps_per_second": 2.002, "step": 7 }, { "epoch": 0.8, "learning_rate": 2.7135254915624213e-05, "loss": 2.6157, "step": 8 }, { "epoch": 0.8, "eval_accuracy": 0.05780155899061963, "eval_loss": 2.4765625, "eval_runtime": 0.9999, "eval_samples_per_second": 29.003, "eval_steps_per_second": 2.0, "step": 8 }, { "epoch": 0.9, "learning_rate": 2.6147172382160913e-05, "loss": 2.6406, "step": 9 }, { "epoch": 0.9, "eval_accuracy": 0.0583300303871053, "eval_loss": 2.458984375, "eval_runtime": 1.206, "eval_samples_per_second": 24.046, "eval_steps_per_second": 1.658, "step": 9 }, { "epoch": 1.0, "learning_rate": 2.5036959095382875e-05, "loss": 2.5701, "step": 10 }, { "epoch": 1.0, "eval_accuracy": 0.05872638393446954, "eval_loss": 2.4375, "eval_runtime": 1.0035, "eval_samples_per_second": 28.898, "eval_steps_per_second": 1.993, "step": 10 }, { "epoch": 1.1, "learning_rate": 2.3816778784387097e-05, "loss": 2.2017, "step": 11 }, { "epoch": 1.1, "eval_accuracy": 0.05866032500990884, "eval_loss": 2.423828125, "eval_runtime": 1.1973, "eval_samples_per_second": 24.222, "eval_steps_per_second": 1.67, "step": 11 }, { "epoch": 1.2, "learning_rate": 2.25e-05, "loss": 2.0039, "step": 12 }, { "epoch": 1.2, "eval_accuracy": 0.05859426608534813, "eval_loss": 2.421875, "eval_runtime": 1.009, "eval_samples_per_second": 28.742, "eval_steps_per_second": 1.982, "step": 12 }, { "epoch": 1.3, "learning_rate": 2.1101049646137008e-05, "loss": 1.8981, "step": 13 }, { "epoch": 1.3, "eval_accuracy": 0.058858501783590964, "eval_loss": 2.416015625, "eval_runtime": 1.2002, "eval_samples_per_second": 24.163, "eval_steps_per_second": 1.666, "step": 13 }, { "epoch": 1.4, "learning_rate": 1.963525491562421e-05, "loss": 1.7683, "step": 14 }, { "epoch": 1.4, "eval_accuracy": 0.059453032104637336, "eval_loss": 2.416015625, "eval_runtime": 1.0984, "eval_samples_per_second": 26.402, "eval_steps_per_second": 1.821, "step": 14 }, { "epoch": 1.5, "learning_rate": 1.8118675362266388e-05, "loss": 1.6746, "step": 15 }, { "epoch": 1.5, "eval_accuracy": 0.059981503501123, "eval_loss": 2.412109375, "eval_runtime": 0.8904, "eval_samples_per_second": 32.57, "eval_steps_per_second": 2.246, "step": 15 }, { "epoch": 1.6, "learning_rate": 1.6567926949014805e-05, "loss": 1.8051, "step": 16 }, { "epoch": 1.6, "eval_accuracy": 0.06004756242568371, "eval_loss": 2.41015625, "eval_runtime": 1.1118, "eval_samples_per_second": 26.085, "eval_steps_per_second": 1.799, "step": 16 }, { "epoch": 1.7, "learning_rate": 1.5e-05, "loss": 2.0457, "step": 17 }, { "epoch": 1.7, "eval_accuracy": 0.06017968027480513, "eval_loss": 2.404296875, "eval_runtime": 1.1017, "eval_samples_per_second": 26.322, "eval_steps_per_second": 1.815, "step": 17 }, { "epoch": 1.8, "learning_rate": 1.3432073050985201e-05, "loss": 1.8257, "step": 18 }, { "epoch": 1.8, "eval_accuracy": 0.060576033822169376, "eval_loss": 2.400390625, "eval_runtime": 1.5124, "eval_samples_per_second": 19.175, "eval_steps_per_second": 1.322, "step": 18 }, { "epoch": 1.9, "learning_rate": 1.1881324637733613e-05, "loss": 1.744, "step": 19 }, { "epoch": 1.9, "eval_accuracy": 0.06070815167129079, "eval_loss": 2.388671875, "eval_runtime": 0.9026, "eval_samples_per_second": 32.129, "eval_steps_per_second": 2.216, "step": 19 }, { "epoch": 2.0, "learning_rate": 1.1881324637733613e-05, "loss": 1.8232, "step": 20 }, { "epoch": 2.0, "eval_accuracy": 0.06070815167129079, "eval_loss": 2.388671875, "eval_runtime": 1.3479, "eval_samples_per_second": 21.515, "eval_steps_per_second": 1.484, "step": 20 }, { "epoch": 2.1, "learning_rate": 1.036474508437579e-05, "loss": 1.4741, "step": 21 }, { "epoch": 2.1, "eval_accuracy": 0.06097238736953362, "eval_loss": 2.3828125, "eval_runtime": 1.1016, "eval_samples_per_second": 26.326, "eval_steps_per_second": 1.816, "step": 21 }, { "epoch": 2.2, "learning_rate": 8.898950353863e-06, "loss": 1.651, "step": 22 }, { "epoch": 2.2, "eval_accuracy": 0.06084026952041221, "eval_loss": 2.376953125, "eval_runtime": 1.0964, "eval_samples_per_second": 26.45, "eval_steps_per_second": 1.824, "step": 22 }, { "epoch": 2.3, "learning_rate": 7.500000000000004e-06, "loss": 1.3732, "step": 23 }, { "epoch": 2.3, "eval_accuracy": 0.06097238736953362, "eval_loss": 2.373046875, "eval_runtime": 1.1007, "eval_samples_per_second": 26.348, "eval_steps_per_second": 1.817, "step": 23 }, { "epoch": 2.4, "learning_rate": 6.1832212156129045e-06, "loss": 1.3151, "step": 24 }, { "epoch": 2.4, "eval_accuracy": 0.061038446294094335, "eval_loss": 2.373046875, "eval_runtime": 1.0081, "eval_samples_per_second": 28.767, "eval_steps_per_second": 1.984, "step": 24 }, { "epoch": 2.5, "learning_rate": 4.963040904617131e-06, "loss": 1.5302, "step": 25 }, { "epoch": 2.5, "eval_accuracy": 0.061038446294094335, "eval_loss": 2.373046875, "eval_runtime": 1.2116, "eval_samples_per_second": 23.936, "eval_steps_per_second": 1.651, "step": 25 }, { "epoch": 2.6, "learning_rate": 3.852827617839085e-06, "loss": 1.2539, "step": 26 }, { "epoch": 2.6, "eval_accuracy": 0.06117056414321575, "eval_loss": 2.375, "eval_runtime": 1.4027, "eval_samples_per_second": 20.674, "eval_steps_per_second": 1.426, "step": 26 }, { "epoch": 2.7, "learning_rate": 2.86474508437579e-06, "loss": 1.6211, "step": 27 }, { "epoch": 2.7, "eval_accuracy": 0.061236623067776455, "eval_loss": 2.376953125, "eval_runtime": 0.9053, "eval_samples_per_second": 32.035, "eval_steps_per_second": 2.209, "step": 27 }, { "epoch": 2.8, "learning_rate": 2.0096189432334194e-06, "loss": 1.6047, "step": 28 }, { "epoch": 2.8, "eval_accuracy": 0.06130268199233716, "eval_loss": 2.376953125, "eval_runtime": 1.0962, "eval_samples_per_second": 26.456, "eval_steps_per_second": 1.825, "step": 28 }, { "epoch": 2.9, "learning_rate": 1.2968181353609854e-06, "loss": 1.1953, "step": 29 }, { "epoch": 2.9, "eval_accuracy": 0.06143479984145858, "eval_loss": 2.37890625, "eval_runtime": 1.52, "eval_samples_per_second": 19.079, "eval_steps_per_second": 1.316, "step": 29 }, { "epoch": 3.0, "learning_rate": 7.341522555726971e-07, "loss": 1.1621, "step": 30 }, { "epoch": 3.0, "eval_accuracy": 0.06143479984145858, "eval_loss": 2.37890625, "eval_runtime": 1.3108, "eval_samples_per_second": 22.124, "eval_steps_per_second": 1.526, "step": 30 }, { "epoch": 3.0, "step": 30, "total_flos": 1466265894912.0, "train_loss": 1.999542236328125, "train_runtime": 244.9371, "train_samples_per_second": 1.911, "train_steps_per_second": 0.122 } ], "max_steps": 30, "num_train_epochs": 3, "total_flos": 1466265894912.0, "trial_name": null, "trial_params": null }