{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "global_step": 58, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 3e-05, "loss": 2.6543, "step": 1 }, { "epoch": 0.03, "eval_accuracy": 0.05133992343294669, "eval_loss": 2.611328125, "eval_runtime": 4.6158, "eval_samples_per_second": 18.632, "eval_steps_per_second": 1.3, "step": 1 }, { "epoch": 0.07, "learning_rate": 3e-05, "loss": 2.6077, "step": 2 }, { "epoch": 0.07, "eval_accuracy": 0.05133992343294669, "eval_loss": 2.611328125, "eval_runtime": 4.3958, "eval_samples_per_second": 19.564, "eval_steps_per_second": 1.365, "step": 2 }, { "epoch": 0.1, "learning_rate": 2.9978001207766858e-05, "loss": 2.5964, "step": 3 }, { "epoch": 0.1, "eval_accuracy": 0.05192560425118565, "eval_loss": 2.560546875, "eval_runtime": 4.3633, "eval_samples_per_second": 19.71, "eval_steps_per_second": 1.375, "step": 3 }, { "epoch": 0.14, "learning_rate": 2.9912069357315394e-05, "loss": 2.7302, "step": 4 }, { "epoch": 0.14, "eval_accuracy": 0.052668418947488714, "eval_loss": 2.5234375, "eval_runtime": 3.6387, "eval_samples_per_second": 23.635, "eval_steps_per_second": 1.649, "step": 4 }, { "epoch": 0.17, "learning_rate": 2.9802397838122895e-05, "loss": 2.7002, "step": 5 }, { "epoch": 0.17, "eval_accuracy": 0.05286840751957031, "eval_loss": 2.5078125, "eval_runtime": 4.3812, "eval_samples_per_second": 19.629, "eval_steps_per_second": 1.369, "step": 5 }, { "epoch": 0.21, "learning_rate": 2.96493083356513e-05, "loss": 2.5674, "step": 6 }, { "epoch": 0.21, "eval_accuracy": 0.0532683846637335, "eval_loss": 2.494140625, "eval_runtime": 4.3925, "eval_samples_per_second": 19.579, "eval_steps_per_second": 1.366, "step": 6 }, { "epoch": 0.24, "learning_rate": 2.9453249887788343e-05, "loss": 2.6399, "step": 7 }, { "epoch": 0.24, "eval_accuracy": 0.05339694874578595, "eval_loss": 2.48828125, "eval_runtime": 4.3565, "eval_samples_per_second": 19.741, "eval_steps_per_second": 1.377, "step": 7 }, { "epoch": 0.28, "learning_rate": 2.9214797567742036e-05, "loss": 2.533, "step": 8 }, { "epoch": 0.28, "eval_accuracy": 0.05362550711387921, "eval_loss": 2.48046875, "eval_runtime": 4.3451, "eval_samples_per_second": 19.792, "eval_steps_per_second": 1.381, "step": 8 }, { "epoch": 0.31, "learning_rate": 2.8934650797251875e-05, "loss": 2.7202, "step": 9 }, { "epoch": 0.31, "eval_accuracy": 0.05358265241986172, "eval_loss": 2.474609375, "eval_runtime": 4.3538, "eval_samples_per_second": 19.753, "eval_steps_per_second": 1.378, "step": 9 }, { "epoch": 0.34, "learning_rate": 2.861363129506436e-05, "loss": 2.5137, "step": 10 }, { "epoch": 0.34, "eval_accuracy": 0.0533683789497743, "eval_loss": 2.46484375, "eval_runtime": 4.2706, "eval_samples_per_second": 20.138, "eval_steps_per_second": 1.405, "step": 10 }, { "epoch": 0.38, "learning_rate": 2.8252680666690346e-05, "loss": 2.499, "step": 11 }, { "epoch": 0.38, "eval_accuracy": 0.053554082623850065, "eval_loss": 2.451171875, "eval_runtime": 3.643, "eval_samples_per_second": 23.607, "eval_steps_per_second": 1.647, "step": 11 }, { "epoch": 0.41, "learning_rate": 2.7852857642513838e-05, "loss": 2.7026, "step": 12 }, { "epoch": 0.41, "eval_accuracy": 0.05393977487000743, "eval_loss": 2.44140625, "eval_runtime": 4.3592, "eval_samples_per_second": 19.729, "eval_steps_per_second": 1.376, "step": 12 }, { "epoch": 0.45, "learning_rate": 2.741533497235336e-05, "loss": 2.5254, "step": 13 }, { "epoch": 0.45, "eval_accuracy": 0.05431118221815896, "eval_loss": 2.43359375, "eval_runtime": 4.3496, "eval_samples_per_second": 19.772, "eval_steps_per_second": 1.379, "step": 13 }, { "epoch": 0.48, "learning_rate": 2.6941395985584656e-05, "loss": 2.5667, "step": 14 }, { "epoch": 0.48, "eval_accuracy": 0.05453974058625222, "eval_loss": 2.423828125, "eval_runtime": 3.9428, "eval_samples_per_second": 21.812, "eval_steps_per_second": 1.522, "step": 14 }, { "epoch": 0.52, "learning_rate": 2.643243082691455e-05, "loss": 2.5715, "step": 15 }, { "epoch": 0.52, "eval_accuracy": 0.054768298954345464, "eval_loss": 2.416015625, "eval_runtime": 4.3566, "eval_samples_per_second": 19.74, "eval_steps_per_second": 1.377, "step": 15 }, { "epoch": 0.55, "learning_rate": 2.5889932378846963e-05, "loss": 2.3739, "step": 16 }, { "epoch": 0.55, "eval_accuracy": 0.05499685732243872, "eval_loss": 2.41015625, "eval_runtime": 3.9638, "eval_samples_per_second": 21.696, "eval_steps_per_second": 1.514, "step": 16 }, { "epoch": 0.59, "learning_rate": 2.531549188280135e-05, "loss": 2.4756, "step": 17 }, { "epoch": 0.59, "eval_accuracy": 0.054896863036397923, "eval_loss": 2.404296875, "eval_runtime": 3.6572, "eval_samples_per_second": 23.515, "eval_steps_per_second": 1.641, "step": 17 }, { "epoch": 0.62, "learning_rate": 2.4710794271727415e-05, "loss": 2.4783, "step": 18 }, { "epoch": 0.62, "eval_accuracy": 0.05496828752642706, "eval_loss": 2.3984375, "eval_runtime": 4.3549, "eval_samples_per_second": 19.748, "eval_steps_per_second": 1.378, "step": 18 }, { "epoch": 0.66, "learning_rate": 2.407761322790648e-05, "loss": 2.5665, "step": 19 }, { "epoch": 0.66, "eval_accuracy": 0.05488257813839209, "eval_loss": 2.390625, "eval_runtime": 4.3461, "eval_samples_per_second": 19.788, "eval_steps_per_second": 1.381, "step": 19 }, { "epoch": 0.69, "learning_rate": 2.407761322790648e-05, "loss": 2.4888, "step": 20 }, { "epoch": 0.69, "eval_accuracy": 0.05488257813839209, "eval_loss": 2.390625, "eval_runtime": 4.3835, "eval_samples_per_second": 19.619, "eval_steps_per_second": 1.369, "step": 20 }, { "epoch": 0.72, "learning_rate": 2.341780598043574e-05, "loss": 2.4476, "step": 21 }, { "epoch": 0.72, "eval_accuracy": 0.05499685732243872, "eval_loss": 2.3828125, "eval_runtime": 4.2591, "eval_samples_per_second": 20.192, "eval_steps_per_second": 1.409, "step": 21 }, { "epoch": 0.76, "learning_rate": 2.2733307857655327e-05, "loss": 2.604, "step": 22 }, { "epoch": 0.76, "eval_accuracy": 0.05521113079252614, "eval_loss": 2.375, "eval_runtime": 4.354, "eval_samples_per_second": 19.752, "eval_steps_per_second": 1.378, "step": 22 }, { "epoch": 0.79, "learning_rate": 2.2026126610496852e-05, "loss": 2.3416, "step": 23 }, { "epoch": 0.79, "eval_accuracy": 0.05539683446660191, "eval_loss": 2.365234375, "eval_runtime": 4.3608, "eval_samples_per_second": 19.721, "eval_steps_per_second": 1.376, "step": 23 }, { "epoch": 0.83, "learning_rate": 2.129833652340397e-05, "loss": 2.6028, "step": 24 }, { "epoch": 0.83, "eval_accuracy": 0.05546825895663105, "eval_loss": 2.35546875, "eval_runtime": 4.354, "eval_samples_per_second": 19.752, "eval_steps_per_second": 1.378, "step": 24 }, { "epoch": 0.86, "learning_rate": 2.055207233009872e-05, "loss": 2.3425, "step": 25 }, { "epoch": 0.86, "eval_accuracy": 0.055753956916747616, "eval_loss": 2.34765625, "eval_runtime": 3.9522, "eval_samples_per_second": 21.76, "eval_steps_per_second": 1.518, "step": 25 }, { "epoch": 0.9, "learning_rate": 1.9789522952039697e-05, "loss": 2.4142, "step": 26 }, { "epoch": 0.9, "eval_accuracy": 0.055811096508770926, "eval_loss": 2.33984375, "eval_runtime": 4.3577, "eval_samples_per_second": 19.735, "eval_steps_per_second": 1.377, "step": 26 }, { "epoch": 0.93, "learning_rate": 1.9012925077938318e-05, "loss": 2.5317, "step": 27 }, { "epoch": 0.93, "eval_accuracy": 0.05592537569281755, "eval_loss": 2.333984375, "eval_runtime": 3.95, "eval_samples_per_second": 21.772, "eval_steps_per_second": 1.519, "step": 27 }, { "epoch": 0.97, "learning_rate": 1.8224556603165363e-05, "loss": 2.4119, "step": 28 }, { "epoch": 0.97, "eval_accuracy": 0.05612536426489915, "eval_loss": 2.330078125, "eval_runtime": 3.959, "eval_samples_per_second": 21.723, "eval_steps_per_second": 1.516, "step": 28 }, { "epoch": 1.0, "learning_rate": 1.7426729948291474e-05, "loss": 2.4048, "step": 29 }, { "epoch": 1.0, "eval_accuracy": 0.0562539283469516, "eval_loss": 2.326171875, "eval_runtime": 4.3642, "eval_samples_per_second": 19.706, "eval_steps_per_second": 1.375, "step": 29 }, { "epoch": 1.03, "learning_rate": 1.662178527635913e-05, "loss": 1.9646, "step": 30 }, { "epoch": 1.03, "eval_accuracy": 0.0563539226329924, "eval_loss": 2.32421875, "eval_runtime": 4.3686, "eval_samples_per_second": 19.686, "eval_steps_per_second": 1.373, "step": 30 }, { "epoch": 1.07, "learning_rate": 1.5812083628781265e-05, "loss": 1.9233, "step": 31 }, { "epoch": 1.07, "eval_accuracy": 0.056296783040969085, "eval_loss": 2.3203125, "eval_runtime": 4.3634, "eval_samples_per_second": 19.709, "eval_steps_per_second": 1.375, "step": 31 }, { "epoch": 1.1, "learning_rate": 1.5e-05, "loss": 1.9276, "step": 32 }, { "epoch": 1.1, "eval_accuracy": 0.0563539226329924, "eval_loss": 2.3203125, "eval_runtime": 3.6556, "eval_samples_per_second": 23.526, "eval_steps_per_second": 1.641, "step": 32 }, { "epoch": 1.14, "learning_rate": 1.4187916371218739e-05, "loss": 1.8702, "step": 33 }, { "epoch": 1.14, "eval_accuracy": 0.05649677161305068, "eval_loss": 2.328125, "eval_runtime": 3.6562, "eval_samples_per_second": 23.522, "eval_steps_per_second": 1.641, "step": 33 }, { "epoch": 1.17, "learning_rate": 1.3378214723640877e-05, "loss": 2.0997, "step": 34 }, { "epoch": 1.17, "eval_accuracy": 0.05649677161305068, "eval_loss": 2.333984375, "eval_runtime": 3.9689, "eval_samples_per_second": 21.668, "eval_steps_per_second": 1.512, "step": 34 }, { "epoch": 1.21, "learning_rate": 1.2573270051708529e-05, "loss": 1.7943, "step": 35 }, { "epoch": 1.21, "eval_accuracy": 0.05676818467516142, "eval_loss": 2.33203125, "eval_runtime": 4.3641, "eval_samples_per_second": 19.706, "eval_steps_per_second": 1.375, "step": 35 }, { "epoch": 1.24, "learning_rate": 1.177544339683464e-05, "loss": 1.8579, "step": 36 }, { "epoch": 1.24, "eval_accuracy": 0.056668190389120625, "eval_loss": 2.32421875, "eval_runtime": 3.3714, "eval_samples_per_second": 25.509, "eval_steps_per_second": 1.78, "step": 36 }, { "epoch": 1.28, "learning_rate": 1.098707492206169e-05, "loss": 1.8844, "step": 37 }, { "epoch": 1.28, "eval_accuracy": 0.05676818467516142, "eval_loss": 2.314453125, "eval_runtime": 4.3578, "eval_samples_per_second": 19.735, "eval_steps_per_second": 1.377, "step": 37 }, { "epoch": 1.31, "learning_rate": 1.0210477047960303e-05, "loss": 1.9288, "step": 38 }, { "epoch": 1.31, "eval_accuracy": 0.05689674875721387, "eval_loss": 2.30859375, "eval_runtime": 3.3527, "eval_samples_per_second": 25.651, "eval_steps_per_second": 1.79, "step": 38 }, { "epoch": 1.34, "learning_rate": 9.447927669901284e-06, "loss": 1.6616, "step": 39 }, { "epoch": 1.34, "eval_accuracy": 0.057025312839266325, "eval_loss": 2.3046875, "eval_runtime": 4.3576, "eval_samples_per_second": 19.736, "eval_steps_per_second": 1.377, "step": 39 }, { "epoch": 1.38, "learning_rate": 8.701663476596031e-06, "loss": 1.6443, "step": 40 }, { "epoch": 1.38, "eval_accuracy": 0.05705388263527798, "eval_loss": 2.3046875, "eval_runtime": 4.3425, "eval_samples_per_second": 19.804, "eval_steps_per_second": 1.382, "step": 40 }, { "epoch": 1.41, "learning_rate": 7.97387338950315e-06, "loss": 1.7616, "step": 41 }, { "epoch": 1.41, "eval_accuracy": 0.057182446717330436, "eval_loss": 2.302734375, "eval_runtime": 4.3701, "eval_samples_per_second": 19.679, "eval_steps_per_second": 1.373, "step": 41 }, { "epoch": 1.45, "learning_rate": 7.266692142344673e-06, "loss": 1.7904, "step": 42 }, { "epoch": 1.45, "eval_accuracy": 0.0571110222273013, "eval_loss": 2.302734375, "eval_runtime": 3.9729, "eval_samples_per_second": 21.647, "eval_steps_per_second": 1.51, "step": 42 }, { "epoch": 1.48, "learning_rate": 6.582194019564266e-06, "loss": 1.8762, "step": 43 }, { "epoch": 1.48, "eval_accuracy": 0.05729672590137706, "eval_loss": 2.302734375, "eval_runtime": 4.3645, "eval_samples_per_second": 19.704, "eval_steps_per_second": 1.375, "step": 43 }, { "epoch": 1.52, "learning_rate": 5.922386772093526e-06, "loss": 1.6569, "step": 44 }, { "epoch": 1.52, "eval_accuracy": 0.05725387120735958, "eval_loss": 2.302734375, "eval_runtime": 4.4596, "eval_samples_per_second": 19.284, "eval_steps_per_second": 1.345, "step": 44 }, { "epoch": 1.55, "learning_rate": 5.289205728272587e-06, "loss": 1.647, "step": 45 }, { "epoch": 1.55, "eval_accuracy": 0.05733958059539455, "eval_loss": 2.302734375, "eval_runtime": 4.3534, "eval_samples_per_second": 19.755, "eval_steps_per_second": 1.378, "step": 45 }, { "epoch": 1.59, "learning_rate": 4.684508117198649e-06, "loss": 1.8168, "step": 46 }, { "epoch": 1.59, "eval_accuracy": 0.057425289983429516, "eval_loss": 2.302734375, "eval_runtime": 3.9668, "eval_samples_per_second": 21.68, "eval_steps_per_second": 1.513, "step": 46 }, { "epoch": 1.62, "learning_rate": 4.110067621153041e-06, "loss": 1.7194, "step": 47 }, { "epoch": 1.62, "eval_accuracy": 0.057311010799382896, "eval_loss": 2.302734375, "eval_runtime": 4.3648, "eval_samples_per_second": 19.703, "eval_steps_per_second": 1.375, "step": 47 }, { "epoch": 1.66, "learning_rate": 3.567569173085455e-06, "loss": 1.7667, "step": 48 }, { "epoch": 1.66, "eval_accuracy": 0.05722530141134792, "eval_loss": 2.302734375, "eval_runtime": 4.3629, "eval_samples_per_second": 19.712, "eval_steps_per_second": 1.375, "step": 48 }, { "epoch": 1.69, "learning_rate": 3.0586040144153436e-06, "loss": 1.7621, "step": 49 }, { "epoch": 1.69, "eval_accuracy": 0.05729672590137706, "eval_loss": 2.302734375, "eval_runtime": 4.3603, "eval_samples_per_second": 19.723, "eval_steps_per_second": 1.376, "step": 49 }, { "epoch": 1.72, "learning_rate": 2.5846650276466435e-06, "loss": 1.7269, "step": 50 }, { "epoch": 1.72, "eval_accuracy": 0.05733958059539455, "eval_loss": 2.30078125, "eval_runtime": 3.6679, "eval_samples_per_second": 23.447, "eval_steps_per_second": 1.636, "step": 50 }, { "epoch": 1.76, "learning_rate": 2.1471423574861643e-06, "loss": 1.7815, "step": 51 }, { "epoch": 1.76, "eval_accuracy": 0.057396720187417864, "eval_loss": 2.30078125, "eval_runtime": 4.3528, "eval_samples_per_second": 19.757, "eval_steps_per_second": 1.378, "step": 51 }, { "epoch": 1.79, "learning_rate": 1.7473193333096576e-06, "loss": 1.8318, "step": 52 }, { "epoch": 1.79, "eval_accuracy": 0.057396720187417864, "eval_loss": 2.298828125, "eval_runtime": 4.3561, "eval_samples_per_second": 19.743, "eval_steps_per_second": 1.377, "step": 52 }, { "epoch": 1.83, "learning_rate": 1.3863687049356465e-06, "loss": 1.9366, "step": 53 }, { "epoch": 1.83, "eval_accuracy": 0.057468144677447, "eval_loss": 2.298828125, "eval_runtime": 4.3614, "eval_samples_per_second": 19.719, "eval_steps_per_second": 1.376, "step": 53 }, { "epoch": 1.86, "learning_rate": 1.0653492027481287e-06, "loss": 1.736, "step": 54 }, { "epoch": 1.86, "eval_accuracy": 0.057553854065481976, "eval_loss": 2.296875, "eval_runtime": 4.3622, "eval_samples_per_second": 19.715, "eval_steps_per_second": 1.375, "step": 54 }, { "epoch": 1.9, "learning_rate": 7.852024322579649e-07, "loss": 1.9984, "step": 55 }, { "epoch": 1.9, "eval_accuracy": 0.05753956916747614, "eval_loss": 2.296875, "eval_runtime": 4.3417, "eval_samples_per_second": 19.808, "eval_steps_per_second": 1.382, "step": 55 }, { "epoch": 1.93, "learning_rate": 5.467501122116564e-07, "loss": 1.7203, "step": 56 }, { "epoch": 1.93, "eval_accuracy": 0.05753956916747614, "eval_loss": 2.294921875, "eval_runtime": 3.6633, "eval_samples_per_second": 23.476, "eval_steps_per_second": 1.638, "step": 56 }, { "epoch": 1.97, "learning_rate": 3.5069166434870014e-07, "loss": 1.7391, "step": 57 }, { "epoch": 1.97, "eval_accuracy": 0.0575681389634878, "eval_loss": 2.294921875, "eval_runtime": 4.3729, "eval_samples_per_second": 19.667, "eval_steps_per_second": 1.372, "step": 57 }, { "epoch": 2.0, "learning_rate": 1.9760216187710788e-07, "loss": 1.6611, "step": 58 }, { "epoch": 2.0, "eval_accuracy": 0.057553854065481976, "eval_loss": 2.294921875, "eval_runtime": 3.3519, "eval_samples_per_second": 25.657, "eval_steps_per_second": 1.79, "step": 58 }, { "epoch": 2.0, "step": 58, "total_flos": 3170519875584.0, "train_loss": 2.1741048878636855, "train_runtime": 774.6069, "train_samples_per_second": 1.185, "train_steps_per_second": 0.075 } ], "max_steps": 58, "num_train_epochs": 2, "total_flos": 3170519875584.0, "trial_name": null, "trial_params": null }