{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "global_step": 21600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 9.953703703703704e-05, "loss": 1.1352, "step": 100 }, { "epoch": 0.02, "eval_accuracy": 0.7482928037643433, "eval_loss": 0.6091228127479553, "eval_runtime": 1960.8351, "eval_samples_per_second": 17.625, "eval_steps_per_second": 4.406, "step": 100 }, { "epoch": 0.05, "learning_rate": 9.907407407407407e-05, "loss": 0.5497, "step": 200 }, { "epoch": 0.05, "eval_accuracy": 0.9043981432914734, "eval_loss": 0.2794453501701355, "eval_runtime": 2070.7116, "eval_samples_per_second": 16.69, "eval_steps_per_second": 4.172, "step": 200 }, { "epoch": 0.07, "learning_rate": 9.861111111111112e-05, "loss": 0.4001, "step": 300 }, { "epoch": 0.07, "eval_accuracy": 0.9633391499519348, "eval_loss": 0.1039256900548935, "eval_runtime": 2051.9615, "eval_samples_per_second": 16.842, "eval_steps_per_second": 4.211, "step": 300 }, { "epoch": 0.09, "learning_rate": 9.814814814814815e-05, "loss": 0.2967, "step": 400 }, { "epoch": 0.09, "eval_accuracy": 0.9760127067565918, "eval_loss": 0.0726834237575531, "eval_runtime": 1976.3465, "eval_samples_per_second": 17.487, "eval_steps_per_second": 4.372, "step": 400 }, { "epoch": 0.12, "learning_rate": 9.768518518518519e-05, "loss": 0.2572, "step": 500 }, { "epoch": 0.12, "eval_accuracy": 0.9752025604248047, "eval_loss": 0.07836401462554932, "eval_runtime": 1953.8972, "eval_samples_per_second": 17.688, "eval_steps_per_second": 4.422, "step": 500 }, { "epoch": 0.14, "learning_rate": 9.722222222222223e-05, "loss": 0.1858, "step": 600 }, { "epoch": 0.14, "eval_accuracy": 0.987442135810852, "eval_loss": 0.03908771649003029, "eval_runtime": 1933.1285, "eval_samples_per_second": 17.878, "eval_steps_per_second": 4.469, "step": 600 }, { "epoch": 0.16, "learning_rate": 9.675925925925926e-05, "loss": 0.1776, "step": 700 }, { "epoch": 0.16, "eval_accuracy": 0.9870080947875977, "eval_loss": 0.046012409031391144, "eval_runtime": 2073.2574, "eval_samples_per_second": 16.669, "eval_steps_per_second": 4.167, "step": 700 }, { "epoch": 0.19, "learning_rate": 9.62962962962963e-05, "loss": 0.1253, "step": 800 }, { "epoch": 0.19, "eval_accuracy": 0.987442135810852, "eval_loss": 0.04302794486284256, "eval_runtime": 2100.3436, "eval_samples_per_second": 16.454, "eval_steps_per_second": 4.114, "step": 800 }, { "epoch": 0.21, "learning_rate": 9.583333333333334e-05, "loss": 0.1509, "step": 900 }, { "epoch": 0.21, "eval_accuracy": 0.9821469783782959, "eval_loss": 0.06500900536775589, "eval_runtime": 2096.1889, "eval_samples_per_second": 16.487, "eval_steps_per_second": 4.122, "step": 900 }, { "epoch": 0.23, "learning_rate": 9.537037037037038e-05, "loss": 0.1574, "step": 1000 }, { "epoch": 0.23, "eval_accuracy": 0.9847221970558167, "eval_loss": 0.059933874756097794, "eval_runtime": 2109.7765, "eval_samples_per_second": 16.381, "eval_steps_per_second": 4.095, "step": 1000 }, { "epoch": 0.25, "learning_rate": 9.490740740740742e-05, "loss": 0.1506, "step": 1100 }, { "epoch": 0.25, "eval_accuracy": 0.9896122813224792, "eval_loss": 0.034695032984018326, "eval_runtime": 2117.6815, "eval_samples_per_second": 16.32, "eval_steps_per_second": 4.08, "step": 1100 }, { "epoch": 0.28, "learning_rate": 9.444444444444444e-05, "loss": 0.118, "step": 1200 }, { "epoch": 0.28, "eval_accuracy": 0.9911168813705444, "eval_loss": 0.03316599503159523, "eval_runtime": 2107.0764, "eval_samples_per_second": 16.402, "eval_steps_per_second": 4.1, "step": 1200 }, { "epoch": 0.3, "learning_rate": 9.398148148148148e-05, "loss": 0.0885, "step": 1300 }, { "epoch": 0.3, "eval_accuracy": 0.9947627186775208, "eval_loss": 0.019724205136299133, "eval_runtime": 2100.2147, "eval_samples_per_second": 16.455, "eval_steps_per_second": 4.114, "step": 1300 }, { "epoch": 0.32, "learning_rate": 9.351851851851852e-05, "loss": 0.0967, "step": 1400 }, { "epoch": 0.32, "eval_accuracy": 0.9936053156852722, "eval_loss": 0.022701723501086235, "eval_runtime": 2110.6117, "eval_samples_per_second": 16.374, "eval_steps_per_second": 4.094, "step": 1400 }, { "epoch": 0.35, "learning_rate": 9.305555555555556e-05, "loss": 0.0882, "step": 1500 }, { "epoch": 0.35, "eval_accuracy": 0.992274284362793, "eval_loss": 0.02855427749454975, "eval_runtime": 2115.7419, "eval_samples_per_second": 16.335, "eval_steps_per_second": 4.084, "step": 1500 }, { "epoch": 0.37, "learning_rate": 9.25925925925926e-05, "loss": 0.1056, "step": 1600 }, { "epoch": 0.37, "eval_accuracy": 0.9962384104728699, "eval_loss": 0.015638431534171104, "eval_runtime": 2093.1202, "eval_samples_per_second": 16.511, "eval_steps_per_second": 4.128, "step": 1600 }, { "epoch": 0.39, "learning_rate": 9.212962962962963e-05, "loss": 0.1124, "step": 1700 }, { "epoch": 0.39, "eval_accuracy": 0.9942708611488342, "eval_loss": 0.023519381880760193, "eval_runtime": 2715.3988, "eval_samples_per_second": 12.727, "eval_steps_per_second": 3.182, "step": 1700 }, { "epoch": 0.42, "learning_rate": 9.166666666666667e-05, "loss": 0.0813, "step": 1800 }, { "epoch": 0.42, "eval_accuracy": 0.995341420173645, "eval_loss": 0.017750833183526993, "eval_runtime": 2099.7025, "eval_samples_per_second": 16.459, "eval_steps_per_second": 4.115, "step": 1800 }, { "epoch": 0.44, "learning_rate": 9.120370370370371e-05, "loss": 0.0609, "step": 1900 }, { "epoch": 0.44, "eval_accuracy": 0.9971932768821716, "eval_loss": 0.011351389810442924, "eval_runtime": 2142.3716, "eval_samples_per_second": 16.132, "eval_steps_per_second": 4.033, "step": 1900 }, { "epoch": 0.46, "learning_rate": 9.074074074074075e-05, "loss": 0.0891, "step": 2000 }, { "epoch": 0.46, "eval_accuracy": 0.9973379373550415, "eval_loss": 0.012310467660427094, "eval_runtime": 2095.1245, "eval_samples_per_second": 16.495, "eval_steps_per_second": 4.124, "step": 2000 }, { "epoch": 0.49, "learning_rate": 9.027777777777779e-05, "loss": 0.0424, "step": 2100 }, { "epoch": 0.49, "eval_accuracy": 0.9985821843147278, "eval_loss": 0.00660862447693944, "eval_runtime": 2101.8862, "eval_samples_per_second": 16.442, "eval_steps_per_second": 4.111, "step": 2100 }, { "epoch": 0.51, "learning_rate": 8.981481481481481e-05, "loss": 0.0546, "step": 2200 }, { "epoch": 0.51, "eval_accuracy": 0.9950520992279053, "eval_loss": 0.021980540826916695, "eval_runtime": 2121.7281, "eval_samples_per_second": 16.289, "eval_steps_per_second": 4.072, "step": 2200 }, { "epoch": 0.53, "learning_rate": 8.935185185185185e-05, "loss": 0.146, "step": 2300 }, { "epoch": 0.53, "eval_accuracy": 0.9940393567085266, "eval_loss": 0.02473669871687889, "eval_runtime": 2062.8449, "eval_samples_per_second": 16.754, "eval_steps_per_second": 4.188, "step": 2300 }, { "epoch": 0.56, "learning_rate": 8.888888888888889e-05, "loss": 0.1174, "step": 2400 }, { "epoch": 0.56, "eval_accuracy": 0.9958622455596924, "eval_loss": 0.01570066250860691, "eval_runtime": 2017.5974, "eval_samples_per_second": 17.129, "eval_steps_per_second": 4.282, "step": 2400 }, { "epoch": 0.58, "learning_rate": 8.842592592592593e-05, "loss": 0.0848, "step": 2500 }, { "epoch": 0.58, "eval_accuracy": 0.9978298544883728, "eval_loss": 0.008064490742981434, "eval_runtime": 2005.1771, "eval_samples_per_second": 17.235, "eval_steps_per_second": 4.309, "step": 2500 }, { "epoch": 0.6, "learning_rate": 8.796296296296297e-05, "loss": 0.0792, "step": 2600 }, { "epoch": 0.6, "eval_accuracy": 0.9986110925674438, "eval_loss": 0.004222337622195482, "eval_runtime": 1999.0902, "eval_samples_per_second": 17.288, "eval_steps_per_second": 4.322, "step": 2600 }, { "epoch": 0.62, "learning_rate": 8.75e-05, "loss": 0.0482, "step": 2700 }, { "epoch": 0.62, "eval_accuracy": 0.9971354007720947, "eval_loss": 0.01219157688319683, "eval_runtime": 2001.1288, "eval_samples_per_second": 17.27, "eval_steps_per_second": 4.318, "step": 2700 }, { "epoch": 0.65, "learning_rate": 8.703703703703704e-05, "loss": 0.0697, "step": 2800 }, { "epoch": 0.65, "eval_accuracy": 0.9931133985519409, "eval_loss": 0.027987554669380188, "eval_runtime": 1997.1851, "eval_samples_per_second": 17.304, "eval_steps_per_second": 4.326, "step": 2800 }, { "epoch": 0.67, "learning_rate": 8.657407407407408e-05, "loss": 0.106, "step": 2900 }, { "epoch": 0.67, "eval_accuracy": 0.9977430701255798, "eval_loss": 0.008220946416258812, "eval_runtime": 2609.175, "eval_samples_per_second": 13.246, "eval_steps_per_second": 3.311, "step": 2900 }, { "epoch": 0.69, "learning_rate": 8.611111111111112e-05, "loss": 0.052, "step": 3000 }, { "epoch": 0.69, "eval_accuracy": 0.9971932768821716, "eval_loss": 0.01051583793014288, "eval_runtime": 1971.9035, "eval_samples_per_second": 17.526, "eval_steps_per_second": 4.382, "step": 3000 }, { "epoch": 0.72, "learning_rate": 8.564814814814816e-05, "loss": 0.047, "step": 3100 }, { "epoch": 0.72, "eval_accuracy": 0.9978588223457336, "eval_loss": 0.009094738401472569, "eval_runtime": 1980.5023, "eval_samples_per_second": 17.45, "eval_steps_per_second": 4.363, "step": 3100 }, { "epoch": 0.74, "learning_rate": 8.518518518518518e-05, "loss": 0.0495, "step": 3200 }, { "epoch": 0.74, "eval_accuracy": 0.998466432094574, "eval_loss": 0.006100042257457972, "eval_runtime": 2070.7502, "eval_samples_per_second": 16.69, "eval_steps_per_second": 4.172, "step": 3200 }, { "epoch": 0.76, "learning_rate": 8.472222222222222e-05, "loss": 0.0979, "step": 3300 }, { "epoch": 0.76, "eval_accuracy": 0.9978588223457336, "eval_loss": 0.009109850972890854, "eval_runtime": 1970.0999, "eval_samples_per_second": 17.542, "eval_steps_per_second": 4.386, "step": 3300 }, { "epoch": 0.79, "learning_rate": 8.425925925925926e-05, "loss": 0.0381, "step": 3400 }, { "epoch": 0.79, "eval_accuracy": 0.9951099753379822, "eval_loss": 0.021163903176784515, "eval_runtime": 1977.6331, "eval_samples_per_second": 17.475, "eval_steps_per_second": 4.369, "step": 3400 }, { "epoch": 0.81, "learning_rate": 8.379629629629629e-05, "loss": 0.0268, "step": 3500 }, { "epoch": 0.81, "eval_accuracy": 0.9980034828186035, "eval_loss": 0.008532223291695118, "eval_runtime": 1971.1529, "eval_samples_per_second": 17.533, "eval_steps_per_second": 4.383, "step": 3500 }, { "epoch": 0.83, "learning_rate": 8.333333333333334e-05, "loss": 0.073, "step": 3600 }, { "epoch": 0.83, "eval_accuracy": 0.9961516261100769, "eval_loss": 0.017610933631658554, "eval_runtime": 1957.2329, "eval_samples_per_second": 17.658, "eval_steps_per_second": 4.414, "step": 3600 }, { "epoch": 0.86, "learning_rate": 8.287037037037037e-05, "loss": 0.0585, "step": 3700 }, { "epoch": 0.86, "eval_accuracy": 0.9971354007720947, "eval_loss": 0.011580849066376686, "eval_runtime": 1962.4064, "eval_samples_per_second": 17.611, "eval_steps_per_second": 4.403, "step": 3700 }, { "epoch": 0.88, "learning_rate": 8.240740740740741e-05, "loss": 0.0868, "step": 3800 }, { "epoch": 0.88, "eval_accuracy": 0.9994502067565918, "eval_loss": 0.00212017516605556, "eval_runtime": 1982.4259, "eval_samples_per_second": 17.433, "eval_steps_per_second": 4.358, "step": 3800 }, { "epoch": 0.9, "learning_rate": 8.194444444444445e-05, "loss": 0.0496, "step": 3900 }, { "epoch": 0.9, "eval_accuracy": 0.9978877305984497, "eval_loss": 0.008284298703074455, "eval_runtime": 1983.4898, "eval_samples_per_second": 17.424, "eval_steps_per_second": 4.356, "step": 3900 }, { "epoch": 0.93, "learning_rate": 8.148148148148148e-05, "loss": 0.0641, "step": 4000 }, { "epoch": 0.93, "eval_accuracy": 0.9967592358589172, "eval_loss": 0.013520145788788795, "eval_runtime": 1998.2946, "eval_samples_per_second": 17.295, "eval_steps_per_second": 4.324, "step": 4000 }, { "epoch": 0.95, "learning_rate": 8.101851851851853e-05, "loss": 0.0858, "step": 4100 }, { "epoch": 0.95, "eval_accuracy": 0.9989872574806213, "eval_loss": 0.003793817013502121, "eval_runtime": 2125.8264, "eval_samples_per_second": 16.257, "eval_steps_per_second": 4.064, "step": 4100 }, { "epoch": 0.97, "learning_rate": 8.055555555555556e-05, "loss": 0.0483, "step": 4200 }, { "epoch": 0.97, "eval_accuracy": 0.9978588223457336, "eval_loss": 0.009265501983463764, "eval_runtime": 2260.2096, "eval_samples_per_second": 15.291, "eval_steps_per_second": 3.823, "step": 4200 }, { "epoch": 1.0, "learning_rate": 8.00925925925926e-05, "loss": 0.1115, "step": 4300 }, { "epoch": 1.0, "eval_accuracy": 0.9990162253379822, "eval_loss": 0.003752070013433695, "eval_runtime": 1992.5237, "eval_samples_per_second": 17.345, "eval_steps_per_second": 4.336, "step": 4300 }, { "epoch": 1.02, "learning_rate": 7.962962962962964e-05, "loss": 0.0486, "step": 4400 }, { "epoch": 1.02, "eval_accuracy": 0.9991897940635681, "eval_loss": 0.0031358152627944946, "eval_runtime": 1985.6758, "eval_samples_per_second": 17.405, "eval_steps_per_second": 4.351, "step": 4400 }, { "epoch": 1.04, "learning_rate": 7.916666666666666e-05, "loss": 0.0166, "step": 4500 }, { "epoch": 1.04, "eval_accuracy": 0.9995370507240295, "eval_loss": 0.002144153229892254, "eval_runtime": 2034.5738, "eval_samples_per_second": 16.986, "eval_steps_per_second": 4.247, "step": 4500 }, { "epoch": 1.06, "learning_rate": 7.870370370370372e-05, "loss": 0.0084, "step": 4600 }, { "epoch": 1.06, "eval_accuracy": 0.9986979365348816, "eval_loss": 0.006190824322402477, "eval_runtime": 2022.3112, "eval_samples_per_second": 17.089, "eval_steps_per_second": 4.272, "step": 4600 }, { "epoch": 1.09, "learning_rate": 7.824074074074074e-05, "loss": 0.0205, "step": 4700 }, { "epoch": 1.09, "eval_accuracy": 0.999160885810852, "eval_loss": 0.0034529021941125393, "eval_runtime": 2036.3231, "eval_samples_per_second": 16.972, "eval_steps_per_second": 4.243, "step": 4700 }, { "epoch": 1.11, "learning_rate": 7.777777777777778e-05, "loss": 0.0217, "step": 4800 }, { "epoch": 1.11, "eval_accuracy": 0.9973379373550415, "eval_loss": 0.012433897703886032, "eval_runtime": 2054.4934, "eval_samples_per_second": 16.822, "eval_steps_per_second": 4.205, "step": 4800 }, { "epoch": 1.13, "learning_rate": 7.731481481481482e-05, "loss": 0.0407, "step": 4900 }, { "epoch": 1.13, "eval_accuracy": 0.9991030097007751, "eval_loss": 0.004298593383282423, "eval_runtime": 2009.8166, "eval_samples_per_second": 17.196, "eval_steps_per_second": 4.299, "step": 4900 }, { "epoch": 1.16, "learning_rate": 7.685185185185185e-05, "loss": 0.0598, "step": 5000 }, { "epoch": 1.16, "eval_accuracy": 0.9981771111488342, "eval_loss": 0.007797444239258766, "eval_runtime": 1996.948, "eval_samples_per_second": 17.306, "eval_steps_per_second": 4.327, "step": 5000 }, { "epoch": 1.18, "learning_rate": 7.638888888888889e-05, "loss": 0.058, "step": 5100 }, { "epoch": 1.18, "eval_accuracy": 0.9981192350387573, "eval_loss": 0.009161165915429592, "eval_runtime": 2007.394, "eval_samples_per_second": 17.216, "eval_steps_per_second": 4.304, "step": 5100 }, { "epoch": 1.2, "learning_rate": 7.592592592592593e-05, "loss": 0.0119, "step": 5200 }, { "epoch": 1.2, "eval_accuracy": 0.9994502067565918, "eval_loss": 0.0023240004666149616, "eval_runtime": 2021.6614, "eval_samples_per_second": 17.095, "eval_steps_per_second": 4.274, "step": 5200 }, { "epoch": 1.23, "learning_rate": 7.546296296296297e-05, "loss": 0.08, "step": 5300 }, { "epoch": 1.23, "eval_accuracy": 0.9976562261581421, "eval_loss": 0.009528687223792076, "eval_runtime": 2024.2247, "eval_samples_per_second": 17.073, "eval_steps_per_second": 4.268, "step": 5300 }, { "epoch": 1.25, "learning_rate": 7.500000000000001e-05, "loss": 0.0336, "step": 5400 }, { "epoch": 1.25, "eval_accuracy": 0.9995949268341064, "eval_loss": 0.0020153559744358063, "eval_runtime": 2005.7373, "eval_samples_per_second": 17.231, "eval_steps_per_second": 4.308, "step": 5400 }, { "epoch": 1.27, "learning_rate": 7.453703703703703e-05, "loss": 0.0508, "step": 5500 }, { "epoch": 1.27, "eval_accuracy": 0.9989004731178284, "eval_loss": 0.00367682590149343, "eval_runtime": 2024.401, "eval_samples_per_second": 17.072, "eval_steps_per_second": 4.268, "step": 5500 }, { "epoch": 1.3, "learning_rate": 7.407407407407407e-05, "loss": 0.0146, "step": 5600 }, { "epoch": 1.3, "eval_accuracy": 0.9992766380310059, "eval_loss": 0.002618621801957488, "eval_runtime": 2017.6548, "eval_samples_per_second": 17.129, "eval_steps_per_second": 4.282, "step": 5600 }, { "epoch": 1.32, "learning_rate": 7.361111111111111e-05, "loss": 0.038, "step": 5700 }, { "epoch": 1.32, "eval_accuracy": 0.9988425970077515, "eval_loss": 0.00465565687045455, "eval_runtime": 2002.1508, "eval_samples_per_second": 17.261, "eval_steps_per_second": 4.315, "step": 5700 }, { "epoch": 1.34, "learning_rate": 7.314814814814815e-05, "loss": 0.0613, "step": 5800 }, { "epoch": 1.34, "eval_accuracy": 0.998379647731781, "eval_loss": 0.005978360306471586, "eval_runtime": 2001.292, "eval_samples_per_second": 17.269, "eval_steps_per_second": 4.317, "step": 5800 }, { "epoch": 1.37, "learning_rate": 7.268518518518519e-05, "loss": 0.0364, "step": 5900 }, { "epoch": 1.37, "eval_accuracy": 0.9971064925193787, "eval_loss": 0.01282673142850399, "eval_runtime": 2012.1731, "eval_samples_per_second": 17.175, "eval_steps_per_second": 4.294, "step": 5900 }, { "epoch": 1.39, "learning_rate": 7.222222222222222e-05, "loss": 0.108, "step": 6000 }, { "epoch": 1.39, "eval_accuracy": 0.998379647731781, "eval_loss": 0.005587506573647261, "eval_runtime": 2228.721, "eval_samples_per_second": 15.507, "eval_steps_per_second": 3.877, "step": 6000 }, { "epoch": 1.41, "learning_rate": 7.175925925925926e-05, "loss": 0.0134, "step": 6100 }, { "epoch": 1.41, "eval_accuracy": 0.9985821843147278, "eval_loss": 0.0066048940643668175, "eval_runtime": 2000.8975, "eval_samples_per_second": 17.272, "eval_steps_per_second": 4.318, "step": 6100 }, { "epoch": 1.44, "learning_rate": 7.12962962962963e-05, "loss": 0.0389, "step": 6200 }, { "epoch": 1.44, "eval_accuracy": 0.9972511529922485, "eval_loss": 0.012162311002612114, "eval_runtime": 1997.5848, "eval_samples_per_second": 17.301, "eval_steps_per_second": 4.325, "step": 6200 }, { "epoch": 1.46, "learning_rate": 7.083333333333334e-05, "loss": 0.0208, "step": 6300 }, { "epoch": 1.46, "eval_accuracy": 0.9991030097007751, "eval_loss": 0.0034532626159489155, "eval_runtime": 2007.9035, "eval_samples_per_second": 17.212, "eval_steps_per_second": 4.303, "step": 6300 }, { "epoch": 1.48, "learning_rate": 7.037037037037038e-05, "loss": 0.0376, "step": 6400 }, { "epoch": 1.48, "eval_accuracy": 0.9991897940635681, "eval_loss": 0.004356299061328173, "eval_runtime": 1996.8911, "eval_samples_per_second": 17.307, "eval_steps_per_second": 4.327, "step": 6400 }, { "epoch": 1.5, "learning_rate": 6.99074074074074e-05, "loss": 0.0346, "step": 6500 }, { "epoch": 1.5, "eval_accuracy": 0.9969907402992249, "eval_loss": 0.017812130972743034, "eval_runtime": 2004.911, "eval_samples_per_second": 17.238, "eval_steps_per_second": 4.309, "step": 6500 }, { "epoch": 1.53, "learning_rate": 6.944444444444444e-05, "loss": 0.0189, "step": 6600 }, { "epoch": 1.53, "eval_accuracy": 0.9987847208976746, "eval_loss": 0.0057495711371302605, "eval_runtime": 2011.102, "eval_samples_per_second": 17.185, "eval_steps_per_second": 4.296, "step": 6600 }, { "epoch": 1.55, "learning_rate": 6.898148148148148e-05, "loss": 0.0141, "step": 6700 }, { "epoch": 1.55, "eval_accuracy": 0.9992766380310059, "eval_loss": 0.003152304096147418, "eval_runtime": 1989.9017, "eval_samples_per_second": 17.368, "eval_steps_per_second": 4.342, "step": 6700 }, { "epoch": 1.57, "learning_rate": 6.851851851851852e-05, "loss": 0.0719, "step": 6800 }, { "epoch": 1.57, "eval_accuracy": 0.9987847208976746, "eval_loss": 0.005420052912086248, "eval_runtime": 1969.8998, "eval_samples_per_second": 17.544, "eval_steps_per_second": 4.386, "step": 6800 }, { "epoch": 1.6, "learning_rate": 6.805555555555556e-05, "loss": 0.0225, "step": 6900 }, { "epoch": 1.6, "eval_accuracy": 0.9971932768821716, "eval_loss": 0.012641699984669685, "eval_runtime": 1981.1809, "eval_samples_per_second": 17.444, "eval_steps_per_second": 4.361, "step": 6900 }, { "epoch": 1.62, "learning_rate": 6.759259259259259e-05, "loss": 0.0682, "step": 7000 }, { "epoch": 1.62, "eval_accuracy": 0.9989583492279053, "eval_loss": 0.003953148610889912, "eval_runtime": 1973.9678, "eval_samples_per_second": 17.508, "eval_steps_per_second": 4.377, "step": 7000 }, { "epoch": 1.64, "learning_rate": 6.712962962962963e-05, "loss": 0.0521, "step": 7100 }, { "epoch": 1.64, "eval_accuracy": 0.998466432094574, "eval_loss": 0.005261498969048262, "eval_runtime": 1989.7692, "eval_samples_per_second": 17.369, "eval_steps_per_second": 4.342, "step": 7100 }, { "epoch": 1.67, "learning_rate": 6.666666666666667e-05, "loss": 0.0358, "step": 7200 }, { "epoch": 1.67, "eval_accuracy": 0.9993634223937988, "eval_loss": 0.002406924497336149, "eval_runtime": 1975.9496, "eval_samples_per_second": 17.49, "eval_steps_per_second": 4.373, "step": 7200 }, { "epoch": 1.69, "learning_rate": 6.620370370370371e-05, "loss": 0.0255, "step": 7300 }, { "epoch": 1.69, "eval_accuracy": 0.9984953999519348, "eval_loss": 0.007655243389308453, "eval_runtime": 1972.1073, "eval_samples_per_second": 17.524, "eval_steps_per_second": 4.381, "step": 7300 }, { "epoch": 1.71, "learning_rate": 6.574074074074075e-05, "loss": 0.0424, "step": 7400 }, { "epoch": 1.71, "eval_accuracy": 0.9996238350868225, "eval_loss": 0.0017167649930343032, "eval_runtime": 1980.6249, "eval_samples_per_second": 17.449, "eval_steps_per_second": 4.362, "step": 7400 }, { "epoch": 1.74, "learning_rate": 6.527777777777778e-05, "loss": 0.0214, "step": 7500 }, { "epoch": 1.74, "eval_accuracy": 0.9997106194496155, "eval_loss": 0.0009764753049239516, "eval_runtime": 2005.2649, "eval_samples_per_second": 17.235, "eval_steps_per_second": 4.309, "step": 7500 }, { "epoch": 1.76, "learning_rate": 6.481481481481482e-05, "loss": 0.0429, "step": 7600 }, { "epoch": 1.76, "eval_accuracy": 0.996006965637207, "eval_loss": 0.019011829048395157, "eval_runtime": 2045.2435, "eval_samples_per_second": 16.898, "eval_steps_per_second": 4.224, "step": 7600 }, { "epoch": 1.78, "learning_rate": 6.435185185185186e-05, "loss": 0.0783, "step": 7700 }, { "epoch": 1.78, "eval_accuracy": 0.9976562261581421, "eval_loss": 0.008234655484557152, "eval_runtime": 2041.1233, "eval_samples_per_second": 16.932, "eval_steps_per_second": 4.233, "step": 7700 }, { "epoch": 1.81, "learning_rate": 6.388888888888888e-05, "loss": 0.0141, "step": 7800 }, { "epoch": 1.81, "eval_accuracy": 0.9996238350868225, "eval_loss": 0.0018950661178678274, "eval_runtime": 1994.0408, "eval_samples_per_second": 17.332, "eval_steps_per_second": 4.333, "step": 7800 }, { "epoch": 1.83, "learning_rate": 6.342592592592594e-05, "loss": 0.0203, "step": 7900 }, { "epoch": 1.83, "eval_accuracy": 0.9994502067565918, "eval_loss": 0.0022274223156273365, "eval_runtime": 1978.2563, "eval_samples_per_second": 17.47, "eval_steps_per_second": 4.367, "step": 7900 }, { "epoch": 1.85, "learning_rate": 6.296296296296296e-05, "loss": 0.0439, "step": 8000 }, { "epoch": 1.85, "eval_accuracy": 0.9979166388511658, "eval_loss": 0.007150179240852594, "eval_runtime": 1990.4775, "eval_samples_per_second": 17.363, "eval_steps_per_second": 4.341, "step": 8000 }, { "epoch": 1.88, "learning_rate": 6.25e-05, "loss": 0.0228, "step": 8100 }, { "epoch": 1.88, "eval_accuracy": 0.9973090291023254, "eval_loss": 0.010999325662851334, "eval_runtime": 1995.6933, "eval_samples_per_second": 17.317, "eval_steps_per_second": 4.329, "step": 8100 }, { "epoch": 1.9, "learning_rate": 6.203703703703704e-05, "loss": 0.0386, "step": 8200 }, { "epoch": 1.9, "eval_accuracy": 0.9996817111968994, "eval_loss": 0.001689778990112245, "eval_runtime": 1983.468, "eval_samples_per_second": 17.424, "eval_steps_per_second": 4.356, "step": 8200 }, { "epoch": 1.92, "learning_rate": 6.157407407407407e-05, "loss": 0.023, "step": 8300 }, { "epoch": 1.92, "eval_accuracy": 0.9997106194496155, "eval_loss": 0.001407949603162706, "eval_runtime": 1979.2035, "eval_samples_per_second": 17.462, "eval_steps_per_second": 4.365, "step": 8300 }, { "epoch": 1.94, "learning_rate": 6.111111111111112e-05, "loss": 0.0188, "step": 8400 }, { "epoch": 1.94, "eval_accuracy": 0.9997395873069763, "eval_loss": 0.001248441985808313, "eval_runtime": 1986.8349, "eval_samples_per_second": 17.395, "eval_steps_per_second": 4.349, "step": 8400 }, { "epoch": 1.97, "learning_rate": 6.0648148148148154e-05, "loss": 0.0301, "step": 8500 }, { "epoch": 1.97, "eval_accuracy": 0.9998553395271301, "eval_loss": 0.0005934939254075289, "eval_runtime": 2006.6093, "eval_samples_per_second": 17.223, "eval_steps_per_second": 4.306, "step": 8500 }, { "epoch": 1.99, "learning_rate": 6.018518518518519e-05, "loss": 0.0077, "step": 8600 }, { "epoch": 1.99, "eval_accuracy": 0.9999421238899231, "eval_loss": 0.00026703893672674894, "eval_runtime": 2048.5713, "eval_samples_per_second": 16.87, "eval_steps_per_second": 4.218, "step": 8600 }, { "epoch": 2.01, "learning_rate": 5.972222222222223e-05, "loss": 0.0291, "step": 8700 }, { "epoch": 2.01, "eval_accuracy": 0.9989872574806213, "eval_loss": 0.004098657984286547, "eval_runtime": 2031.7661, "eval_samples_per_second": 17.01, "eval_steps_per_second": 4.252, "step": 8700 }, { "epoch": 2.04, "learning_rate": 5.925925925925926e-05, "loss": 0.0274, "step": 8800 }, { "epoch": 2.04, "eval_accuracy": 0.9995949268341064, "eval_loss": 0.001983657479286194, "eval_runtime": 2040.4005, "eval_samples_per_second": 16.938, "eval_steps_per_second": 4.234, "step": 8800 }, { "epoch": 2.06, "learning_rate": 5.879629629629629e-05, "loss": 0.0193, "step": 8900 }, { "epoch": 2.06, "eval_accuracy": 0.9998842477798462, "eval_loss": 0.0003717490180861205, "eval_runtime": 2041.1098, "eval_samples_per_second": 16.932, "eval_steps_per_second": 4.233, "step": 8900 }, { "epoch": 2.08, "learning_rate": 5.833333333333334e-05, "loss": 0.0296, "step": 9000 }, { "epoch": 2.08, "eval_accuracy": 0.9997684955596924, "eval_loss": 0.0011291600530967116, "eval_runtime": 2004.7261, "eval_samples_per_second": 17.239, "eval_steps_per_second": 4.31, "step": 9000 }, { "epoch": 2.11, "learning_rate": 5.787037037037037e-05, "loss": 0.0033, "step": 9100 }, { "epoch": 2.11, "eval_accuracy": 0.9998553395271301, "eval_loss": 0.0005978959961794317, "eval_runtime": 2001.4449, "eval_samples_per_second": 17.268, "eval_steps_per_second": 4.317, "step": 9100 }, { "epoch": 2.13, "learning_rate": 5.740740740740741e-05, "loss": 0.0218, "step": 9200 }, { "epoch": 2.13, "eval_accuracy": 0.999160885810852, "eval_loss": 0.0025338120758533478, "eval_runtime": 1990.5201, "eval_samples_per_second": 17.362, "eval_steps_per_second": 4.341, "step": 9200 }, { "epoch": 2.15, "learning_rate": 5.6944444444444445e-05, "loss": 0.0238, "step": 9300 }, { "epoch": 2.15, "eval_accuracy": 0.999218761920929, "eval_loss": 0.0033705937676131725, "eval_runtime": 2019.0619, "eval_samples_per_second": 17.117, "eval_steps_per_second": 4.279, "step": 9300 }, { "epoch": 2.18, "learning_rate": 5.648148148148148e-05, "loss": 0.0319, "step": 9400 }, { "epoch": 2.18, "eval_accuracy": 0.9994791746139526, "eval_loss": 0.0017771282000467181, "eval_runtime": 1997.4844, "eval_samples_per_second": 17.302, "eval_steps_per_second": 4.325, "step": 9400 }, { "epoch": 2.2, "learning_rate": 5.6018518518518525e-05, "loss": 0.0465, "step": 9500 }, { "epoch": 2.2, "eval_accuracy": 0.9994502067565918, "eval_loss": 0.002331700176000595, "eval_runtime": 2012.3132, "eval_samples_per_second": 17.174, "eval_steps_per_second": 4.294, "step": 9500 }, { "epoch": 2.22, "learning_rate": 5.555555555555556e-05, "loss": 0.0412, "step": 9600 }, { "epoch": 2.22, "eval_accuracy": 0.9997395873069763, "eval_loss": 0.001237583113834262, "eval_runtime": 1999.8191, "eval_samples_per_second": 17.282, "eval_steps_per_second": 4.32, "step": 9600 }, { "epoch": 2.25, "learning_rate": 5.50925925925926e-05, "loss": 0.02, "step": 9700 }, { "epoch": 2.25, "eval_accuracy": 0.9998842477798462, "eval_loss": 0.0009390079067088664, "eval_runtime": 2008.6286, "eval_samples_per_second": 17.206, "eval_steps_per_second": 4.301, "step": 9700 }, { "epoch": 2.27, "learning_rate": 5.462962962962963e-05, "loss": 0.0226, "step": 9800 }, { "epoch": 2.27, "eval_accuracy": 0.9995949268341064, "eval_loss": 0.0017483533592894673, "eval_runtime": 2005.0151, "eval_samples_per_second": 17.237, "eval_steps_per_second": 4.309, "step": 9800 }, { "epoch": 2.29, "learning_rate": 5.4166666666666664e-05, "loss": 0.0104, "step": 9900 }, { "epoch": 2.29, "eval_accuracy": 0.9997684955596924, "eval_loss": 0.0008292018319480121, "eval_runtime": 1992.6545, "eval_samples_per_second": 17.344, "eval_steps_per_second": 4.336, "step": 9900 }, { "epoch": 2.31, "learning_rate": 5.370370370370371e-05, "loss": 0.0021, "step": 10000 }, { "epoch": 2.31, "eval_accuracy": 0.999913215637207, "eval_loss": 0.000292919430648908, "eval_runtime": 1987.2006, "eval_samples_per_second": 17.391, "eval_steps_per_second": 4.348, "step": 10000 }, { "epoch": 2.34, "learning_rate": 5.3240740740740744e-05, "loss": 0.0135, "step": 10100 }, { "epoch": 2.34, "eval_accuracy": 0.9987558126449585, "eval_loss": 0.005596287082880735, "eval_runtime": 2007.3126, "eval_samples_per_second": 17.217, "eval_steps_per_second": 4.304, "step": 10100 }, { "epoch": 2.36, "learning_rate": 5.2777777777777784e-05, "loss": 0.0319, "step": 10200 }, { "epoch": 2.36, "eval_accuracy": 0.9995949268341064, "eval_loss": 0.001722234534099698, "eval_runtime": 2007.5056, "eval_samples_per_second": 17.215, "eval_steps_per_second": 4.304, "step": 10200 }, { "epoch": 2.38, "learning_rate": 5.231481481481482e-05, "loss": 0.0279, "step": 10300 }, { "epoch": 2.38, "eval_accuracy": 0.9997106194496155, "eval_loss": 0.001128367381170392, "eval_runtime": 1980.2057, "eval_samples_per_second": 17.453, "eval_steps_per_second": 4.363, "step": 10300 }, { "epoch": 2.41, "learning_rate": 5.185185185185185e-05, "loss": 0.0017, "step": 10400 }, { "epoch": 2.41, "eval_accuracy": 0.9997106194496155, "eval_loss": 0.0013848639791831374, "eval_runtime": 2007.1812, "eval_samples_per_second": 17.218, "eval_steps_per_second": 4.305, "step": 10400 }, { "epoch": 2.43, "learning_rate": 5.138888888888889e-05, "loss": 0.0296, "step": 10500 }, { "epoch": 2.43, "eval_accuracy": 0.9989583492279053, "eval_loss": 0.005161995068192482, "eval_runtime": 1986.761, "eval_samples_per_second": 17.395, "eval_steps_per_second": 4.349, "step": 10500 }, { "epoch": 2.45, "learning_rate": 5.092592592592593e-05, "loss": 0.0168, "step": 10600 }, { "epoch": 2.45, "eval_accuracy": 0.9997974634170532, "eval_loss": 0.0004770481900777668, "eval_runtime": 2003.7003, "eval_samples_per_second": 17.248, "eval_steps_per_second": 4.312, "step": 10600 }, { "epoch": 2.48, "learning_rate": 5.046296296296297e-05, "loss": 0.0194, "step": 10700 }, { "epoch": 2.48, "eval_accuracy": 0.9997684955596924, "eval_loss": 0.000735765672288835, "eval_runtime": 1994.758, "eval_samples_per_second": 17.325, "eval_steps_per_second": 4.331, "step": 10700 }, { "epoch": 2.5, "learning_rate": 5e-05, "loss": 0.0006, "step": 10800 }, { "epoch": 2.5, "eval_accuracy": 0.9998263716697693, "eval_loss": 0.0009093827102333307, "eval_runtime": 2000.8621, "eval_samples_per_second": 17.273, "eval_steps_per_second": 4.318, "step": 10800 }, { "epoch": 2.52, "learning_rate": 4.9537037037037035e-05, "loss": 0.0293, "step": 10900 }, { "epoch": 2.52, "eval_accuracy": 0.999913215637207, "eval_loss": 0.0005157970590516925, "eval_runtime": 2011.0169, "eval_samples_per_second": 17.185, "eval_steps_per_second": 4.296, "step": 10900 }, { "epoch": 2.55, "learning_rate": 4.9074074074074075e-05, "loss": 0.0016, "step": 11000 }, { "epoch": 2.55, "eval_accuracy": 0.9994791746139526, "eval_loss": 0.0025301428977400064, "eval_runtime": 2000.9133, "eval_samples_per_second": 17.272, "eval_steps_per_second": 4.318, "step": 11000 }, { "epoch": 2.57, "learning_rate": 4.8611111111111115e-05, "loss": 0.0069, "step": 11100 }, { "epoch": 2.57, "eval_accuracy": 0.9998842477798462, "eval_loss": 0.0004891157150268555, "eval_runtime": 2006.7369, "eval_samples_per_second": 17.222, "eval_steps_per_second": 4.305, "step": 11100 }, { "epoch": 2.59, "learning_rate": 4.814814814814815e-05, "loss": 0.0001, "step": 11200 }, { "epoch": 2.59, "eval_accuracy": 0.9999421238899231, "eval_loss": 0.00020419809152372181, "eval_runtime": 1993.3725, "eval_samples_per_second": 17.337, "eval_steps_per_second": 4.334, "step": 11200 }, { "epoch": 2.62, "learning_rate": 4.768518518518519e-05, "loss": 0.0108, "step": 11300 }, { "epoch": 2.62, "eval_accuracy": 0.9997974634170532, "eval_loss": 0.0010758559219539165, "eval_runtime": 2001.2763, "eval_samples_per_second": 17.269, "eval_steps_per_second": 4.317, "step": 11300 }, { "epoch": 2.64, "learning_rate": 4.722222222222222e-05, "loss": 0.0165, "step": 11400 }, { "epoch": 2.64, "eval_accuracy": 0.9998842477798462, "eval_loss": 0.0006313551566563547, "eval_runtime": 1995.5247, "eval_samples_per_second": 17.319, "eval_steps_per_second": 4.33, "step": 11400 }, { "epoch": 2.66, "learning_rate": 4.675925925925926e-05, "loss": 0.0001, "step": 11500 }, { "epoch": 2.66, "eval_accuracy": 0.9997974634170532, "eval_loss": 0.0007648964528925717, "eval_runtime": 2001.09, "eval_samples_per_second": 17.271, "eval_steps_per_second": 4.318, "step": 11500 }, { "epoch": 2.69, "learning_rate": 4.62962962962963e-05, "loss": 0.0244, "step": 11600 }, { "epoch": 2.69, "eval_accuracy": 0.9998553395271301, "eval_loss": 0.000668107473757118, "eval_runtime": 2000.7577, "eval_samples_per_second": 17.273, "eval_steps_per_second": 4.318, "step": 11600 }, { "epoch": 2.71, "learning_rate": 4.5833333333333334e-05, "loss": 0.0312, "step": 11700 }, { "epoch": 2.71, "eval_accuracy": 0.9995659589767456, "eval_loss": 0.001716578146442771, "eval_runtime": 1997.1256, "eval_samples_per_second": 17.305, "eval_steps_per_second": 4.326, "step": 11700 }, { "epoch": 2.73, "learning_rate": 4.5370370370370374e-05, "loss": 0.0191, "step": 11800 }, { "epoch": 2.73, "eval_accuracy": 0.9997395873069763, "eval_loss": 0.0007975550834089518, "eval_runtime": 1967.8746, "eval_samples_per_second": 17.562, "eval_steps_per_second": 4.391, "step": 11800 }, { "epoch": 2.75, "learning_rate": 4.490740740740741e-05, "loss": 0.0005, "step": 11900 }, { "epoch": 2.75, "eval_accuracy": 0.9998842477798462, "eval_loss": 0.0004628011374734342, "eval_runtime": 1958.7798, "eval_samples_per_second": 17.644, "eval_steps_per_second": 4.411, "step": 11900 }, { "epoch": 2.78, "learning_rate": 4.4444444444444447e-05, "loss": 0.0259, "step": 12000 }, { "epoch": 2.78, "eval_accuracy": 0.9996238350868225, "eval_loss": 0.001358355744741857, "eval_runtime": 1971.0225, "eval_samples_per_second": 17.534, "eval_steps_per_second": 4.384, "step": 12000 }, { "epoch": 2.8, "learning_rate": 4.3981481481481486e-05, "loss": 0.0226, "step": 12100 }, { "epoch": 2.8, "eval_accuracy": 0.9999710917472839, "eval_loss": 0.00019500043708831072, "eval_runtime": 1970.5019, "eval_samples_per_second": 17.539, "eval_steps_per_second": 4.385, "step": 12100 }, { "epoch": 2.82, "learning_rate": 4.351851851851852e-05, "loss": 0.0, "step": 12200 }, { "epoch": 2.82, "eval_accuracy": 0.9999710917472839, "eval_loss": 0.0002164940524380654, "eval_runtime": 1961.9305, "eval_samples_per_second": 17.615, "eval_steps_per_second": 4.404, "step": 12200 }, { "epoch": 2.85, "learning_rate": 4.305555555555556e-05, "loss": 0.0, "step": 12300 }, { "epoch": 2.85, "eval_accuracy": 0.9999421238899231, "eval_loss": 0.00010657820530468598, "eval_runtime": 1973.4403, "eval_samples_per_second": 17.513, "eval_steps_per_second": 4.378, "step": 12300 }, { "epoch": 2.87, "learning_rate": 4.259259259259259e-05, "loss": 0.0145, "step": 12400 }, { "epoch": 2.87, "eval_accuracy": 1.0, "eval_loss": 4.758801151183434e-05, "eval_runtime": 1977.328, "eval_samples_per_second": 17.478, "eval_steps_per_second": 4.37, "step": 12400 }, { "epoch": 2.89, "learning_rate": 4.212962962962963e-05, "loss": 0.0083, "step": 12500 }, { "epoch": 2.89, "eval_accuracy": 0.9995659589767456, "eval_loss": 0.001972577767446637, "eval_runtime": 1962.5085, "eval_samples_per_second": 17.61, "eval_steps_per_second": 4.403, "step": 12500 }, { "epoch": 2.92, "learning_rate": 4.166666666666667e-05, "loss": 0.02, "step": 12600 }, { "epoch": 2.92, "eval_accuracy": 0.9994791746139526, "eval_loss": 0.00198388216085732, "eval_runtime": 1956.5161, "eval_samples_per_second": 17.664, "eval_steps_per_second": 4.416, "step": 12600 }, { "epoch": 2.94, "learning_rate": 4.1203703703703705e-05, "loss": 0.0293, "step": 12700 }, { "epoch": 2.94, "eval_accuracy": 0.9994212985038757, "eval_loss": 0.0031591171864420176, "eval_runtime": 1997.7409, "eval_samples_per_second": 17.3, "eval_steps_per_second": 4.325, "step": 12700 }, { "epoch": 2.96, "learning_rate": 4.074074074074074e-05, "loss": 0.0164, "step": 12800 }, { "epoch": 2.96, "eval_accuracy": 0.9997395873069763, "eval_loss": 0.0012433998053893447, "eval_runtime": 2019.1263, "eval_samples_per_second": 17.116, "eval_steps_per_second": 4.279, "step": 12800 }, { "epoch": 2.99, "learning_rate": 4.027777777777778e-05, "loss": 0.0147, "step": 12900 }, { "epoch": 2.99, "eval_accuracy": 0.9997684955596924, "eval_loss": 0.001224155188538134, "eval_runtime": 2015.6572, "eval_samples_per_second": 17.146, "eval_steps_per_second": 4.286, "step": 12900 }, { "epoch": 3.01, "learning_rate": 3.981481481481482e-05, "loss": 0.0112, "step": 13000 }, { "epoch": 3.01, "eval_accuracy": 0.999913215637207, "eval_loss": 0.0008148940978571773, "eval_runtime": 1991.7022, "eval_samples_per_second": 17.352, "eval_steps_per_second": 4.338, "step": 13000 }, { "epoch": 3.03, "learning_rate": 3.935185185185186e-05, "loss": 0.002, "step": 13100 }, { "epoch": 3.03, "eval_accuracy": 0.9997395873069763, "eval_loss": 0.0012871942017227411, "eval_runtime": 2016.2834, "eval_samples_per_second": 17.14, "eval_steps_per_second": 4.285, "step": 13100 }, { "epoch": 3.06, "learning_rate": 3.888888888888889e-05, "loss": 0.017, "step": 13200 }, { "epoch": 3.06, "eval_accuracy": 0.9997106194496155, "eval_loss": 0.0010973262833431363, "eval_runtime": 2017.4262, "eval_samples_per_second": 17.131, "eval_steps_per_second": 4.283, "step": 13200 }, { "epoch": 3.08, "learning_rate": 3.8425925925925924e-05, "loss": 0.0142, "step": 13300 }, { "epoch": 3.08, "eval_accuracy": 0.9996528029441833, "eval_loss": 0.0019141812808811665, "eval_runtime": 2002.6757, "eval_samples_per_second": 17.257, "eval_steps_per_second": 4.314, "step": 13300 }, { "epoch": 3.1, "learning_rate": 3.7962962962962964e-05, "loss": 0.008, "step": 13400 }, { "epoch": 3.1, "eval_accuracy": 0.9997395873069763, "eval_loss": 0.00135290517937392, "eval_runtime": 2020.5372, "eval_samples_per_second": 17.104, "eval_steps_per_second": 4.276, "step": 13400 }, { "epoch": 3.12, "learning_rate": 3.7500000000000003e-05, "loss": 0.0411, "step": 13500 }, { "epoch": 3.12, "eval_accuracy": 0.9997974634170532, "eval_loss": 0.000736766669433564, "eval_runtime": 2162.1556, "eval_samples_per_second": 15.984, "eval_steps_per_second": 3.996, "step": 13500 }, { "epoch": 3.15, "learning_rate": 3.7037037037037037e-05, "loss": 0.0262, "step": 13600 }, { "epoch": 3.15, "eval_accuracy": 0.9998553395271301, "eval_loss": 0.000846204929985106, "eval_runtime": 2119.0303, "eval_samples_per_second": 16.309, "eval_steps_per_second": 4.077, "step": 13600 }, { "epoch": 3.17, "learning_rate": 3.6574074074074076e-05, "loss": 0.0198, "step": 13700 }, { "epoch": 3.17, "eval_accuracy": 0.9997106194496155, "eval_loss": 0.0010991438757628202, "eval_runtime": 2095.0628, "eval_samples_per_second": 16.496, "eval_steps_per_second": 4.124, "step": 13700 }, { "epoch": 3.19, "learning_rate": 3.611111111111111e-05, "loss": 0.0178, "step": 13800 }, { "epoch": 3.19, "eval_accuracy": 0.999913215637207, "eval_loss": 0.00029710811213590205, "eval_runtime": 2130.6792, "eval_samples_per_second": 16.22, "eval_steps_per_second": 4.055, "step": 13800 }, { "epoch": 3.22, "learning_rate": 3.564814814814815e-05, "loss": 0.0072, "step": 13900 }, { "epoch": 3.22, "eval_accuracy": 0.9999710917472839, "eval_loss": 0.00018699387146625668, "eval_runtime": 2082.0917, "eval_samples_per_second": 16.599, "eval_steps_per_second": 4.15, "step": 13900 }, { "epoch": 3.24, "learning_rate": 3.518518518518519e-05, "loss": 0.0004, "step": 14000 }, { "epoch": 3.24, "eval_accuracy": 0.9998263716697693, "eval_loss": 0.0013777822023257613, "eval_runtime": 2053.664, "eval_samples_per_second": 16.828, "eval_steps_per_second": 4.207, "step": 14000 }, { "epoch": 3.26, "learning_rate": 3.472222222222222e-05, "loss": 0.0191, "step": 14100 }, { "epoch": 3.26, "eval_accuracy": 0.9999421238899231, "eval_loss": 0.0004184871504548937, "eval_runtime": 2048.7946, "eval_samples_per_second": 16.868, "eval_steps_per_second": 4.217, "step": 14100 }, { "epoch": 3.29, "learning_rate": 3.425925925925926e-05, "loss": 0.007, "step": 14200 }, { "epoch": 3.29, "eval_accuracy": 0.999913215637207, "eval_loss": 0.0004062869702465832, "eval_runtime": 2055.664, "eval_samples_per_second": 16.812, "eval_steps_per_second": 4.203, "step": 14200 }, { "epoch": 3.31, "learning_rate": 3.3796296296296295e-05, "loss": 0.0108, "step": 14300 }, { "epoch": 3.31, "eval_accuracy": 0.999913215637207, "eval_loss": 0.00011388419807190076, "eval_runtime": 2043.6545, "eval_samples_per_second": 16.911, "eval_steps_per_second": 4.228, "step": 14300 }, { "epoch": 3.33, "learning_rate": 3.3333333333333335e-05, "loss": 0.0, "step": 14400 }, { "epoch": 3.33, "eval_accuracy": 0.9999710917472839, "eval_loss": 7.532363088103011e-05, "eval_runtime": 2040.9204, "eval_samples_per_second": 16.934, "eval_steps_per_second": 4.233, "step": 14400 }, { "epoch": 3.36, "learning_rate": 3.2870370370370375e-05, "loss": 0.0006, "step": 14500 }, { "epoch": 3.36, "eval_accuracy": 0.9999421238899231, "eval_loss": 0.0003408396732993424, "eval_runtime": 2057.5647, "eval_samples_per_second": 16.797, "eval_steps_per_second": 4.199, "step": 14500 }, { "epoch": 3.38, "learning_rate": 3.240740740740741e-05, "loss": 0.0085, "step": 14600 }, { "epoch": 3.38, "eval_accuracy": 0.9992766380310059, "eval_loss": 0.0034337618853896856, "eval_runtime": 2027.249, "eval_samples_per_second": 17.048, "eval_steps_per_second": 4.262, "step": 14600 }, { "epoch": 3.4, "learning_rate": 3.194444444444444e-05, "loss": 0.0002, "step": 14700 }, { "epoch": 3.4, "eval_accuracy": 0.999913215637207, "eval_loss": 0.0006225552642717957, "eval_runtime": 2004.1478, "eval_samples_per_second": 17.244, "eval_steps_per_second": 4.311, "step": 14700 }, { "epoch": 3.43, "learning_rate": 3.148148148148148e-05, "loss": 0.0181, "step": 14800 }, { "epoch": 3.43, "eval_accuracy": 0.9999710917472839, "eval_loss": 0.000251033779932186, "eval_runtime": 2016.4131, "eval_samples_per_second": 17.139, "eval_steps_per_second": 4.285, "step": 14800 }, { "epoch": 3.45, "learning_rate": 3.101851851851852e-05, "loss": 0.0021, "step": 14900 }, { "epoch": 3.45, "eval_accuracy": 0.999913215637207, "eval_loss": 0.00040141510544344783, "eval_runtime": 2000.3942, "eval_samples_per_second": 17.277, "eval_steps_per_second": 4.319, "step": 14900 }, { "epoch": 3.47, "learning_rate": 3.055555555555556e-05, "loss": 0.0069, "step": 15000 }, { "epoch": 3.47, "eval_accuracy": 0.9998842477798462, "eval_loss": 0.0006463331519626081, "eval_runtime": 2015.2783, "eval_samples_per_second": 17.149, "eval_steps_per_second": 4.287, "step": 15000 }, { "epoch": 3.5, "learning_rate": 3.0092592592592593e-05, "loss": 0.0156, "step": 15100 }, { "epoch": 3.5, "eval_accuracy": 0.9999710917472839, "eval_loss": 0.0001428252726327628, "eval_runtime": 1995.7618, "eval_samples_per_second": 17.317, "eval_steps_per_second": 4.329, "step": 15100 }, { "epoch": 3.52, "learning_rate": 2.962962962962963e-05, "loss": 0.0042, "step": 15200 }, { "epoch": 3.52, "eval_accuracy": 0.9997974634170532, "eval_loss": 0.000510143639985472, "eval_runtime": 2000.972, "eval_samples_per_second": 17.272, "eval_steps_per_second": 4.318, "step": 15200 }, { "epoch": 3.54, "learning_rate": 2.916666666666667e-05, "loss": 0.0233, "step": 15300 }, { "epoch": 3.54, "eval_accuracy": 0.9999710917472839, "eval_loss": 0.00019888828683178872, "eval_runtime": 2002.1598, "eval_samples_per_second": 17.261, "eval_steps_per_second": 4.315, "step": 15300 }, { "epoch": 3.56, "learning_rate": 2.8703703703703706e-05, "loss": 0.003, "step": 15400 }, { "epoch": 3.56, "eval_accuracy": 0.9997974634170532, "eval_loss": 0.0006905001355335116, "eval_runtime": 2000.419, "eval_samples_per_second": 17.276, "eval_steps_per_second": 4.319, "step": 15400 }, { "epoch": 3.59, "learning_rate": 2.824074074074074e-05, "loss": 0.0149, "step": 15500 }, { "epoch": 3.59, "eval_accuracy": 0.9998553395271301, "eval_loss": 0.000585312838666141, "eval_runtime": 1997.3791, "eval_samples_per_second": 17.303, "eval_steps_per_second": 4.326, "step": 15500 }, { "epoch": 3.61, "learning_rate": 2.777777777777778e-05, "loss": 0.0072, "step": 15600 }, { "epoch": 3.61, "eval_accuracy": 0.9999710917472839, "eval_loss": 0.000229826764552854, "eval_runtime": 2001.2597, "eval_samples_per_second": 17.269, "eval_steps_per_second": 4.317, "step": 15600 }, { "epoch": 3.63, "learning_rate": 2.7314814814814816e-05, "loss": 0.0004, "step": 15700 }, { "epoch": 3.63, "eval_accuracy": 0.9999710917472839, "eval_loss": 5.024338679504581e-05, "eval_runtime": 2013.5805, "eval_samples_per_second": 17.163, "eval_steps_per_second": 4.291, "step": 15700 }, { "epoch": 3.66, "learning_rate": 2.6851851851851855e-05, "loss": 0.0001, "step": 15800 }, { "epoch": 3.66, "eval_accuracy": 0.999913215637207, "eval_loss": 0.00017916383512783796, "eval_runtime": 1994.345, "eval_samples_per_second": 17.329, "eval_steps_per_second": 4.332, "step": 15800 }, { "epoch": 3.68, "learning_rate": 2.6388888888888892e-05, "loss": 0.0186, "step": 15900 }, { "epoch": 3.68, "eval_accuracy": 1.0, "eval_loss": 9.207503353536595e-06, "eval_runtime": 2056.2161, "eval_samples_per_second": 16.808, "eval_steps_per_second": 4.202, "step": 15900 }, { "epoch": 3.7, "learning_rate": 2.5925925925925925e-05, "loss": 0.0115, "step": 16000 }, { "epoch": 3.7, "eval_accuracy": 0.9999710917472839, "eval_loss": 0.00022165325935930014, "eval_runtime": 2044.6907, "eval_samples_per_second": 16.902, "eval_steps_per_second": 4.226, "step": 16000 }, { "epoch": 3.73, "learning_rate": 2.5462962962962965e-05, "loss": 0.0011, "step": 16100 }, { "epoch": 3.73, "eval_accuracy": 0.9999710917472839, "eval_loss": 0.00027788631268776953, "eval_runtime": 2046.6409, "eval_samples_per_second": 16.886, "eval_steps_per_second": 4.222, "step": 16100 }, { "epoch": 3.75, "learning_rate": 2.5e-05, "loss": 0.0048, "step": 16200 }, { "epoch": 3.75, "eval_accuracy": 0.9999710917472839, "eval_loss": 5.909843821427785e-05, "eval_runtime": 2008.3137, "eval_samples_per_second": 17.208, "eval_steps_per_second": 4.302, "step": 16200 }, { "epoch": 3.77, "learning_rate": 2.4537037037037038e-05, "loss": 0.0042, "step": 16300 }, { "epoch": 3.77, "eval_accuracy": 1.0, "eval_loss": 6.828932328062365e-06, "eval_runtime": 2129.8226, "eval_samples_per_second": 16.227, "eval_steps_per_second": 4.057, "step": 16300 }, { "epoch": 3.8, "learning_rate": 2.4074074074074074e-05, "loss": 0.0024, "step": 16400 }, { "epoch": 3.8, "eval_accuracy": 1.0, "eval_loss": 8.2383139670128e-06, "eval_runtime": 2113.6583, "eval_samples_per_second": 16.351, "eval_steps_per_second": 4.088, "step": 16400 }, { "epoch": 3.82, "learning_rate": 2.361111111111111e-05, "loss": 0.0, "step": 16500 }, { "epoch": 3.82, "eval_accuracy": 1.0, "eval_loss": 5.800426606583642e-06, "eval_runtime": 2122.3997, "eval_samples_per_second": 16.283, "eval_steps_per_second": 4.071, "step": 16500 }, { "epoch": 3.84, "learning_rate": 2.314814814814815e-05, "loss": 0.0003, "step": 16600 }, { "epoch": 3.84, "eval_accuracy": 0.9999710917472839, "eval_loss": 0.00010272156214341521, "eval_runtime": 2128.9658, "eval_samples_per_second": 16.233, "eval_steps_per_second": 4.058, "step": 16600 }, { "epoch": 3.87, "learning_rate": 2.2685185185185187e-05, "loss": 0.0, "step": 16700 }, { "epoch": 3.87, "eval_accuracy": 0.9999710917472839, "eval_loss": 7.889495464041829e-05, "eval_runtime": 2147.2327, "eval_samples_per_second": 16.095, "eval_steps_per_second": 4.024, "step": 16700 }, { "epoch": 3.89, "learning_rate": 2.2222222222222223e-05, "loss": 0.0, "step": 16800 }, { "epoch": 3.89, "eval_accuracy": 0.9999710917472839, "eval_loss": 7.938377530081198e-05, "eval_runtime": 2139.4855, "eval_samples_per_second": 16.153, "eval_steps_per_second": 4.038, "step": 16800 }, { "epoch": 3.91, "learning_rate": 2.175925925925926e-05, "loss": 0.0029, "step": 16900 }, { "epoch": 3.91, "eval_accuracy": 0.9998842477798462, "eval_loss": 0.0005274215945973992, "eval_runtime": 2142.1862, "eval_samples_per_second": 16.133, "eval_steps_per_second": 4.033, "step": 16900 }, { "epoch": 3.94, "learning_rate": 2.1296296296296296e-05, "loss": 0.0066, "step": 17000 }, { "epoch": 3.94, "eval_accuracy": 0.9999710917472839, "eval_loss": 0.00019657429947983474, "eval_runtime": 2149.2032, "eval_samples_per_second": 16.08, "eval_steps_per_second": 4.02, "step": 17000 }, { "epoch": 3.96, "learning_rate": 2.0833333333333336e-05, "loss": 0.0079, "step": 17100 }, { "epoch": 3.96, "eval_accuracy": 0.9999710917472839, "eval_loss": 5.7856173953041434e-05, "eval_runtime": 2135.9752, "eval_samples_per_second": 16.18, "eval_steps_per_second": 4.045, "step": 17100 }, { "epoch": 3.98, "learning_rate": 2.037037037037037e-05, "loss": 0.0091, "step": 17200 }, { "epoch": 3.98, "eval_accuracy": 0.9999421238899231, "eval_loss": 0.00015575718134641647, "eval_runtime": 2158.5953, "eval_samples_per_second": 16.01, "eval_steps_per_second": 4.003, "step": 17200 }, { "epoch": 4.0, "learning_rate": 1.990740740740741e-05, "loss": 0.0951, "step": 17300 }, { "epoch": 4.0, "eval_accuracy": 0.9999710917472839, "eval_loss": 6.823511648690328e-05, "eval_runtime": 2106.2766, "eval_samples_per_second": 16.408, "eval_steps_per_second": 4.102, "step": 17300 }, { "epoch": 4.03, "learning_rate": 1.9444444444444445e-05, "loss": 0.0578, "step": 17400 }, { "epoch": 4.03, "eval_accuracy": 0.9999421238899231, "eval_loss": 0.00031872568069957197, "eval_runtime": 2091.5056, "eval_samples_per_second": 16.524, "eval_steps_per_second": 4.131, "step": 17400 }, { "epoch": 4.05, "learning_rate": 1.8981481481481482e-05, "loss": 0.0171, "step": 17500 }, { "epoch": 4.05, "eval_accuracy": 0.9999421238899231, "eval_loss": 0.0003504869237076491, "eval_runtime": 2076.2302, "eval_samples_per_second": 16.646, "eval_steps_per_second": 4.161, "step": 17500 }, { "epoch": 4.07, "learning_rate": 1.8518518518518518e-05, "loss": 0.0305, "step": 17600 }, { "epoch": 4.07, "eval_accuracy": 0.9999710917472839, "eval_loss": 0.00012279333896003664, "eval_runtime": 2072.7643, "eval_samples_per_second": 16.673, "eval_steps_per_second": 4.168, "step": 17600 }, { "epoch": 4.1, "learning_rate": 1.8055555555555555e-05, "loss": 0.0449, "step": 17700 }, { "epoch": 4.1, "eval_accuracy": 0.9999710917472839, "eval_loss": 0.00021972648391965777, "eval_runtime": 2090.628, "eval_samples_per_second": 16.531, "eval_steps_per_second": 4.133, "step": 17700 }, { "epoch": 4.12, "learning_rate": 1.7592592592592595e-05, "loss": 0.0161, "step": 17800 }, { "epoch": 4.12, "eval_accuracy": 1.0, "eval_loss": 2.7198611860512756e-05, "eval_runtime": 2085.7289, "eval_samples_per_second": 16.57, "eval_steps_per_second": 4.142, "step": 17800 }, { "epoch": 4.14, "learning_rate": 1.712962962962963e-05, "loss": 0.0322, "step": 17900 }, { "epoch": 4.14, "eval_accuracy": 1.0, "eval_loss": 2.2180371161084622e-05, "eval_runtime": 2061.6769, "eval_samples_per_second": 16.763, "eval_steps_per_second": 4.191, "step": 17900 }, { "epoch": 4.17, "learning_rate": 1.6666666666666667e-05, "loss": 0.0358, "step": 18000 }, { "epoch": 4.17, "eval_accuracy": 0.9999710917472839, "eval_loss": 0.00010751090303529054, "eval_runtime": 2107.1409, "eval_samples_per_second": 16.401, "eval_steps_per_second": 4.1, "step": 18000 }, { "epoch": 4.19, "learning_rate": 1.6203703703703704e-05, "loss": 0.0264, "step": 18100 }, { "epoch": 4.19, "eval_accuracy": 1.0, "eval_loss": 6.194192792463582e-06, "eval_runtime": 2091.7086, "eval_samples_per_second": 16.522, "eval_steps_per_second": 4.131, "step": 18100 }, { "epoch": 4.21, "learning_rate": 1.574074074074074e-05, "loss": 0.0199, "step": 18200 }, { "epoch": 4.21, "eval_accuracy": 1.0, "eval_loss": 6.114233656262513e-06, "eval_runtime": 2093.6259, "eval_samples_per_second": 16.507, "eval_steps_per_second": 4.127, "step": 18200 }, { "epoch": 4.24, "learning_rate": 1.527777777777778e-05, "loss": 0.0266, "step": 18300 }, { "epoch": 4.24, "eval_accuracy": 1.0, "eval_loss": 6.532317456731107e-06, "eval_runtime": 2103.3039, "eval_samples_per_second": 16.431, "eval_steps_per_second": 4.108, "step": 18300 }, { "epoch": 4.26, "learning_rate": 1.4814814814814815e-05, "loss": 0.0162, "step": 18400 }, { "epoch": 4.26, "eval_accuracy": 1.0, "eval_loss": 6.056379334040685e-06, "eval_runtime": 2141.6719, "eval_samples_per_second": 16.137, "eval_steps_per_second": 4.034, "step": 18400 }, { "epoch": 4.28, "learning_rate": 1.4351851851851853e-05, "loss": 0.0142, "step": 18500 }, { "epoch": 4.28, "eval_accuracy": 1.0, "eval_loss": 6.732083420502022e-06, "eval_runtime": 2137.4831, "eval_samples_per_second": 16.169, "eval_steps_per_second": 4.042, "step": 18500 }, { "epoch": 4.31, "learning_rate": 1.388888888888889e-05, "loss": 0.0353, "step": 18600 }, { "epoch": 4.31, "eval_accuracy": 1.0, "eval_loss": 5.884473466721829e-06, "eval_runtime": 2111.534, "eval_samples_per_second": 16.367, "eval_steps_per_second": 4.092, "step": 18600 }, { "epoch": 4.33, "learning_rate": 1.3425925925925928e-05, "loss": 0.0435, "step": 18700 }, { "epoch": 4.33, "eval_accuracy": 1.0, "eval_loss": 6.438468062697211e-06, "eval_runtime": 2127.2273, "eval_samples_per_second": 16.247, "eval_steps_per_second": 4.062, "step": 18700 }, { "epoch": 4.35, "learning_rate": 1.2962962962962962e-05, "loss": 0.0067, "step": 18800 }, { "epoch": 4.35, "eval_accuracy": 1.0, "eval_loss": 9.256172234017868e-06, "eval_runtime": 2183.0463, "eval_samples_per_second": 15.831, "eval_steps_per_second": 3.958, "step": 18800 }, { "epoch": 4.38, "learning_rate": 1.25e-05, "loss": 0.0299, "step": 18900 }, { "epoch": 4.38, "eval_accuracy": 1.0, "eval_loss": 6.904490419401554e-06, "eval_runtime": 2110.4592, "eval_samples_per_second": 16.376, "eval_steps_per_second": 4.094, "step": 18900 }, { "epoch": 4.4, "learning_rate": 1.2037037037037037e-05, "loss": 0.0063, "step": 19000 }, { "epoch": 4.4, "eval_accuracy": 1.0, "eval_loss": 6.991323061811272e-06, "eval_runtime": 2074.4391, "eval_samples_per_second": 16.66, "eval_steps_per_second": 4.165, "step": 19000 }, { "epoch": 4.42, "learning_rate": 1.1574074074074075e-05, "loss": 0.0117, "step": 19100 }, { "epoch": 4.42, "eval_accuracy": 1.0, "eval_loss": 5.223146672506118e-06, "eval_runtime": 2093.0232, "eval_samples_per_second": 16.512, "eval_steps_per_second": 4.128, "step": 19100 }, { "epoch": 4.44, "learning_rate": 1.1111111111111112e-05, "loss": 0.0107, "step": 19200 }, { "epoch": 4.44, "eval_accuracy": 1.0, "eval_loss": 7.764682777633425e-06, "eval_runtime": 2092.3489, "eval_samples_per_second": 16.517, "eval_steps_per_second": 4.129, "step": 19200 }, { "epoch": 4.47, "learning_rate": 1.0648148148148148e-05, "loss": 0.0162, "step": 19300 }, { "epoch": 4.47, "eval_accuracy": 1.0, "eval_loss": 5.700497240468394e-06, "eval_runtime": 2095.1985, "eval_samples_per_second": 16.495, "eval_steps_per_second": 4.124, "step": 19300 }, { "epoch": 4.49, "learning_rate": 1.0185185185185185e-05, "loss": 0.0138, "step": 19400 }, { "epoch": 4.49, "eval_accuracy": 1.0, "eval_loss": 5.209324172028573e-06, "eval_runtime": 2073.7497, "eval_samples_per_second": 16.665, "eval_steps_per_second": 4.166, "step": 19400 }, { "epoch": 4.51, "learning_rate": 9.722222222222223e-06, "loss": 0.0124, "step": 19500 }, { "epoch": 4.51, "eval_accuracy": 1.0, "eval_loss": 5.243016858003102e-06, "eval_runtime": 2056.3515, "eval_samples_per_second": 16.806, "eval_steps_per_second": 4.202, "step": 19500 }, { "epoch": 4.54, "learning_rate": 9.259259259259259e-06, "loss": 0.0083, "step": 19600 }, { "epoch": 4.54, "eval_accuracy": 1.0, "eval_loss": 5.0634776016522665e-06, "eval_runtime": 2077.8389, "eval_samples_per_second": 16.633, "eval_steps_per_second": 4.158, "step": 19600 }, { "epoch": 4.56, "learning_rate": 8.796296296296297e-06, "loss": 0.0066, "step": 19700 }, { "epoch": 4.56, "eval_accuracy": 1.0, "eval_loss": 4.925776011077687e-06, "eval_runtime": 2073.5316, "eval_samples_per_second": 16.667, "eval_steps_per_second": 4.167, "step": 19700 }, { "epoch": 4.58, "learning_rate": 8.333333333333334e-06, "loss": 0.0058, "step": 19800 }, { "epoch": 4.58, "eval_accuracy": 1.0, "eval_loss": 4.750945663545281e-06, "eval_runtime": 2057.702, "eval_samples_per_second": 16.795, "eval_steps_per_second": 4.199, "step": 19800 }, { "epoch": 4.61, "learning_rate": 7.87037037037037e-06, "loss": 0.0032, "step": 19900 }, { "epoch": 4.61, "eval_accuracy": 1.0, "eval_loss": 6.96109145792434e-06, "eval_runtime": 2071.5479, "eval_samples_per_second": 16.683, "eval_steps_per_second": 4.171, "step": 19900 }, { "epoch": 4.63, "learning_rate": 7.4074074074074075e-06, "loss": 0.0205, "step": 20000 }, { "epoch": 4.63, "eval_accuracy": 1.0, "eval_loss": 4.608726612786995e-06, "eval_runtime": 2066.372, "eval_samples_per_second": 16.725, "eval_steps_per_second": 4.181, "step": 20000 }, { "epoch": 4.65, "learning_rate": 6.944444444444445e-06, "loss": 0.0094, "step": 20100 }, { "epoch": 4.65, "eval_accuracy": 1.0, "eval_loss": 4.8284973672707565e-06, "eval_runtime": 2054.9166, "eval_samples_per_second": 16.818, "eval_steps_per_second": 4.205, "step": 20100 }, { "epoch": 4.68, "learning_rate": 6.481481481481481e-06, "loss": 0.003, "step": 20200 }, { "epoch": 4.68, "eval_accuracy": 1.0, "eval_loss": 4.495966550166486e-06, "eval_runtime": 2072.6571, "eval_samples_per_second": 16.674, "eval_steps_per_second": 4.169, "step": 20200 }, { "epoch": 4.7, "learning_rate": 6.0185185185185185e-06, "loss": 0.0035, "step": 20300 }, { "epoch": 4.7, "eval_accuracy": 1.0, "eval_loss": 5.835635420226026e-06, "eval_runtime": 2047.8141, "eval_samples_per_second": 16.877, "eval_steps_per_second": 4.219, "step": 20300 }, { "epoch": 4.72, "learning_rate": 5.555555555555556e-06, "loss": 0.0257, "step": 20400 }, { "epoch": 4.72, "eval_accuracy": 1.0, "eval_loss": 5.829508609167533e-06, "eval_runtime": 2091.8646, "eval_samples_per_second": 16.521, "eval_steps_per_second": 4.13, "step": 20400 }, { "epoch": 4.75, "learning_rate": 5.092592592592592e-06, "loss": 0.0019, "step": 20500 }, { "epoch": 4.75, "eval_accuracy": 1.0, "eval_loss": 6.3429124566027895e-06, "eval_runtime": 2040.9379, "eval_samples_per_second": 16.933, "eval_steps_per_second": 4.233, "step": 20500 }, { "epoch": 4.77, "learning_rate": 4.6296296296296296e-06, "loss": 0.0023, "step": 20600 }, { "epoch": 4.77, "eval_accuracy": 1.0, "eval_loss": 8.131992217386141e-06, "eval_runtime": 2048.3614, "eval_samples_per_second": 16.872, "eval_steps_per_second": 4.218, "step": 20600 }, { "epoch": 4.79, "learning_rate": 4.166666666666667e-06, "loss": 0.0062, "step": 20700 }, { "epoch": 4.79, "eval_accuracy": 1.0, "eval_loss": 8.594151950092055e-06, "eval_runtime": 2094.5382, "eval_samples_per_second": 16.5, "eval_steps_per_second": 4.125, "step": 20700 }, { "epoch": 4.81, "learning_rate": 3.7037037037037037e-06, "loss": 0.0039, "step": 20800 }, { "epoch": 4.81, "eval_accuracy": 1.0, "eval_loss": 7.4294948717579246e-06, "eval_runtime": 2104.354, "eval_samples_per_second": 16.423, "eval_steps_per_second": 4.106, "step": 20800 }, { "epoch": 4.84, "learning_rate": 3.2407407407407406e-06, "loss": 0.0144, "step": 20900 }, { "epoch": 4.84, "eval_accuracy": 1.0, "eval_loss": 6.862039299448952e-06, "eval_runtime": 2101.0817, "eval_samples_per_second": 16.449, "eval_steps_per_second": 4.112, "step": 20900 }, { "epoch": 4.86, "learning_rate": 2.777777777777778e-06, "loss": 0.0109, "step": 21000 }, { "epoch": 4.86, "eval_accuracy": 1.0, "eval_loss": 6.136932825029362e-06, "eval_runtime": 2119.5964, "eval_samples_per_second": 16.305, "eval_steps_per_second": 4.076, "step": 21000 }, { "epoch": 4.88, "learning_rate": 2.3148148148148148e-06, "loss": 0.0148, "step": 21100 }, { "epoch": 4.88, "eval_accuracy": 1.0, "eval_loss": 6.497817139461404e-06, "eval_runtime": 2115.6009, "eval_samples_per_second": 16.336, "eval_steps_per_second": 4.084, "step": 21100 }, { "epoch": 4.91, "learning_rate": 1.8518518518518519e-06, "loss": 0.0308, "step": 21200 }, { "epoch": 4.91, "eval_accuracy": 1.0, "eval_loss": 7.753816134936642e-06, "eval_runtime": 2118.9207, "eval_samples_per_second": 16.31, "eval_steps_per_second": 4.078, "step": 21200 }, { "epoch": 4.93, "learning_rate": 1.388888888888889e-06, "loss": 0.0023, "step": 21300 }, { "epoch": 4.93, "eval_accuracy": 1.0, "eval_loss": 7.5415960054669995e-06, "eval_runtime": 2120.9953, "eval_samples_per_second": 16.294, "eval_steps_per_second": 4.074, "step": 21300 }, { "epoch": 4.95, "learning_rate": 9.259259259259259e-07, "loss": 0.0243, "step": 21400 }, { "epoch": 4.95, "eval_accuracy": 1.0, "eval_loss": 7.68591053201817e-06, "eval_runtime": 2120.6941, "eval_samples_per_second": 16.297, "eval_steps_per_second": 4.074, "step": 21400 }, { "epoch": 4.98, "learning_rate": 4.6296296296296297e-07, "loss": 0.0031, "step": 21500 }, { "epoch": 4.98, "eval_accuracy": 1.0, "eval_loss": 7.5350230872572865e-06, "eval_runtime": 2105.7948, "eval_samples_per_second": 16.412, "eval_steps_per_second": 4.103, "step": 21500 }, { "epoch": 5.0, "learning_rate": 0.0, "loss": 0.0272, "step": 21600 }, { "epoch": 5.0, "eval_accuracy": 1.0, "eval_loss": 7.493398243241245e-06, "eval_runtime": 2100.1734, "eval_samples_per_second": 16.456, "eval_steps_per_second": 4.114, "step": 21600 }, { "epoch": 5.0, "step": 21600, "total_flos": 2.295560541703184e+19, "train_loss": 0.003923701412147946, "train_runtime": 97975.3145, "train_samples_per_second": 1.764, "train_steps_per_second": 0.22 } ], "max_steps": 21600, "num_train_epochs": 5, "total_flos": 2.295560541703184e+19, "trial_name": null, "trial_params": null }