|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"global_step": 21600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.953703703703704e-05, |
|
"loss": 1.1352, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_accuracy": 0.7482928037643433, |
|
"eval_loss": 0.6091228127479553, |
|
"eval_runtime": 1960.8351, |
|
"eval_samples_per_second": 17.625, |
|
"eval_steps_per_second": 4.406, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.907407407407407e-05, |
|
"loss": 0.5497, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_accuracy": 0.9043981432914734, |
|
"eval_loss": 0.2794453501701355, |
|
"eval_runtime": 2070.7116, |
|
"eval_samples_per_second": 16.69, |
|
"eval_steps_per_second": 4.172, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.861111111111112e-05, |
|
"loss": 0.4001, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_accuracy": 0.9633391499519348, |
|
"eval_loss": 0.1039256900548935, |
|
"eval_runtime": 2051.9615, |
|
"eval_samples_per_second": 16.842, |
|
"eval_steps_per_second": 4.211, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.814814814814815e-05, |
|
"loss": 0.2967, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_accuracy": 0.9760127067565918, |
|
"eval_loss": 0.0726834237575531, |
|
"eval_runtime": 1976.3465, |
|
"eval_samples_per_second": 17.487, |
|
"eval_steps_per_second": 4.372, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.768518518518519e-05, |
|
"loss": 0.2572, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_accuracy": 0.9752025604248047, |
|
"eval_loss": 0.07836401462554932, |
|
"eval_runtime": 1953.8972, |
|
"eval_samples_per_second": 17.688, |
|
"eval_steps_per_second": 4.422, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.722222222222223e-05, |
|
"loss": 0.1858, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.987442135810852, |
|
"eval_loss": 0.03908771649003029, |
|
"eval_runtime": 1933.1285, |
|
"eval_samples_per_second": 17.878, |
|
"eval_steps_per_second": 4.469, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.675925925925926e-05, |
|
"loss": 0.1776, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.9870080947875977, |
|
"eval_loss": 0.046012409031391144, |
|
"eval_runtime": 2073.2574, |
|
"eval_samples_per_second": 16.669, |
|
"eval_steps_per_second": 4.167, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.62962962962963e-05, |
|
"loss": 0.1253, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.987442135810852, |
|
"eval_loss": 0.04302794486284256, |
|
"eval_runtime": 2100.3436, |
|
"eval_samples_per_second": 16.454, |
|
"eval_steps_per_second": 4.114, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.583333333333334e-05, |
|
"loss": 0.1509, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_accuracy": 0.9821469783782959, |
|
"eval_loss": 0.06500900536775589, |
|
"eval_runtime": 2096.1889, |
|
"eval_samples_per_second": 16.487, |
|
"eval_steps_per_second": 4.122, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.537037037037038e-05, |
|
"loss": 0.1574, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_accuracy": 0.9847221970558167, |
|
"eval_loss": 0.059933874756097794, |
|
"eval_runtime": 2109.7765, |
|
"eval_samples_per_second": 16.381, |
|
"eval_steps_per_second": 4.095, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.490740740740742e-05, |
|
"loss": 0.1506, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_accuracy": 0.9896122813224792, |
|
"eval_loss": 0.034695032984018326, |
|
"eval_runtime": 2117.6815, |
|
"eval_samples_per_second": 16.32, |
|
"eval_steps_per_second": 4.08, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.444444444444444e-05, |
|
"loss": 0.118, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_accuracy": 0.9911168813705444, |
|
"eval_loss": 0.03316599503159523, |
|
"eval_runtime": 2107.0764, |
|
"eval_samples_per_second": 16.402, |
|
"eval_steps_per_second": 4.1, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.398148148148148e-05, |
|
"loss": 0.0885, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_accuracy": 0.9947627186775208, |
|
"eval_loss": 0.019724205136299133, |
|
"eval_runtime": 2100.2147, |
|
"eval_samples_per_second": 16.455, |
|
"eval_steps_per_second": 4.114, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 9.351851851851852e-05, |
|
"loss": 0.0967, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_accuracy": 0.9936053156852722, |
|
"eval_loss": 0.022701723501086235, |
|
"eval_runtime": 2110.6117, |
|
"eval_samples_per_second": 16.374, |
|
"eval_steps_per_second": 4.094, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 9.305555555555556e-05, |
|
"loss": 0.0882, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_accuracy": 0.992274284362793, |
|
"eval_loss": 0.02855427749454975, |
|
"eval_runtime": 2115.7419, |
|
"eval_samples_per_second": 16.335, |
|
"eval_steps_per_second": 4.084, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 9.25925925925926e-05, |
|
"loss": 0.1056, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_accuracy": 0.9962384104728699, |
|
"eval_loss": 0.015638431534171104, |
|
"eval_runtime": 2093.1202, |
|
"eval_samples_per_second": 16.511, |
|
"eval_steps_per_second": 4.128, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 9.212962962962963e-05, |
|
"loss": 0.1124, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_accuracy": 0.9942708611488342, |
|
"eval_loss": 0.023519381880760193, |
|
"eval_runtime": 2715.3988, |
|
"eval_samples_per_second": 12.727, |
|
"eval_steps_per_second": 3.182, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 9.166666666666667e-05, |
|
"loss": 0.0813, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_accuracy": 0.995341420173645, |
|
"eval_loss": 0.017750833183526993, |
|
"eval_runtime": 2099.7025, |
|
"eval_samples_per_second": 16.459, |
|
"eval_steps_per_second": 4.115, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 9.120370370370371e-05, |
|
"loss": 0.0609, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_accuracy": 0.9971932768821716, |
|
"eval_loss": 0.011351389810442924, |
|
"eval_runtime": 2142.3716, |
|
"eval_samples_per_second": 16.132, |
|
"eval_steps_per_second": 4.033, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 9.074074074074075e-05, |
|
"loss": 0.0891, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_accuracy": 0.9973379373550415, |
|
"eval_loss": 0.012310467660427094, |
|
"eval_runtime": 2095.1245, |
|
"eval_samples_per_second": 16.495, |
|
"eval_steps_per_second": 4.124, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 9.027777777777779e-05, |
|
"loss": 0.0424, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_accuracy": 0.9985821843147278, |
|
"eval_loss": 0.00660862447693944, |
|
"eval_runtime": 2101.8862, |
|
"eval_samples_per_second": 16.442, |
|
"eval_steps_per_second": 4.111, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 8.981481481481481e-05, |
|
"loss": 0.0546, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_accuracy": 0.9950520992279053, |
|
"eval_loss": 0.021980540826916695, |
|
"eval_runtime": 2121.7281, |
|
"eval_samples_per_second": 16.289, |
|
"eval_steps_per_second": 4.072, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 8.935185185185185e-05, |
|
"loss": 0.146, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_accuracy": 0.9940393567085266, |
|
"eval_loss": 0.02473669871687889, |
|
"eval_runtime": 2062.8449, |
|
"eval_samples_per_second": 16.754, |
|
"eval_steps_per_second": 4.188, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.888888888888889e-05, |
|
"loss": 0.1174, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_accuracy": 0.9958622455596924, |
|
"eval_loss": 0.01570066250860691, |
|
"eval_runtime": 2017.5974, |
|
"eval_samples_per_second": 17.129, |
|
"eval_steps_per_second": 4.282, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.842592592592593e-05, |
|
"loss": 0.0848, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 0.9978298544883728, |
|
"eval_loss": 0.008064490742981434, |
|
"eval_runtime": 2005.1771, |
|
"eval_samples_per_second": 17.235, |
|
"eval_steps_per_second": 4.309, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.796296296296297e-05, |
|
"loss": 0.0792, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_accuracy": 0.9986110925674438, |
|
"eval_loss": 0.004222337622195482, |
|
"eval_runtime": 1999.0902, |
|
"eval_samples_per_second": 17.288, |
|
"eval_steps_per_second": 4.322, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 8.75e-05, |
|
"loss": 0.0482, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_accuracy": 0.9971354007720947, |
|
"eval_loss": 0.01219157688319683, |
|
"eval_runtime": 2001.1288, |
|
"eval_samples_per_second": 17.27, |
|
"eval_steps_per_second": 4.318, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 8.703703703703704e-05, |
|
"loss": 0.0697, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_accuracy": 0.9931133985519409, |
|
"eval_loss": 0.027987554669380188, |
|
"eval_runtime": 1997.1851, |
|
"eval_samples_per_second": 17.304, |
|
"eval_steps_per_second": 4.326, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 8.657407407407408e-05, |
|
"loss": 0.106, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_accuracy": 0.9977430701255798, |
|
"eval_loss": 0.008220946416258812, |
|
"eval_runtime": 2609.175, |
|
"eval_samples_per_second": 13.246, |
|
"eval_steps_per_second": 3.311, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 8.611111111111112e-05, |
|
"loss": 0.052, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_accuracy": 0.9971932768821716, |
|
"eval_loss": 0.01051583793014288, |
|
"eval_runtime": 1971.9035, |
|
"eval_samples_per_second": 17.526, |
|
"eval_steps_per_second": 4.382, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 8.564814814814816e-05, |
|
"loss": 0.047, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_accuracy": 0.9978588223457336, |
|
"eval_loss": 0.009094738401472569, |
|
"eval_runtime": 1980.5023, |
|
"eval_samples_per_second": 17.45, |
|
"eval_steps_per_second": 4.363, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 8.518518518518518e-05, |
|
"loss": 0.0495, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_accuracy": 0.998466432094574, |
|
"eval_loss": 0.006100042257457972, |
|
"eval_runtime": 2070.7502, |
|
"eval_samples_per_second": 16.69, |
|
"eval_steps_per_second": 4.172, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.472222222222222e-05, |
|
"loss": 0.0979, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_accuracy": 0.9978588223457336, |
|
"eval_loss": 0.009109850972890854, |
|
"eval_runtime": 1970.0999, |
|
"eval_samples_per_second": 17.542, |
|
"eval_steps_per_second": 4.386, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 8.425925925925926e-05, |
|
"loss": 0.0381, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_accuracy": 0.9951099753379822, |
|
"eval_loss": 0.021163903176784515, |
|
"eval_runtime": 1977.6331, |
|
"eval_samples_per_second": 17.475, |
|
"eval_steps_per_second": 4.369, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 8.379629629629629e-05, |
|
"loss": 0.0268, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_accuracy": 0.9980034828186035, |
|
"eval_loss": 0.008532223291695118, |
|
"eval_runtime": 1971.1529, |
|
"eval_samples_per_second": 17.533, |
|
"eval_steps_per_second": 4.383, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.333333333333334e-05, |
|
"loss": 0.073, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_accuracy": 0.9961516261100769, |
|
"eval_loss": 0.017610933631658554, |
|
"eval_runtime": 1957.2329, |
|
"eval_samples_per_second": 17.658, |
|
"eval_steps_per_second": 4.414, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 8.287037037037037e-05, |
|
"loss": 0.0585, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_accuracy": 0.9971354007720947, |
|
"eval_loss": 0.011580849066376686, |
|
"eval_runtime": 1962.4064, |
|
"eval_samples_per_second": 17.611, |
|
"eval_steps_per_second": 4.403, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 8.240740740740741e-05, |
|
"loss": 0.0868, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_accuracy": 0.9994502067565918, |
|
"eval_loss": 0.00212017516605556, |
|
"eval_runtime": 1982.4259, |
|
"eval_samples_per_second": 17.433, |
|
"eval_steps_per_second": 4.358, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 8.194444444444445e-05, |
|
"loss": 0.0496, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.9978877305984497, |
|
"eval_loss": 0.008284298703074455, |
|
"eval_runtime": 1983.4898, |
|
"eval_samples_per_second": 17.424, |
|
"eval_steps_per_second": 4.356, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 8.148148148148148e-05, |
|
"loss": 0.0641, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_accuracy": 0.9967592358589172, |
|
"eval_loss": 0.013520145788788795, |
|
"eval_runtime": 1998.2946, |
|
"eval_samples_per_second": 17.295, |
|
"eval_steps_per_second": 4.324, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 8.101851851851853e-05, |
|
"loss": 0.0858, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_accuracy": 0.9989872574806213, |
|
"eval_loss": 0.003793817013502121, |
|
"eval_runtime": 2125.8264, |
|
"eval_samples_per_second": 16.257, |
|
"eval_steps_per_second": 4.064, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 8.055555555555556e-05, |
|
"loss": 0.0483, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_accuracy": 0.9978588223457336, |
|
"eval_loss": 0.009265501983463764, |
|
"eval_runtime": 2260.2096, |
|
"eval_samples_per_second": 15.291, |
|
"eval_steps_per_second": 3.823, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 8.00925925925926e-05, |
|
"loss": 0.1115, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9990162253379822, |
|
"eval_loss": 0.003752070013433695, |
|
"eval_runtime": 1992.5237, |
|
"eval_samples_per_second": 17.345, |
|
"eval_steps_per_second": 4.336, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 7.962962962962964e-05, |
|
"loss": 0.0486, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_accuracy": 0.9991897940635681, |
|
"eval_loss": 0.0031358152627944946, |
|
"eval_runtime": 1985.6758, |
|
"eval_samples_per_second": 17.405, |
|
"eval_steps_per_second": 4.351, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 7.916666666666666e-05, |
|
"loss": 0.0166, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_accuracy": 0.9995370507240295, |
|
"eval_loss": 0.002144153229892254, |
|
"eval_runtime": 2034.5738, |
|
"eval_samples_per_second": 16.986, |
|
"eval_steps_per_second": 4.247, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 7.870370370370372e-05, |
|
"loss": 0.0084, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"eval_accuracy": 0.9986979365348816, |
|
"eval_loss": 0.006190824322402477, |
|
"eval_runtime": 2022.3112, |
|
"eval_samples_per_second": 17.089, |
|
"eval_steps_per_second": 4.272, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 7.824074074074074e-05, |
|
"loss": 0.0205, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_accuracy": 0.999160885810852, |
|
"eval_loss": 0.0034529021941125393, |
|
"eval_runtime": 2036.3231, |
|
"eval_samples_per_second": 16.972, |
|
"eval_steps_per_second": 4.243, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 7.777777777777778e-05, |
|
"loss": 0.0217, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_accuracy": 0.9973379373550415, |
|
"eval_loss": 0.012433897703886032, |
|
"eval_runtime": 2054.4934, |
|
"eval_samples_per_second": 16.822, |
|
"eval_steps_per_second": 4.205, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 7.731481481481482e-05, |
|
"loss": 0.0407, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_accuracy": 0.9991030097007751, |
|
"eval_loss": 0.004298593383282423, |
|
"eval_runtime": 2009.8166, |
|
"eval_samples_per_second": 17.196, |
|
"eval_steps_per_second": 4.299, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 7.685185185185185e-05, |
|
"loss": 0.0598, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_accuracy": 0.9981771111488342, |
|
"eval_loss": 0.007797444239258766, |
|
"eval_runtime": 1996.948, |
|
"eval_samples_per_second": 17.306, |
|
"eval_steps_per_second": 4.327, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 7.638888888888889e-05, |
|
"loss": 0.058, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_accuracy": 0.9981192350387573, |
|
"eval_loss": 0.009161165915429592, |
|
"eval_runtime": 2007.394, |
|
"eval_samples_per_second": 17.216, |
|
"eval_steps_per_second": 4.304, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 7.592592592592593e-05, |
|
"loss": 0.0119, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_accuracy": 0.9994502067565918, |
|
"eval_loss": 0.0023240004666149616, |
|
"eval_runtime": 2021.6614, |
|
"eval_samples_per_second": 17.095, |
|
"eval_steps_per_second": 4.274, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 7.546296296296297e-05, |
|
"loss": 0.08, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_accuracy": 0.9976562261581421, |
|
"eval_loss": 0.009528687223792076, |
|
"eval_runtime": 2024.2247, |
|
"eval_samples_per_second": 17.073, |
|
"eval_steps_per_second": 4.268, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 0.0336, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_accuracy": 0.9995949268341064, |
|
"eval_loss": 0.0020153559744358063, |
|
"eval_runtime": 2005.7373, |
|
"eval_samples_per_second": 17.231, |
|
"eval_steps_per_second": 4.308, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 7.453703703703703e-05, |
|
"loss": 0.0508, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"eval_accuracy": 0.9989004731178284, |
|
"eval_loss": 0.00367682590149343, |
|
"eval_runtime": 2024.401, |
|
"eval_samples_per_second": 17.072, |
|
"eval_steps_per_second": 4.268, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 7.407407407407407e-05, |
|
"loss": 0.0146, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_accuracy": 0.9992766380310059, |
|
"eval_loss": 0.002618621801957488, |
|
"eval_runtime": 2017.6548, |
|
"eval_samples_per_second": 17.129, |
|
"eval_steps_per_second": 4.282, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 7.361111111111111e-05, |
|
"loss": 0.038, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_accuracy": 0.9988425970077515, |
|
"eval_loss": 0.00465565687045455, |
|
"eval_runtime": 2002.1508, |
|
"eval_samples_per_second": 17.261, |
|
"eval_steps_per_second": 4.315, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 7.314814814814815e-05, |
|
"loss": 0.0613, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_accuracy": 0.998379647731781, |
|
"eval_loss": 0.005978360306471586, |
|
"eval_runtime": 2001.292, |
|
"eval_samples_per_second": 17.269, |
|
"eval_steps_per_second": 4.317, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 7.268518518518519e-05, |
|
"loss": 0.0364, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_accuracy": 0.9971064925193787, |
|
"eval_loss": 0.01282673142850399, |
|
"eval_runtime": 2012.1731, |
|
"eval_samples_per_second": 17.175, |
|
"eval_steps_per_second": 4.294, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 7.222222222222222e-05, |
|
"loss": 0.108, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_accuracy": 0.998379647731781, |
|
"eval_loss": 0.005587506573647261, |
|
"eval_runtime": 2228.721, |
|
"eval_samples_per_second": 15.507, |
|
"eval_steps_per_second": 3.877, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 7.175925925925926e-05, |
|
"loss": 0.0134, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_accuracy": 0.9985821843147278, |
|
"eval_loss": 0.0066048940643668175, |
|
"eval_runtime": 2000.8975, |
|
"eval_samples_per_second": 17.272, |
|
"eval_steps_per_second": 4.318, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 7.12962962962963e-05, |
|
"loss": 0.0389, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_accuracy": 0.9972511529922485, |
|
"eval_loss": 0.012162311002612114, |
|
"eval_runtime": 1997.5848, |
|
"eval_samples_per_second": 17.301, |
|
"eval_steps_per_second": 4.325, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 7.083333333333334e-05, |
|
"loss": 0.0208, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_accuracy": 0.9991030097007751, |
|
"eval_loss": 0.0034532626159489155, |
|
"eval_runtime": 2007.9035, |
|
"eval_samples_per_second": 17.212, |
|
"eval_steps_per_second": 4.303, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 7.037037037037038e-05, |
|
"loss": 0.0376, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_accuracy": 0.9991897940635681, |
|
"eval_loss": 0.004356299061328173, |
|
"eval_runtime": 1996.8911, |
|
"eval_samples_per_second": 17.307, |
|
"eval_steps_per_second": 4.327, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 6.99074074074074e-05, |
|
"loss": 0.0346, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_accuracy": 0.9969907402992249, |
|
"eval_loss": 0.017812130972743034, |
|
"eval_runtime": 2004.911, |
|
"eval_samples_per_second": 17.238, |
|
"eval_steps_per_second": 4.309, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 6.944444444444444e-05, |
|
"loss": 0.0189, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_accuracy": 0.9987847208976746, |
|
"eval_loss": 0.0057495711371302605, |
|
"eval_runtime": 2011.102, |
|
"eval_samples_per_second": 17.185, |
|
"eval_steps_per_second": 4.296, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 6.898148148148148e-05, |
|
"loss": 0.0141, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_accuracy": 0.9992766380310059, |
|
"eval_loss": 0.003152304096147418, |
|
"eval_runtime": 1989.9017, |
|
"eval_samples_per_second": 17.368, |
|
"eval_steps_per_second": 4.342, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 6.851851851851852e-05, |
|
"loss": 0.0719, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_accuracy": 0.9987847208976746, |
|
"eval_loss": 0.005420052912086248, |
|
"eval_runtime": 1969.8998, |
|
"eval_samples_per_second": 17.544, |
|
"eval_steps_per_second": 4.386, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 6.805555555555556e-05, |
|
"loss": 0.0225, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_accuracy": 0.9971932768821716, |
|
"eval_loss": 0.012641699984669685, |
|
"eval_runtime": 1981.1809, |
|
"eval_samples_per_second": 17.444, |
|
"eval_steps_per_second": 4.361, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 6.759259259259259e-05, |
|
"loss": 0.0682, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_accuracy": 0.9989583492279053, |
|
"eval_loss": 0.003953148610889912, |
|
"eval_runtime": 1973.9678, |
|
"eval_samples_per_second": 17.508, |
|
"eval_steps_per_second": 4.377, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 6.712962962962963e-05, |
|
"loss": 0.0521, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_accuracy": 0.998466432094574, |
|
"eval_loss": 0.005261498969048262, |
|
"eval_runtime": 1989.7692, |
|
"eval_samples_per_second": 17.369, |
|
"eval_steps_per_second": 4.342, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 0.0358, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_accuracy": 0.9993634223937988, |
|
"eval_loss": 0.002406924497336149, |
|
"eval_runtime": 1975.9496, |
|
"eval_samples_per_second": 17.49, |
|
"eval_steps_per_second": 4.373, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 6.620370370370371e-05, |
|
"loss": 0.0255, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_accuracy": 0.9984953999519348, |
|
"eval_loss": 0.007655243389308453, |
|
"eval_runtime": 1972.1073, |
|
"eval_samples_per_second": 17.524, |
|
"eval_steps_per_second": 4.381, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 6.574074074074075e-05, |
|
"loss": 0.0424, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_accuracy": 0.9996238350868225, |
|
"eval_loss": 0.0017167649930343032, |
|
"eval_runtime": 1980.6249, |
|
"eval_samples_per_second": 17.449, |
|
"eval_steps_per_second": 4.362, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 6.527777777777778e-05, |
|
"loss": 0.0214, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"eval_accuracy": 0.9997106194496155, |
|
"eval_loss": 0.0009764753049239516, |
|
"eval_runtime": 2005.2649, |
|
"eval_samples_per_second": 17.235, |
|
"eval_steps_per_second": 4.309, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 6.481481481481482e-05, |
|
"loss": 0.0429, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_accuracy": 0.996006965637207, |
|
"eval_loss": 0.019011829048395157, |
|
"eval_runtime": 2045.2435, |
|
"eval_samples_per_second": 16.898, |
|
"eval_steps_per_second": 4.224, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 6.435185185185186e-05, |
|
"loss": 0.0783, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"eval_accuracy": 0.9976562261581421, |
|
"eval_loss": 0.008234655484557152, |
|
"eval_runtime": 2041.1233, |
|
"eval_samples_per_second": 16.932, |
|
"eval_steps_per_second": 4.233, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 6.388888888888888e-05, |
|
"loss": 0.0141, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_accuracy": 0.9996238350868225, |
|
"eval_loss": 0.0018950661178678274, |
|
"eval_runtime": 1994.0408, |
|
"eval_samples_per_second": 17.332, |
|
"eval_steps_per_second": 4.333, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 6.342592592592594e-05, |
|
"loss": 0.0203, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_accuracy": 0.9994502067565918, |
|
"eval_loss": 0.0022274223156273365, |
|
"eval_runtime": 1978.2563, |
|
"eval_samples_per_second": 17.47, |
|
"eval_steps_per_second": 4.367, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 6.296296296296296e-05, |
|
"loss": 0.0439, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_accuracy": 0.9979166388511658, |
|
"eval_loss": 0.007150179240852594, |
|
"eval_runtime": 1990.4775, |
|
"eval_samples_per_second": 17.363, |
|
"eval_steps_per_second": 4.341, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.0228, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"eval_accuracy": 0.9973090291023254, |
|
"eval_loss": 0.010999325662851334, |
|
"eval_runtime": 1995.6933, |
|
"eval_samples_per_second": 17.317, |
|
"eval_steps_per_second": 4.329, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 6.203703703703704e-05, |
|
"loss": 0.0386, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_accuracy": 0.9996817111968994, |
|
"eval_loss": 0.001689778990112245, |
|
"eval_runtime": 1983.468, |
|
"eval_samples_per_second": 17.424, |
|
"eval_steps_per_second": 4.356, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 6.157407407407407e-05, |
|
"loss": 0.023, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_accuracy": 0.9997106194496155, |
|
"eval_loss": 0.001407949603162706, |
|
"eval_runtime": 1979.2035, |
|
"eval_samples_per_second": 17.462, |
|
"eval_steps_per_second": 4.365, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 6.111111111111112e-05, |
|
"loss": 0.0188, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_accuracy": 0.9997395873069763, |
|
"eval_loss": 0.001248441985808313, |
|
"eval_runtime": 1986.8349, |
|
"eval_samples_per_second": 17.395, |
|
"eval_steps_per_second": 4.349, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 6.0648148148148154e-05, |
|
"loss": 0.0301, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_accuracy": 0.9998553395271301, |
|
"eval_loss": 0.0005934939254075289, |
|
"eval_runtime": 2006.6093, |
|
"eval_samples_per_second": 17.223, |
|
"eval_steps_per_second": 4.306, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 6.018518518518519e-05, |
|
"loss": 0.0077, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_accuracy": 0.9999421238899231, |
|
"eval_loss": 0.00026703893672674894, |
|
"eval_runtime": 2048.5713, |
|
"eval_samples_per_second": 16.87, |
|
"eval_steps_per_second": 4.218, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 5.972222222222223e-05, |
|
"loss": 0.0291, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"eval_accuracy": 0.9989872574806213, |
|
"eval_loss": 0.004098657984286547, |
|
"eval_runtime": 2031.7661, |
|
"eval_samples_per_second": 17.01, |
|
"eval_steps_per_second": 4.252, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 5.925925925925926e-05, |
|
"loss": 0.0274, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_accuracy": 0.9995949268341064, |
|
"eval_loss": 0.001983657479286194, |
|
"eval_runtime": 2040.4005, |
|
"eval_samples_per_second": 16.938, |
|
"eval_steps_per_second": 4.234, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 5.879629629629629e-05, |
|
"loss": 0.0193, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"eval_accuracy": 0.9998842477798462, |
|
"eval_loss": 0.0003717490180861205, |
|
"eval_runtime": 2041.1098, |
|
"eval_samples_per_second": 16.932, |
|
"eval_steps_per_second": 4.233, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 5.833333333333334e-05, |
|
"loss": 0.0296, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_accuracy": 0.9997684955596924, |
|
"eval_loss": 0.0011291600530967116, |
|
"eval_runtime": 2004.7261, |
|
"eval_samples_per_second": 17.239, |
|
"eval_steps_per_second": 4.31, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 5.787037037037037e-05, |
|
"loss": 0.0033, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_accuracy": 0.9998553395271301, |
|
"eval_loss": 0.0005978959961794317, |
|
"eval_runtime": 2001.4449, |
|
"eval_samples_per_second": 17.268, |
|
"eval_steps_per_second": 4.317, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 5.740740740740741e-05, |
|
"loss": 0.0218, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_accuracy": 0.999160885810852, |
|
"eval_loss": 0.0025338120758533478, |
|
"eval_runtime": 1990.5201, |
|
"eval_samples_per_second": 17.362, |
|
"eval_steps_per_second": 4.341, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 5.6944444444444445e-05, |
|
"loss": 0.0238, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"eval_accuracy": 0.999218761920929, |
|
"eval_loss": 0.0033705937676131725, |
|
"eval_runtime": 2019.0619, |
|
"eval_samples_per_second": 17.117, |
|
"eval_steps_per_second": 4.279, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 5.648148148148148e-05, |
|
"loss": 0.0319, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"eval_accuracy": 0.9994791746139526, |
|
"eval_loss": 0.0017771282000467181, |
|
"eval_runtime": 1997.4844, |
|
"eval_samples_per_second": 17.302, |
|
"eval_steps_per_second": 4.325, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 5.6018518518518525e-05, |
|
"loss": 0.0465, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_accuracy": 0.9994502067565918, |
|
"eval_loss": 0.002331700176000595, |
|
"eval_runtime": 2012.3132, |
|
"eval_samples_per_second": 17.174, |
|
"eval_steps_per_second": 4.294, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 5.555555555555556e-05, |
|
"loss": 0.0412, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_accuracy": 0.9997395873069763, |
|
"eval_loss": 0.001237583113834262, |
|
"eval_runtime": 1999.8191, |
|
"eval_samples_per_second": 17.282, |
|
"eval_steps_per_second": 4.32, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 5.50925925925926e-05, |
|
"loss": 0.02, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"eval_accuracy": 0.9998842477798462, |
|
"eval_loss": 0.0009390079067088664, |
|
"eval_runtime": 2008.6286, |
|
"eval_samples_per_second": 17.206, |
|
"eval_steps_per_second": 4.301, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 5.462962962962963e-05, |
|
"loss": 0.0226, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"eval_accuracy": 0.9995949268341064, |
|
"eval_loss": 0.0017483533592894673, |
|
"eval_runtime": 2005.0151, |
|
"eval_samples_per_second": 17.237, |
|
"eval_steps_per_second": 4.309, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 5.4166666666666664e-05, |
|
"loss": 0.0104, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"eval_accuracy": 0.9997684955596924, |
|
"eval_loss": 0.0008292018319480121, |
|
"eval_runtime": 1992.6545, |
|
"eval_samples_per_second": 17.344, |
|
"eval_steps_per_second": 4.336, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 5.370370370370371e-05, |
|
"loss": 0.0021, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"eval_accuracy": 0.999913215637207, |
|
"eval_loss": 0.000292919430648908, |
|
"eval_runtime": 1987.2006, |
|
"eval_samples_per_second": 17.391, |
|
"eval_steps_per_second": 4.348, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 5.3240740740740744e-05, |
|
"loss": 0.0135, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"eval_accuracy": 0.9987558126449585, |
|
"eval_loss": 0.005596287082880735, |
|
"eval_runtime": 2007.3126, |
|
"eval_samples_per_second": 17.217, |
|
"eval_steps_per_second": 4.304, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 5.2777777777777784e-05, |
|
"loss": 0.0319, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_accuracy": 0.9995949268341064, |
|
"eval_loss": 0.001722234534099698, |
|
"eval_runtime": 2007.5056, |
|
"eval_samples_per_second": 17.215, |
|
"eval_steps_per_second": 4.304, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 5.231481481481482e-05, |
|
"loss": 0.0279, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"eval_accuracy": 0.9997106194496155, |
|
"eval_loss": 0.001128367381170392, |
|
"eval_runtime": 1980.2057, |
|
"eval_samples_per_second": 17.453, |
|
"eval_steps_per_second": 4.363, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 5.185185185185185e-05, |
|
"loss": 0.0017, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_accuracy": 0.9997106194496155, |
|
"eval_loss": 0.0013848639791831374, |
|
"eval_runtime": 2007.1812, |
|
"eval_samples_per_second": 17.218, |
|
"eval_steps_per_second": 4.305, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 5.138888888888889e-05, |
|
"loss": 0.0296, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"eval_accuracy": 0.9989583492279053, |
|
"eval_loss": 0.005161995068192482, |
|
"eval_runtime": 1986.761, |
|
"eval_samples_per_second": 17.395, |
|
"eval_steps_per_second": 4.349, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 5.092592592592593e-05, |
|
"loss": 0.0168, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"eval_accuracy": 0.9997974634170532, |
|
"eval_loss": 0.0004770481900777668, |
|
"eval_runtime": 2003.7003, |
|
"eval_samples_per_second": 17.248, |
|
"eval_steps_per_second": 4.312, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 5.046296296296297e-05, |
|
"loss": 0.0194, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_accuracy": 0.9997684955596924, |
|
"eval_loss": 0.000735765672288835, |
|
"eval_runtime": 1994.758, |
|
"eval_samples_per_second": 17.325, |
|
"eval_steps_per_second": 4.331, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 5e-05, |
|
"loss": 0.0006, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_accuracy": 0.9998263716697693, |
|
"eval_loss": 0.0009093827102333307, |
|
"eval_runtime": 2000.8621, |
|
"eval_samples_per_second": 17.273, |
|
"eval_steps_per_second": 4.318, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 4.9537037037037035e-05, |
|
"loss": 0.0293, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"eval_accuracy": 0.999913215637207, |
|
"eval_loss": 0.0005157970590516925, |
|
"eval_runtime": 2011.0169, |
|
"eval_samples_per_second": 17.185, |
|
"eval_steps_per_second": 4.296, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.9074074074074075e-05, |
|
"loss": 0.0016, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_accuracy": 0.9994791746139526, |
|
"eval_loss": 0.0025301428977400064, |
|
"eval_runtime": 2000.9133, |
|
"eval_samples_per_second": 17.272, |
|
"eval_steps_per_second": 4.318, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 4.8611111111111115e-05, |
|
"loss": 0.0069, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_accuracy": 0.9998842477798462, |
|
"eval_loss": 0.0004891157150268555, |
|
"eval_runtime": 2006.7369, |
|
"eval_samples_per_second": 17.222, |
|
"eval_steps_per_second": 4.305, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 4.814814814814815e-05, |
|
"loss": 0.0001, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"eval_accuracy": 0.9999421238899231, |
|
"eval_loss": 0.00020419809152372181, |
|
"eval_runtime": 1993.3725, |
|
"eval_samples_per_second": 17.337, |
|
"eval_steps_per_second": 4.334, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 4.768518518518519e-05, |
|
"loss": 0.0108, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_accuracy": 0.9997974634170532, |
|
"eval_loss": 0.0010758559219539165, |
|
"eval_runtime": 2001.2763, |
|
"eval_samples_per_second": 17.269, |
|
"eval_steps_per_second": 4.317, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 4.722222222222222e-05, |
|
"loss": 0.0165, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"eval_accuracy": 0.9998842477798462, |
|
"eval_loss": 0.0006313551566563547, |
|
"eval_runtime": 1995.5247, |
|
"eval_samples_per_second": 17.319, |
|
"eval_steps_per_second": 4.33, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 4.675925925925926e-05, |
|
"loss": 0.0001, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_accuracy": 0.9997974634170532, |
|
"eval_loss": 0.0007648964528925717, |
|
"eval_runtime": 2001.09, |
|
"eval_samples_per_second": 17.271, |
|
"eval_steps_per_second": 4.318, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 4.62962962962963e-05, |
|
"loss": 0.0244, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"eval_accuracy": 0.9998553395271301, |
|
"eval_loss": 0.000668107473757118, |
|
"eval_runtime": 2000.7577, |
|
"eval_samples_per_second": 17.273, |
|
"eval_steps_per_second": 4.318, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 4.5833333333333334e-05, |
|
"loss": 0.0312, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"eval_accuracy": 0.9995659589767456, |
|
"eval_loss": 0.001716578146442771, |
|
"eval_runtime": 1997.1256, |
|
"eval_samples_per_second": 17.305, |
|
"eval_steps_per_second": 4.326, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.5370370370370374e-05, |
|
"loss": 0.0191, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"eval_accuracy": 0.9997395873069763, |
|
"eval_loss": 0.0007975550834089518, |
|
"eval_runtime": 1967.8746, |
|
"eval_samples_per_second": 17.562, |
|
"eval_steps_per_second": 4.391, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.490740740740741e-05, |
|
"loss": 0.0005, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"eval_accuracy": 0.9998842477798462, |
|
"eval_loss": 0.0004628011374734342, |
|
"eval_runtime": 1958.7798, |
|
"eval_samples_per_second": 17.644, |
|
"eval_steps_per_second": 4.411, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.0259, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"eval_accuracy": 0.9996238350868225, |
|
"eval_loss": 0.001358355744741857, |
|
"eval_runtime": 1971.0225, |
|
"eval_samples_per_second": 17.534, |
|
"eval_steps_per_second": 4.384, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 4.3981481481481486e-05, |
|
"loss": 0.0226, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_accuracy": 0.9999710917472839, |
|
"eval_loss": 0.00019500043708831072, |
|
"eval_runtime": 1970.5019, |
|
"eval_samples_per_second": 17.539, |
|
"eval_steps_per_second": 4.385, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 4.351851851851852e-05, |
|
"loss": 0.0, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_accuracy": 0.9999710917472839, |
|
"eval_loss": 0.0002164940524380654, |
|
"eval_runtime": 1961.9305, |
|
"eval_samples_per_second": 17.615, |
|
"eval_steps_per_second": 4.404, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 4.305555555555556e-05, |
|
"loss": 0.0, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"eval_accuracy": 0.9999421238899231, |
|
"eval_loss": 0.00010657820530468598, |
|
"eval_runtime": 1973.4403, |
|
"eval_samples_per_second": 17.513, |
|
"eval_steps_per_second": 4.378, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 4.259259259259259e-05, |
|
"loss": 0.0145, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 4.758801151183434e-05, |
|
"eval_runtime": 1977.328, |
|
"eval_samples_per_second": 17.478, |
|
"eval_steps_per_second": 4.37, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 4.212962962962963e-05, |
|
"loss": 0.0083, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"eval_accuracy": 0.9995659589767456, |
|
"eval_loss": 0.001972577767446637, |
|
"eval_runtime": 1962.5085, |
|
"eval_samples_per_second": 17.61, |
|
"eval_steps_per_second": 4.403, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.02, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"eval_accuracy": 0.9994791746139526, |
|
"eval_loss": 0.00198388216085732, |
|
"eval_runtime": 1956.5161, |
|
"eval_samples_per_second": 17.664, |
|
"eval_steps_per_second": 4.416, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 4.1203703703703705e-05, |
|
"loss": 0.0293, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"eval_accuracy": 0.9994212985038757, |
|
"eval_loss": 0.0031591171864420176, |
|
"eval_runtime": 1997.7409, |
|
"eval_samples_per_second": 17.3, |
|
"eval_steps_per_second": 4.325, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 4.074074074074074e-05, |
|
"loss": 0.0164, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_accuracy": 0.9997395873069763, |
|
"eval_loss": 0.0012433998053893447, |
|
"eval_runtime": 2019.1263, |
|
"eval_samples_per_second": 17.116, |
|
"eval_steps_per_second": 4.279, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 4.027777777777778e-05, |
|
"loss": 0.0147, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_accuracy": 0.9997684955596924, |
|
"eval_loss": 0.001224155188538134, |
|
"eval_runtime": 2015.6572, |
|
"eval_samples_per_second": 17.146, |
|
"eval_steps_per_second": 4.286, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 3.981481481481482e-05, |
|
"loss": 0.0112, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"eval_accuracy": 0.999913215637207, |
|
"eval_loss": 0.0008148940978571773, |
|
"eval_runtime": 1991.7022, |
|
"eval_samples_per_second": 17.352, |
|
"eval_steps_per_second": 4.338, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 3.935185185185186e-05, |
|
"loss": 0.002, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"eval_accuracy": 0.9997395873069763, |
|
"eval_loss": 0.0012871942017227411, |
|
"eval_runtime": 2016.2834, |
|
"eval_samples_per_second": 17.14, |
|
"eval_steps_per_second": 4.285, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 0.017, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"eval_accuracy": 0.9997106194496155, |
|
"eval_loss": 0.0010973262833431363, |
|
"eval_runtime": 2017.4262, |
|
"eval_samples_per_second": 17.131, |
|
"eval_steps_per_second": 4.283, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 3.8425925925925924e-05, |
|
"loss": 0.0142, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"eval_accuracy": 0.9996528029441833, |
|
"eval_loss": 0.0019141812808811665, |
|
"eval_runtime": 2002.6757, |
|
"eval_samples_per_second": 17.257, |
|
"eval_steps_per_second": 4.314, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 3.7962962962962964e-05, |
|
"loss": 0.008, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"eval_accuracy": 0.9997395873069763, |
|
"eval_loss": 0.00135290517937392, |
|
"eval_runtime": 2020.5372, |
|
"eval_samples_per_second": 17.104, |
|
"eval_steps_per_second": 4.276, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.0411, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"eval_accuracy": 0.9997974634170532, |
|
"eval_loss": 0.000736766669433564, |
|
"eval_runtime": 2162.1556, |
|
"eval_samples_per_second": 15.984, |
|
"eval_steps_per_second": 3.996, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.0262, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"eval_accuracy": 0.9998553395271301, |
|
"eval_loss": 0.000846204929985106, |
|
"eval_runtime": 2119.0303, |
|
"eval_samples_per_second": 16.309, |
|
"eval_steps_per_second": 4.077, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 3.6574074074074076e-05, |
|
"loss": 0.0198, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"eval_accuracy": 0.9997106194496155, |
|
"eval_loss": 0.0010991438757628202, |
|
"eval_runtime": 2095.0628, |
|
"eval_samples_per_second": 16.496, |
|
"eval_steps_per_second": 4.124, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 3.611111111111111e-05, |
|
"loss": 0.0178, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"eval_accuracy": 0.999913215637207, |
|
"eval_loss": 0.00029710811213590205, |
|
"eval_runtime": 2130.6792, |
|
"eval_samples_per_second": 16.22, |
|
"eval_steps_per_second": 4.055, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 3.564814814814815e-05, |
|
"loss": 0.0072, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"eval_accuracy": 0.9999710917472839, |
|
"eval_loss": 0.00018699387146625668, |
|
"eval_runtime": 2082.0917, |
|
"eval_samples_per_second": 16.599, |
|
"eval_steps_per_second": 4.15, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 3.518518518518519e-05, |
|
"loss": 0.0004, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"eval_accuracy": 0.9998263716697693, |
|
"eval_loss": 0.0013777822023257613, |
|
"eval_runtime": 2053.664, |
|
"eval_samples_per_second": 16.828, |
|
"eval_steps_per_second": 4.207, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 3.472222222222222e-05, |
|
"loss": 0.0191, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"eval_accuracy": 0.9999421238899231, |
|
"eval_loss": 0.0004184871504548937, |
|
"eval_runtime": 2048.7946, |
|
"eval_samples_per_second": 16.868, |
|
"eval_steps_per_second": 4.217, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 3.425925925925926e-05, |
|
"loss": 0.007, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"eval_accuracy": 0.999913215637207, |
|
"eval_loss": 0.0004062869702465832, |
|
"eval_runtime": 2055.664, |
|
"eval_samples_per_second": 16.812, |
|
"eval_steps_per_second": 4.203, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 3.3796296296296295e-05, |
|
"loss": 0.0108, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"eval_accuracy": 0.999913215637207, |
|
"eval_loss": 0.00011388419807190076, |
|
"eval_runtime": 2043.6545, |
|
"eval_samples_per_second": 16.911, |
|
"eval_steps_per_second": 4.228, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.0, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"eval_accuracy": 0.9999710917472839, |
|
"eval_loss": 7.532363088103011e-05, |
|
"eval_runtime": 2040.9204, |
|
"eval_samples_per_second": 16.934, |
|
"eval_steps_per_second": 4.233, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 3.2870370370370375e-05, |
|
"loss": 0.0006, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"eval_accuracy": 0.9999421238899231, |
|
"eval_loss": 0.0003408396732993424, |
|
"eval_runtime": 2057.5647, |
|
"eval_samples_per_second": 16.797, |
|
"eval_steps_per_second": 4.199, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 3.240740740740741e-05, |
|
"loss": 0.0085, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"eval_accuracy": 0.9992766380310059, |
|
"eval_loss": 0.0034337618853896856, |
|
"eval_runtime": 2027.249, |
|
"eval_samples_per_second": 17.048, |
|
"eval_steps_per_second": 4.262, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 3.194444444444444e-05, |
|
"loss": 0.0002, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"eval_accuracy": 0.999913215637207, |
|
"eval_loss": 0.0006225552642717957, |
|
"eval_runtime": 2004.1478, |
|
"eval_samples_per_second": 17.244, |
|
"eval_steps_per_second": 4.311, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 3.148148148148148e-05, |
|
"loss": 0.0181, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"eval_accuracy": 0.9999710917472839, |
|
"eval_loss": 0.000251033779932186, |
|
"eval_runtime": 2016.4131, |
|
"eval_samples_per_second": 17.139, |
|
"eval_steps_per_second": 4.285, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 3.101851851851852e-05, |
|
"loss": 0.0021, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"eval_accuracy": 0.999913215637207, |
|
"eval_loss": 0.00040141510544344783, |
|
"eval_runtime": 2000.3942, |
|
"eval_samples_per_second": 17.277, |
|
"eval_steps_per_second": 4.319, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 3.055555555555556e-05, |
|
"loss": 0.0069, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"eval_accuracy": 0.9998842477798462, |
|
"eval_loss": 0.0006463331519626081, |
|
"eval_runtime": 2015.2783, |
|
"eval_samples_per_second": 17.149, |
|
"eval_steps_per_second": 4.287, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 3.0092592592592593e-05, |
|
"loss": 0.0156, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"eval_accuracy": 0.9999710917472839, |
|
"eval_loss": 0.0001428252726327628, |
|
"eval_runtime": 1995.7618, |
|
"eval_samples_per_second": 17.317, |
|
"eval_steps_per_second": 4.329, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 2.962962962962963e-05, |
|
"loss": 0.0042, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"eval_accuracy": 0.9997974634170532, |
|
"eval_loss": 0.000510143639985472, |
|
"eval_runtime": 2000.972, |
|
"eval_samples_per_second": 17.272, |
|
"eval_steps_per_second": 4.318, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 2.916666666666667e-05, |
|
"loss": 0.0233, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"eval_accuracy": 0.9999710917472839, |
|
"eval_loss": 0.00019888828683178872, |
|
"eval_runtime": 2002.1598, |
|
"eval_samples_per_second": 17.261, |
|
"eval_steps_per_second": 4.315, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 2.8703703703703706e-05, |
|
"loss": 0.003, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"eval_accuracy": 0.9997974634170532, |
|
"eval_loss": 0.0006905001355335116, |
|
"eval_runtime": 2000.419, |
|
"eval_samples_per_second": 17.276, |
|
"eval_steps_per_second": 4.319, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 2.824074074074074e-05, |
|
"loss": 0.0149, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"eval_accuracy": 0.9998553395271301, |
|
"eval_loss": 0.000585312838666141, |
|
"eval_runtime": 1997.3791, |
|
"eval_samples_per_second": 17.303, |
|
"eval_steps_per_second": 4.326, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.0072, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"eval_accuracy": 0.9999710917472839, |
|
"eval_loss": 0.000229826764552854, |
|
"eval_runtime": 2001.2597, |
|
"eval_samples_per_second": 17.269, |
|
"eval_steps_per_second": 4.317, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 2.7314814814814816e-05, |
|
"loss": 0.0004, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"eval_accuracy": 0.9999710917472839, |
|
"eval_loss": 5.024338679504581e-05, |
|
"eval_runtime": 2013.5805, |
|
"eval_samples_per_second": 17.163, |
|
"eval_steps_per_second": 4.291, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 2.6851851851851855e-05, |
|
"loss": 0.0001, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"eval_accuracy": 0.999913215637207, |
|
"eval_loss": 0.00017916383512783796, |
|
"eval_runtime": 1994.345, |
|
"eval_samples_per_second": 17.329, |
|
"eval_steps_per_second": 4.332, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 2.6388888888888892e-05, |
|
"loss": 0.0186, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 9.207503353536595e-06, |
|
"eval_runtime": 2056.2161, |
|
"eval_samples_per_second": 16.808, |
|
"eval_steps_per_second": 4.202, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 2.5925925925925925e-05, |
|
"loss": 0.0115, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"eval_accuracy": 0.9999710917472839, |
|
"eval_loss": 0.00022165325935930014, |
|
"eval_runtime": 2044.6907, |
|
"eval_samples_per_second": 16.902, |
|
"eval_steps_per_second": 4.226, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 2.5462962962962965e-05, |
|
"loss": 0.0011, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"eval_accuracy": 0.9999710917472839, |
|
"eval_loss": 0.00027788631268776953, |
|
"eval_runtime": 2046.6409, |
|
"eval_samples_per_second": 16.886, |
|
"eval_steps_per_second": 4.222, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.0048, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"eval_accuracy": 0.9999710917472839, |
|
"eval_loss": 5.909843821427785e-05, |
|
"eval_runtime": 2008.3137, |
|
"eval_samples_per_second": 17.208, |
|
"eval_steps_per_second": 4.302, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 2.4537037037037038e-05, |
|
"loss": 0.0042, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 6.828932328062365e-06, |
|
"eval_runtime": 2129.8226, |
|
"eval_samples_per_second": 16.227, |
|
"eval_steps_per_second": 4.057, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 2.4074074074074074e-05, |
|
"loss": 0.0024, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 8.2383139670128e-06, |
|
"eval_runtime": 2113.6583, |
|
"eval_samples_per_second": 16.351, |
|
"eval_steps_per_second": 4.088, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 2.361111111111111e-05, |
|
"loss": 0.0, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 5.800426606583642e-06, |
|
"eval_runtime": 2122.3997, |
|
"eval_samples_per_second": 16.283, |
|
"eval_steps_per_second": 4.071, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 2.314814814814815e-05, |
|
"loss": 0.0003, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"eval_accuracy": 0.9999710917472839, |
|
"eval_loss": 0.00010272156214341521, |
|
"eval_runtime": 2128.9658, |
|
"eval_samples_per_second": 16.233, |
|
"eval_steps_per_second": 4.058, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 2.2685185185185187e-05, |
|
"loss": 0.0, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"eval_accuracy": 0.9999710917472839, |
|
"eval_loss": 7.889495464041829e-05, |
|
"eval_runtime": 2147.2327, |
|
"eval_samples_per_second": 16.095, |
|
"eval_steps_per_second": 4.024, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.0, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"eval_accuracy": 0.9999710917472839, |
|
"eval_loss": 7.938377530081198e-05, |
|
"eval_runtime": 2139.4855, |
|
"eval_samples_per_second": 16.153, |
|
"eval_steps_per_second": 4.038, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 2.175925925925926e-05, |
|
"loss": 0.0029, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"eval_accuracy": 0.9998842477798462, |
|
"eval_loss": 0.0005274215945973992, |
|
"eval_runtime": 2142.1862, |
|
"eval_samples_per_second": 16.133, |
|
"eval_steps_per_second": 4.033, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 2.1296296296296296e-05, |
|
"loss": 0.0066, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"eval_accuracy": 0.9999710917472839, |
|
"eval_loss": 0.00019657429947983474, |
|
"eval_runtime": 2149.2032, |
|
"eval_samples_per_second": 16.08, |
|
"eval_steps_per_second": 4.02, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 0.0079, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"eval_accuracy": 0.9999710917472839, |
|
"eval_loss": 5.7856173953041434e-05, |
|
"eval_runtime": 2135.9752, |
|
"eval_samples_per_second": 16.18, |
|
"eval_steps_per_second": 4.045, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 2.037037037037037e-05, |
|
"loss": 0.0091, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"eval_accuracy": 0.9999421238899231, |
|
"eval_loss": 0.00015575718134641647, |
|
"eval_runtime": 2158.5953, |
|
"eval_samples_per_second": 16.01, |
|
"eval_steps_per_second": 4.003, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 1.990740740740741e-05, |
|
"loss": 0.0951, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9999710917472839, |
|
"eval_loss": 6.823511648690328e-05, |
|
"eval_runtime": 2106.2766, |
|
"eval_samples_per_second": 16.408, |
|
"eval_steps_per_second": 4.102, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 1.9444444444444445e-05, |
|
"loss": 0.0578, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"eval_accuracy": 0.9999421238899231, |
|
"eval_loss": 0.00031872568069957197, |
|
"eval_runtime": 2091.5056, |
|
"eval_samples_per_second": 16.524, |
|
"eval_steps_per_second": 4.131, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 1.8981481481481482e-05, |
|
"loss": 0.0171, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"eval_accuracy": 0.9999421238899231, |
|
"eval_loss": 0.0003504869237076491, |
|
"eval_runtime": 2076.2302, |
|
"eval_samples_per_second": 16.646, |
|
"eval_steps_per_second": 4.161, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.0305, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"eval_accuracy": 0.9999710917472839, |
|
"eval_loss": 0.00012279333896003664, |
|
"eval_runtime": 2072.7643, |
|
"eval_samples_per_second": 16.673, |
|
"eval_steps_per_second": 4.168, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 1.8055555555555555e-05, |
|
"loss": 0.0449, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"eval_accuracy": 0.9999710917472839, |
|
"eval_loss": 0.00021972648391965777, |
|
"eval_runtime": 2090.628, |
|
"eval_samples_per_second": 16.531, |
|
"eval_steps_per_second": 4.133, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 1.7592592592592595e-05, |
|
"loss": 0.0161, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 2.7198611860512756e-05, |
|
"eval_runtime": 2085.7289, |
|
"eval_samples_per_second": 16.57, |
|
"eval_steps_per_second": 4.142, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 1.712962962962963e-05, |
|
"loss": 0.0322, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 2.2180371161084622e-05, |
|
"eval_runtime": 2061.6769, |
|
"eval_samples_per_second": 16.763, |
|
"eval_steps_per_second": 4.191, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.0358, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"eval_accuracy": 0.9999710917472839, |
|
"eval_loss": 0.00010751090303529054, |
|
"eval_runtime": 2107.1409, |
|
"eval_samples_per_second": 16.401, |
|
"eval_steps_per_second": 4.1, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 1.6203703703703704e-05, |
|
"loss": 0.0264, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 6.194192792463582e-06, |
|
"eval_runtime": 2091.7086, |
|
"eval_samples_per_second": 16.522, |
|
"eval_steps_per_second": 4.131, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 1.574074074074074e-05, |
|
"loss": 0.0199, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 6.114233656262513e-06, |
|
"eval_runtime": 2093.6259, |
|
"eval_samples_per_second": 16.507, |
|
"eval_steps_per_second": 4.127, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 1.527777777777778e-05, |
|
"loss": 0.0266, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 6.532317456731107e-06, |
|
"eval_runtime": 2103.3039, |
|
"eval_samples_per_second": 16.431, |
|
"eval_steps_per_second": 4.108, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 1.4814814814814815e-05, |
|
"loss": 0.0162, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 6.056379334040685e-06, |
|
"eval_runtime": 2141.6719, |
|
"eval_samples_per_second": 16.137, |
|
"eval_steps_per_second": 4.034, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 1.4351851851851853e-05, |
|
"loss": 0.0142, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 6.732083420502022e-06, |
|
"eval_runtime": 2137.4831, |
|
"eval_samples_per_second": 16.169, |
|
"eval_steps_per_second": 4.042, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.0353, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 5.884473466721829e-06, |
|
"eval_runtime": 2111.534, |
|
"eval_samples_per_second": 16.367, |
|
"eval_steps_per_second": 4.092, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 1.3425925925925928e-05, |
|
"loss": 0.0435, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 6.438468062697211e-06, |
|
"eval_runtime": 2127.2273, |
|
"eval_samples_per_second": 16.247, |
|
"eval_steps_per_second": 4.062, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 1.2962962962962962e-05, |
|
"loss": 0.0067, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 9.256172234017868e-06, |
|
"eval_runtime": 2183.0463, |
|
"eval_samples_per_second": 15.831, |
|
"eval_steps_per_second": 3.958, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.0299, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 6.904490419401554e-06, |
|
"eval_runtime": 2110.4592, |
|
"eval_samples_per_second": 16.376, |
|
"eval_steps_per_second": 4.094, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 1.2037037037037037e-05, |
|
"loss": 0.0063, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 6.991323061811272e-06, |
|
"eval_runtime": 2074.4391, |
|
"eval_samples_per_second": 16.66, |
|
"eval_steps_per_second": 4.165, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 1.1574074074074075e-05, |
|
"loss": 0.0117, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 5.223146672506118e-06, |
|
"eval_runtime": 2093.0232, |
|
"eval_samples_per_second": 16.512, |
|
"eval_steps_per_second": 4.128, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.0107, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 7.764682777633425e-06, |
|
"eval_runtime": 2092.3489, |
|
"eval_samples_per_second": 16.517, |
|
"eval_steps_per_second": 4.129, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 1.0648148148148148e-05, |
|
"loss": 0.0162, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 5.700497240468394e-06, |
|
"eval_runtime": 2095.1985, |
|
"eval_samples_per_second": 16.495, |
|
"eval_steps_per_second": 4.124, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 1.0185185185185185e-05, |
|
"loss": 0.0138, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 5.209324172028573e-06, |
|
"eval_runtime": 2073.7497, |
|
"eval_samples_per_second": 16.665, |
|
"eval_steps_per_second": 4.166, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 9.722222222222223e-06, |
|
"loss": 0.0124, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 5.243016858003102e-06, |
|
"eval_runtime": 2056.3515, |
|
"eval_samples_per_second": 16.806, |
|
"eval_steps_per_second": 4.202, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 9.259259259259259e-06, |
|
"loss": 0.0083, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 5.0634776016522665e-06, |
|
"eval_runtime": 2077.8389, |
|
"eval_samples_per_second": 16.633, |
|
"eval_steps_per_second": 4.158, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 8.796296296296297e-06, |
|
"loss": 0.0066, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 4.925776011077687e-06, |
|
"eval_runtime": 2073.5316, |
|
"eval_samples_per_second": 16.667, |
|
"eval_steps_per_second": 4.167, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.0058, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 4.750945663545281e-06, |
|
"eval_runtime": 2057.702, |
|
"eval_samples_per_second": 16.795, |
|
"eval_steps_per_second": 4.199, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 7.87037037037037e-06, |
|
"loss": 0.0032, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 6.96109145792434e-06, |
|
"eval_runtime": 2071.5479, |
|
"eval_samples_per_second": 16.683, |
|
"eval_steps_per_second": 4.171, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 0.0205, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 4.608726612786995e-06, |
|
"eval_runtime": 2066.372, |
|
"eval_samples_per_second": 16.725, |
|
"eval_steps_per_second": 4.181, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 0.0094, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 4.8284973672707565e-06, |
|
"eval_runtime": 2054.9166, |
|
"eval_samples_per_second": 16.818, |
|
"eval_steps_per_second": 4.205, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 6.481481481481481e-06, |
|
"loss": 0.003, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 4.495966550166486e-06, |
|
"eval_runtime": 2072.6571, |
|
"eval_samples_per_second": 16.674, |
|
"eval_steps_per_second": 4.169, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 6.0185185185185185e-06, |
|
"loss": 0.0035, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 5.835635420226026e-06, |
|
"eval_runtime": 2047.8141, |
|
"eval_samples_per_second": 16.877, |
|
"eval_steps_per_second": 4.219, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.0257, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 5.829508609167533e-06, |
|
"eval_runtime": 2091.8646, |
|
"eval_samples_per_second": 16.521, |
|
"eval_steps_per_second": 4.13, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 5.092592592592592e-06, |
|
"loss": 0.0019, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 6.3429124566027895e-06, |
|
"eval_runtime": 2040.9379, |
|
"eval_samples_per_second": 16.933, |
|
"eval_steps_per_second": 4.233, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 4.6296296296296296e-06, |
|
"loss": 0.0023, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 8.131992217386141e-06, |
|
"eval_runtime": 2048.3614, |
|
"eval_samples_per_second": 16.872, |
|
"eval_steps_per_second": 4.218, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 0.0062, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 8.594151950092055e-06, |
|
"eval_runtime": 2094.5382, |
|
"eval_samples_per_second": 16.5, |
|
"eval_steps_per_second": 4.125, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 0.0039, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 7.4294948717579246e-06, |
|
"eval_runtime": 2104.354, |
|
"eval_samples_per_second": 16.423, |
|
"eval_steps_per_second": 4.106, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 3.2407407407407406e-06, |
|
"loss": 0.0144, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 6.862039299448952e-06, |
|
"eval_runtime": 2101.0817, |
|
"eval_samples_per_second": 16.449, |
|
"eval_steps_per_second": 4.112, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 2.777777777777778e-06, |
|
"loss": 0.0109, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 6.136932825029362e-06, |
|
"eval_runtime": 2119.5964, |
|
"eval_samples_per_second": 16.305, |
|
"eval_steps_per_second": 4.076, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 2.3148148148148148e-06, |
|
"loss": 0.0148, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 6.497817139461404e-06, |
|
"eval_runtime": 2115.6009, |
|
"eval_samples_per_second": 16.336, |
|
"eval_steps_per_second": 4.084, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 1.8518518518518519e-06, |
|
"loss": 0.0308, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 7.753816134936642e-06, |
|
"eval_runtime": 2118.9207, |
|
"eval_samples_per_second": 16.31, |
|
"eval_steps_per_second": 4.078, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 1.388888888888889e-06, |
|
"loss": 0.0023, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 7.5415960054669995e-06, |
|
"eval_runtime": 2120.9953, |
|
"eval_samples_per_second": 16.294, |
|
"eval_steps_per_second": 4.074, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 9.259259259259259e-07, |
|
"loss": 0.0243, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 7.68591053201817e-06, |
|
"eval_runtime": 2120.6941, |
|
"eval_samples_per_second": 16.297, |
|
"eval_steps_per_second": 4.074, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 4.6296296296296297e-07, |
|
"loss": 0.0031, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 7.5350230872572865e-06, |
|
"eval_runtime": 2105.7948, |
|
"eval_samples_per_second": 16.412, |
|
"eval_steps_per_second": 4.103, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.0272, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 7.493398243241245e-06, |
|
"eval_runtime": 2100.1734, |
|
"eval_samples_per_second": 16.456, |
|
"eval_steps_per_second": 4.114, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 21600, |
|
"total_flos": 2.295560541703184e+19, |
|
"train_loss": 0.003923701412147946, |
|
"train_runtime": 97975.3145, |
|
"train_samples_per_second": 1.764, |
|
"train_steps_per_second": 0.22 |
|
} |
|
], |
|
"max_steps": 21600, |
|
"num_train_epochs": 5, |
|
"total_flos": 2.295560541703184e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|