|
{ |
|
"best_metric": 0.9781420765027322, |
|
"best_model_checkpoint": "resnet-50-shortSleeveCleanedData/checkpoint-1470", |
|
"epoch": 10.0, |
|
"global_step": 1470, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 1.0923, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.982817869415808e-05, |
|
"loss": 1.0936, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.948453608247423e-05, |
|
"loss": 1.0859, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.9140893470790375e-05, |
|
"loss": 1.0726, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.879725085910653e-05, |
|
"loss": 1.0655, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.845360824742268e-05, |
|
"loss": 1.0659, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.810996563573883e-05, |
|
"loss": 1.0552, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.776632302405499e-05, |
|
"loss": 1.0441, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.7422680412371134e-05, |
|
"loss": 1.0318, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.7079037800687284e-05, |
|
"loss": 1.0237, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.673539518900344e-05, |
|
"loss": 1.0043, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.639175257731959e-05, |
|
"loss": 1.0046, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.6048109965635736e-05, |
|
"loss": 0.9857, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.570446735395189e-05, |
|
"loss": 0.973, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.726775956284153, |
|
"eval_loss": 0.9370501637458801, |
|
"eval_runtime": 577.0712, |
|
"eval_samples_per_second": 1.586, |
|
"eval_steps_per_second": 0.199, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.536082474226804e-05, |
|
"loss": 0.9592, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 4.5017182130584194e-05, |
|
"loss": 0.9381, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 4.467353951890035e-05, |
|
"loss": 0.9292, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 4.4329896907216494e-05, |
|
"loss": 0.8919, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 4.3986254295532645e-05, |
|
"loss": 0.8828, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 4.36426116838488e-05, |
|
"loss": 0.8452, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 4.329896907216495e-05, |
|
"loss": 0.8414, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 4.2955326460481096e-05, |
|
"loss": 0.8229, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.261168384879725e-05, |
|
"loss": 0.8034, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 4.2268041237113404e-05, |
|
"loss": 0.7824, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 4.1924398625429554e-05, |
|
"loss": 0.7358, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 4.158075601374571e-05, |
|
"loss": 0.7344, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 4.1237113402061855e-05, |
|
"loss": 0.6981, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 4.0893470790378005e-05, |
|
"loss": 0.6869, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 4.054982817869416e-05, |
|
"loss": 0.6565, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8710382513661202, |
|
"eval_loss": 0.5519522428512573, |
|
"eval_runtime": 27.0894, |
|
"eval_samples_per_second": 33.777, |
|
"eval_steps_per_second": 4.245, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 4.020618556701031e-05, |
|
"loss": 0.663, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 3.9862542955326463e-05, |
|
"loss": 0.648, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 3.9518900343642614e-05, |
|
"loss": 0.6162, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 3.9175257731958764e-05, |
|
"loss": 0.5981, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 3.8831615120274915e-05, |
|
"loss": 0.5583, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 3.848797250859107e-05, |
|
"loss": 0.5563, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 3.8144329896907216e-05, |
|
"loss": 0.563, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 3.7800687285223366e-05, |
|
"loss": 0.534, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 3.745704467353952e-05, |
|
"loss": 0.5228, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 3.7113402061855674e-05, |
|
"loss": 0.5041, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 3.6769759450171824e-05, |
|
"loss": 0.4817, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.6426116838487974e-05, |
|
"loss": 0.5135, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 3.6082474226804125e-05, |
|
"loss": 0.4676, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 3.5738831615120275e-05, |
|
"loss": 0.4882, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 3.539518900343643e-05, |
|
"loss": 0.4609, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9278688524590164, |
|
"eval_loss": 0.29834744334220886, |
|
"eval_runtime": 25.7025, |
|
"eval_samples_per_second": 35.6, |
|
"eval_steps_per_second": 4.474, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 3.5051546391752576e-05, |
|
"loss": 0.4741, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 3.4707903780068726e-05, |
|
"loss": 0.4635, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 3.4364261168384884e-05, |
|
"loss": 0.4645, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 3.4020618556701034e-05, |
|
"loss": 0.4558, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 3.3676975945017185e-05, |
|
"loss": 0.4318, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.4389, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 3.2989690721649485e-05, |
|
"loss": 0.4614, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 3.2646048109965636e-05, |
|
"loss": 0.3683, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 3.230240549828179e-05, |
|
"loss": 0.4364, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 3.1958762886597937e-05, |
|
"loss": 0.4254, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 3.161512027491409e-05, |
|
"loss": 0.4524, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 3.1271477663230244e-05, |
|
"loss": 0.4369, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 3.0927835051546395e-05, |
|
"loss": 0.3894, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 3.0584192439862545e-05, |
|
"loss": 0.3937, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9486338797814208, |
|
"eval_loss": 0.20506763458251953, |
|
"eval_runtime": 26.8515, |
|
"eval_samples_per_second": 34.076, |
|
"eval_steps_per_second": 4.283, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 3.0240549828178692e-05, |
|
"loss": 0.4192, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 2.9896907216494846e-05, |
|
"loss": 0.4003, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 2.9553264604811e-05, |
|
"loss": 0.4544, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 2.920962199312715e-05, |
|
"loss": 0.3996, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 2.8865979381443297e-05, |
|
"loss": 0.4168, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 2.852233676975945e-05, |
|
"loss": 0.3964, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 2.81786941580756e-05, |
|
"loss": 0.4113, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 2.7835051546391755e-05, |
|
"loss": 0.3912, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 2.749140893470791e-05, |
|
"loss": 0.3793, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 2.7147766323024053e-05, |
|
"loss": 0.4175, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 2.6804123711340206e-05, |
|
"loss": 0.3831, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 2.646048109965636e-05, |
|
"loss": 0.3818, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 2.611683848797251e-05, |
|
"loss": 0.3901, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 2.5773195876288658e-05, |
|
"loss": 0.3541, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 2.542955326460481e-05, |
|
"loss": 0.3723, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9726775956284153, |
|
"eval_loss": 0.15214987099170685, |
|
"eval_runtime": 26.7904, |
|
"eval_samples_per_second": 34.154, |
|
"eval_steps_per_second": 4.293, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 2.5085910652920962e-05, |
|
"loss": 0.3624, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 2.4742268041237116e-05, |
|
"loss": 0.3777, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 2.4398625429553266e-05, |
|
"loss": 0.3625, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 2.4054982817869417e-05, |
|
"loss": 0.3478, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 2.3711340206185567e-05, |
|
"loss": 0.3732, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 2.336769759450172e-05, |
|
"loss": 0.3508, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 2.3024054982817868e-05, |
|
"loss": 0.373, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 2.268041237113402e-05, |
|
"loss": 0.406, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 2.2336769759450175e-05, |
|
"loss": 0.3409, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 2.1993127147766322e-05, |
|
"loss": 0.3647, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 2.1649484536082476e-05, |
|
"loss": 0.3719, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 2.1305841924398627e-05, |
|
"loss": 0.3631, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 2.0962199312714777e-05, |
|
"loss": 0.3574, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 2.0618556701030927e-05, |
|
"loss": 0.3704, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 2.027491408934708e-05, |
|
"loss": 0.3926, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9672131147540983, |
|
"eval_loss": 0.14902302622795105, |
|
"eval_runtime": 25.8181, |
|
"eval_samples_per_second": 35.44, |
|
"eval_steps_per_second": 4.454, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 1.9931271477663232e-05, |
|
"loss": 0.3687, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 1.9587628865979382e-05, |
|
"loss": 0.3601, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 1.9243986254295536e-05, |
|
"loss": 0.3455, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 1.8900343642611683e-05, |
|
"loss": 0.3983, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 1.8556701030927837e-05, |
|
"loss": 0.3627, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 1.8213058419243987e-05, |
|
"loss": 0.3731, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"learning_rate": 1.7869415807560138e-05, |
|
"loss": 0.3517, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 1.7525773195876288e-05, |
|
"loss": 0.3834, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 1.7182130584192442e-05, |
|
"loss": 0.3294, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 1.6838487972508592e-05, |
|
"loss": 0.301, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 1.6494845360824743e-05, |
|
"loss": 0.3338, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 1.6151202749140896e-05, |
|
"loss": 0.3375, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 1.5807560137457044e-05, |
|
"loss": 0.359, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 1.5463917525773197e-05, |
|
"loss": 0.3326, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9650273224043716, |
|
"eval_loss": 0.1366710066795349, |
|
"eval_runtime": 26.9372, |
|
"eval_samples_per_second": 33.968, |
|
"eval_steps_per_second": 4.269, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 1.5120274914089346e-05, |
|
"loss": 0.3346, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 1.47766323024055e-05, |
|
"loss": 0.3585, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 1.4432989690721649e-05, |
|
"loss": 0.3262, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 1.40893470790378e-05, |
|
"loss": 0.329, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 1.3745704467353954e-05, |
|
"loss": 0.3365, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 1.3402061855670103e-05, |
|
"loss": 0.3348, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 1.3058419243986255e-05, |
|
"loss": 0.3666, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 1.2714776632302406e-05, |
|
"loss": 0.3044, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 1.2371134020618558e-05, |
|
"loss": 0.3048, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 1.2027491408934708e-05, |
|
"loss": 0.2838, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 1.168384879725086e-05, |
|
"loss": 0.3713, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"learning_rate": 1.134020618556701e-05, |
|
"loss": 0.3235, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 1.0996563573883161e-05, |
|
"loss": 0.3703, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 7.89, |
|
"learning_rate": 1.0652920962199313e-05, |
|
"loss": 0.3923, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"learning_rate": 1.0309278350515464e-05, |
|
"loss": 0.3166, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9737704918032787, |
|
"eval_loss": 0.11094537377357483, |
|
"eval_runtime": 26.7688, |
|
"eval_samples_per_second": 34.182, |
|
"eval_steps_per_second": 4.296, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 9.965635738831616e-06, |
|
"loss": 0.3415, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 9.621993127147768e-06, |
|
"loss": 0.3692, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"learning_rate": 9.278350515463918e-06, |
|
"loss": 0.3247, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 8.934707903780069e-06, |
|
"loss": 0.2912, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 8.591065292096221e-06, |
|
"loss": 0.3685, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"learning_rate": 8.247422680412371e-06, |
|
"loss": 0.3102, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"learning_rate": 7.903780068728522e-06, |
|
"loss": 0.3595, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 7.560137457044673e-06, |
|
"loss": 0.3045, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 7.216494845360824e-06, |
|
"loss": 0.3092, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 6.872852233676977e-06, |
|
"loss": 0.3247, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"learning_rate": 6.529209621993128e-06, |
|
"loss": 0.3052, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"learning_rate": 6.185567010309279e-06, |
|
"loss": 0.2978, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"learning_rate": 5.84192439862543e-06, |
|
"loss": 0.3043, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"learning_rate": 5.498281786941581e-06, |
|
"loss": 0.3189, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"learning_rate": 5.154639175257732e-06, |
|
"loss": 0.3492, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9759562841530055, |
|
"eval_loss": 0.1108308881521225, |
|
"eval_runtime": 26.5259, |
|
"eval_samples_per_second": 34.495, |
|
"eval_steps_per_second": 4.335, |
|
"step": 1323 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 4.810996563573884e-06, |
|
"loss": 0.3084, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 4.467353951890034e-06, |
|
"loss": 0.2987, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 4.123711340206186e-06, |
|
"loss": 0.3322, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"learning_rate": 3.7800687285223365e-06, |
|
"loss": 0.293, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 3.4364261168384886e-06, |
|
"loss": 0.338, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 9.39, |
|
"learning_rate": 3.0927835051546395e-06, |
|
"loss": 0.304, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"learning_rate": 2.7491408934707903e-06, |
|
"loss": 0.3074, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 2.405498281786942e-06, |
|
"loss": 0.3073, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 9.59, |
|
"learning_rate": 2.061855670103093e-06, |
|
"loss": 0.2956, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 9.66, |
|
"learning_rate": 1.7182130584192443e-06, |
|
"loss": 0.3137, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"learning_rate": 1.3745704467353952e-06, |
|
"loss": 0.3035, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 1.0309278350515464e-06, |
|
"loss": 0.3166, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 6.872852233676976e-07, |
|
"loss": 0.2696, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 9.93, |
|
"learning_rate": 3.436426116838488e-07, |
|
"loss": 0.3482, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.3228, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9781420765027322, |
|
"eval_loss": 0.1102537140250206, |
|
"eval_runtime": 26.664, |
|
"eval_samples_per_second": 34.316, |
|
"eval_steps_per_second": 4.313, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 1470, |
|
"total_flos": 1.7471888881490534e+18, |
|
"train_loss": 0.49037140356440123, |
|
"train_runtime": 8760.81, |
|
"train_samples_per_second": 9.391, |
|
"train_steps_per_second": 0.168 |
|
} |
|
], |
|
"max_steps": 1470, |
|
"num_train_epochs": 10, |
|
"total_flos": 1.7471888881490534e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|