|
{ |
|
"best_metric": 0.3248412013053894, |
|
"best_model_checkpoint": "./vit-base-beans/checkpoint-3840", |
|
"epoch": 1.9865494050698396, |
|
"global_step": 3840, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019948266942576307, |
|
"loss": 1.9449, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019896533885152613, |
|
"loss": 1.5813, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001984480082772892, |
|
"loss": 1.3362, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019793067770305226, |
|
"loss": 1.332, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_accuracy": 0.5205692108667529, |
|
"eval_loss": 1.2994741201400757, |
|
"eval_runtime": 42.6019, |
|
"eval_samples_per_second": 90.724, |
|
"eval_steps_per_second": 11.361, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019741334712881532, |
|
"loss": 1.2472, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019689601655457838, |
|
"loss": 1.2749, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019637868598034144, |
|
"loss": 1.0352, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0001958613554061045, |
|
"loss": 1.0517, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_accuracy": 0.5673997412677878, |
|
"eval_loss": 1.1108654737472534, |
|
"eval_runtime": 42.8705, |
|
"eval_samples_per_second": 90.155, |
|
"eval_steps_per_second": 11.29, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019534402483186757, |
|
"loss": 1.0752, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019482669425763063, |
|
"loss": 1.2143, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0001943093636833937, |
|
"loss": 1.0985, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019379203310915678, |
|
"loss": 1.1256, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_accuracy": 0.6051746442432083, |
|
"eval_loss": 1.0107784271240234, |
|
"eval_runtime": 42.8294, |
|
"eval_samples_per_second": 90.242, |
|
"eval_steps_per_second": 11.301, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0001932747025349198, |
|
"loss": 0.9229, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019275737196068287, |
|
"loss": 0.9273, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019224004138644596, |
|
"loss": 0.958, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000191722710812209, |
|
"loss": 0.8958, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.6248382923673997, |
|
"eval_loss": 0.9631242156028748, |
|
"eval_runtime": 42.7586, |
|
"eval_samples_per_second": 90.391, |
|
"eval_steps_per_second": 11.319, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019120538023797209, |
|
"loss": 0.9523, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019068804966373515, |
|
"loss": 1.0217, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019017071908949818, |
|
"loss": 1.0574, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018965338851526127, |
|
"loss": 0.9888, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.6349288486416559, |
|
"eval_loss": 0.9595150947570801, |
|
"eval_runtime": 42.8989, |
|
"eval_samples_per_second": 90.095, |
|
"eval_steps_per_second": 11.282, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0001891360579410243, |
|
"loss": 0.8882, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0001886187273667874, |
|
"loss": 0.92, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018810139679255046, |
|
"loss": 0.8887, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0001875840662183135, |
|
"loss": 0.8887, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_accuracy": 0.6305304010349289, |
|
"eval_loss": 0.9519457817077637, |
|
"eval_runtime": 42.9821, |
|
"eval_samples_per_second": 89.921, |
|
"eval_steps_per_second": 11.261, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018706673564407658, |
|
"loss": 0.974, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018654940506983964, |
|
"loss": 0.9556, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0001860320744956027, |
|
"loss": 0.8191, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018551474392136577, |
|
"loss": 0.7793, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.6677878395860285, |
|
"eval_loss": 0.8867014050483704, |
|
"eval_runtime": 42.8178, |
|
"eval_samples_per_second": 90.266, |
|
"eval_steps_per_second": 11.304, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018499741334712883, |
|
"loss": 0.8582, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0001844800827728919, |
|
"loss": 0.8303, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018396275219865495, |
|
"loss": 0.9885, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.000183445421624418, |
|
"loss": 0.8471, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_accuracy": 0.6623544631306598, |
|
"eval_loss": 0.8803606033325195, |
|
"eval_runtime": 42.9624, |
|
"eval_samples_per_second": 89.962, |
|
"eval_steps_per_second": 11.266, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018292809105018107, |
|
"loss": 1.0244, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018241076047594414, |
|
"loss": 0.9719, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0001818934299017072, |
|
"loss": 0.8929, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00018137609932747026, |
|
"loss": 0.907, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.6675291073738681, |
|
"eval_loss": 0.8609929084777832, |
|
"eval_runtime": 42.7896, |
|
"eval_samples_per_second": 90.326, |
|
"eval_steps_per_second": 11.311, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00018085876875323332, |
|
"loss": 0.9411, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00018034143817899638, |
|
"loss": 0.7953, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00017982410760475944, |
|
"loss": 0.9103, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017930677703052253, |
|
"loss": 0.8575, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_accuracy": 0.685640362225097, |
|
"eval_loss": 0.8224917650222778, |
|
"eval_runtime": 42.3738, |
|
"eval_samples_per_second": 91.212, |
|
"eval_steps_per_second": 11.422, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017878944645628557, |
|
"loss": 0.8143, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00017827211588204863, |
|
"loss": 0.6689, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00017775478530781172, |
|
"loss": 0.7662, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00017723745473357475, |
|
"loss": 0.7847, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_accuracy": 0.7073738680465718, |
|
"eval_loss": 0.7917023301124573, |
|
"eval_runtime": 42.3838, |
|
"eval_samples_per_second": 91.191, |
|
"eval_steps_per_second": 11.419, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00017672012415933784, |
|
"loss": 0.7556, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00017620279358510088, |
|
"loss": 0.7435, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00017568546301086394, |
|
"loss": 0.8761, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00017516813243662703, |
|
"loss": 0.7827, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_accuracy": 0.7009055627425614, |
|
"eval_loss": 0.7942800521850586, |
|
"eval_runtime": 42.3643, |
|
"eval_samples_per_second": 91.233, |
|
"eval_steps_per_second": 11.425, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00017465080186239006, |
|
"loss": 0.9351, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00017413347128815315, |
|
"loss": 0.805, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0001736161407139162, |
|
"loss": 0.9189, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00017309881013967925, |
|
"loss": 0.7886, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_accuracy": 0.6613195342820181, |
|
"eval_loss": 0.8620208501815796, |
|
"eval_runtime": 42.6346, |
|
"eval_samples_per_second": 90.654, |
|
"eval_steps_per_second": 11.352, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00017258147956544234, |
|
"loss": 0.7991, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0001720641489912054, |
|
"loss": 0.8246, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00017154681841696846, |
|
"loss": 0.8205, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00017102948784273152, |
|
"loss": 0.7851, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_accuracy": 0.6972833117723156, |
|
"eval_loss": 0.7913413047790527, |
|
"eval_runtime": 42.3735, |
|
"eval_samples_per_second": 91.213, |
|
"eval_steps_per_second": 11.422, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00017051215726849456, |
|
"loss": 0.8207, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00016999482669425764, |
|
"loss": 0.7919, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0001694774961200207, |
|
"loss": 0.6958, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00016896016554578377, |
|
"loss": 0.9368, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_accuracy": 0.6957309184993532, |
|
"eval_loss": 0.8293155431747437, |
|
"eval_runtime": 42.2867, |
|
"eval_samples_per_second": 91.4, |
|
"eval_steps_per_second": 11.446, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00016844283497154683, |
|
"loss": 0.7047, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0001679255043973099, |
|
"loss": 0.8973, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00016740817382307295, |
|
"loss": 0.8332, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00016689084324883602, |
|
"loss": 0.8284, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.6437257438551099, |
|
"eval_loss": 0.8856919407844543, |
|
"eval_runtime": 42.4054, |
|
"eval_samples_per_second": 91.144, |
|
"eval_steps_per_second": 11.414, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00016637351267459908, |
|
"loss": 0.7203, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00016585618210036214, |
|
"loss": 0.7151, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0001653388515261252, |
|
"loss": 0.6793, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00016482152095188826, |
|
"loss": 0.8299, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_accuracy": 0.726261319534282, |
|
"eval_loss": 0.7111316323280334, |
|
"eval_runtime": 42.7336, |
|
"eval_samples_per_second": 90.444, |
|
"eval_steps_per_second": 11.326, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00016430419037765132, |
|
"loss": 0.7213, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00016378685980341439, |
|
"loss": 0.6364, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00016326952922917745, |
|
"loss": 0.6296, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0001627521986549405, |
|
"loss": 0.7239, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_accuracy": 0.7304010349288487, |
|
"eval_loss": 0.7160272002220154, |
|
"eval_runtime": 42.6157, |
|
"eval_samples_per_second": 90.694, |
|
"eval_steps_per_second": 11.357, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00016223486808070357, |
|
"loss": 0.766, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00016171753750646663, |
|
"loss": 0.8078, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0001612002069322297, |
|
"loss": 0.8452, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00016068287635799278, |
|
"loss": 0.6726, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_accuracy": 0.7270375161707633, |
|
"eval_loss": 0.7201307415962219, |
|
"eval_runtime": 42.4274, |
|
"eval_samples_per_second": 91.097, |
|
"eval_steps_per_second": 11.408, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00016016554578375582, |
|
"loss": 0.7182, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00015964821520951888, |
|
"loss": 0.7185, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00015913088463528197, |
|
"loss": 0.6869, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.000158613554061045, |
|
"loss": 0.6081, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_accuracy": 0.6970245795601553, |
|
"eval_loss": 0.8389468789100647, |
|
"eval_runtime": 42.7093, |
|
"eval_samples_per_second": 90.496, |
|
"eval_steps_per_second": 11.332, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0001580962234868081, |
|
"loss": 0.8349, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00015757889291257113, |
|
"loss": 0.6436, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0001570615623383342, |
|
"loss": 0.7325, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00015654423176409728, |
|
"loss": 0.8363, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_accuracy": 0.7260025873221216, |
|
"eval_loss": 0.7098237872123718, |
|
"eval_runtime": 42.9691, |
|
"eval_samples_per_second": 89.948, |
|
"eval_steps_per_second": 11.264, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0001560269011898603, |
|
"loss": 0.6456, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0001555095706156234, |
|
"loss": 0.7325, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00015499224004138646, |
|
"loss": 0.6097, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0001544749094671495, |
|
"loss": 0.6176, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_accuracy": 0.7267787839586028, |
|
"eval_loss": 0.7126018404960632, |
|
"eval_runtime": 42.8632, |
|
"eval_samples_per_second": 90.171, |
|
"eval_steps_per_second": 11.292, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00015395757889291259, |
|
"loss": 0.6045, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00015344024831867565, |
|
"loss": 0.6202, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0001529229177444387, |
|
"loss": 0.753, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00015240558717020177, |
|
"loss": 0.852, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_accuracy": 0.727554980595084, |
|
"eval_loss": 0.715835452079773, |
|
"eval_runtime": 42.7695, |
|
"eval_samples_per_second": 90.368, |
|
"eval_steps_per_second": 11.316, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00015188825659596483, |
|
"loss": 0.7012, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0001513709260217279, |
|
"loss": 0.5647, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00015085359544749096, |
|
"loss": 0.6485, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00015033626487325402, |
|
"loss": 0.7937, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_accuracy": 0.7190168175937904, |
|
"eval_loss": 0.7453812956809998, |
|
"eval_runtime": 43.0359, |
|
"eval_samples_per_second": 89.809, |
|
"eval_steps_per_second": 11.246, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00014981893429901708, |
|
"loss": 0.7984, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00014930160372478014, |
|
"loss": 0.6896, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0001487842731505432, |
|
"loss": 0.6828, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00014826694257630627, |
|
"loss": 0.6087, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_accuracy": 0.745666235446313, |
|
"eval_loss": 0.7019104361534119, |
|
"eval_runtime": 43.12, |
|
"eval_samples_per_second": 89.634, |
|
"eval_steps_per_second": 11.224, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00014774961200206933, |
|
"loss": 0.5794, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0001472322814278324, |
|
"loss": 0.6007, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00014671495085359545, |
|
"loss": 0.6977, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00014619762027935854, |
|
"loss": 0.6523, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_accuracy": 0.7611901681759379, |
|
"eval_loss": 0.6592049598693848, |
|
"eval_runtime": 43.048, |
|
"eval_samples_per_second": 89.783, |
|
"eval_steps_per_second": 11.243, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00014568028970512157, |
|
"loss": 0.7187, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00014516295913088464, |
|
"loss": 0.7343, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0001446456285566477, |
|
"loss": 0.6977, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00014412829798241076, |
|
"loss": 0.6964, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_accuracy": 0.7428201811125485, |
|
"eval_loss": 0.682680606842041, |
|
"eval_runtime": 43.1365, |
|
"eval_samples_per_second": 89.599, |
|
"eval_steps_per_second": 11.22, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00014361096740817385, |
|
"loss": 0.6316, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00014309363683393688, |
|
"loss": 0.5869, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00014257630625969994, |
|
"loss": 0.734, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00014205897568546303, |
|
"loss": 0.5214, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 0.7619663648124192, |
|
"eval_loss": 0.6545156240463257, |
|
"eval_runtime": 43.5785, |
|
"eval_samples_per_second": 88.691, |
|
"eval_steps_per_second": 11.106, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00014154164511122607, |
|
"loss": 0.6521, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00014102431453698916, |
|
"loss": 0.7645, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00014050698396275222, |
|
"loss": 0.6585, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00013998965338851525, |
|
"loss": 0.6959, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_accuracy": 0.7391979301423027, |
|
"eval_loss": 0.6814814805984497, |
|
"eval_runtime": 43.4601, |
|
"eval_samples_per_second": 88.932, |
|
"eval_steps_per_second": 11.137, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00013947232281427834, |
|
"loss": 0.6886, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0001389549922400414, |
|
"loss": 0.706, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00013843766166580447, |
|
"loss": 0.7285, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00013792033109156753, |
|
"loss": 0.7318, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_accuracy": 0.726261319534282, |
|
"eval_loss": 0.7493521571159363, |
|
"eval_runtime": 43.7478, |
|
"eval_samples_per_second": 88.347, |
|
"eval_steps_per_second": 11.063, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00013740300051733056, |
|
"loss": 0.6447, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00013688566994309365, |
|
"loss": 0.6502, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0001363683393688567, |
|
"loss": 0.6552, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00013585100879461975, |
|
"loss": 0.4897, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_accuracy": 0.7353169469598965, |
|
"eval_loss": 0.6954035758972168, |
|
"eval_runtime": 43.8864, |
|
"eval_samples_per_second": 88.068, |
|
"eval_steps_per_second": 11.028, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00013533367822038284, |
|
"loss": 0.7551, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0001348163476461459, |
|
"loss": 0.6713, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00013429901707190896, |
|
"loss": 0.6389, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00013378168649767202, |
|
"loss": 0.7711, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_accuracy": 0.7648124191461837, |
|
"eval_loss": 0.6494836807250977, |
|
"eval_runtime": 43.4576, |
|
"eval_samples_per_second": 88.937, |
|
"eval_steps_per_second": 11.137, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00013326435592343508, |
|
"loss": 0.5356, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00013274702534919814, |
|
"loss": 0.534, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0001322296947749612, |
|
"loss": 0.5749, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00013171236420072427, |
|
"loss": 0.5831, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy": 0.7689521345407503, |
|
"eval_loss": 0.6288875341415405, |
|
"eval_runtime": 43.2829, |
|
"eval_samples_per_second": 89.296, |
|
"eval_steps_per_second": 11.182, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00013119503362648733, |
|
"loss": 0.6402, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0001306777030522504, |
|
"loss": 0.6681, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00013016037247801345, |
|
"loss": 0.616, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00012964304190377652, |
|
"loss": 0.6276, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_accuracy": 0.7547218628719276, |
|
"eval_loss": 0.6559097766876221, |
|
"eval_runtime": 42.7536, |
|
"eval_samples_per_second": 90.402, |
|
"eval_steps_per_second": 11.321, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00012912571132953958, |
|
"loss": 0.6331, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00012860838075530264, |
|
"loss": 0.6172, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0001280910501810657, |
|
"loss": 0.6621, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0001275737196068288, |
|
"loss": 0.6204, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_accuracy": 0.7464424320827943, |
|
"eval_loss": 0.6652135848999023, |
|
"eval_runtime": 42.8306, |
|
"eval_samples_per_second": 90.239, |
|
"eval_steps_per_second": 11.3, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00012705638903259182, |
|
"loss": 0.5672, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00012653905845835489, |
|
"loss": 0.5268, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00012602172788411797, |
|
"loss": 0.5703, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.000125504397309881, |
|
"loss": 0.4628, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_accuracy": 0.7562742561448901, |
|
"eval_loss": 0.6426355838775635, |
|
"eval_runtime": 42.7732, |
|
"eval_samples_per_second": 90.36, |
|
"eval_steps_per_second": 11.316, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0001249870667356441, |
|
"loss": 0.5506, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00012446973616140713, |
|
"loss": 0.6282, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0001239524055871702, |
|
"loss": 0.77, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00012343507501293328, |
|
"loss": 0.5973, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_accuracy": 0.7865459249676585, |
|
"eval_loss": 0.5713614225387573, |
|
"eval_runtime": 42.9276, |
|
"eval_samples_per_second": 90.035, |
|
"eval_steps_per_second": 11.275, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00012291774443869632, |
|
"loss": 0.6429, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0001224004138644594, |
|
"loss": 0.66, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00012188308329022247, |
|
"loss": 0.6809, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00012136575271598552, |
|
"loss": 0.534, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_accuracy": 0.8005174644243208, |
|
"eval_loss": 0.5555915236473083, |
|
"eval_runtime": 43.3619, |
|
"eval_samples_per_second": 89.134, |
|
"eval_steps_per_second": 11.162, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00012084842214174858, |
|
"loss": 0.5991, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00012033109156751165, |
|
"loss": 0.5939, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0001198137609932747, |
|
"loss": 0.5722, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00011929643041903778, |
|
"loss": 0.5295, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_accuracy": 0.789391979301423, |
|
"eval_loss": 0.5779016017913818, |
|
"eval_runtime": 43.1951, |
|
"eval_samples_per_second": 89.478, |
|
"eval_steps_per_second": 11.205, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00011877909984480083, |
|
"loss": 0.5853, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00011826176927056389, |
|
"loss": 0.5291, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00011774443869632696, |
|
"loss": 0.7245, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00011722710812209001, |
|
"loss": 0.523, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_accuracy": 0.7836998706338939, |
|
"eval_loss": 0.5925487875938416, |
|
"eval_runtime": 43.5083, |
|
"eval_samples_per_second": 88.834, |
|
"eval_steps_per_second": 11.124, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00011670977754785309, |
|
"loss": 0.5309, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00011619244697361615, |
|
"loss": 0.6577, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0001156751163993792, |
|
"loss": 0.7373, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00011515778582514227, |
|
"loss": 0.6749, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_accuracy": 0.7904269081500647, |
|
"eval_loss": 0.5738394856452942, |
|
"eval_runtime": 43.1735, |
|
"eval_samples_per_second": 89.522, |
|
"eval_steps_per_second": 11.211, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00011464045525090535, |
|
"loss": 0.4776, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0001141231246766684, |
|
"loss": 0.4709, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00011360579410243146, |
|
"loss": 0.6755, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00011308846352819453, |
|
"loss": 0.6328, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_accuracy": 0.7875808538163002, |
|
"eval_loss": 0.5803186893463135, |
|
"eval_runtime": 43.3376, |
|
"eval_samples_per_second": 89.184, |
|
"eval_steps_per_second": 11.168, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00011257113295395758, |
|
"loss": 0.579, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00011205380237972066, |
|
"loss": 0.7426, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.0001115364718054837, |
|
"loss": 0.5371, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00011101914123124677, |
|
"loss": 0.5914, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_accuracy": 0.7953428201811126, |
|
"eval_loss": 0.5584585666656494, |
|
"eval_runtime": 43.5623, |
|
"eval_samples_per_second": 88.723, |
|
"eval_steps_per_second": 11.111, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00011050181065700984, |
|
"loss": 0.643, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00010998448008277289, |
|
"loss": 0.6047, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00010946714950853596, |
|
"loss": 0.5757, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00010894981893429903, |
|
"loss": 0.578, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_accuracy": 0.8005174644243208, |
|
"eval_loss": 0.5448063015937805, |
|
"eval_runtime": 43.3799, |
|
"eval_samples_per_second": 89.096, |
|
"eval_steps_per_second": 11.157, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00010843248836006207, |
|
"loss": 0.4588, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00010791515778582515, |
|
"loss": 0.5981, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00010739782721158822, |
|
"loss": 0.667, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00010688049663735127, |
|
"loss": 0.4411, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_accuracy": 0.8227684346701164, |
|
"eval_loss": 0.5116038918495178, |
|
"eval_runtime": 42.7036, |
|
"eval_samples_per_second": 90.508, |
|
"eval_steps_per_second": 11.334, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00010636316606311433, |
|
"loss": 0.5035, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00010584583548887738, |
|
"loss": 0.5763, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00010532850491464046, |
|
"loss": 0.5425, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00010481117434040353, |
|
"loss": 0.5106, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_accuracy": 0.8147477360931435, |
|
"eval_loss": 0.5113465785980225, |
|
"eval_runtime": 42.6403, |
|
"eval_samples_per_second": 90.642, |
|
"eval_steps_per_second": 11.351, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00010429384376616658, |
|
"loss": 0.5484, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00010377651319192964, |
|
"loss": 0.5009, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00010325918261769272, |
|
"loss": 0.6042, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00010274185204345577, |
|
"loss": 0.5546, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_accuracy": 0.8103492884864165, |
|
"eval_loss": 0.5038859248161316, |
|
"eval_runtime": 42.5887, |
|
"eval_samples_per_second": 90.752, |
|
"eval_steps_per_second": 11.365, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00010222452146921884, |
|
"loss": 0.5231, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.0001017071908949819, |
|
"loss": 0.5845, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00010118986032074495, |
|
"loss": 0.4398, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00010067252974650803, |
|
"loss": 0.608, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.8142302716688228, |
|
"eval_loss": 0.5111123323440552, |
|
"eval_runtime": 42.6045, |
|
"eval_samples_per_second": 90.718, |
|
"eval_steps_per_second": 11.36, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0001001551991722711, |
|
"loss": 0.4246, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 9.963786859803415e-05, |
|
"loss": 0.4871, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 9.912053802379721e-05, |
|
"loss": 0.4021, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 9.860320744956027e-05, |
|
"loss": 0.4014, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_accuracy": 0.8147477360931435, |
|
"eval_loss": 0.5171140432357788, |
|
"eval_runtime": 42.2155, |
|
"eval_samples_per_second": 91.554, |
|
"eval_steps_per_second": 11.465, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 9.808587687532334e-05, |
|
"loss": 0.3512, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 9.75685463010864e-05, |
|
"loss": 0.4632, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 9.705121572684946e-05, |
|
"loss": 0.4426, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 9.653388515261252e-05, |
|
"loss": 0.3698, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_accuracy": 0.8098318240620958, |
|
"eval_loss": 0.5332066416740417, |
|
"eval_runtime": 42.367, |
|
"eval_samples_per_second": 91.227, |
|
"eval_steps_per_second": 11.424, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 9.601655457837558e-05, |
|
"loss": 0.3427, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.549922400413866e-05, |
|
"loss": 0.3389, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.498189342990172e-05, |
|
"loss": 0.2373, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 9.446456285566477e-05, |
|
"loss": 0.3809, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"eval_accuracy": 0.8062095730918499, |
|
"eval_loss": 0.5469871759414673, |
|
"eval_runtime": 42.1927, |
|
"eval_samples_per_second": 91.604, |
|
"eval_steps_per_second": 11.471, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 9.394723228142783e-05, |
|
"loss": 0.3754, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 9.34299017071909e-05, |
|
"loss": 0.4235, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 9.291257113295397e-05, |
|
"loss": 0.38, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 9.239524055871703e-05, |
|
"loss": 0.3148, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_accuracy": 0.8028460543337645, |
|
"eval_loss": 0.5701329708099365, |
|
"eval_runtime": 42.3816, |
|
"eval_samples_per_second": 91.195, |
|
"eval_steps_per_second": 11.42, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 9.187790998448008e-05, |
|
"loss": 0.4024, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.136057941024315e-05, |
|
"loss": 0.4395, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.084324883600621e-05, |
|
"loss": 0.3876, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 9.032591826176928e-05, |
|
"loss": 0.343, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_accuracy": 0.8209573091849935, |
|
"eval_loss": 0.4977104961872101, |
|
"eval_runtime": 42.4591, |
|
"eval_samples_per_second": 91.029, |
|
"eval_steps_per_second": 11.399, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 8.980858768753234e-05, |
|
"loss": 0.4519, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 8.92912571132954e-05, |
|
"loss": 0.3997, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 8.877392653905846e-05, |
|
"loss": 0.2541, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 8.825659596482152e-05, |
|
"loss": 0.3902, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_accuracy": 0.8206985769728331, |
|
"eval_loss": 0.5099577903747559, |
|
"eval_runtime": 42.2629, |
|
"eval_samples_per_second": 91.451, |
|
"eval_steps_per_second": 11.452, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 8.773926539058458e-05, |
|
"loss": 0.3226, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 8.722193481634765e-05, |
|
"loss": 0.3409, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 8.670460424211071e-05, |
|
"loss": 0.4179, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 8.618727366787378e-05, |
|
"loss": 0.4167, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_accuracy": 0.8175937904269082, |
|
"eval_loss": 0.5081688761711121, |
|
"eval_runtime": 42.3344, |
|
"eval_samples_per_second": 91.297, |
|
"eval_steps_per_second": 11.433, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 8.566994309363684e-05, |
|
"loss": 0.4263, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.515261251939989e-05, |
|
"loss": 0.3126, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.463528194516295e-05, |
|
"loss": 0.4524, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.411795137092603e-05, |
|
"loss": 0.5353, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_accuracy": 0.8282018111254851, |
|
"eval_loss": 0.4856567680835724, |
|
"eval_runtime": 42.6599, |
|
"eval_samples_per_second": 90.6, |
|
"eval_steps_per_second": 11.346, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.360062079668909e-05, |
|
"loss": 0.4858, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.308329022245215e-05, |
|
"loss": 0.5258, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.256595964821522e-05, |
|
"loss": 0.3878, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.204862907397828e-05, |
|
"loss": 0.3638, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_accuracy": 0.8196636481241915, |
|
"eval_loss": 0.496245414018631, |
|
"eval_runtime": 42.5167, |
|
"eval_samples_per_second": 90.905, |
|
"eval_steps_per_second": 11.384, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.153129849974134e-05, |
|
"loss": 0.4835, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.10139679255044e-05, |
|
"loss": 0.3377, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 8.049663735126746e-05, |
|
"loss": 0.4418, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 7.997930677703052e-05, |
|
"loss": 0.3683, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_accuracy": 0.823803363518758, |
|
"eval_loss": 0.5005962252616882, |
|
"eval_runtime": 42.9491, |
|
"eval_samples_per_second": 89.99, |
|
"eval_steps_per_second": 11.269, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 7.946197620279359e-05, |
|
"loss": 0.3083, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 7.894464562855665e-05, |
|
"loss": 0.471, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 7.842731505431972e-05, |
|
"loss": 0.4098, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 7.790998448008277e-05, |
|
"loss": 0.4013, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_accuracy": 0.8302716688227685, |
|
"eval_loss": 0.4766274690628052, |
|
"eval_runtime": 42.6182, |
|
"eval_samples_per_second": 90.689, |
|
"eval_steps_per_second": 11.357, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 7.739265390584583e-05, |
|
"loss": 0.3378, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 7.687532333160891e-05, |
|
"loss": 0.2741, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 7.635799275737197e-05, |
|
"loss": 0.2806, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 7.584066218313503e-05, |
|
"loss": 0.2147, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_accuracy": 0.8315653298835705, |
|
"eval_loss": 0.4876723885536194, |
|
"eval_runtime": 42.6227, |
|
"eval_samples_per_second": 90.679, |
|
"eval_steps_per_second": 11.355, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 7.532333160889808e-05, |
|
"loss": 0.3557, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 7.480600103466116e-05, |
|
"loss": 0.3691, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 7.428867046042422e-05, |
|
"loss": 0.3896, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 7.377133988618728e-05, |
|
"loss": 0.3973, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_accuracy": 0.8385510996119017, |
|
"eval_loss": 0.4723876118659973, |
|
"eval_runtime": 42.708, |
|
"eval_samples_per_second": 90.498, |
|
"eval_steps_per_second": 11.333, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 7.325400931195034e-05, |
|
"loss": 0.2486, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 7.27366787377134e-05, |
|
"loss": 0.3107, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 7.221934816347646e-05, |
|
"loss": 0.4066, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 7.170201758923953e-05, |
|
"loss": 0.3876, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_accuracy": 0.84372574385511, |
|
"eval_loss": 0.4504094421863556, |
|
"eval_runtime": 42.6319, |
|
"eval_samples_per_second": 90.66, |
|
"eval_steps_per_second": 11.353, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 7.118468701500259e-05, |
|
"loss": 0.314, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 7.066735644076565e-05, |
|
"loss": 0.3722, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 7.015002586652871e-05, |
|
"loss": 0.2722, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 6.963269529229179e-05, |
|
"loss": 0.2998, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_accuracy": 0.8499353169469599, |
|
"eval_loss": 0.43631112575531006, |
|
"eval_runtime": 42.596, |
|
"eval_samples_per_second": 90.736, |
|
"eval_steps_per_second": 11.363, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 6.911536471805485e-05, |
|
"loss": 0.2214, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 6.85980341438179e-05, |
|
"loss": 0.3157, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 6.808070356958096e-05, |
|
"loss": 0.4113, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 6.756337299534403e-05, |
|
"loss": 0.3621, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_accuracy": 0.8476067270375162, |
|
"eval_loss": 0.44939151406288147, |
|
"eval_runtime": 42.5462, |
|
"eval_samples_per_second": 90.842, |
|
"eval_steps_per_second": 11.376, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 6.70460424211071e-05, |
|
"loss": 0.3491, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 6.652871184687016e-05, |
|
"loss": 0.3335, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 6.60113812726332e-05, |
|
"loss": 0.3568, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 6.549405069839628e-05, |
|
"loss": 0.3128, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_accuracy": 0.8478654592496766, |
|
"eval_loss": 0.43925172090530396, |
|
"eval_runtime": 42.5607, |
|
"eval_samples_per_second": 90.811, |
|
"eval_steps_per_second": 11.372, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 6.497672012415934e-05, |
|
"loss": 0.3334, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 6.44593895499224e-05, |
|
"loss": 0.3584, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 6.394205897568547e-05, |
|
"loss": 0.2895, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 6.342472840144853e-05, |
|
"loss": 0.3283, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_accuracy": 0.8473479948253557, |
|
"eval_loss": 0.44278526306152344, |
|
"eval_runtime": 42.7932, |
|
"eval_samples_per_second": 90.318, |
|
"eval_steps_per_second": 11.31, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 6.290739782721159e-05, |
|
"loss": 0.4039, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 6.239006725297465e-05, |
|
"loss": 0.3649, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 6.187273667873773e-05, |
|
"loss": 0.2828, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 6.135540610450077e-05, |
|
"loss": 0.4072, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_accuracy": 0.8455368693402329, |
|
"eval_loss": 0.4490886330604553, |
|
"eval_runtime": 42.8656, |
|
"eval_samples_per_second": 90.166, |
|
"eval_steps_per_second": 11.291, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 6.0838075530263836e-05, |
|
"loss": 0.3216, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 6.032074495602691e-05, |
|
"loss": 0.3065, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5.9803414381789966e-05, |
|
"loss": 0.2062, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5.928608380755303e-05, |
|
"loss": 0.2698, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_accuracy": 0.8525226390685641, |
|
"eval_loss": 0.41426753997802734, |
|
"eval_runtime": 42.9118, |
|
"eval_samples_per_second": 90.068, |
|
"eval_steps_per_second": 11.279, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5.876875323331609e-05, |
|
"loss": 0.2975, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5.825142265907916e-05, |
|
"loss": 0.2441, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5.773409208484222e-05, |
|
"loss": 0.3095, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5.7216761510605275e-05, |
|
"loss": 0.2922, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_accuracy": 0.8514877102199224, |
|
"eval_loss": 0.4335246980190277, |
|
"eval_runtime": 42.5699, |
|
"eval_samples_per_second": 90.792, |
|
"eval_steps_per_second": 11.37, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5.669943093636835e-05, |
|
"loss": 0.3733, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5.6182100362131405e-05, |
|
"loss": 0.2772, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5.566476978789447e-05, |
|
"loss": 0.2602, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5.514743921365753e-05, |
|
"loss": 0.3662, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_accuracy": 0.8613195342820181, |
|
"eval_loss": 0.41216862201690674, |
|
"eval_runtime": 42.677, |
|
"eval_samples_per_second": 90.564, |
|
"eval_steps_per_second": 11.341, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5.46301086394206e-05, |
|
"loss": 0.3444, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 5.411277806518366e-05, |
|
"loss": 0.2477, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 5.3595447490946714e-05, |
|
"loss": 0.242, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5.3078116916709776e-05, |
|
"loss": 0.3607, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_accuracy": 0.8633893919793014, |
|
"eval_loss": 0.40597081184387207, |
|
"eval_runtime": 42.553, |
|
"eval_samples_per_second": 90.828, |
|
"eval_steps_per_second": 11.374, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5.2560786342472844e-05, |
|
"loss": 0.3836, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5.2043455768235906e-05, |
|
"loss": 0.2116, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5.152612519399897e-05, |
|
"loss": 0.3695, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5.1008794619762036e-05, |
|
"loss": 0.2488, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"eval_accuracy": 0.8633893919793014, |
|
"eval_loss": 0.4011004567146301, |
|
"eval_runtime": 42.6493, |
|
"eval_samples_per_second": 90.623, |
|
"eval_steps_per_second": 11.348, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 5.049146404552509e-05, |
|
"loss": 0.2717, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 4.997413347128815e-05, |
|
"loss": 0.3791, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 4.945680289705122e-05, |
|
"loss": 0.4196, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 4.8939472322814276e-05, |
|
"loss": 0.3733, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"eval_accuracy": 0.8566623544631307, |
|
"eval_loss": 0.4146381914615631, |
|
"eval_runtime": 42.9227, |
|
"eval_samples_per_second": 90.045, |
|
"eval_steps_per_second": 11.276, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 4.8422141748577345e-05, |
|
"loss": 0.3268, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 4.7904811174340407e-05, |
|
"loss": 0.2866, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 4.738748060010347e-05, |
|
"loss": 0.2063, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 4.687015002586653e-05, |
|
"loss": 0.3388, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_accuracy": 0.8657179818887452, |
|
"eval_loss": 0.4066773056983948, |
|
"eval_runtime": 42.7293, |
|
"eval_samples_per_second": 90.453, |
|
"eval_steps_per_second": 11.327, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.635281945162959e-05, |
|
"loss": 0.2401, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.583548887739266e-05, |
|
"loss": 0.3411, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 4.5318158303155715e-05, |
|
"loss": 0.2449, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 4.4800827728918784e-05, |
|
"loss": 0.3176, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_accuracy": 0.8654592496765847, |
|
"eval_loss": 0.3914910852909088, |
|
"eval_runtime": 42.6139, |
|
"eval_samples_per_second": 90.698, |
|
"eval_steps_per_second": 11.358, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.428349715468184e-05, |
|
"loss": 0.3395, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.376616658044491e-05, |
|
"loss": 0.2695, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 4.324883600620797e-05, |
|
"loss": 0.2911, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 4.273150543197103e-05, |
|
"loss": 0.3989, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_accuracy": 0.8690815006468305, |
|
"eval_loss": 0.37924402952194214, |
|
"eval_runtime": 42.79, |
|
"eval_samples_per_second": 90.325, |
|
"eval_steps_per_second": 11.311, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.221417485773409e-05, |
|
"loss": 0.1723, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.1696844283497154e-05, |
|
"loss": 0.1942, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 4.117951370926022e-05, |
|
"loss": 0.359, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 4.066218313502328e-05, |
|
"loss": 0.2519, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_accuracy": 0.8734799482535576, |
|
"eval_loss": 0.3759077787399292, |
|
"eval_runtime": 42.6891, |
|
"eval_samples_per_second": 90.538, |
|
"eval_steps_per_second": 11.338, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.0144852560786346e-05, |
|
"loss": 0.2928, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 3.962752198654941e-05, |
|
"loss": 0.2605, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 3.911019141231247e-05, |
|
"loss": 0.2225, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 3.859286083807553e-05, |
|
"loss": 0.241, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"eval_accuracy": 0.8716688227684347, |
|
"eval_loss": 0.38423553109169006, |
|
"eval_runtime": 42.5605, |
|
"eval_samples_per_second": 90.812, |
|
"eval_steps_per_second": 11.372, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 3.807553026383859e-05, |
|
"loss": 0.3516, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 3.755819968960166e-05, |
|
"loss": 0.2819, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 3.704086911536472e-05, |
|
"loss": 0.2807, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 3.6523538541127785e-05, |
|
"loss": 0.2908, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_accuracy": 0.8698576972833117, |
|
"eval_loss": 0.37994685769081116, |
|
"eval_runtime": 42.6371, |
|
"eval_samples_per_second": 90.649, |
|
"eval_steps_per_second": 11.352, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 3.600620796689084e-05, |
|
"loss": 0.2328, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 3.548887739265391e-05, |
|
"loss": 0.1905, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 3.497154681841697e-05, |
|
"loss": 0.3018, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 3.445421624418003e-05, |
|
"loss": 0.2793, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_accuracy": 0.8659767141009056, |
|
"eval_loss": 0.38844797015190125, |
|
"eval_runtime": 42.5682, |
|
"eval_samples_per_second": 90.795, |
|
"eval_steps_per_second": 11.37, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 3.3936885669943094e-05, |
|
"loss": 0.2598, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 3.3419555095706156e-05, |
|
"loss": 0.289, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 3.2902224521469224e-05, |
|
"loss": 0.3378, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 3.238489394723228e-05, |
|
"loss": 0.2196, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_accuracy": 0.8747736093143597, |
|
"eval_loss": 0.3740740716457367, |
|
"eval_runtime": 42.8776, |
|
"eval_samples_per_second": 90.14, |
|
"eval_steps_per_second": 11.288, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 3.186756337299535e-05, |
|
"loss": 0.3452, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 3.13502327987584e-05, |
|
"loss": 0.3273, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 3.083290222452147e-05, |
|
"loss": 0.3743, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 3.0315571650284536e-05, |
|
"loss": 0.2614, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_accuracy": 0.8727037516170764, |
|
"eval_loss": 0.3755486309528351, |
|
"eval_runtime": 42.6899, |
|
"eval_samples_per_second": 90.537, |
|
"eval_steps_per_second": 11.338, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.9798241076047595e-05, |
|
"loss": 0.2846, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.928091050181066e-05, |
|
"loss": 0.199, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.8763579927573718e-05, |
|
"loss": 0.2398, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.8246249353336783e-05, |
|
"loss": 0.1883, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"eval_accuracy": 0.8727037516170764, |
|
"eval_loss": 0.3819185793399811, |
|
"eval_runtime": 42.9048, |
|
"eval_samples_per_second": 90.083, |
|
"eval_steps_per_second": 11.281, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.7728918779099845e-05, |
|
"loss": 0.3646, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.721158820486291e-05, |
|
"loss": 0.2332, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.6694257630625975e-05, |
|
"loss": 0.3035, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.6176927056389034e-05, |
|
"loss": 0.247, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"eval_accuracy": 0.8776196636481242, |
|
"eval_loss": 0.36190494894981384, |
|
"eval_runtime": 43.0956, |
|
"eval_samples_per_second": 89.684, |
|
"eval_steps_per_second": 11.231, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.56595964821521e-05, |
|
"loss": 0.2436, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.5142265907915157e-05, |
|
"loss": 0.305, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.4624935333678222e-05, |
|
"loss": 0.2783, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.4107604759441284e-05, |
|
"loss": 0.1617, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_accuracy": 0.8760672703751617, |
|
"eval_loss": 0.3629147410392761, |
|
"eval_runtime": 42.8946, |
|
"eval_samples_per_second": 90.105, |
|
"eval_steps_per_second": 11.283, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.3590274185204346e-05, |
|
"loss": 0.2749, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.307294361096741e-05, |
|
"loss": 0.3613, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.2555613036730473e-05, |
|
"loss": 0.2276, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.2038282462493534e-05, |
|
"loss": 0.2177, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"eval_accuracy": 0.8846054333764554, |
|
"eval_loss": 0.3531200587749481, |
|
"eval_runtime": 42.8358, |
|
"eval_samples_per_second": 90.228, |
|
"eval_steps_per_second": 11.299, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.1520951888256596e-05, |
|
"loss": 0.378, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.1003621314019658e-05, |
|
"loss": 0.1863, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.048629073978272e-05, |
|
"loss": 0.2241, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.9968960165545785e-05, |
|
"loss": 0.265, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_accuracy": 0.8799482535575679, |
|
"eval_loss": 0.3503970801830292, |
|
"eval_runtime": 42.617, |
|
"eval_samples_per_second": 90.692, |
|
"eval_steps_per_second": 11.357, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.9451629591308847e-05, |
|
"loss": 0.3363, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.893429901707191e-05, |
|
"loss": 0.2832, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.8416968442834973e-05, |
|
"loss": 0.2654, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.7899637868598035e-05, |
|
"loss": 0.176, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_accuracy": 0.888745148771022, |
|
"eval_loss": 0.34128451347351074, |
|
"eval_runtime": 42.6366, |
|
"eval_samples_per_second": 90.65, |
|
"eval_steps_per_second": 11.352, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.7382307294361097e-05, |
|
"loss": 0.2136, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.686497672012416e-05, |
|
"loss": 0.2482, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.634764614588722e-05, |
|
"loss": 0.2034, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.5830315571650286e-05, |
|
"loss": 0.1942, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"eval_accuracy": 0.8892626131953428, |
|
"eval_loss": 0.3416860103607178, |
|
"eval_runtime": 42.741, |
|
"eval_samples_per_second": 90.428, |
|
"eval_steps_per_second": 11.324, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.5312984997413347e-05, |
|
"loss": 0.1961, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.479565442317641e-05, |
|
"loss": 0.1949, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.4278323848939472e-05, |
|
"loss": 0.2767, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.3760993274702536e-05, |
|
"loss": 0.2977, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_accuracy": 0.8864165588615782, |
|
"eval_loss": 0.34559765458106995, |
|
"eval_runtime": 42.8519, |
|
"eval_samples_per_second": 90.194, |
|
"eval_steps_per_second": 11.295, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.3243662700465598e-05, |
|
"loss": 0.2409, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.272633212622866e-05, |
|
"loss": 0.2807, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.2209001551991723e-05, |
|
"loss": 0.2191, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.1691670977754786e-05, |
|
"loss": 0.1658, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"eval_accuracy": 0.8882276843467012, |
|
"eval_loss": 0.3382810950279236, |
|
"eval_runtime": 42.9231, |
|
"eval_samples_per_second": 90.045, |
|
"eval_steps_per_second": 11.276, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.1174340403517848e-05, |
|
"loss": 0.1498, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.065700982928091e-05, |
|
"loss": 0.2529, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.0139679255043973e-05, |
|
"loss": 0.1997, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 9.622348680807037e-06, |
|
"loss": 0.2904, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_accuracy": 0.891849935316947, |
|
"eval_loss": 0.33569639921188354, |
|
"eval_runtime": 42.8384, |
|
"eval_samples_per_second": 90.223, |
|
"eval_steps_per_second": 11.298, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 9.105018106570098e-06, |
|
"loss": 0.2314, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 8.58768753233316e-06, |
|
"loss": 0.3056, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 8.070356958096224e-06, |
|
"loss": 0.1688, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 7.553026383859286e-06, |
|
"loss": 0.2423, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_accuracy": 0.8944372574385511, |
|
"eval_loss": 0.32754141092300415, |
|
"eval_runtime": 42.7436, |
|
"eval_samples_per_second": 90.423, |
|
"eval_steps_per_second": 11.323, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 7.035695809622349e-06, |
|
"loss": 0.1978, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 6.518365235385411e-06, |
|
"loss": 0.2156, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 6.001034661148474e-06, |
|
"loss": 0.1649, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 5.4837040869115365e-06, |
|
"loss": 0.263, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_accuracy": 0.8921086675291073, |
|
"eval_loss": 0.3273804187774658, |
|
"eval_runtime": 42.7684, |
|
"eval_samples_per_second": 90.37, |
|
"eval_steps_per_second": 11.317, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 4.966373512674599e-06, |
|
"loss": 0.3802, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 4.449042938437662e-06, |
|
"loss": 0.2816, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 3.931712364200724e-06, |
|
"loss": 0.2565, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 3.4143817899637873e-06, |
|
"loss": 0.2458, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_accuracy": 0.8923673997412678, |
|
"eval_loss": 0.3263191878795624, |
|
"eval_runtime": 42.9857, |
|
"eval_samples_per_second": 89.914, |
|
"eval_steps_per_second": 11.26, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 2.8970512157268495e-06, |
|
"loss": 0.3755, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 2.379720641489912e-06, |
|
"loss": 0.3133, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.8623900672529747e-06, |
|
"loss": 0.2631, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.3450594930160373e-06, |
|
"loss": 0.227, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_accuracy": 0.8923673997412678, |
|
"eval_loss": 0.3248412013053894, |
|
"eval_runtime": 42.9211, |
|
"eval_samples_per_second": 90.049, |
|
"eval_steps_per_second": 11.276, |
|
"step": 3840 |
|
} |
|
], |
|
"max_steps": 3866, |
|
"num_train_epochs": 2, |
|
"total_flos": 4.760709076383676e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|