|
{ |
|
"best_metric": 0.9592959295929593, |
|
"best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-eurosat/checkpoint-1227", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 1227, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 8.624119758605957, |
|
"learning_rate": 4.0650406504065046e-06, |
|
"loss": 1.1456, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 10.445067405700684, |
|
"learning_rate": 8.130081300813009e-06, |
|
"loss": 1.0943, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 8.588050842285156, |
|
"learning_rate": 1.2195121951219513e-05, |
|
"loss": 1.0114, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 13.745179176330566, |
|
"learning_rate": 1.6260162601626018e-05, |
|
"loss": 0.9088, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 16.390989303588867, |
|
"learning_rate": 2.032520325203252e-05, |
|
"loss": 0.8678, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 23.49904441833496, |
|
"learning_rate": 2.4390243902439026e-05, |
|
"loss": 0.6711, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 15.184337615966797, |
|
"learning_rate": 2.8455284552845528e-05, |
|
"loss": 0.5926, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 31.39984703063965, |
|
"learning_rate": 3.2520325203252037e-05, |
|
"loss": 0.5689, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 11.041800498962402, |
|
"learning_rate": 3.6585365853658535e-05, |
|
"loss": 0.5543, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 23.220727920532227, |
|
"learning_rate": 4.065040650406504e-05, |
|
"loss": 0.4727, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 7.919190406799316, |
|
"learning_rate": 4.4715447154471546e-05, |
|
"loss": 0.4859, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 17.01369285583496, |
|
"learning_rate": 4.878048780487805e-05, |
|
"loss": 0.4051, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 7.333168983459473, |
|
"learning_rate": 4.968297101449276e-05, |
|
"loss": 0.4138, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 11.308248519897461, |
|
"learning_rate": 4.9230072463768114e-05, |
|
"loss": 0.4285, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 15.314751625061035, |
|
"learning_rate": 4.8777173913043476e-05, |
|
"loss": 0.3061, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 30.280778884887695, |
|
"learning_rate": 4.8324275362318844e-05, |
|
"loss": 0.4037, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 9.863381385803223, |
|
"learning_rate": 4.7871376811594205e-05, |
|
"loss": 0.2954, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 25.285860061645508, |
|
"learning_rate": 4.741847826086957e-05, |
|
"loss": 0.3745, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 9.545634269714355, |
|
"learning_rate": 4.696557971014493e-05, |
|
"loss": 0.4062, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 20.92987060546875, |
|
"learning_rate": 4.651268115942029e-05, |
|
"loss": 0.365, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 9.27026081085205, |
|
"learning_rate": 4.6059782608695657e-05, |
|
"loss": 0.2602, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 12.594367980957031, |
|
"learning_rate": 4.560688405797102e-05, |
|
"loss": 0.3196, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 18.245967864990234, |
|
"learning_rate": 4.515398550724638e-05, |
|
"loss": 0.3375, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 22.912260055541992, |
|
"learning_rate": 4.470108695652174e-05, |
|
"loss": 0.2464, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 18.069555282592773, |
|
"learning_rate": 4.42481884057971e-05, |
|
"loss": 0.5225, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 4.701681613922119, |
|
"learning_rate": 4.379528985507246e-05, |
|
"loss": 0.4517, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 8.533316612243652, |
|
"learning_rate": 4.334239130434783e-05, |
|
"loss": 0.3102, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 13.00133991241455, |
|
"learning_rate": 4.288949275362319e-05, |
|
"loss": 0.2733, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 11.808174133300781, |
|
"learning_rate": 4.243659420289855e-05, |
|
"loss": 0.3279, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 3.255472183227539, |
|
"learning_rate": 4.1983695652173914e-05, |
|
"loss": 0.2222, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 18.238956451416016, |
|
"learning_rate": 4.1530797101449276e-05, |
|
"loss": 0.2738, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 6.426773548126221, |
|
"learning_rate": 4.1077898550724644e-05, |
|
"loss": 0.2553, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 11.917569160461426, |
|
"learning_rate": 4.0625000000000005e-05, |
|
"loss": 0.3049, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 8.527942657470703, |
|
"learning_rate": 4.017210144927536e-05, |
|
"loss": 0.3218, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 12.122479438781738, |
|
"learning_rate": 3.971920289855073e-05, |
|
"loss": 0.2399, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 9.603090286254883, |
|
"learning_rate": 3.926630434782609e-05, |
|
"loss": 0.2811, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 5.066426753997803, |
|
"learning_rate": 3.881340579710145e-05, |
|
"loss": 0.2594, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 18.739673614501953, |
|
"learning_rate": 3.836050724637682e-05, |
|
"loss": 0.244, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 17.591327667236328, |
|
"learning_rate": 3.790760869565217e-05, |
|
"loss": 0.3675, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 12.923001289367676, |
|
"learning_rate": 3.745471014492754e-05, |
|
"loss": 0.3065, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9471947194719472, |
|
"eval_loss": 0.15252229571342468, |
|
"eval_runtime": 6.4388, |
|
"eval_samples_per_second": 141.175, |
|
"eval_steps_per_second": 28.266, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 7.585595607757568, |
|
"learning_rate": 3.70018115942029e-05, |
|
"loss": 0.2302, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 9.591753005981445, |
|
"learning_rate": 3.654891304347826e-05, |
|
"loss": 0.2185, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 18.43389320373535, |
|
"learning_rate": 3.6096014492753624e-05, |
|
"loss": 0.1938, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 13.575929641723633, |
|
"learning_rate": 3.5643115942028985e-05, |
|
"loss": 0.2622, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 18.06717300415039, |
|
"learning_rate": 3.5190217391304346e-05, |
|
"loss": 0.4565, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 8.216936111450195, |
|
"learning_rate": 3.4737318840579714e-05, |
|
"loss": 0.2695, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 9.777605056762695, |
|
"learning_rate": 3.4284420289855076e-05, |
|
"loss": 0.2196, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 14.722864151000977, |
|
"learning_rate": 3.383152173913044e-05, |
|
"loss": 0.3281, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 6.793803691864014, |
|
"learning_rate": 3.33786231884058e-05, |
|
"loss": 0.2514, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 12.138315200805664, |
|
"learning_rate": 3.292572463768116e-05, |
|
"loss": 0.2029, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 9.416603088378906, |
|
"learning_rate": 3.247282608695653e-05, |
|
"loss": 0.2635, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 9.601762771606445, |
|
"learning_rate": 3.201992753623189e-05, |
|
"loss": 0.1661, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 5.231688022613525, |
|
"learning_rate": 3.156702898550725e-05, |
|
"loss": 0.2205, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 6.3703532218933105, |
|
"learning_rate": 3.111413043478261e-05, |
|
"loss": 0.2531, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 14.850451469421387, |
|
"learning_rate": 3.066123188405797e-05, |
|
"loss": 0.1995, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 14.940292358398438, |
|
"learning_rate": 3.0208333333333334e-05, |
|
"loss": 0.2991, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 18.384620666503906, |
|
"learning_rate": 2.9755434782608698e-05, |
|
"loss": 0.1576, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 8.814579963684082, |
|
"learning_rate": 2.930253623188406e-05, |
|
"loss": 0.2447, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 9.285147666931152, |
|
"learning_rate": 2.884963768115942e-05, |
|
"loss": 0.1615, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 7.262125015258789, |
|
"learning_rate": 2.8396739130434785e-05, |
|
"loss": 0.1979, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 1.0271364450454712, |
|
"learning_rate": 2.7943840579710146e-05, |
|
"loss": 0.18, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 15.003844261169434, |
|
"learning_rate": 2.749094202898551e-05, |
|
"loss": 0.1752, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 25.77374267578125, |
|
"learning_rate": 2.7038043478260872e-05, |
|
"loss": 0.1326, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 33.20709228515625, |
|
"learning_rate": 2.6585144927536234e-05, |
|
"loss": 0.3093, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 11.408818244934082, |
|
"learning_rate": 2.6132246376811598e-05, |
|
"loss": 0.2182, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 2.7263596057891846, |
|
"learning_rate": 2.567934782608696e-05, |
|
"loss": 0.1294, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 3.28471302986145, |
|
"learning_rate": 2.5226449275362317e-05, |
|
"loss": 0.2335, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"grad_norm": 9.495030403137207, |
|
"learning_rate": 2.4773550724637682e-05, |
|
"loss": 0.191, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 15.677921295166016, |
|
"learning_rate": 2.4320652173913043e-05, |
|
"loss": 0.201, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 10.669686317443848, |
|
"learning_rate": 2.3867753623188408e-05, |
|
"loss": 0.2334, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 22.738924026489258, |
|
"learning_rate": 2.341485507246377e-05, |
|
"loss": 0.2054, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 13.700279235839844, |
|
"learning_rate": 2.296195652173913e-05, |
|
"loss": 0.2358, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 8.795063018798828, |
|
"learning_rate": 2.2509057971014495e-05, |
|
"loss": 0.2259, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"grad_norm": 13.257553100585938, |
|
"learning_rate": 2.2056159420289856e-05, |
|
"loss": 0.2174, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 16.352462768554688, |
|
"learning_rate": 2.1603260869565217e-05, |
|
"loss": 0.1939, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 9.414634704589844, |
|
"learning_rate": 2.1150362318840582e-05, |
|
"loss": 0.1856, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 12.655200958251953, |
|
"learning_rate": 2.0697463768115943e-05, |
|
"loss": 0.2237, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 8.855188369750977, |
|
"learning_rate": 2.0244565217391308e-05, |
|
"loss": 0.2063, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 0.29552894830703735, |
|
"learning_rate": 1.9791666666666665e-05, |
|
"loss": 0.1005, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 2.12908935546875, |
|
"learning_rate": 1.933876811594203e-05, |
|
"loss": 0.1538, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 8.586709022521973, |
|
"learning_rate": 1.888586956521739e-05, |
|
"loss": 0.1745, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9570957095709571, |
|
"eval_loss": 0.12920857965946198, |
|
"eval_runtime": 6.4383, |
|
"eval_samples_per_second": 141.187, |
|
"eval_steps_per_second": 28.268, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 20.99432373046875, |
|
"learning_rate": 1.8432971014492756e-05, |
|
"loss": 0.2031, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 11.791101455688477, |
|
"learning_rate": 1.7980072463768117e-05, |
|
"loss": 0.1351, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 4.006465435028076, |
|
"learning_rate": 1.752717391304348e-05, |
|
"loss": 0.1164, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 5.286161422729492, |
|
"learning_rate": 1.7074275362318843e-05, |
|
"loss": 0.1593, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 5.119263648986816, |
|
"learning_rate": 1.66213768115942e-05, |
|
"loss": 0.0819, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"grad_norm": 7.2655253410339355, |
|
"learning_rate": 1.6168478260869565e-05, |
|
"loss": 0.0936, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"grad_norm": 21.422300338745117, |
|
"learning_rate": 1.571557971014493e-05, |
|
"loss": 0.1586, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"grad_norm": 20.841064453125, |
|
"learning_rate": 1.526268115942029e-05, |
|
"loss": 0.2057, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 5.252929210662842, |
|
"learning_rate": 1.4809782608695653e-05, |
|
"loss": 0.1781, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"grad_norm": 23.310047149658203, |
|
"learning_rate": 1.4356884057971015e-05, |
|
"loss": 0.1749, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 2.4795796871185303, |
|
"learning_rate": 1.3903985507246378e-05, |
|
"loss": 0.2178, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"grad_norm": 8.616735458374023, |
|
"learning_rate": 1.3451086956521738e-05, |
|
"loss": 0.2184, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 11.104738235473633, |
|
"learning_rate": 1.2998188405797101e-05, |
|
"loss": 0.088, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"grad_norm": 11.91581916809082, |
|
"learning_rate": 1.2545289855072464e-05, |
|
"loss": 0.2445, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 1.7576195001602173, |
|
"learning_rate": 1.2092391304347827e-05, |
|
"loss": 0.1298, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"grad_norm": 9.120392799377441, |
|
"learning_rate": 1.163949275362319e-05, |
|
"loss": 0.1935, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 13.665518760681152, |
|
"learning_rate": 1.1186594202898551e-05, |
|
"loss": 0.1689, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"grad_norm": 19.75223731994629, |
|
"learning_rate": 1.0733695652173914e-05, |
|
"loss": 0.142, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"grad_norm": 8.352010726928711, |
|
"learning_rate": 1.0280797101449275e-05, |
|
"loss": 0.2239, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"grad_norm": 21.5345516204834, |
|
"learning_rate": 9.827898550724638e-06, |
|
"loss": 0.2238, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"grad_norm": 8.045764923095703, |
|
"learning_rate": 9.375000000000001e-06, |
|
"loss": 0.1246, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"grad_norm": 8.87646770477295, |
|
"learning_rate": 8.922101449275362e-06, |
|
"loss": 0.0863, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"grad_norm": 11.654149055480957, |
|
"learning_rate": 8.469202898550725e-06, |
|
"loss": 0.0846, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"grad_norm": 0.8430119156837463, |
|
"learning_rate": 8.016304347826086e-06, |
|
"loss": 0.1514, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"grad_norm": 1.686838984489441, |
|
"learning_rate": 7.56340579710145e-06, |
|
"loss": 0.0937, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"grad_norm": 31.7119140625, |
|
"learning_rate": 7.110507246376811e-06, |
|
"loss": 0.1698, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"grad_norm": 12.519082069396973, |
|
"learning_rate": 6.657608695652175e-06, |
|
"loss": 0.1635, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"grad_norm": 0.5338079333305359, |
|
"learning_rate": 6.204710144927536e-06, |
|
"loss": 0.0695, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"grad_norm": 22.285619735717773, |
|
"learning_rate": 5.751811594202898e-06, |
|
"loss": 0.1957, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"grad_norm": 9.996444702148438, |
|
"learning_rate": 5.298913043478261e-06, |
|
"loss": 0.2182, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"grad_norm": 5.337853908538818, |
|
"learning_rate": 4.846014492753623e-06, |
|
"loss": 0.089, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"grad_norm": 6.16534948348999, |
|
"learning_rate": 4.393115942028985e-06, |
|
"loss": 0.1833, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"grad_norm": 0.6340013742446899, |
|
"learning_rate": 3.940217391304348e-06, |
|
"loss": 0.1684, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"grad_norm": 17.78191375732422, |
|
"learning_rate": 3.4873188405797104e-06, |
|
"loss": 0.211, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"grad_norm": 6.65162992477417, |
|
"learning_rate": 3.0344202898550725e-06, |
|
"loss": 0.1569, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"grad_norm": 14.574379920959473, |
|
"learning_rate": 2.581521739130435e-06, |
|
"loss": 0.1173, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"grad_norm": 6.665879249572754, |
|
"learning_rate": 2.1286231884057975e-06, |
|
"loss": 0.1795, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"grad_norm": 2.035705327987671, |
|
"learning_rate": 1.6757246376811596e-06, |
|
"loss": 0.201, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"grad_norm": 5.481184482574463, |
|
"learning_rate": 1.2228260869565218e-06, |
|
"loss": 0.127, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"grad_norm": 19.509849548339844, |
|
"learning_rate": 7.699275362318841e-07, |
|
"loss": 0.1226, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"grad_norm": 9.846253395080566, |
|
"learning_rate": 3.170289855072464e-07, |
|
"loss": 0.1562, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9592959295929593, |
|
"eval_loss": 0.11152195930480957, |
|
"eval_runtime": 6.4385, |
|
"eval_samples_per_second": 141.182, |
|
"eval_steps_per_second": 28.267, |
|
"step": 1227 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 1227, |
|
"total_flos": 6.096847334287933e+17, |
|
"train_loss": 0.2716238140671255, |
|
"train_runtime": 385.934, |
|
"train_samples_per_second": 63.555, |
|
"train_steps_per_second": 3.179 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1227, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 6.096847334287933e+17, |
|
"train_batch_size": 5, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|