|
{ |
|
"best_metric": 0.060996126383543015, |
|
"best_model_checkpoint": "./eurosat_outpus/checkpoint-10125", |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 10125, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0049382716049382715, |
|
"grad_norm": 38.450260162353516, |
|
"learning_rate": 1.9980246913580248e-05, |
|
"loss": 0.1979, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.009876543209876543, |
|
"grad_norm": 22.966312408447266, |
|
"learning_rate": 1.9960493827160498e-05, |
|
"loss": 0.3363, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.014814814814814815, |
|
"grad_norm": 73.729248046875, |
|
"learning_rate": 1.9940740740740744e-05, |
|
"loss": 0.323, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.019753086419753086, |
|
"grad_norm": 58.143798828125, |
|
"learning_rate": 1.992098765432099e-05, |
|
"loss": 0.3155, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.024691358024691357, |
|
"grad_norm": 38.21614074707031, |
|
"learning_rate": 1.9901234567901237e-05, |
|
"loss": 0.1625, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02962962962962963, |
|
"grad_norm": 9.119422912597656, |
|
"learning_rate": 1.9881481481481483e-05, |
|
"loss": 0.4599, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0345679012345679, |
|
"grad_norm": 0.6812440156936646, |
|
"learning_rate": 1.986172839506173e-05, |
|
"loss": 0.1372, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.03950617283950617, |
|
"grad_norm": 17.003955841064453, |
|
"learning_rate": 1.9841975308641976e-05, |
|
"loss": 0.184, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.044444444444444446, |
|
"grad_norm": 4.515798568725586, |
|
"learning_rate": 1.9822222222222226e-05, |
|
"loss": 0.327, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.04938271604938271, |
|
"grad_norm": 134.1114959716797, |
|
"learning_rate": 1.9802469135802472e-05, |
|
"loss": 0.1908, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05432098765432099, |
|
"grad_norm": 61.6785774230957, |
|
"learning_rate": 1.978271604938272e-05, |
|
"loss": 0.4477, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.05925925925925926, |
|
"grad_norm": 0.12833823263645172, |
|
"learning_rate": 1.9762962962962965e-05, |
|
"loss": 0.2536, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.06419753086419754, |
|
"grad_norm": 108.99272155761719, |
|
"learning_rate": 1.974320987654321e-05, |
|
"loss": 0.3968, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.0691358024691358, |
|
"grad_norm": 35.6202507019043, |
|
"learning_rate": 1.9723456790123458e-05, |
|
"loss": 0.268, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.07407407407407407, |
|
"grad_norm": 0.6642296314239502, |
|
"learning_rate": 1.9703703703703704e-05, |
|
"loss": 0.2327, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.07901234567901234, |
|
"grad_norm": 110.14276123046875, |
|
"learning_rate": 1.968395061728395e-05, |
|
"loss": 0.5562, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.08395061728395062, |
|
"grad_norm": 78.44914245605469, |
|
"learning_rate": 1.96641975308642e-05, |
|
"loss": 0.2922, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.08888888888888889, |
|
"grad_norm": 63.069766998291016, |
|
"learning_rate": 1.9644444444444447e-05, |
|
"loss": 0.3735, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.09382716049382717, |
|
"grad_norm": 246.51309204101562, |
|
"learning_rate": 1.9624691358024693e-05, |
|
"loss": 0.3787, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.09876543209876543, |
|
"grad_norm": 30.573638916015625, |
|
"learning_rate": 1.960493827160494e-05, |
|
"loss": 0.3193, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1037037037037037, |
|
"grad_norm": 0.3898200988769531, |
|
"learning_rate": 1.9585185185185186e-05, |
|
"loss": 0.2929, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.10864197530864197, |
|
"grad_norm": 85.34624481201172, |
|
"learning_rate": 1.9565432098765432e-05, |
|
"loss": 0.3892, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.11358024691358025, |
|
"grad_norm": 2.596446990966797, |
|
"learning_rate": 1.954567901234568e-05, |
|
"loss": 0.2047, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.11851851851851852, |
|
"grad_norm": 16.085424423217773, |
|
"learning_rate": 1.952592592592593e-05, |
|
"loss": 0.3302, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.12345679012345678, |
|
"grad_norm": 49.19840621948242, |
|
"learning_rate": 1.9506172839506175e-05, |
|
"loss": 0.1766, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.12839506172839507, |
|
"grad_norm": 49.17105484008789, |
|
"learning_rate": 1.948641975308642e-05, |
|
"loss": 0.3533, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.13333333333333333, |
|
"grad_norm": 31.642642974853516, |
|
"learning_rate": 1.9466666666666668e-05, |
|
"loss": 0.262, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.1382716049382716, |
|
"grad_norm": 49.4565544128418, |
|
"learning_rate": 1.9446913580246914e-05, |
|
"loss": 0.3649, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.14320987654320988, |
|
"grad_norm": 33.3835563659668, |
|
"learning_rate": 1.942716049382716e-05, |
|
"loss": 0.2047, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.14814814814814814, |
|
"grad_norm": 30.190998077392578, |
|
"learning_rate": 1.9407407407407407e-05, |
|
"loss": 0.2901, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.15308641975308643, |
|
"grad_norm": 73.48704528808594, |
|
"learning_rate": 1.9387654320987657e-05, |
|
"loss": 0.399, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.1580246913580247, |
|
"grad_norm": 2.583846092224121, |
|
"learning_rate": 1.9367901234567903e-05, |
|
"loss": 0.3736, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.16296296296296298, |
|
"grad_norm": 100.03057861328125, |
|
"learning_rate": 1.934814814814815e-05, |
|
"loss": 0.2624, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.16790123456790124, |
|
"grad_norm": 0.5729751586914062, |
|
"learning_rate": 1.93283950617284e-05, |
|
"loss": 0.4662, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.1728395061728395, |
|
"grad_norm": 133.75845336914062, |
|
"learning_rate": 1.9308641975308646e-05, |
|
"loss": 0.4739, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.17777777777777778, |
|
"grad_norm": 0.10531154274940491, |
|
"learning_rate": 1.928888888888889e-05, |
|
"loss": 0.2592, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.18271604938271604, |
|
"grad_norm": 4.886446952819824, |
|
"learning_rate": 1.9269135802469135e-05, |
|
"loss": 0.2601, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.18765432098765433, |
|
"grad_norm": 20.537151336669922, |
|
"learning_rate": 1.9249382716049385e-05, |
|
"loss": 0.1298, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1925925925925926, |
|
"grad_norm": 101.08270263671875, |
|
"learning_rate": 1.922962962962963e-05, |
|
"loss": 0.2034, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.19753086419753085, |
|
"grad_norm": 101.60489654541016, |
|
"learning_rate": 1.9209876543209878e-05, |
|
"loss": 0.5103, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.20246913580246914, |
|
"grad_norm": 4.052034854888916, |
|
"learning_rate": 1.9190123456790124e-05, |
|
"loss": 0.3028, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.2074074074074074, |
|
"grad_norm": 21.401437759399414, |
|
"learning_rate": 1.9170370370370374e-05, |
|
"loss": 0.4247, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.2123456790123457, |
|
"grad_norm": 24.329212188720703, |
|
"learning_rate": 1.915061728395062e-05, |
|
"loss": 0.3295, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.21728395061728395, |
|
"grad_norm": 6.972232341766357, |
|
"learning_rate": 1.9130864197530867e-05, |
|
"loss": 0.1283, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.2222222222222222, |
|
"grad_norm": 2.0192437171936035, |
|
"learning_rate": 1.9111111111111113e-05, |
|
"loss": 0.3414, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.2271604938271605, |
|
"grad_norm": 209.81227111816406, |
|
"learning_rate": 1.909135802469136e-05, |
|
"loss": 0.418, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.23209876543209876, |
|
"grad_norm": 64.07678985595703, |
|
"learning_rate": 1.9071604938271606e-05, |
|
"loss": 0.4821, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.23703703703703705, |
|
"grad_norm": 6.7498087882995605, |
|
"learning_rate": 1.9051851851851852e-05, |
|
"loss": 0.5004, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.2419753086419753, |
|
"grad_norm": 67.15694427490234, |
|
"learning_rate": 1.9032098765432102e-05, |
|
"loss": 0.2125, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.24691358024691357, |
|
"grad_norm": 49.803070068359375, |
|
"learning_rate": 1.901234567901235e-05, |
|
"loss": 0.2238, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2518518518518518, |
|
"grad_norm": 76.97516632080078, |
|
"learning_rate": 1.8992592592592595e-05, |
|
"loss": 0.6817, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.25679012345679014, |
|
"grad_norm": 45.78963088989258, |
|
"learning_rate": 1.897283950617284e-05, |
|
"loss": 0.2176, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.2617283950617284, |
|
"grad_norm": 1.871187448501587, |
|
"learning_rate": 1.8953086419753087e-05, |
|
"loss": 0.0952, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.26666666666666666, |
|
"grad_norm": 52.718875885009766, |
|
"learning_rate": 1.8933333333333334e-05, |
|
"loss": 0.306, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.2716049382716049, |
|
"grad_norm": 23.83916473388672, |
|
"learning_rate": 1.891358024691358e-05, |
|
"loss": 0.306, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.2765432098765432, |
|
"grad_norm": 38.469512939453125, |
|
"learning_rate": 1.889382716049383e-05, |
|
"loss": 0.3775, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.2814814814814815, |
|
"grad_norm": 71.19271087646484, |
|
"learning_rate": 1.8874074074074076e-05, |
|
"loss": 0.2412, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.28641975308641976, |
|
"grad_norm": 10.515379905700684, |
|
"learning_rate": 1.8854320987654323e-05, |
|
"loss": 0.3623, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.291358024691358, |
|
"grad_norm": 56.489166259765625, |
|
"learning_rate": 1.883456790123457e-05, |
|
"loss": 0.2472, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.2962962962962963, |
|
"grad_norm": 11.128917694091797, |
|
"learning_rate": 1.8814814814814816e-05, |
|
"loss": 0.512, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.3012345679012346, |
|
"grad_norm": 2.7650094032287598, |
|
"learning_rate": 1.8795061728395062e-05, |
|
"loss": 0.518, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.30617283950617286, |
|
"grad_norm": 55.65047073364258, |
|
"learning_rate": 1.877530864197531e-05, |
|
"loss": 0.2817, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.3111111111111111, |
|
"grad_norm": 8.692935943603516, |
|
"learning_rate": 1.8755555555555558e-05, |
|
"loss": 0.2106, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.3160493827160494, |
|
"grad_norm": 2.446716785430908, |
|
"learning_rate": 1.8735802469135805e-05, |
|
"loss": 0.2604, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.32098765432098764, |
|
"grad_norm": 12.735766410827637, |
|
"learning_rate": 1.871604938271605e-05, |
|
"loss": 0.5116, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.32592592592592595, |
|
"grad_norm": 1.8498376607894897, |
|
"learning_rate": 1.8696296296296297e-05, |
|
"loss": 0.0587, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.3308641975308642, |
|
"grad_norm": 0.5433443188667297, |
|
"learning_rate": 1.8676543209876544e-05, |
|
"loss": 0.5793, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.3358024691358025, |
|
"grad_norm": 0.06385264545679092, |
|
"learning_rate": 1.865679012345679e-05, |
|
"loss": 0.2238, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.34074074074074073, |
|
"grad_norm": 129.50604248046875, |
|
"learning_rate": 1.8637037037037037e-05, |
|
"loss": 0.4826, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.345679012345679, |
|
"grad_norm": 20.5740909576416, |
|
"learning_rate": 1.8617283950617286e-05, |
|
"loss": 0.4072, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.3506172839506173, |
|
"grad_norm": 1.4352848529815674, |
|
"learning_rate": 1.8597530864197533e-05, |
|
"loss": 0.1077, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.35555555555555557, |
|
"grad_norm": 1.2378454208374023, |
|
"learning_rate": 1.857777777777778e-05, |
|
"loss": 0.2087, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.36049382716049383, |
|
"grad_norm": 54.489768981933594, |
|
"learning_rate": 1.8558024691358025e-05, |
|
"loss": 0.34, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.3654320987654321, |
|
"grad_norm": 85.84687042236328, |
|
"learning_rate": 1.8538271604938275e-05, |
|
"loss": 0.145, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.37037037037037035, |
|
"grad_norm": 20.322895050048828, |
|
"learning_rate": 1.851851851851852e-05, |
|
"loss": 0.4289, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.37530864197530867, |
|
"grad_norm": 1.6802163124084473, |
|
"learning_rate": 1.8498765432098768e-05, |
|
"loss": 0.4687, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.3802469135802469, |
|
"grad_norm": 125.9644546508789, |
|
"learning_rate": 1.8479012345679014e-05, |
|
"loss": 0.5029, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.3851851851851852, |
|
"grad_norm": 46.97697830200195, |
|
"learning_rate": 1.845925925925926e-05, |
|
"loss": 0.4326, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.39012345679012345, |
|
"grad_norm": 82.32715606689453, |
|
"learning_rate": 1.8439506172839507e-05, |
|
"loss": 0.3779, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.3950617283950617, |
|
"grad_norm": 48.87428665161133, |
|
"learning_rate": 1.8419753086419754e-05, |
|
"loss": 0.4167, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.4260449707508087, |
|
"learning_rate": 1.8400000000000003e-05, |
|
"loss": 0.3357, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.4049382716049383, |
|
"grad_norm": 3.1416447162628174, |
|
"learning_rate": 1.838024691358025e-05, |
|
"loss": 0.0848, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.40987654320987654, |
|
"grad_norm": 0.17075039446353912, |
|
"learning_rate": 1.8360493827160496e-05, |
|
"loss": 0.5083, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.4148148148148148, |
|
"grad_norm": 78.5146713256836, |
|
"learning_rate": 1.8340740740740743e-05, |
|
"loss": 0.3346, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.41975308641975306, |
|
"grad_norm": 38.72228240966797, |
|
"learning_rate": 1.832098765432099e-05, |
|
"loss": 0.2796, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.4246913580246914, |
|
"grad_norm": 28.315433502197266, |
|
"learning_rate": 1.8301234567901235e-05, |
|
"loss": 0.4984, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.42962962962962964, |
|
"grad_norm": 1.3758037090301514, |
|
"learning_rate": 1.8281481481481482e-05, |
|
"loss": 0.2027, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.4345679012345679, |
|
"grad_norm": 33.141361236572266, |
|
"learning_rate": 1.826172839506173e-05, |
|
"loss": 0.1499, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.43950617283950616, |
|
"grad_norm": 48.69041442871094, |
|
"learning_rate": 1.8241975308641978e-05, |
|
"loss": 0.1608, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.4444444444444444, |
|
"grad_norm": 35.90753173828125, |
|
"learning_rate": 1.8222222222222224e-05, |
|
"loss": 0.2102, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.44938271604938274, |
|
"grad_norm": 27.275602340698242, |
|
"learning_rate": 1.820246913580247e-05, |
|
"loss": 0.3688, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.454320987654321, |
|
"grad_norm": 14.521764755249023, |
|
"learning_rate": 1.8182716049382717e-05, |
|
"loss": 0.3542, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.45925925925925926, |
|
"grad_norm": 22.390480041503906, |
|
"learning_rate": 1.8162962962962963e-05, |
|
"loss": 0.1098, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.4641975308641975, |
|
"grad_norm": 5.19728422164917, |
|
"learning_rate": 1.814320987654321e-05, |
|
"loss": 0.2809, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.4691358024691358, |
|
"grad_norm": 0.5096778869628906, |
|
"learning_rate": 1.812345679012346e-05, |
|
"loss": 0.1538, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.4740740740740741, |
|
"grad_norm": 107.34992980957031, |
|
"learning_rate": 1.8103703703703706e-05, |
|
"loss": 0.5366, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.47901234567901235, |
|
"grad_norm": 37.320709228515625, |
|
"learning_rate": 1.8083950617283952e-05, |
|
"loss": 0.1668, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.4839506172839506, |
|
"grad_norm": 20.405574798583984, |
|
"learning_rate": 1.80641975308642e-05, |
|
"loss": 0.3588, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.4888888888888889, |
|
"grad_norm": 1.3000644445419312, |
|
"learning_rate": 1.8044444444444445e-05, |
|
"loss": 0.088, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.49382716049382713, |
|
"grad_norm": 37.02173614501953, |
|
"learning_rate": 1.802469135802469e-05, |
|
"loss": 0.2648, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.49876543209876545, |
|
"grad_norm": 48.47230529785156, |
|
"learning_rate": 1.8004938271604938e-05, |
|
"loss": 0.5109, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.5037037037037037, |
|
"grad_norm": 51.70542907714844, |
|
"learning_rate": 1.7985185185185188e-05, |
|
"loss": 0.3476, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.508641975308642, |
|
"grad_norm": 2.4657256603240967, |
|
"learning_rate": 1.7965432098765434e-05, |
|
"loss": 0.3445, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.5135802469135803, |
|
"grad_norm": 96.39098358154297, |
|
"learning_rate": 1.794567901234568e-05, |
|
"loss": 0.2845, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.5185185185185185, |
|
"grad_norm": 45.08651351928711, |
|
"learning_rate": 1.7925925925925927e-05, |
|
"loss": 0.2077, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.5234567901234568, |
|
"grad_norm": 0.06106605753302574, |
|
"learning_rate": 1.7906172839506177e-05, |
|
"loss": 0.0399, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.528395061728395, |
|
"grad_norm": 36.55531692504883, |
|
"learning_rate": 1.788641975308642e-05, |
|
"loss": 0.3436, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.5333333333333333, |
|
"grad_norm": 2.2626407146453857, |
|
"learning_rate": 1.7866666666666666e-05, |
|
"loss": 0.6299, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.5382716049382716, |
|
"grad_norm": 16.667465209960938, |
|
"learning_rate": 1.7846913580246913e-05, |
|
"loss": 0.2992, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.5432098765432098, |
|
"grad_norm": 41.49295425415039, |
|
"learning_rate": 1.7827160493827162e-05, |
|
"loss": 0.2554, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.5481481481481482, |
|
"grad_norm": 4.2133002281188965, |
|
"learning_rate": 1.780740740740741e-05, |
|
"loss": 0.2452, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.5530864197530864, |
|
"grad_norm": 49.12704086303711, |
|
"learning_rate": 1.7787654320987655e-05, |
|
"loss": 0.3656, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.5580246913580247, |
|
"grad_norm": 21.075599670410156, |
|
"learning_rate": 1.7767901234567905e-05, |
|
"loss": 0.0648, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.562962962962963, |
|
"grad_norm": 0.5144210457801819, |
|
"learning_rate": 1.774814814814815e-05, |
|
"loss": 0.2793, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.5679012345679012, |
|
"grad_norm": 53.27878189086914, |
|
"learning_rate": 1.7728395061728398e-05, |
|
"loss": 0.206, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.5728395061728395, |
|
"grad_norm": 36.761356353759766, |
|
"learning_rate": 1.7708641975308644e-05, |
|
"loss": 0.3469, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.5777777777777777, |
|
"grad_norm": 3.539717435836792, |
|
"learning_rate": 1.768888888888889e-05, |
|
"loss": 0.2327, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.582716049382716, |
|
"grad_norm": 3.940678596496582, |
|
"learning_rate": 1.7669135802469137e-05, |
|
"loss": 0.2148, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.5876543209876544, |
|
"grad_norm": 44.36384963989258, |
|
"learning_rate": 1.7649382716049383e-05, |
|
"loss": 0.3014, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.5925925925925926, |
|
"grad_norm": 0.4438416063785553, |
|
"learning_rate": 1.7629629629629633e-05, |
|
"loss": 0.2904, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.5975308641975309, |
|
"grad_norm": 0.08722967654466629, |
|
"learning_rate": 1.760987654320988e-05, |
|
"loss": 0.6003, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.6024691358024692, |
|
"grad_norm": 3.851921319961548, |
|
"learning_rate": 1.7590123456790126e-05, |
|
"loss": 0.1097, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.6074074074074074, |
|
"grad_norm": 2.105475425720215, |
|
"learning_rate": 1.7570370370370372e-05, |
|
"loss": 0.0446, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.6123456790123457, |
|
"grad_norm": 1.8762763738632202, |
|
"learning_rate": 1.755061728395062e-05, |
|
"loss": 0.3621, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.6172839506172839, |
|
"grad_norm": 0.8981475234031677, |
|
"learning_rate": 1.7530864197530865e-05, |
|
"loss": 0.6072, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.6222222222222222, |
|
"grad_norm": 0.05930788442492485, |
|
"learning_rate": 1.751111111111111e-05, |
|
"loss": 0.2451, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.6271604938271605, |
|
"grad_norm": 47.51054763793945, |
|
"learning_rate": 1.7491358024691358e-05, |
|
"loss": 0.2657, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.6320987654320988, |
|
"grad_norm": 84.59910583496094, |
|
"learning_rate": 1.7471604938271608e-05, |
|
"loss": 0.2821, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.6370370370370371, |
|
"grad_norm": 92.97787475585938, |
|
"learning_rate": 1.7451851851851854e-05, |
|
"loss": 0.3573, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.6419753086419753, |
|
"grad_norm": 134.259033203125, |
|
"learning_rate": 1.74320987654321e-05, |
|
"loss": 0.2441, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.6469135802469136, |
|
"grad_norm": 61.10758972167969, |
|
"learning_rate": 1.7412345679012347e-05, |
|
"loss": 0.3629, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.6518518518518519, |
|
"grad_norm": 0.031939879059791565, |
|
"learning_rate": 1.7392592592592593e-05, |
|
"loss": 0.0489, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.6567901234567901, |
|
"grad_norm": 52.49007034301758, |
|
"learning_rate": 1.737283950617284e-05, |
|
"loss": 0.2687, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.6617283950617284, |
|
"grad_norm": 4.723176002502441, |
|
"learning_rate": 1.7353086419753086e-05, |
|
"loss": 0.2252, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 2.503265619277954, |
|
"learning_rate": 1.7333333333333336e-05, |
|
"loss": 0.3459, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.671604938271605, |
|
"grad_norm": 68.56127166748047, |
|
"learning_rate": 1.7313580246913582e-05, |
|
"loss": 0.4752, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.6765432098765433, |
|
"grad_norm": 96.8653793334961, |
|
"learning_rate": 1.729382716049383e-05, |
|
"loss": 0.1921, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.6814814814814815, |
|
"grad_norm": 139.44691467285156, |
|
"learning_rate": 1.7274074074074075e-05, |
|
"loss": 0.2266, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.6864197530864198, |
|
"grad_norm": 7.88108491897583, |
|
"learning_rate": 1.725432098765432e-05, |
|
"loss": 0.1687, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.691358024691358, |
|
"grad_norm": 61.542091369628906, |
|
"learning_rate": 1.7234567901234568e-05, |
|
"loss": 0.1411, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.6962962962962963, |
|
"grad_norm": 0.7576116919517517, |
|
"learning_rate": 1.7214814814814814e-05, |
|
"loss": 0.4797, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.7012345679012346, |
|
"grad_norm": 14.038137435913086, |
|
"learning_rate": 1.7195061728395064e-05, |
|
"loss": 0.0933, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.7061728395061728, |
|
"grad_norm": 46.00447463989258, |
|
"learning_rate": 1.717530864197531e-05, |
|
"loss": 0.3333, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.7111111111111111, |
|
"grad_norm": 180.21914672851562, |
|
"learning_rate": 1.7155555555555557e-05, |
|
"loss": 0.4032, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.7160493827160493, |
|
"grad_norm": 211.60653686523438, |
|
"learning_rate": 1.7135802469135806e-05, |
|
"loss": 0.3602, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.7209876543209877, |
|
"grad_norm": 10.442931175231934, |
|
"learning_rate": 1.7116049382716053e-05, |
|
"loss": 0.3413, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.725925925925926, |
|
"grad_norm": 54.73400115966797, |
|
"learning_rate": 1.70962962962963e-05, |
|
"loss": 0.1263, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.7308641975308642, |
|
"grad_norm": 7.259425163269043, |
|
"learning_rate": 1.7076543209876542e-05, |
|
"loss": 0.2431, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.7358024691358025, |
|
"grad_norm": 96.37651824951172, |
|
"learning_rate": 1.7056790123456792e-05, |
|
"loss": 0.5599, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.7407407407407407, |
|
"grad_norm": 9.702010154724121, |
|
"learning_rate": 1.7037037037037038e-05, |
|
"loss": 0.3493, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.745679012345679, |
|
"grad_norm": 29.10769271850586, |
|
"learning_rate": 1.7017283950617285e-05, |
|
"loss": 0.3369, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.7506172839506173, |
|
"grad_norm": 77.2637939453125, |
|
"learning_rate": 1.699753086419753e-05, |
|
"loss": 0.5669, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.7555555555555555, |
|
"grad_norm": 0.2619607150554657, |
|
"learning_rate": 1.697777777777778e-05, |
|
"loss": 0.2248, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.7604938271604939, |
|
"grad_norm": 49.25140380859375, |
|
"learning_rate": 1.6958024691358027e-05, |
|
"loss": 0.1465, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.7654320987654321, |
|
"grad_norm": 1.6038424968719482, |
|
"learning_rate": 1.6938271604938274e-05, |
|
"loss": 0.2798, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.7703703703703704, |
|
"grad_norm": 0.2095940262079239, |
|
"learning_rate": 1.691851851851852e-05, |
|
"loss": 0.359, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.7753086419753087, |
|
"grad_norm": 53.154632568359375, |
|
"learning_rate": 1.6898765432098766e-05, |
|
"loss": 0.1937, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.7802469135802469, |
|
"grad_norm": 6.8274006843566895, |
|
"learning_rate": 1.6879012345679013e-05, |
|
"loss": 0.2881, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.7851851851851852, |
|
"grad_norm": 115.4723892211914, |
|
"learning_rate": 1.685925925925926e-05, |
|
"loss": 0.2592, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.7901234567901234, |
|
"grad_norm": 0.015067143365740776, |
|
"learning_rate": 1.683950617283951e-05, |
|
"loss": 0.3331, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.7950617283950617, |
|
"grad_norm": 28.81291961669922, |
|
"learning_rate": 1.6819753086419755e-05, |
|
"loss": 0.5361, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.010893706232309341, |
|
"learning_rate": 1.6800000000000002e-05, |
|
"loss": 0.1873, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.8049382716049382, |
|
"grad_norm": 1.351131796836853, |
|
"learning_rate": 1.6780246913580248e-05, |
|
"loss": 0.2294, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.8098765432098766, |
|
"grad_norm": 60.61597442626953, |
|
"learning_rate": 1.6760493827160495e-05, |
|
"loss": 0.2917, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.8148148148148148, |
|
"grad_norm": 11.661639213562012, |
|
"learning_rate": 1.674074074074074e-05, |
|
"loss": 0.4087, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.8197530864197531, |
|
"grad_norm": 251.9644012451172, |
|
"learning_rate": 1.6720987654320987e-05, |
|
"loss": 0.2226, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.8246913580246914, |
|
"grad_norm": 7.840044975280762, |
|
"learning_rate": 1.6701234567901237e-05, |
|
"loss": 0.5515, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.8296296296296296, |
|
"grad_norm": 0.08511721342802048, |
|
"learning_rate": 1.6681481481481484e-05, |
|
"loss": 0.2206, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.8345679012345679, |
|
"grad_norm": 19.307905197143555, |
|
"learning_rate": 1.666172839506173e-05, |
|
"loss": 0.2081, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.8395061728395061, |
|
"grad_norm": 1.045444130897522, |
|
"learning_rate": 1.6641975308641976e-05, |
|
"loss": 0.1815, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.8444444444444444, |
|
"grad_norm": 5.953945636749268, |
|
"learning_rate": 1.6622222222222223e-05, |
|
"loss": 0.2312, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.8493827160493828, |
|
"grad_norm": 5.905419826507568, |
|
"learning_rate": 1.660246913580247e-05, |
|
"loss": 0.1001, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.854320987654321, |
|
"grad_norm": 118.84114837646484, |
|
"learning_rate": 1.6582716049382715e-05, |
|
"loss": 0.2355, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.8592592592592593, |
|
"grad_norm": 27.624740600585938, |
|
"learning_rate": 1.6562962962962965e-05, |
|
"loss": 0.1446, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.8641975308641975, |
|
"grad_norm": 126.23757934570312, |
|
"learning_rate": 1.654320987654321e-05, |
|
"loss": 0.2595, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.8691358024691358, |
|
"grad_norm": 2.478506326675415, |
|
"learning_rate": 1.6523456790123458e-05, |
|
"loss": 0.0814, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.8740740740740741, |
|
"grad_norm": 42.80133819580078, |
|
"learning_rate": 1.6503703703703704e-05, |
|
"loss": 0.1934, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.8790123456790123, |
|
"grad_norm": 0.015840064734220505, |
|
"learning_rate": 1.648395061728395e-05, |
|
"loss": 0.2246, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.8839506172839506, |
|
"grad_norm": 112.66703796386719, |
|
"learning_rate": 1.6464197530864197e-05, |
|
"loss": 0.2628, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"grad_norm": 33.77766036987305, |
|
"learning_rate": 1.6444444444444444e-05, |
|
"loss": 0.3193, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.8938271604938272, |
|
"grad_norm": 236.83761596679688, |
|
"learning_rate": 1.6424691358024693e-05, |
|
"loss": 0.3724, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.8987654320987655, |
|
"grad_norm": 57.66241455078125, |
|
"learning_rate": 1.640493827160494e-05, |
|
"loss": 0.3194, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.9037037037037037, |
|
"grad_norm": 142.6712646484375, |
|
"learning_rate": 1.6385185185185186e-05, |
|
"loss": 0.2389, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.908641975308642, |
|
"grad_norm": 0.11197575181722641, |
|
"learning_rate": 1.6365432098765433e-05, |
|
"loss": 0.268, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.9135802469135802, |
|
"grad_norm": 407.26885986328125, |
|
"learning_rate": 1.6345679012345682e-05, |
|
"loss": 0.2186, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.9185185185185185, |
|
"grad_norm": 0.057163987308740616, |
|
"learning_rate": 1.632592592592593e-05, |
|
"loss": 0.4509, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.9234567901234568, |
|
"grad_norm": 66.4487075805664, |
|
"learning_rate": 1.6306172839506175e-05, |
|
"loss": 0.3745, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.928395061728395, |
|
"grad_norm": 115.2850570678711, |
|
"learning_rate": 1.628641975308642e-05, |
|
"loss": 0.4606, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.9333333333333333, |
|
"grad_norm": 66.02615356445312, |
|
"learning_rate": 1.6266666666666668e-05, |
|
"loss": 0.2206, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.9382716049382716, |
|
"grad_norm": 2.386338949203491, |
|
"learning_rate": 1.6246913580246914e-05, |
|
"loss": 0.364, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.9432098765432099, |
|
"grad_norm": 57.060977935791016, |
|
"learning_rate": 1.622716049382716e-05, |
|
"loss": 0.2837, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.9481481481481482, |
|
"grad_norm": 0.7722509503364563, |
|
"learning_rate": 1.620740740740741e-05, |
|
"loss": 0.6167, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.9530864197530864, |
|
"grad_norm": 0.762596845626831, |
|
"learning_rate": 1.6187654320987657e-05, |
|
"loss": 0.1728, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.9580246913580247, |
|
"grad_norm": 40.202091217041016, |
|
"learning_rate": 1.6167901234567903e-05, |
|
"loss": 0.2449, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.9629629629629629, |
|
"grad_norm": 57.35947799682617, |
|
"learning_rate": 1.614814814814815e-05, |
|
"loss": 0.4488, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.9679012345679012, |
|
"grad_norm": 68.08243560791016, |
|
"learning_rate": 1.6128395061728396e-05, |
|
"loss": 0.3488, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.9728395061728395, |
|
"grad_norm": 1.9619942903518677, |
|
"learning_rate": 1.6108641975308642e-05, |
|
"loss": 0.2035, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.9777777777777777, |
|
"grad_norm": 0.8691776990890503, |
|
"learning_rate": 1.608888888888889e-05, |
|
"loss": 0.2162, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.9827160493827161, |
|
"grad_norm": 0.5446602702140808, |
|
"learning_rate": 1.606913580246914e-05, |
|
"loss": 0.4364, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.9876543209876543, |
|
"grad_norm": 10.081711769104004, |
|
"learning_rate": 1.6049382716049385e-05, |
|
"loss": 0.2213, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.9925925925925926, |
|
"grad_norm": 0.02493743598461151, |
|
"learning_rate": 1.602962962962963e-05, |
|
"loss": 0.0608, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.9975308641975309, |
|
"grad_norm": 2.9489526748657227, |
|
"learning_rate": 1.6009876543209878e-05, |
|
"loss": 0.3004, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9677777777777777, |
|
"eval_loss": 0.11802458763122559, |
|
"eval_runtime": 32.902, |
|
"eval_samples_per_second": 164.124, |
|
"eval_steps_per_second": 20.515, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 1.002469135802469, |
|
"grad_norm": 138.59349060058594, |
|
"learning_rate": 1.5990123456790124e-05, |
|
"loss": 0.2046, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.0074074074074073, |
|
"grad_norm": 0.05510491877794266, |
|
"learning_rate": 1.597037037037037e-05, |
|
"loss": 0.1356, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.0123456790123457, |
|
"grad_norm": 14.264396667480469, |
|
"learning_rate": 1.5950617283950617e-05, |
|
"loss": 0.1624, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.017283950617284, |
|
"grad_norm": 0.9380566477775574, |
|
"learning_rate": 1.5930864197530867e-05, |
|
"loss": 0.2289, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.0222222222222221, |
|
"grad_norm": 0.017738979309797287, |
|
"learning_rate": 1.5911111111111113e-05, |
|
"loss": 0.4301, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.0271604938271606, |
|
"grad_norm": 0.030082279816269875, |
|
"learning_rate": 1.589135802469136e-05, |
|
"loss": 0.1049, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.0320987654320988, |
|
"grad_norm": 0.28669413924217224, |
|
"learning_rate": 1.5871604938271606e-05, |
|
"loss": 0.1245, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.037037037037037, |
|
"grad_norm": 7.697299003601074, |
|
"learning_rate": 1.5851851851851852e-05, |
|
"loss": 0.6147, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.0419753086419754, |
|
"grad_norm": 99.23163604736328, |
|
"learning_rate": 1.58320987654321e-05, |
|
"loss": 0.1613, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.0469135802469136, |
|
"grad_norm": 52.61363220214844, |
|
"learning_rate": 1.5812345679012345e-05, |
|
"loss": 0.3256, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.0518518518518518, |
|
"grad_norm": 87.68861389160156, |
|
"learning_rate": 1.5792592592592595e-05, |
|
"loss": 0.2956, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.05679012345679, |
|
"grad_norm": 30.490577697753906, |
|
"learning_rate": 1.577283950617284e-05, |
|
"loss": 0.2226, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.0617283950617284, |
|
"grad_norm": 1.5879323482513428, |
|
"learning_rate": 1.5753086419753088e-05, |
|
"loss": 0.3573, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.0666666666666667, |
|
"grad_norm": 0.36435502767562866, |
|
"learning_rate": 1.5733333333333334e-05, |
|
"loss": 0.167, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.0716049382716049, |
|
"grad_norm": 0.3206441104412079, |
|
"learning_rate": 1.5713580246913584e-05, |
|
"loss": 0.2698, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.0765432098765433, |
|
"grad_norm": 17.28899574279785, |
|
"learning_rate": 1.569382716049383e-05, |
|
"loss": 0.383, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.0814814814814815, |
|
"grad_norm": 31.972209930419922, |
|
"learning_rate": 1.5674074074074073e-05, |
|
"loss": 0.2109, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.0864197530864197, |
|
"grad_norm": 35.79594802856445, |
|
"learning_rate": 1.565432098765432e-05, |
|
"loss": 0.2666, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.0913580246913581, |
|
"grad_norm": 3.0720813274383545, |
|
"learning_rate": 1.563456790123457e-05, |
|
"loss": 0.0663, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.0962962962962963, |
|
"grad_norm": 46.16384506225586, |
|
"learning_rate": 1.5614814814814816e-05, |
|
"loss": 0.1775, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.1012345679012345, |
|
"grad_norm": 3.8352577686309814, |
|
"learning_rate": 1.5595061728395062e-05, |
|
"loss": 0.1719, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.106172839506173, |
|
"grad_norm": 24.50127601623535, |
|
"learning_rate": 1.5575308641975312e-05, |
|
"loss": 0.4285, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.1111111111111112, |
|
"grad_norm": 45.77573776245117, |
|
"learning_rate": 1.555555555555556e-05, |
|
"loss": 0.3723, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.1160493827160494, |
|
"grad_norm": 51.60211181640625, |
|
"learning_rate": 1.5535802469135805e-05, |
|
"loss": 0.1194, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.1209876543209876, |
|
"grad_norm": 48.674163818359375, |
|
"learning_rate": 1.551604938271605e-05, |
|
"loss": 0.3845, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.125925925925926, |
|
"grad_norm": 0.43790122866630554, |
|
"learning_rate": 1.5496296296296298e-05, |
|
"loss": 0.1622, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.1308641975308642, |
|
"grad_norm": 0.4926997125148773, |
|
"learning_rate": 1.5476543209876544e-05, |
|
"loss": 0.1739, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.1358024691358024, |
|
"grad_norm": 27.840295791625977, |
|
"learning_rate": 1.545679012345679e-05, |
|
"loss": 0.1265, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.1407407407407408, |
|
"grad_norm": 148.9844207763672, |
|
"learning_rate": 1.543703703703704e-05, |
|
"loss": 0.2187, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.145679012345679, |
|
"grad_norm": 63.56736373901367, |
|
"learning_rate": 1.5417283950617286e-05, |
|
"loss": 0.2227, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.1506172839506172, |
|
"grad_norm": 32.42955780029297, |
|
"learning_rate": 1.5397530864197533e-05, |
|
"loss": 0.1863, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.1555555555555554, |
|
"grad_norm": 72.6145248413086, |
|
"learning_rate": 1.537777777777778e-05, |
|
"loss": 0.3744, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.1604938271604939, |
|
"grad_norm": 4.558436393737793, |
|
"learning_rate": 1.5358024691358026e-05, |
|
"loss": 0.2796, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.165432098765432, |
|
"grad_norm": 0.5049192905426025, |
|
"learning_rate": 1.5338271604938272e-05, |
|
"loss": 0.1426, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.1703703703703703, |
|
"grad_norm": 0.11132398992776871, |
|
"learning_rate": 1.531851851851852e-05, |
|
"loss": 0.1231, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.1753086419753087, |
|
"grad_norm": 26.840200424194336, |
|
"learning_rate": 1.5298765432098768e-05, |
|
"loss": 0.2786, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.180246913580247, |
|
"grad_norm": 0.15319669246673584, |
|
"learning_rate": 1.5279012345679015e-05, |
|
"loss": 0.5859, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.1851851851851851, |
|
"grad_norm": 39.83156204223633, |
|
"learning_rate": 1.525925925925926e-05, |
|
"loss": 0.4391, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.1901234567901235, |
|
"grad_norm": 0.38840270042419434, |
|
"learning_rate": 1.5239506172839507e-05, |
|
"loss": 0.1187, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.1950617283950618, |
|
"grad_norm": 0.025911659002304077, |
|
"learning_rate": 1.5219753086419755e-05, |
|
"loss": 0.0865, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 81.05162048339844, |
|
"learning_rate": 1.5200000000000002e-05, |
|
"loss": 0.3289, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.2049382716049384, |
|
"grad_norm": 72.2834701538086, |
|
"learning_rate": 1.5180246913580248e-05, |
|
"loss": 0.5105, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.2098765432098766, |
|
"grad_norm": 0.06509275734424591, |
|
"learning_rate": 1.5160493827160495e-05, |
|
"loss": 0.2435, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.2148148148148148, |
|
"grad_norm": 12.417915344238281, |
|
"learning_rate": 1.5140740740740743e-05, |
|
"loss": 0.3175, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.219753086419753, |
|
"grad_norm": 64.59101104736328, |
|
"learning_rate": 1.5120987654320989e-05, |
|
"loss": 0.4517, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.2246913580246914, |
|
"grad_norm": 43.42831802368164, |
|
"learning_rate": 1.5101234567901236e-05, |
|
"loss": 0.1514, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.2296296296296296, |
|
"grad_norm": 0.5973836779594421, |
|
"learning_rate": 1.5081481481481484e-05, |
|
"loss": 0.1027, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.2345679012345678, |
|
"grad_norm": 41.84488296508789, |
|
"learning_rate": 1.506172839506173e-05, |
|
"loss": 0.2706, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.2395061728395063, |
|
"grad_norm": 135.85255432128906, |
|
"learning_rate": 1.5041975308641976e-05, |
|
"loss": 0.204, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.2444444444444445, |
|
"grad_norm": 14.007678985595703, |
|
"learning_rate": 1.5022222222222223e-05, |
|
"loss": 0.4253, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.2493827160493827, |
|
"grad_norm": 34.2636833190918, |
|
"learning_rate": 1.5002469135802471e-05, |
|
"loss": 0.21, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.2543209876543209, |
|
"grad_norm": 19.363365173339844, |
|
"learning_rate": 1.4982716049382717e-05, |
|
"loss": 0.2031, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.2592592592592593, |
|
"grad_norm": 0.3058103919029236, |
|
"learning_rate": 1.4962962962962964e-05, |
|
"loss": 0.2789, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.2641975308641975, |
|
"grad_norm": 70.8534164428711, |
|
"learning_rate": 1.4943209876543212e-05, |
|
"loss": 0.4306, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.269135802469136, |
|
"grad_norm": 0.1311403512954712, |
|
"learning_rate": 1.4923456790123458e-05, |
|
"loss": 0.4098, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.2740740740740741, |
|
"grad_norm": 84.89444732666016, |
|
"learning_rate": 1.4903703703703705e-05, |
|
"loss": 0.2931, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.2790123456790123, |
|
"grad_norm": 0.9064738154411316, |
|
"learning_rate": 1.4883950617283951e-05, |
|
"loss": 0.3069, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.2839506172839505, |
|
"grad_norm": 0.491811603307724, |
|
"learning_rate": 1.4864197530864199e-05, |
|
"loss": 0.2636, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.2888888888888888, |
|
"grad_norm": 35.797969818115234, |
|
"learning_rate": 1.4844444444444445e-05, |
|
"loss": 0.2673, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.2938271604938272, |
|
"grad_norm": 0.0416533537209034, |
|
"learning_rate": 1.4824691358024692e-05, |
|
"loss": 0.0711, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.2987654320987654, |
|
"grad_norm": 4.76767635345459, |
|
"learning_rate": 1.480493827160494e-05, |
|
"loss": 0.2506, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.3037037037037038, |
|
"grad_norm": 32.206031799316406, |
|
"learning_rate": 1.4785185185185186e-05, |
|
"loss": 0.453, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.308641975308642, |
|
"grad_norm": 131.6813201904297, |
|
"learning_rate": 1.4765432098765433e-05, |
|
"loss": 0.1793, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.3135802469135802, |
|
"grad_norm": 7.119224548339844, |
|
"learning_rate": 1.4745679012345679e-05, |
|
"loss": 0.0779, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.3185185185185184, |
|
"grad_norm": 139.8772735595703, |
|
"learning_rate": 1.4725925925925927e-05, |
|
"loss": 0.4545, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.3234567901234568, |
|
"grad_norm": 0.4141978919506073, |
|
"learning_rate": 1.4706172839506174e-05, |
|
"loss": 0.2352, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.328395061728395, |
|
"grad_norm": 42.8140869140625, |
|
"learning_rate": 1.468641975308642e-05, |
|
"loss": 0.1611, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 16.763948440551758, |
|
"learning_rate": 1.4666666666666666e-05, |
|
"loss": 0.0735, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.3382716049382717, |
|
"grad_norm": 140.94900512695312, |
|
"learning_rate": 1.4646913580246916e-05, |
|
"loss": 0.1474, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.34320987654321, |
|
"grad_norm": 0.9029823541641235, |
|
"learning_rate": 1.4627160493827162e-05, |
|
"loss": 0.0437, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.348148148148148, |
|
"grad_norm": 46.620086669921875, |
|
"learning_rate": 1.4607407407407407e-05, |
|
"loss": 0.1856, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.3530864197530863, |
|
"grad_norm": 64.09046173095703, |
|
"learning_rate": 1.4587654320987657e-05, |
|
"loss": 0.1532, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.3580246913580247, |
|
"grad_norm": 104.23167419433594, |
|
"learning_rate": 1.4567901234567903e-05, |
|
"loss": 0.2386, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.362962962962963, |
|
"grad_norm": 0.36242911219596863, |
|
"learning_rate": 1.454814814814815e-05, |
|
"loss": 0.4831, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.3679012345679014, |
|
"grad_norm": 0.5484885573387146, |
|
"learning_rate": 1.4528395061728396e-05, |
|
"loss": 0.0836, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.3728395061728396, |
|
"grad_norm": 51.26658630371094, |
|
"learning_rate": 1.4508641975308644e-05, |
|
"loss": 0.1736, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.3777777777777778, |
|
"grad_norm": 20.211082458496094, |
|
"learning_rate": 1.448888888888889e-05, |
|
"loss": 0.3063, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.382716049382716, |
|
"grad_norm": 0.7425023913383484, |
|
"learning_rate": 1.4469135802469137e-05, |
|
"loss": 0.1025, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.3876543209876544, |
|
"grad_norm": 159.22314453125, |
|
"learning_rate": 1.4449382716049385e-05, |
|
"loss": 0.2052, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.3925925925925926, |
|
"grad_norm": 47.53805923461914, |
|
"learning_rate": 1.4429629629629631e-05, |
|
"loss": 0.1378, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.3975308641975308, |
|
"grad_norm": 0.2027841955423355, |
|
"learning_rate": 1.4409876543209878e-05, |
|
"loss": 0.0507, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.4024691358024692, |
|
"grad_norm": 0.18290477991104126, |
|
"learning_rate": 1.4390123456790124e-05, |
|
"loss": 0.2193, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.4074074074074074, |
|
"grad_norm": 126.16277313232422, |
|
"learning_rate": 1.4370370370370372e-05, |
|
"loss": 0.3206, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.4123456790123456, |
|
"grad_norm": 127.88780975341797, |
|
"learning_rate": 1.4350617283950619e-05, |
|
"loss": 0.4142, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.4172839506172838, |
|
"grad_norm": 3.724766254425049, |
|
"learning_rate": 1.4330864197530865e-05, |
|
"loss": 0.0783, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.4222222222222223, |
|
"grad_norm": 199.94883728027344, |
|
"learning_rate": 1.4311111111111111e-05, |
|
"loss": 0.3896, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.4271604938271605, |
|
"grad_norm": 116.74020385742188, |
|
"learning_rate": 1.429135802469136e-05, |
|
"loss": 0.2982, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.4320987654320987, |
|
"grad_norm": 2.576690673828125, |
|
"learning_rate": 1.4271604938271606e-05, |
|
"loss": 0.1678, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.4370370370370371, |
|
"grad_norm": 95.74549865722656, |
|
"learning_rate": 1.4251851851851852e-05, |
|
"loss": 0.2808, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.4419753086419753, |
|
"grad_norm": 43.24068069458008, |
|
"learning_rate": 1.42320987654321e-05, |
|
"loss": 0.3589, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.4469135802469135, |
|
"grad_norm": 40.1359977722168, |
|
"learning_rate": 1.4212345679012347e-05, |
|
"loss": 0.1566, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.4518518518518517, |
|
"grad_norm": 7.546663284301758, |
|
"learning_rate": 1.4192592592592593e-05, |
|
"loss": 0.1562, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.4567901234567902, |
|
"grad_norm": 117.94816589355469, |
|
"learning_rate": 1.417283950617284e-05, |
|
"loss": 0.3526, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.4617283950617284, |
|
"grad_norm": 107.50965881347656, |
|
"learning_rate": 1.4153086419753088e-05, |
|
"loss": 0.2148, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.4666666666666668, |
|
"grad_norm": 16.908262252807617, |
|
"learning_rate": 1.4133333333333334e-05, |
|
"loss": 0.451, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.471604938271605, |
|
"grad_norm": 53.356773376464844, |
|
"learning_rate": 1.411358024691358e-05, |
|
"loss": 0.3616, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.4765432098765432, |
|
"grad_norm": 44.207054138183594, |
|
"learning_rate": 1.4093827160493829e-05, |
|
"loss": 0.0903, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.4814814814814814, |
|
"grad_norm": 78.0193862915039, |
|
"learning_rate": 1.4074074074074075e-05, |
|
"loss": 0.2323, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.4864197530864198, |
|
"grad_norm": 1.2068320512771606, |
|
"learning_rate": 1.4054320987654321e-05, |
|
"loss": 0.2748, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.491358024691358, |
|
"grad_norm": 15.009058952331543, |
|
"learning_rate": 1.4034567901234568e-05, |
|
"loss": 0.2607, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.4962962962962962, |
|
"grad_norm": 1.3016469478607178, |
|
"learning_rate": 1.4014814814814816e-05, |
|
"loss": 0.0402, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.5012345679012347, |
|
"grad_norm": 64.81990814208984, |
|
"learning_rate": 1.3995061728395062e-05, |
|
"loss": 0.4051, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.5061728395061729, |
|
"grad_norm": 18.911441802978516, |
|
"learning_rate": 1.3975308641975309e-05, |
|
"loss": 0.2663, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.511111111111111, |
|
"grad_norm": 89.58609771728516, |
|
"learning_rate": 1.3955555555555558e-05, |
|
"loss": 0.2006, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.5160493827160493, |
|
"grad_norm": 84.76557922363281, |
|
"learning_rate": 1.3935802469135805e-05, |
|
"loss": 0.1644, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.5209876543209877, |
|
"grad_norm": 0.690521240234375, |
|
"learning_rate": 1.391604938271605e-05, |
|
"loss": 0.3695, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.525925925925926, |
|
"grad_norm": 0.9079038500785828, |
|
"learning_rate": 1.3896296296296296e-05, |
|
"loss": 0.1316, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.5308641975308643, |
|
"grad_norm": 0.0010949569987133145, |
|
"learning_rate": 1.3876543209876546e-05, |
|
"loss": 0.1599, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.5358024691358025, |
|
"grad_norm": 0.017062200233340263, |
|
"learning_rate": 1.3856790123456792e-05, |
|
"loss": 0.2102, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 1.5407407407407407, |
|
"grad_norm": 54.44521713256836, |
|
"learning_rate": 1.3837037037037038e-05, |
|
"loss": 0.2856, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.545679012345679, |
|
"grad_norm": 124.57701873779297, |
|
"learning_rate": 1.3817283950617285e-05, |
|
"loss": 0.6973, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 1.5506172839506172, |
|
"grad_norm": 73.95056915283203, |
|
"learning_rate": 1.3797530864197533e-05, |
|
"loss": 0.134, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 1.5555555555555556, |
|
"grad_norm": 114.4755630493164, |
|
"learning_rate": 1.377777777777778e-05, |
|
"loss": 0.4007, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.5604938271604938, |
|
"grad_norm": 5.708268165588379, |
|
"learning_rate": 1.3758024691358026e-05, |
|
"loss": 0.2191, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.5654320987654322, |
|
"grad_norm": 39.35977554321289, |
|
"learning_rate": 1.3738271604938274e-05, |
|
"loss": 0.1217, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 1.5703703703703704, |
|
"grad_norm": 1.868407130241394, |
|
"learning_rate": 1.371851851851852e-05, |
|
"loss": 0.1177, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 1.5753086419753086, |
|
"grad_norm": 7.092827320098877, |
|
"learning_rate": 1.3698765432098767e-05, |
|
"loss": 0.1979, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 1.5802469135802468, |
|
"grad_norm": 0.005435746628791094, |
|
"learning_rate": 1.3679012345679013e-05, |
|
"loss": 0.1564, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.585185185185185, |
|
"grad_norm": 80.7311019897461, |
|
"learning_rate": 1.3659259259259261e-05, |
|
"loss": 0.2003, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 1.5901234567901235, |
|
"grad_norm": 0.9620011448860168, |
|
"learning_rate": 1.3639506172839507e-05, |
|
"loss": 0.1261, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.5950617283950619, |
|
"grad_norm": 95.69831085205078, |
|
"learning_rate": 1.3619753086419754e-05, |
|
"loss": 0.1809, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 61.438812255859375, |
|
"learning_rate": 1.3600000000000002e-05, |
|
"loss": 0.506, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.6049382716049383, |
|
"grad_norm": 325.63250732421875, |
|
"learning_rate": 1.3580246913580248e-05, |
|
"loss": 0.3093, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.6098765432098765, |
|
"grad_norm": 17.00379180908203, |
|
"learning_rate": 1.3560493827160495e-05, |
|
"loss": 0.2099, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 1.6148148148148147, |
|
"grad_norm": 100.260498046875, |
|
"learning_rate": 1.3540740740740741e-05, |
|
"loss": 0.6063, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 1.6197530864197531, |
|
"grad_norm": 0.09998781979084015, |
|
"learning_rate": 1.352098765432099e-05, |
|
"loss": 0.3561, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.6246913580246913, |
|
"grad_norm": 0.34626302123069763, |
|
"learning_rate": 1.3501234567901236e-05, |
|
"loss": 0.0074, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 1.6296296296296298, |
|
"grad_norm": 0.034202978014945984, |
|
"learning_rate": 1.3481481481481482e-05, |
|
"loss": 0.4788, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.634567901234568, |
|
"grad_norm": 18.52402687072754, |
|
"learning_rate": 1.346172839506173e-05, |
|
"loss": 0.1834, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 1.6395061728395062, |
|
"grad_norm": 1.5653138160705566, |
|
"learning_rate": 1.3441975308641976e-05, |
|
"loss": 0.354, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.6444444444444444, |
|
"grad_norm": 69.99710845947266, |
|
"learning_rate": 1.3422222222222223e-05, |
|
"loss": 0.3642, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 1.6493827160493826, |
|
"grad_norm": 50.67994689941406, |
|
"learning_rate": 1.340246913580247e-05, |
|
"loss": 0.1864, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.654320987654321, |
|
"grad_norm": 0.31549400091171265, |
|
"learning_rate": 1.3382716049382717e-05, |
|
"loss": 0.3157, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.6592592592592592, |
|
"grad_norm": 111.24998474121094, |
|
"learning_rate": 1.3362962962962964e-05, |
|
"loss": 0.6087, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.6641975308641976, |
|
"grad_norm": 23.009380340576172, |
|
"learning_rate": 1.334320987654321e-05, |
|
"loss": 0.1866, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 1.6691358024691358, |
|
"grad_norm": 88.22378540039062, |
|
"learning_rate": 1.3323456790123456e-05, |
|
"loss": 0.1728, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.674074074074074, |
|
"grad_norm": 0.3229973316192627, |
|
"learning_rate": 1.3303703703703705e-05, |
|
"loss": 0.4118, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 1.6790123456790123, |
|
"grad_norm": 5.422463893890381, |
|
"learning_rate": 1.3283950617283951e-05, |
|
"loss": 0.2223, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.6839506172839505, |
|
"grad_norm": 0.07091034948825836, |
|
"learning_rate": 1.3264197530864197e-05, |
|
"loss": 0.5162, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 1.6888888888888889, |
|
"grad_norm": 0.41538941860198975, |
|
"learning_rate": 1.3244444444444447e-05, |
|
"loss": 0.4052, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.6938271604938273, |
|
"grad_norm": 7.8336181640625, |
|
"learning_rate": 1.3224691358024694e-05, |
|
"loss": 0.1862, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 1.6987654320987655, |
|
"grad_norm": 7.325730800628662, |
|
"learning_rate": 1.3204938271604938e-05, |
|
"loss": 0.1988, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.7037037037037037, |
|
"grad_norm": 39.67108154296875, |
|
"learning_rate": 1.3185185185185185e-05, |
|
"loss": 0.3016, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.708641975308642, |
|
"grad_norm": 0.42901355028152466, |
|
"learning_rate": 1.3165432098765434e-05, |
|
"loss": 0.008, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.7135802469135801, |
|
"grad_norm": 99.74118041992188, |
|
"learning_rate": 1.314567901234568e-05, |
|
"loss": 0.3562, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 1.7185185185185186, |
|
"grad_norm": 41.35346221923828, |
|
"learning_rate": 1.3125925925925927e-05, |
|
"loss": 0.2514, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.7234567901234568, |
|
"grad_norm": 59.84602355957031, |
|
"learning_rate": 1.3106172839506175e-05, |
|
"loss": 0.3048, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 1.7283950617283952, |
|
"grad_norm": 2.039802312850952, |
|
"learning_rate": 1.3086419753086422e-05, |
|
"loss": 0.2926, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.7333333333333334, |
|
"grad_norm": 66.14095306396484, |
|
"learning_rate": 1.3066666666666668e-05, |
|
"loss": 0.3515, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 1.7382716049382716, |
|
"grad_norm": 5.856687068939209, |
|
"learning_rate": 1.3046913580246914e-05, |
|
"loss": 0.2199, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.7432098765432098, |
|
"grad_norm": 89.60210418701172, |
|
"learning_rate": 1.3027160493827163e-05, |
|
"loss": 0.3104, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 1.748148148148148, |
|
"grad_norm": 2.4179534912109375, |
|
"learning_rate": 1.3007407407407409e-05, |
|
"loss": 0.2304, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.7530864197530864, |
|
"grad_norm": 39.764408111572266, |
|
"learning_rate": 1.2987654320987655e-05, |
|
"loss": 0.3049, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.7580246913580246, |
|
"grad_norm": 66.1130599975586, |
|
"learning_rate": 1.2967901234567903e-05, |
|
"loss": 0.1726, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.762962962962963, |
|
"grad_norm": 33.54975509643555, |
|
"learning_rate": 1.294814814814815e-05, |
|
"loss": 0.2627, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 1.7679012345679013, |
|
"grad_norm": 0.5882616639137268, |
|
"learning_rate": 1.2928395061728396e-05, |
|
"loss": 0.1133, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.7728395061728395, |
|
"grad_norm": 0.09102596342563629, |
|
"learning_rate": 1.2908641975308643e-05, |
|
"loss": 0.1391, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 1.7777777777777777, |
|
"grad_norm": 0.2745858430862427, |
|
"learning_rate": 1.288888888888889e-05, |
|
"loss": 0.1178, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.7827160493827159, |
|
"grad_norm": 0.22387881577014923, |
|
"learning_rate": 1.2869135802469137e-05, |
|
"loss": 0.2893, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 1.7876543209876543, |
|
"grad_norm": 0.3061552047729492, |
|
"learning_rate": 1.2849382716049383e-05, |
|
"loss": 0.2718, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.7925925925925927, |
|
"grad_norm": 40.53445053100586, |
|
"learning_rate": 1.282962962962963e-05, |
|
"loss": 0.0972, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 1.797530864197531, |
|
"grad_norm": 0.2346036285161972, |
|
"learning_rate": 1.2809876543209878e-05, |
|
"loss": 0.1796, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.8024691358024691, |
|
"grad_norm": 84.19086456298828, |
|
"learning_rate": 1.2790123456790124e-05, |
|
"loss": 0.2555, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.8074074074074074, |
|
"grad_norm": 26.573976516723633, |
|
"learning_rate": 1.277037037037037e-05, |
|
"loss": 0.1533, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.8123456790123456, |
|
"grad_norm": 0.0031530587002635, |
|
"learning_rate": 1.2750617283950619e-05, |
|
"loss": 0.1559, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 1.817283950617284, |
|
"grad_norm": 72.7174072265625, |
|
"learning_rate": 1.2730864197530865e-05, |
|
"loss": 0.1383, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.8222222222222222, |
|
"grad_norm": 0.07971396297216415, |
|
"learning_rate": 1.2711111111111112e-05, |
|
"loss": 0.3888, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 1.8271604938271606, |
|
"grad_norm": 82.53282165527344, |
|
"learning_rate": 1.2691358024691358e-05, |
|
"loss": 0.113, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.8320987654320988, |
|
"grad_norm": 0.34782519936561584, |
|
"learning_rate": 1.2671604938271606e-05, |
|
"loss": 0.2208, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 1.837037037037037, |
|
"grad_norm": 6.04480504989624, |
|
"learning_rate": 1.2651851851851852e-05, |
|
"loss": 0.3451, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.8419753086419752, |
|
"grad_norm": 15.001103401184082, |
|
"learning_rate": 1.2632098765432099e-05, |
|
"loss": 0.0905, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.8469135802469134, |
|
"grad_norm": 47.090877532958984, |
|
"learning_rate": 1.2612345679012347e-05, |
|
"loss": 0.2327, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.8518518518518519, |
|
"grad_norm": 0.032411132007837296, |
|
"learning_rate": 1.2592592592592593e-05, |
|
"loss": 0.268, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.8567901234567903, |
|
"grad_norm": 54.430667877197266, |
|
"learning_rate": 1.257283950617284e-05, |
|
"loss": 0.213, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.8617283950617285, |
|
"grad_norm": 0.37125247716903687, |
|
"learning_rate": 1.2553086419753086e-05, |
|
"loss": 0.1433, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 1.8666666666666667, |
|
"grad_norm": 0.05495602637529373, |
|
"learning_rate": 1.2533333333333336e-05, |
|
"loss": 0.3747, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 1.871604938271605, |
|
"grad_norm": 35.28487777709961, |
|
"learning_rate": 1.2513580246913582e-05, |
|
"loss": 0.503, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 1.876543209876543, |
|
"grad_norm": 60.75400924682617, |
|
"learning_rate": 1.2493827160493827e-05, |
|
"loss": 0.1602, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.8814814814814815, |
|
"grad_norm": 137.60702514648438, |
|
"learning_rate": 1.2474074074074073e-05, |
|
"loss": 0.2931, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 1.8864197530864197, |
|
"grad_norm": 60.11787796020508, |
|
"learning_rate": 1.2454320987654323e-05, |
|
"loss": 0.354, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.8913580246913582, |
|
"grad_norm": 19.017499923706055, |
|
"learning_rate": 1.243456790123457e-05, |
|
"loss": 0.1684, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 1.8962962962962964, |
|
"grad_norm": 43.31821823120117, |
|
"learning_rate": 1.2414814814814816e-05, |
|
"loss": 0.1728, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 1.9012345679012346, |
|
"grad_norm": 602.893798828125, |
|
"learning_rate": 1.2395061728395064e-05, |
|
"loss": 0.2077, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.9061728395061728, |
|
"grad_norm": 12.869080543518066, |
|
"learning_rate": 1.237530864197531e-05, |
|
"loss": 0.2419, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 1.911111111111111, |
|
"grad_norm": 0.9421246647834778, |
|
"learning_rate": 1.2355555555555557e-05, |
|
"loss": 0.3389, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 1.9160493827160494, |
|
"grad_norm": 3.65885591506958, |
|
"learning_rate": 1.2335802469135803e-05, |
|
"loss": 0.5007, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 1.9209876543209876, |
|
"grad_norm": 3.625490665435791, |
|
"learning_rate": 1.2316049382716051e-05, |
|
"loss": 0.1538, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 1.925925925925926, |
|
"grad_norm": 92.34613800048828, |
|
"learning_rate": 1.2296296296296298e-05, |
|
"loss": 0.4137, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.9308641975308642, |
|
"grad_norm": 0.5257686376571655, |
|
"learning_rate": 1.2276543209876544e-05, |
|
"loss": 0.2876, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 1.9358024691358025, |
|
"grad_norm": 0.39652788639068604, |
|
"learning_rate": 1.2256790123456792e-05, |
|
"loss": 0.254, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 1.9407407407407407, |
|
"grad_norm": 26.36481285095215, |
|
"learning_rate": 1.2237037037037039e-05, |
|
"loss": 0.271, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 1.9456790123456789, |
|
"grad_norm": 0.03053528629243374, |
|
"learning_rate": 1.2217283950617285e-05, |
|
"loss": 0.0742, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 1.9506172839506173, |
|
"grad_norm": 0.09434489160776138, |
|
"learning_rate": 1.2197530864197531e-05, |
|
"loss": 0.1895, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.9555555555555557, |
|
"grad_norm": 69.78058624267578, |
|
"learning_rate": 1.217777777777778e-05, |
|
"loss": 0.4513, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 1.960493827160494, |
|
"grad_norm": 0.07707086950540543, |
|
"learning_rate": 1.2158024691358026e-05, |
|
"loss": 0.054, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 1.9654320987654321, |
|
"grad_norm": 37.1689453125, |
|
"learning_rate": 1.2138271604938272e-05, |
|
"loss": 0.0594, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 1.9703703703703703, |
|
"grad_norm": 48.61039352416992, |
|
"learning_rate": 1.211851851851852e-05, |
|
"loss": 0.1572, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 1.9753086419753085, |
|
"grad_norm": 163.54615783691406, |
|
"learning_rate": 1.2098765432098767e-05, |
|
"loss": 0.1604, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.980246913580247, |
|
"grad_norm": 85.144775390625, |
|
"learning_rate": 1.2079012345679013e-05, |
|
"loss": 0.1157, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 1.9851851851851852, |
|
"grad_norm": 15.836172103881836, |
|
"learning_rate": 1.205925925925926e-05, |
|
"loss": 0.1904, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 1.9901234567901236, |
|
"grad_norm": 2.649322748184204, |
|
"learning_rate": 1.2039506172839508e-05, |
|
"loss": 0.2893, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 1.9950617283950618, |
|
"grad_norm": 1.9400321245193481, |
|
"learning_rate": 1.2019753086419754e-05, |
|
"loss": 0.2295, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 74.16377258300781, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.539, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9798148148148148, |
|
"eval_loss": 0.08341296017169952, |
|
"eval_runtime": 32.2756, |
|
"eval_samples_per_second": 167.309, |
|
"eval_steps_per_second": 20.914, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 2.004938271604938, |
|
"grad_norm": 0.23466235399246216, |
|
"learning_rate": 1.1980246913580247e-05, |
|
"loss": 0.0651, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 2.0098765432098764, |
|
"grad_norm": 10.602593421936035, |
|
"learning_rate": 1.1960493827160495e-05, |
|
"loss": 0.2293, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 2.0148148148148146, |
|
"grad_norm": 101.87135314941406, |
|
"learning_rate": 1.1940740740740741e-05, |
|
"loss": 0.3077, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 2.0197530864197533, |
|
"grad_norm": 27.52354621887207, |
|
"learning_rate": 1.1920987654320988e-05, |
|
"loss": 0.1848, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 2.0246913580246915, |
|
"grad_norm": 90.54155731201172, |
|
"learning_rate": 1.1901234567901236e-05, |
|
"loss": 0.2108, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.0296296296296297, |
|
"grad_norm": 0.018464339897036552, |
|
"learning_rate": 1.1881481481481482e-05, |
|
"loss": 0.1732, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 2.034567901234568, |
|
"grad_norm": 0.21476837992668152, |
|
"learning_rate": 1.1861728395061728e-05, |
|
"loss": 0.5227, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 2.039506172839506, |
|
"grad_norm": 95.82560729980469, |
|
"learning_rate": 1.1841975308641975e-05, |
|
"loss": 0.1769, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 2.0444444444444443, |
|
"grad_norm": 6.9548468589782715, |
|
"learning_rate": 1.1822222222222225e-05, |
|
"loss": 0.2134, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 2.049382716049383, |
|
"grad_norm": 80.2332763671875, |
|
"learning_rate": 1.180246913580247e-05, |
|
"loss": 0.2451, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 2.054320987654321, |
|
"grad_norm": 19.164928436279297, |
|
"learning_rate": 1.1782716049382716e-05, |
|
"loss": 0.1896, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 2.0592592592592593, |
|
"grad_norm": 0.12828746438026428, |
|
"learning_rate": 1.1762962962962965e-05, |
|
"loss": 0.077, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 2.0641975308641975, |
|
"grad_norm": 3.3232741355895996, |
|
"learning_rate": 1.1743209876543212e-05, |
|
"loss": 0.0855, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 2.0691358024691358, |
|
"grad_norm": 0.32502618432044983, |
|
"learning_rate": 1.1723456790123458e-05, |
|
"loss": 0.2269, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 2.074074074074074, |
|
"grad_norm": 1.072849154472351, |
|
"learning_rate": 1.1703703703703703e-05, |
|
"loss": 0.2473, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.079012345679012, |
|
"grad_norm": 3.3251664638519287, |
|
"learning_rate": 1.1683950617283953e-05, |
|
"loss": 0.2367, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 2.083950617283951, |
|
"grad_norm": 0.1870512068271637, |
|
"learning_rate": 1.1664197530864199e-05, |
|
"loss": 0.2782, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 2.088888888888889, |
|
"grad_norm": 3.8792381286621094, |
|
"learning_rate": 1.1644444444444446e-05, |
|
"loss": 0.1886, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 2.093827160493827, |
|
"grad_norm": 47.594451904296875, |
|
"learning_rate": 1.1624691358024694e-05, |
|
"loss": 0.3145, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 2.0987654320987654, |
|
"grad_norm": 158.525634765625, |
|
"learning_rate": 1.160493827160494e-05, |
|
"loss": 0.3143, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 2.1037037037037036, |
|
"grad_norm": 74.01322174072266, |
|
"learning_rate": 1.1585185185185186e-05, |
|
"loss": 0.1924, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 2.108641975308642, |
|
"grad_norm": 75.74314880371094, |
|
"learning_rate": 1.1565432098765433e-05, |
|
"loss": 0.3617, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 2.11358024691358, |
|
"grad_norm": 22.196048736572266, |
|
"learning_rate": 1.1545679012345681e-05, |
|
"loss": 0.2283, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 2.1185185185185187, |
|
"grad_norm": 0.7152767777442932, |
|
"learning_rate": 1.1525925925925927e-05, |
|
"loss": 0.3129, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 2.123456790123457, |
|
"grad_norm": 0.11401913315057755, |
|
"learning_rate": 1.1506172839506174e-05, |
|
"loss": 0.2689, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.128395061728395, |
|
"grad_norm": 52.53899002075195, |
|
"learning_rate": 1.148641975308642e-05, |
|
"loss": 0.0563, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 2.1333333333333333, |
|
"grad_norm": 42.3081169128418, |
|
"learning_rate": 1.1466666666666668e-05, |
|
"loss": 0.2296, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 2.1382716049382715, |
|
"grad_norm": 10.208148002624512, |
|
"learning_rate": 1.1446913580246915e-05, |
|
"loss": 0.3501, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 2.1432098765432097, |
|
"grad_norm": 20.181745529174805, |
|
"learning_rate": 1.1427160493827161e-05, |
|
"loss": 0.0309, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 2.148148148148148, |
|
"grad_norm": 0.01720772311091423, |
|
"learning_rate": 1.1407407407407409e-05, |
|
"loss": 0.1887, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 2.1530864197530866, |
|
"grad_norm": 6.094252109527588, |
|
"learning_rate": 1.1387654320987655e-05, |
|
"loss": 0.0933, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 2.1580246913580248, |
|
"grad_norm": 0.02691926248371601, |
|
"learning_rate": 1.1367901234567902e-05, |
|
"loss": 0.1443, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 2.162962962962963, |
|
"grad_norm": 0.3429844081401825, |
|
"learning_rate": 1.1348148148148148e-05, |
|
"loss": 0.253, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 2.167901234567901, |
|
"grad_norm": 36.565834045410156, |
|
"learning_rate": 1.1328395061728396e-05, |
|
"loss": 0.3124, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 2.1728395061728394, |
|
"grad_norm": 0.1142088919878006, |
|
"learning_rate": 1.1308641975308643e-05, |
|
"loss": 0.2102, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.1777777777777776, |
|
"grad_norm": 1.0915874242782593, |
|
"learning_rate": 1.1288888888888889e-05, |
|
"loss": 0.117, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 2.1827160493827162, |
|
"grad_norm": 0.015154359862208366, |
|
"learning_rate": 1.1269135802469137e-05, |
|
"loss": 0.2591, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 2.1876543209876544, |
|
"grad_norm": 0.0378662571310997, |
|
"learning_rate": 1.1249382716049384e-05, |
|
"loss": 0.4314, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 2.1925925925925926, |
|
"grad_norm": 39.53334045410156, |
|
"learning_rate": 1.122962962962963e-05, |
|
"loss": 0.0796, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 2.197530864197531, |
|
"grad_norm": 33.39299011230469, |
|
"learning_rate": 1.1209876543209876e-05, |
|
"loss": 0.0708, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 2.202469135802469, |
|
"grad_norm": 32.73172378540039, |
|
"learning_rate": 1.1190123456790124e-05, |
|
"loss": 0.0602, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 2.2074074074074073, |
|
"grad_norm": 27.3021297454834, |
|
"learning_rate": 1.117037037037037e-05, |
|
"loss": 0.0563, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 2.212345679012346, |
|
"grad_norm": 33.85374450683594, |
|
"learning_rate": 1.1150617283950617e-05, |
|
"loss": 0.3346, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 2.217283950617284, |
|
"grad_norm": 46.218204498291016, |
|
"learning_rate": 1.1130864197530864e-05, |
|
"loss": 0.2087, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 2.2222222222222223, |
|
"grad_norm": 47.22572326660156, |
|
"learning_rate": 1.1111111111111113e-05, |
|
"loss": 0.2552, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.2271604938271605, |
|
"grad_norm": 0.1430201381444931, |
|
"learning_rate": 1.1091358024691358e-05, |
|
"loss": 0.2517, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 2.2320987654320987, |
|
"grad_norm": 11.38235092163086, |
|
"learning_rate": 1.1071604938271604e-05, |
|
"loss": 0.1918, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 2.237037037037037, |
|
"grad_norm": 37.20140838623047, |
|
"learning_rate": 1.1051851851851854e-05, |
|
"loss": 0.1549, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 2.241975308641975, |
|
"grad_norm": 0.10535780340433121, |
|
"learning_rate": 1.10320987654321e-05, |
|
"loss": 0.0271, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 2.246913580246914, |
|
"grad_norm": 0.6121019124984741, |
|
"learning_rate": 1.1012345679012347e-05, |
|
"loss": 0.389, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 2.251851851851852, |
|
"grad_norm": 35.94973373413086, |
|
"learning_rate": 1.0992592592592592e-05, |
|
"loss": 0.3603, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 2.25679012345679, |
|
"grad_norm": 95.45260620117188, |
|
"learning_rate": 1.0972839506172841e-05, |
|
"loss": 0.4025, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 2.2617283950617284, |
|
"grad_norm": 0.17219342291355133, |
|
"learning_rate": 1.0953086419753088e-05, |
|
"loss": 0.2335, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 2.2666666666666666, |
|
"grad_norm": 1.9040601253509521, |
|
"learning_rate": 1.0933333333333334e-05, |
|
"loss": 0.3124, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 2.271604938271605, |
|
"grad_norm": 77.7896957397461, |
|
"learning_rate": 1.0913580246913582e-05, |
|
"loss": 0.2387, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.276543209876543, |
|
"grad_norm": 0.5370518565177917, |
|
"learning_rate": 1.0893827160493829e-05, |
|
"loss": 0.1187, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 2.2814814814814817, |
|
"grad_norm": 113.6650619506836, |
|
"learning_rate": 1.0874074074074075e-05, |
|
"loss": 0.3598, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 2.28641975308642, |
|
"grad_norm": 0.025056390091776848, |
|
"learning_rate": 1.0854320987654322e-05, |
|
"loss": 0.1631, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 2.291358024691358, |
|
"grad_norm": 0.0650627464056015, |
|
"learning_rate": 1.083456790123457e-05, |
|
"loss": 0.257, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 2.2962962962962963, |
|
"grad_norm": 34.378414154052734, |
|
"learning_rate": 1.0814814814814816e-05, |
|
"loss": 0.2349, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 2.3012345679012345, |
|
"grad_norm": 0.046463072299957275, |
|
"learning_rate": 1.0795061728395062e-05, |
|
"loss": 0.0695, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 2.3061728395061727, |
|
"grad_norm": 81.86966705322266, |
|
"learning_rate": 1.077530864197531e-05, |
|
"loss": 0.2093, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 2.311111111111111, |
|
"grad_norm": 0.004781852941960096, |
|
"learning_rate": 1.0755555555555557e-05, |
|
"loss": 0.1424, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 2.3160493827160495, |
|
"grad_norm": 0.817314624786377, |
|
"learning_rate": 1.0735802469135803e-05, |
|
"loss": 0.0413, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 2.3209876543209877, |
|
"grad_norm": 5.055154800415039, |
|
"learning_rate": 1.071604938271605e-05, |
|
"loss": 0.0046, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.325925925925926, |
|
"grad_norm": 133.45437622070312, |
|
"learning_rate": 1.0696296296296298e-05, |
|
"loss": 0.3131, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 2.330864197530864, |
|
"grad_norm": 0.014058091677725315, |
|
"learning_rate": 1.0676543209876544e-05, |
|
"loss": 0.1227, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 2.3358024691358024, |
|
"grad_norm": 4.482833385467529, |
|
"learning_rate": 1.065679012345679e-05, |
|
"loss": 0.1694, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 2.3407407407407406, |
|
"grad_norm": 0.8238074779510498, |
|
"learning_rate": 1.0637037037037037e-05, |
|
"loss": 0.1315, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 2.3456790123456788, |
|
"grad_norm": 55.907318115234375, |
|
"learning_rate": 1.0617283950617285e-05, |
|
"loss": 0.0988, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 2.3506172839506174, |
|
"grad_norm": 119.31465911865234, |
|
"learning_rate": 1.0597530864197531e-05, |
|
"loss": 0.2308, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 2.3555555555555556, |
|
"grad_norm": 5.956635475158691, |
|
"learning_rate": 1.0577777777777778e-05, |
|
"loss": 0.1726, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 2.360493827160494, |
|
"grad_norm": 1.8036092519760132, |
|
"learning_rate": 1.0558024691358026e-05, |
|
"loss": 0.2904, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 2.365432098765432, |
|
"grad_norm": 16.762969970703125, |
|
"learning_rate": 1.0538271604938272e-05, |
|
"loss": 0.039, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 2.3703703703703702, |
|
"grad_norm": 0.5352030992507935, |
|
"learning_rate": 1.0518518518518519e-05, |
|
"loss": 0.6986, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.375308641975309, |
|
"grad_norm": 72.20184326171875, |
|
"learning_rate": 1.0498765432098765e-05, |
|
"loss": 0.1986, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 2.380246913580247, |
|
"grad_norm": 39.09406661987305, |
|
"learning_rate": 1.0479012345679013e-05, |
|
"loss": 0.2384, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 2.3851851851851853, |
|
"grad_norm": 101.78142547607422, |
|
"learning_rate": 1.045925925925926e-05, |
|
"loss": 0.1049, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 2.3901234567901235, |
|
"grad_norm": 31.242937088012695, |
|
"learning_rate": 1.0439506172839506e-05, |
|
"loss": 0.3993, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 2.3950617283950617, |
|
"grad_norm": 107.1478271484375, |
|
"learning_rate": 1.0419753086419756e-05, |
|
"loss": 0.1895, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 0.6550659537315369, |
|
"learning_rate": 1.04e-05, |
|
"loss": 0.2174, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 2.404938271604938, |
|
"grad_norm": 37.14043045043945, |
|
"learning_rate": 1.0380246913580247e-05, |
|
"loss": 0.2233, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 2.4098765432098768, |
|
"grad_norm": 10.13899040222168, |
|
"learning_rate": 1.0360493827160493e-05, |
|
"loss": 0.4372, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 2.414814814814815, |
|
"grad_norm": 0.8044024705886841, |
|
"learning_rate": 1.0340740740740743e-05, |
|
"loss": 0.3235, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 2.419753086419753, |
|
"grad_norm": 0.08543165773153305, |
|
"learning_rate": 1.032098765432099e-05, |
|
"loss": 0.0319, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.4246913580246914, |
|
"grad_norm": 25.276649475097656, |
|
"learning_rate": 1.0301234567901236e-05, |
|
"loss": 0.2608, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 2.4296296296296296, |
|
"grad_norm": 53.250003814697266, |
|
"learning_rate": 1.0281481481481484e-05, |
|
"loss": 0.2555, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 2.434567901234568, |
|
"grad_norm": 0.0675877258181572, |
|
"learning_rate": 1.026172839506173e-05, |
|
"loss": 0.1439, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 2.439506172839506, |
|
"grad_norm": 0.07533666491508484, |
|
"learning_rate": 1.0241975308641977e-05, |
|
"loss": 0.2685, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 2.4444444444444446, |
|
"grad_norm": 0.0232541486620903, |
|
"learning_rate": 1.0222222222222223e-05, |
|
"loss": 0.1896, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 2.449382716049383, |
|
"grad_norm": 0.4157695770263672, |
|
"learning_rate": 1.0202469135802471e-05, |
|
"loss": 0.4117, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 2.454320987654321, |
|
"grad_norm": 6.473262786865234, |
|
"learning_rate": 1.0182716049382717e-05, |
|
"loss": 0.1608, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 2.4592592592592593, |
|
"grad_norm": 47.35124588012695, |
|
"learning_rate": 1.0162962962962964e-05, |
|
"loss": 0.1861, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 2.4641975308641975, |
|
"grad_norm": 0.0442415289580822, |
|
"learning_rate": 1.014320987654321e-05, |
|
"loss": 0.2317, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 2.4691358024691357, |
|
"grad_norm": 0.02038310095667839, |
|
"learning_rate": 1.0123456790123458e-05, |
|
"loss": 0.5267, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.474074074074074, |
|
"grad_norm": 166.4259033203125, |
|
"learning_rate": 1.0103703703703705e-05, |
|
"loss": 0.2363, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 2.4790123456790125, |
|
"grad_norm": 68.62043762207031, |
|
"learning_rate": 1.0083950617283951e-05, |
|
"loss": 0.2097, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 2.4839506172839507, |
|
"grad_norm": 2.836273431777954, |
|
"learning_rate": 1.00641975308642e-05, |
|
"loss": 0.2101, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 2.488888888888889, |
|
"grad_norm": 4.900826930999756, |
|
"learning_rate": 1.0044444444444446e-05, |
|
"loss": 0.191, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 2.493827160493827, |
|
"grad_norm": 22.4804744720459, |
|
"learning_rate": 1.0024691358024692e-05, |
|
"loss": 0.182, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 2.4987654320987653, |
|
"grad_norm": 0.00806320272386074, |
|
"learning_rate": 1.0004938271604938e-05, |
|
"loss": 0.0106, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 2.5037037037037035, |
|
"grad_norm": 0.13981568813323975, |
|
"learning_rate": 9.985185185185185e-06, |
|
"loss": 0.2085, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 2.5086419753086417, |
|
"grad_norm": 115.363037109375, |
|
"learning_rate": 9.965432098765433e-06, |
|
"loss": 0.3881, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 2.5135802469135804, |
|
"grad_norm": 0.5273131132125854, |
|
"learning_rate": 9.945679012345681e-06, |
|
"loss": 0.3149, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 2.5185185185185186, |
|
"grad_norm": 0.044860485941171646, |
|
"learning_rate": 9.925925925925927e-06, |
|
"loss": 0.1487, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 2.523456790123457, |
|
"grad_norm": 0.0039957864210009575, |
|
"learning_rate": 9.906172839506174e-06, |
|
"loss": 0.1385, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 2.528395061728395, |
|
"grad_norm": 0.014863072894513607, |
|
"learning_rate": 9.88641975308642e-06, |
|
"loss": 0.1111, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 2.533333333333333, |
|
"grad_norm": 75.10174560546875, |
|
"learning_rate": 9.866666666666668e-06, |
|
"loss": 0.1741, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 2.538271604938272, |
|
"grad_norm": 0.048640429973602295, |
|
"learning_rate": 9.846913580246915e-06, |
|
"loss": 0.1827, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 2.5432098765432096, |
|
"grad_norm": 0.25287771224975586, |
|
"learning_rate": 9.827160493827161e-06, |
|
"loss": 0.2889, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 2.5481481481481483, |
|
"grad_norm": 3.0355021953582764, |
|
"learning_rate": 9.807407407407407e-06, |
|
"loss": 0.0549, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 2.5530864197530865, |
|
"grad_norm": 0.008490847423672676, |
|
"learning_rate": 9.787654320987655e-06, |
|
"loss": 0.1945, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 2.5580246913580247, |
|
"grad_norm": 0.055667582899332047, |
|
"learning_rate": 9.767901234567902e-06, |
|
"loss": 0.178, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 2.562962962962963, |
|
"grad_norm": 2.11090350151062, |
|
"learning_rate": 9.748148148148148e-06, |
|
"loss": 0.1497, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 2.567901234567901, |
|
"grad_norm": 48.44843292236328, |
|
"learning_rate": 9.728395061728396e-06, |
|
"loss": 0.3233, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.5728395061728397, |
|
"grad_norm": 16.53707504272461, |
|
"learning_rate": 9.708641975308643e-06, |
|
"loss": 0.0269, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 2.5777777777777775, |
|
"grad_norm": 85.8476791381836, |
|
"learning_rate": 9.688888888888889e-06, |
|
"loss": 0.4162, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 2.582716049382716, |
|
"grad_norm": 333.21466064453125, |
|
"learning_rate": 9.669135802469136e-06, |
|
"loss": 0.161, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 2.5876543209876544, |
|
"grad_norm": 46.150047302246094, |
|
"learning_rate": 9.649382716049384e-06, |
|
"loss": 0.1367, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 2.5925925925925926, |
|
"grad_norm": 23.23380470275879, |
|
"learning_rate": 9.62962962962963e-06, |
|
"loss": 0.049, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 2.5975308641975308, |
|
"grad_norm": 0.01312983874231577, |
|
"learning_rate": 9.609876543209878e-06, |
|
"loss": 0.4376, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 2.602469135802469, |
|
"grad_norm": 0.1367645114660263, |
|
"learning_rate": 9.590123456790124e-06, |
|
"loss": 0.0646, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 2.6074074074074076, |
|
"grad_norm": 0.16247719526290894, |
|
"learning_rate": 9.570370370370371e-06, |
|
"loss": 0.3247, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 2.612345679012346, |
|
"grad_norm": 140.21865844726562, |
|
"learning_rate": 9.550617283950619e-06, |
|
"loss": 0.4135, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 2.617283950617284, |
|
"grad_norm": 60.00096893310547, |
|
"learning_rate": 9.530864197530865e-06, |
|
"loss": 0.1836, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 2.6222222222222222, |
|
"grad_norm": 0.0217946358025074, |
|
"learning_rate": 9.511111111111112e-06, |
|
"loss": 0.3697, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 2.6271604938271604, |
|
"grad_norm": 75.67610931396484, |
|
"learning_rate": 9.491358024691358e-06, |
|
"loss": 0.0953, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 2.6320987654320986, |
|
"grad_norm": 19.351255416870117, |
|
"learning_rate": 9.471604938271606e-06, |
|
"loss": 0.0855, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 2.637037037037037, |
|
"grad_norm": 7.155949115753174, |
|
"learning_rate": 9.451851851851853e-06, |
|
"loss": 0.5717, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 2.6419753086419755, |
|
"grad_norm": 143.97991943359375, |
|
"learning_rate": 9.432098765432099e-06, |
|
"loss": 0.0894, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 2.6469135802469137, |
|
"grad_norm": 66.95204162597656, |
|
"learning_rate": 9.412345679012347e-06, |
|
"loss": 0.1136, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 2.651851851851852, |
|
"grad_norm": 5.1548590660095215, |
|
"learning_rate": 9.392592592592593e-06, |
|
"loss": 0.066, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 2.65679012345679, |
|
"grad_norm": 164.66404724121094, |
|
"learning_rate": 9.37283950617284e-06, |
|
"loss": 0.2865, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 2.6617283950617283, |
|
"grad_norm": 200.15574645996094, |
|
"learning_rate": 9.353086419753086e-06, |
|
"loss": 0.2895, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 2.6666666666666665, |
|
"grad_norm": 233.70343017578125, |
|
"learning_rate": 9.333333333333334e-06, |
|
"loss": 0.052, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 2.6716049382716047, |
|
"grad_norm": 140.56007385253906, |
|
"learning_rate": 9.31358024691358e-06, |
|
"loss": 0.2882, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 2.6765432098765434, |
|
"grad_norm": 281.7587585449219, |
|
"learning_rate": 9.293827160493827e-06, |
|
"loss": 0.1121, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 2.6814814814814816, |
|
"grad_norm": 0.00958334095776081, |
|
"learning_rate": 9.274074074074075e-06, |
|
"loss": 0.0447, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 2.68641975308642, |
|
"grad_norm": 0.6552028059959412, |
|
"learning_rate": 9.254320987654322e-06, |
|
"loss": 0.0727, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 2.691358024691358, |
|
"grad_norm": 0.01010242011398077, |
|
"learning_rate": 9.23456790123457e-06, |
|
"loss": 0.1756, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 2.696296296296296, |
|
"grad_norm": 0.013218900188803673, |
|
"learning_rate": 9.214814814814816e-06, |
|
"loss": 0.2895, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 2.701234567901235, |
|
"grad_norm": 44.7857780456543, |
|
"learning_rate": 9.195061728395062e-06, |
|
"loss": 0.323, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 2.7061728395061726, |
|
"grad_norm": 2.435910701751709, |
|
"learning_rate": 9.175308641975309e-06, |
|
"loss": 0.473, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 2.7111111111111112, |
|
"grad_norm": 5.467461585998535, |
|
"learning_rate": 9.155555555555557e-06, |
|
"loss": 0.4263, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 2.7160493827160495, |
|
"grad_norm": 0.020925594493746758, |
|
"learning_rate": 9.135802469135803e-06, |
|
"loss": 0.1927, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.7209876543209877, |
|
"grad_norm": 0.850062906742096, |
|
"learning_rate": 9.11604938271605e-06, |
|
"loss": 0.2724, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 2.725925925925926, |
|
"grad_norm": 0.8104738593101501, |
|
"learning_rate": 9.096296296296298e-06, |
|
"loss": 0.0688, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 2.730864197530864, |
|
"grad_norm": 183.3977813720703, |
|
"learning_rate": 9.076543209876544e-06, |
|
"loss": 0.403, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 2.7358024691358027, |
|
"grad_norm": 0.39399421215057373, |
|
"learning_rate": 9.05679012345679e-06, |
|
"loss": 0.2956, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 2.7407407407407405, |
|
"grad_norm": 17.86000633239746, |
|
"learning_rate": 9.037037037037037e-06, |
|
"loss": 0.2467, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 2.745679012345679, |
|
"grad_norm": 0.007520174607634544, |
|
"learning_rate": 9.017283950617285e-06, |
|
"loss": 0.0734, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 2.7506172839506173, |
|
"grad_norm": 42.2265739440918, |
|
"learning_rate": 8.997530864197531e-06, |
|
"loss": 0.1445, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 2.7555555555555555, |
|
"grad_norm": 55.289222717285156, |
|
"learning_rate": 8.977777777777778e-06, |
|
"loss": 0.1346, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 2.7604938271604937, |
|
"grad_norm": 1.1563366651535034, |
|
"learning_rate": 8.958024691358024e-06, |
|
"loss": 0.1427, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 2.765432098765432, |
|
"grad_norm": 31.966625213623047, |
|
"learning_rate": 8.938271604938272e-06, |
|
"loss": 0.1432, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.7703703703703706, |
|
"grad_norm": 26.22989273071289, |
|
"learning_rate": 8.91851851851852e-06, |
|
"loss": 0.1465, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 2.775308641975309, |
|
"grad_norm": 2.2528607845306396, |
|
"learning_rate": 8.898765432098767e-06, |
|
"loss": 0.1046, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 2.780246913580247, |
|
"grad_norm": 41.7017707824707, |
|
"learning_rate": 8.879012345679013e-06, |
|
"loss": 0.3095, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 2.785185185185185, |
|
"grad_norm": 80.6755142211914, |
|
"learning_rate": 8.85925925925926e-06, |
|
"loss": 0.1785, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 2.7901234567901234, |
|
"grad_norm": 49.54252624511719, |
|
"learning_rate": 8.839506172839508e-06, |
|
"loss": 0.1924, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 2.7950617283950616, |
|
"grad_norm": 0.05363411456346512, |
|
"learning_rate": 8.819753086419754e-06, |
|
"loss": 0.1327, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 8.126516342163086, |
|
"learning_rate": 8.8e-06, |
|
"loss": 0.121, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 2.8049382716049385, |
|
"grad_norm": 0.02661011926829815, |
|
"learning_rate": 8.780246913580249e-06, |
|
"loss": 0.0073, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 2.8098765432098767, |
|
"grad_norm": 8.132286071777344, |
|
"learning_rate": 8.760493827160495e-06, |
|
"loss": 0.1296, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 2.814814814814815, |
|
"grad_norm": 62.083099365234375, |
|
"learning_rate": 8.740740740740741e-06, |
|
"loss": 0.2036, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 2.819753086419753, |
|
"grad_norm": 17.057275772094727, |
|
"learning_rate": 8.720987654320988e-06, |
|
"loss": 0.236, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 2.8246913580246913, |
|
"grad_norm": 0.07913421094417572, |
|
"learning_rate": 8.701234567901236e-06, |
|
"loss": 0.0186, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 2.8296296296296295, |
|
"grad_norm": 59.11501693725586, |
|
"learning_rate": 8.681481481481482e-06, |
|
"loss": 0.2352, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 2.8345679012345677, |
|
"grad_norm": 0.05783538892865181, |
|
"learning_rate": 8.661728395061729e-06, |
|
"loss": 0.325, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 2.8395061728395063, |
|
"grad_norm": 0.07834266871213913, |
|
"learning_rate": 8.641975308641975e-06, |
|
"loss": 0.0508, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 2.8444444444444446, |
|
"grad_norm": 2.788255214691162, |
|
"learning_rate": 8.622222222222223e-06, |
|
"loss": 0.0728, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 2.8493827160493828, |
|
"grad_norm": 41.630611419677734, |
|
"learning_rate": 8.602469135802471e-06, |
|
"loss": 0.2255, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 2.854320987654321, |
|
"grad_norm": 0.47825512290000916, |
|
"learning_rate": 8.582716049382716e-06, |
|
"loss": 0.1858, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 2.859259259259259, |
|
"grad_norm": 0.4730166494846344, |
|
"learning_rate": 8.562962962962964e-06, |
|
"loss": 0.0417, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 2.8641975308641974, |
|
"grad_norm": 0.00964848231524229, |
|
"learning_rate": 8.54320987654321e-06, |
|
"loss": 0.2487, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 2.8691358024691356, |
|
"grad_norm": 4.990635395050049, |
|
"learning_rate": 8.523456790123458e-06, |
|
"loss": 0.1967, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 2.8740740740740742, |
|
"grad_norm": 0.06853197515010834, |
|
"learning_rate": 8.503703703703705e-06, |
|
"loss": 0.1847, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 2.8790123456790124, |
|
"grad_norm": 14.369994163513184, |
|
"learning_rate": 8.483950617283951e-06, |
|
"loss": 0.4819, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 2.8839506172839506, |
|
"grad_norm": 1.4478572607040405, |
|
"learning_rate": 8.464197530864198e-06, |
|
"loss": 0.2011, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 2.888888888888889, |
|
"grad_norm": 197.60943603515625, |
|
"learning_rate": 8.444444444444446e-06, |
|
"loss": 0.2301, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 2.893827160493827, |
|
"grad_norm": 0.3465060293674469, |
|
"learning_rate": 8.424691358024692e-06, |
|
"loss": 0.0814, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 2.8987654320987657, |
|
"grad_norm": 0.22260437905788422, |
|
"learning_rate": 8.404938271604938e-06, |
|
"loss": 0.1913, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 2.9037037037037035, |
|
"grad_norm": 3.2895030975341797, |
|
"learning_rate": 8.385185185185187e-06, |
|
"loss": 0.161, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 2.908641975308642, |
|
"grad_norm": 75.78804016113281, |
|
"learning_rate": 8.365432098765433e-06, |
|
"loss": 0.2146, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 2.9135802469135803, |
|
"grad_norm": 37.905670166015625, |
|
"learning_rate": 8.34567901234568e-06, |
|
"loss": 0.0171, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 2.9185185185185185, |
|
"grad_norm": 1.2207163572311401, |
|
"learning_rate": 8.325925925925926e-06, |
|
"loss": 0.0008, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 2.9234567901234567, |
|
"grad_norm": 0.26251447200775146, |
|
"learning_rate": 8.306172839506174e-06, |
|
"loss": 0.2391, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 2.928395061728395, |
|
"grad_norm": 184.48342895507812, |
|
"learning_rate": 8.28641975308642e-06, |
|
"loss": 0.4721, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 2.9333333333333336, |
|
"grad_norm": 2.430443048477173, |
|
"learning_rate": 8.266666666666667e-06, |
|
"loss": 0.3217, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 2.9382716049382713, |
|
"grad_norm": 167.15850830078125, |
|
"learning_rate": 8.246913580246915e-06, |
|
"loss": 0.1661, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 2.94320987654321, |
|
"grad_norm": 3.9648666381835938, |
|
"learning_rate": 8.227160493827161e-06, |
|
"loss": 0.0846, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 2.948148148148148, |
|
"grad_norm": 0.18866649270057678, |
|
"learning_rate": 8.207407407407409e-06, |
|
"loss": 0.0355, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 2.9530864197530864, |
|
"grad_norm": 0.19261124730110168, |
|
"learning_rate": 8.187654320987654e-06, |
|
"loss": 0.1842, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 2.9580246913580246, |
|
"grad_norm": 0.13655029237270355, |
|
"learning_rate": 8.167901234567902e-06, |
|
"loss": 0.0244, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 2.962962962962963, |
|
"grad_norm": 0.24857792258262634, |
|
"learning_rate": 8.148148148148148e-06, |
|
"loss": 0.2052, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.9679012345679014, |
|
"grad_norm": 85.19855499267578, |
|
"learning_rate": 8.128395061728396e-06, |
|
"loss": 0.187, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 2.9728395061728397, |
|
"grad_norm": 190.1832733154297, |
|
"learning_rate": 8.108641975308643e-06, |
|
"loss": 0.5081, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 2.977777777777778, |
|
"grad_norm": 0.0004998709191568196, |
|
"learning_rate": 8.08888888888889e-06, |
|
"loss": 0.4187, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 2.982716049382716, |
|
"grad_norm": 0.019353624433279037, |
|
"learning_rate": 8.069135802469137e-06, |
|
"loss": 0.0796, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 2.9876543209876543, |
|
"grad_norm": 0.00627252459526062, |
|
"learning_rate": 8.049382716049384e-06, |
|
"loss": 0.4005, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 2.9925925925925925, |
|
"grad_norm": 159.71725463867188, |
|
"learning_rate": 8.02962962962963e-06, |
|
"loss": 0.028, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 2.9975308641975307, |
|
"grad_norm": 2.6106536388397217, |
|
"learning_rate": 8.009876543209876e-06, |
|
"loss": 0.024, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9831481481481481, |
|
"eval_loss": 0.07001630961894989, |
|
"eval_runtime": 32.6621, |
|
"eval_samples_per_second": 165.329, |
|
"eval_steps_per_second": 20.666, |
|
"step": 6075 |
|
}, |
|
{ |
|
"epoch": 3.0024691358024693, |
|
"grad_norm": 1.3359025716781616, |
|
"learning_rate": 7.990123456790125e-06, |
|
"loss": 0.0996, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 3.0074074074074075, |
|
"grad_norm": 0.05273491516709328, |
|
"learning_rate": 7.970370370370371e-06, |
|
"loss": 0.012, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 3.0123456790123457, |
|
"grad_norm": 0.23167039453983307, |
|
"learning_rate": 7.950617283950617e-06, |
|
"loss": 0.1334, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 3.017283950617284, |
|
"grad_norm": 0.03928215801715851, |
|
"learning_rate": 7.930864197530865e-06, |
|
"loss": 0.1258, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 3.022222222222222, |
|
"grad_norm": 109.73241424560547, |
|
"learning_rate": 7.911111111111112e-06, |
|
"loss": 0.1747, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 3.0271604938271603, |
|
"grad_norm": 2.945659637451172, |
|
"learning_rate": 7.89135802469136e-06, |
|
"loss": 0.1064, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 3.0320987654320986, |
|
"grad_norm": 19.941844940185547, |
|
"learning_rate": 7.871604938271605e-06, |
|
"loss": 0.1372, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 3.037037037037037, |
|
"grad_norm": 0.11880356073379517, |
|
"learning_rate": 7.851851851851853e-06, |
|
"loss": 0.0212, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 3.0419753086419754, |
|
"grad_norm": 1.0245414972305298, |
|
"learning_rate": 7.832098765432099e-06, |
|
"loss": 0.2392, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 3.0469135802469136, |
|
"grad_norm": 0.23312650620937347, |
|
"learning_rate": 7.812345679012347e-06, |
|
"loss": 0.0706, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 3.051851851851852, |
|
"grad_norm": 63.500797271728516, |
|
"learning_rate": 7.792592592592594e-06, |
|
"loss": 0.2912, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 3.05679012345679, |
|
"grad_norm": 4.3201727867126465, |
|
"learning_rate": 7.77283950617284e-06, |
|
"loss": 0.1027, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 3.0617283950617282, |
|
"grad_norm": 0.009072243236005306, |
|
"learning_rate": 7.753086419753088e-06, |
|
"loss": 0.0177, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 3.066666666666667, |
|
"grad_norm": 7.860177993774414, |
|
"learning_rate": 7.733333333333334e-06, |
|
"loss": 0.1266, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 3.071604938271605, |
|
"grad_norm": 125.65026092529297, |
|
"learning_rate": 7.71358024691358e-06, |
|
"loss": 0.1102, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 3.0765432098765433, |
|
"grad_norm": 64.10157012939453, |
|
"learning_rate": 7.693827160493827e-06, |
|
"loss": 0.2813, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 3.0814814814814815, |
|
"grad_norm": 0.023331521078944206, |
|
"learning_rate": 7.674074074074075e-06, |
|
"loss": 0.4718, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 3.0864197530864197, |
|
"grad_norm": 0.9373367428779602, |
|
"learning_rate": 7.654320987654322e-06, |
|
"loss": 0.1784, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 3.091358024691358, |
|
"grad_norm": 0.09618625789880753, |
|
"learning_rate": 7.634567901234568e-06, |
|
"loss": 0.1675, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 3.096296296296296, |
|
"grad_norm": 53.146034240722656, |
|
"learning_rate": 7.614814814814816e-06, |
|
"loss": 0.3012, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 3.1012345679012348, |
|
"grad_norm": 0.9176463484764099, |
|
"learning_rate": 7.5950617283950625e-06, |
|
"loss": 0.0438, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 3.106172839506173, |
|
"grad_norm": 0.6210525035858154, |
|
"learning_rate": 7.57530864197531e-06, |
|
"loss": 0.2127, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 3.111111111111111, |
|
"grad_norm": 171.12738037109375, |
|
"learning_rate": 7.555555555555556e-06, |
|
"loss": 0.3021, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 3.1160493827160494, |
|
"grad_norm": 0.15432004630565643, |
|
"learning_rate": 7.535802469135803e-06, |
|
"loss": 0.2258, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 3.1209876543209876, |
|
"grad_norm": 6.785965919494629, |
|
"learning_rate": 7.51604938271605e-06, |
|
"loss": 0.0524, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 3.1259259259259258, |
|
"grad_norm": 14.042142868041992, |
|
"learning_rate": 7.496296296296297e-06, |
|
"loss": 0.1315, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 3.1308641975308644, |
|
"grad_norm": 0.005698219407349825, |
|
"learning_rate": 7.476543209876543e-06, |
|
"loss": 0.59, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 3.1358024691358026, |
|
"grad_norm": 0.2984008193016052, |
|
"learning_rate": 7.456790123456791e-06, |
|
"loss": 0.1643, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 3.140740740740741, |
|
"grad_norm": 33.20651626586914, |
|
"learning_rate": 7.437037037037038e-06, |
|
"loss": 0.0757, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 3.145679012345679, |
|
"grad_norm": 39.41627883911133, |
|
"learning_rate": 7.417283950617284e-06, |
|
"loss": 0.2282, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 3.1506172839506172, |
|
"grad_norm": 0.06810309737920761, |
|
"learning_rate": 7.3975308641975315e-06, |
|
"loss": 0.0338, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 3.1555555555555554, |
|
"grad_norm": 0.4489476680755615, |
|
"learning_rate": 7.377777777777778e-06, |
|
"loss": 0.258, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 3.1604938271604937, |
|
"grad_norm": 3.387746572494507, |
|
"learning_rate": 7.358024691358025e-06, |
|
"loss": 0.0842, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 3.1654320987654323, |
|
"grad_norm": 2.4589788913726807, |
|
"learning_rate": 7.3382716049382715e-06, |
|
"loss": 0.1025, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 3.1703703703703705, |
|
"grad_norm": 11.912010192871094, |
|
"learning_rate": 7.31851851851852e-06, |
|
"loss": 0.1159, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 3.1753086419753087, |
|
"grad_norm": 0.0014852778986096382, |
|
"learning_rate": 7.298765432098765e-06, |
|
"loss": 0.1174, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 3.180246913580247, |
|
"grad_norm": 0.23326246440410614, |
|
"learning_rate": 7.279012345679013e-06, |
|
"loss": 0.1595, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 3.185185185185185, |
|
"grad_norm": 0.023275885730981827, |
|
"learning_rate": 7.2592592592592605e-06, |
|
"loss": 0.3177, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 3.1901234567901233, |
|
"grad_norm": 0.0346212200820446, |
|
"learning_rate": 7.239506172839507e-06, |
|
"loss": 0.1329, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 3.1950617283950615, |
|
"grad_norm": 0.14802587032318115, |
|
"learning_rate": 7.219753086419754e-06, |
|
"loss": 0.0812, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"grad_norm": 0.2590476870536804, |
|
"learning_rate": 7.2000000000000005e-06, |
|
"loss": 0.0625, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 3.2049382716049384, |
|
"grad_norm": 0.7991506457328796, |
|
"learning_rate": 7.180246913580248e-06, |
|
"loss": 0.0811, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 3.2098765432098766, |
|
"grad_norm": 76.12113189697266, |
|
"learning_rate": 7.160493827160494e-06, |
|
"loss": 0.0727, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.214814814814815, |
|
"grad_norm": 24.764394760131836, |
|
"learning_rate": 7.140740740740741e-06, |
|
"loss": 0.3267, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 3.219753086419753, |
|
"grad_norm": 59.69222640991211, |
|
"learning_rate": 7.120987654320988e-06, |
|
"loss": 0.1661, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 3.224691358024691, |
|
"grad_norm": 0.007727318909019232, |
|
"learning_rate": 7.101234567901235e-06, |
|
"loss": 0.1388, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 3.2296296296296294, |
|
"grad_norm": 1.3282524347305298, |
|
"learning_rate": 7.081481481481482e-06, |
|
"loss": 0.0129, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 3.234567901234568, |
|
"grad_norm": 58.830318450927734, |
|
"learning_rate": 7.061728395061729e-06, |
|
"loss": 0.075, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 3.2395061728395063, |
|
"grad_norm": 0.0027803820557892323, |
|
"learning_rate": 7.041975308641976e-06, |
|
"loss": 0.0688, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 3.2444444444444445, |
|
"grad_norm": 7.03369140625, |
|
"learning_rate": 7.022222222222222e-06, |
|
"loss": 0.2156, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 3.2493827160493827, |
|
"grad_norm": 0.3327115476131439, |
|
"learning_rate": 7.0024691358024695e-06, |
|
"loss": 0.2499, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 3.254320987654321, |
|
"grad_norm": 0.007271229289472103, |
|
"learning_rate": 6.982716049382716e-06, |
|
"loss": 0.1749, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 3.259259259259259, |
|
"grad_norm": 0.011601006612181664, |
|
"learning_rate": 6.962962962962964e-06, |
|
"loss": 0.4342, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 3.2641975308641973, |
|
"grad_norm": 1.5765591859817505, |
|
"learning_rate": 6.943209876543211e-06, |
|
"loss": 0.02, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 3.269135802469136, |
|
"grad_norm": 10.005110740661621, |
|
"learning_rate": 6.923456790123458e-06, |
|
"loss": 0.1143, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 3.274074074074074, |
|
"grad_norm": 0.1242939829826355, |
|
"learning_rate": 6.903703703703705e-06, |
|
"loss": 0.3571, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 3.2790123456790123, |
|
"grad_norm": 57.85032272338867, |
|
"learning_rate": 6.883950617283951e-06, |
|
"loss": 0.3811, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 3.2839506172839505, |
|
"grad_norm": 1.068203091621399, |
|
"learning_rate": 6.8641975308641985e-06, |
|
"loss": 0.1045, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 3.2888888888888888, |
|
"grad_norm": 0.03020775318145752, |
|
"learning_rate": 6.844444444444445e-06, |
|
"loss": 0.0945, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 3.2938271604938274, |
|
"grad_norm": 18.36736297607422, |
|
"learning_rate": 6.824691358024692e-06, |
|
"loss": 0.2015, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 3.2987654320987656, |
|
"grad_norm": 0.0009854953968897462, |
|
"learning_rate": 6.8049382716049385e-06, |
|
"loss": 0.2278, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 3.303703703703704, |
|
"grad_norm": 0.02513027749955654, |
|
"learning_rate": 6.785185185185186e-06, |
|
"loss": 0.2392, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 3.308641975308642, |
|
"grad_norm": 29.72653579711914, |
|
"learning_rate": 6.765432098765433e-06, |
|
"loss": 0.2054, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 3.31358024691358, |
|
"grad_norm": 0.006469042040407658, |
|
"learning_rate": 6.745679012345679e-06, |
|
"loss": 0.0055, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 3.3185185185185184, |
|
"grad_norm": 129.7929229736328, |
|
"learning_rate": 6.725925925925927e-06, |
|
"loss": 0.0842, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 3.3234567901234566, |
|
"grad_norm": 0.4482802748680115, |
|
"learning_rate": 6.706172839506173e-06, |
|
"loss": 0.1121, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 3.3283950617283953, |
|
"grad_norm": 10.919482231140137, |
|
"learning_rate": 6.68641975308642e-06, |
|
"loss": 0.2323, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 3.3333333333333335, |
|
"grad_norm": 0.10504257678985596, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.1925, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 3.3382716049382717, |
|
"grad_norm": 26.70441436767578, |
|
"learning_rate": 6.646913580246914e-06, |
|
"loss": 0.3749, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 3.34320987654321, |
|
"grad_norm": 1.2347007989883423, |
|
"learning_rate": 6.62716049382716e-06, |
|
"loss": 0.1701, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 3.348148148148148, |
|
"grad_norm": 6.345317840576172, |
|
"learning_rate": 6.6074074074074075e-06, |
|
"loss": 0.0607, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 3.3530864197530863, |
|
"grad_norm": 13.622949600219727, |
|
"learning_rate": 6.587654320987656e-06, |
|
"loss": 0.1763, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 3.3580246913580245, |
|
"grad_norm": 16.68195152282715, |
|
"learning_rate": 6.567901234567902e-06, |
|
"loss": 0.2754, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 3.362962962962963, |
|
"grad_norm": 0.2912677526473999, |
|
"learning_rate": 6.548148148148149e-06, |
|
"loss": 0.2011, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 3.3679012345679014, |
|
"grad_norm": 76.45751953125, |
|
"learning_rate": 6.528395061728396e-06, |
|
"loss": 0.3157, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 3.3728395061728396, |
|
"grad_norm": 0.0012998235179111362, |
|
"learning_rate": 6.508641975308643e-06, |
|
"loss": 0.1313, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 3.3777777777777778, |
|
"grad_norm": 170.02474975585938, |
|
"learning_rate": 6.488888888888889e-06, |
|
"loss": 0.1319, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 3.382716049382716, |
|
"grad_norm": 87.3119888305664, |
|
"learning_rate": 6.4691358024691365e-06, |
|
"loss": 0.2838, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 3.387654320987654, |
|
"grad_norm": 25.350370407104492, |
|
"learning_rate": 6.449382716049383e-06, |
|
"loss": 0.1525, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 3.3925925925925924, |
|
"grad_norm": 0.22812433540821075, |
|
"learning_rate": 6.42962962962963e-06, |
|
"loss": 0.0099, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 3.397530864197531, |
|
"grad_norm": 0.06566119194030762, |
|
"learning_rate": 6.409876543209877e-06, |
|
"loss": 0.0049, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 3.4024691358024692, |
|
"grad_norm": 0.003955530468374491, |
|
"learning_rate": 6.390123456790124e-06, |
|
"loss": 0.3611, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 3.4074074074074074, |
|
"grad_norm": 46.40278244018555, |
|
"learning_rate": 6.370370370370371e-06, |
|
"loss": 0.2929, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 3.4123456790123456, |
|
"grad_norm": 0.0017953283386304975, |
|
"learning_rate": 6.350617283950617e-06, |
|
"loss": 0.0162, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 3.417283950617284, |
|
"grad_norm": 0.001457493519410491, |
|
"learning_rate": 6.330864197530865e-06, |
|
"loss": 0.1854, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 3.422222222222222, |
|
"grad_norm": 0.0005978038534522057, |
|
"learning_rate": 6.311111111111111e-06, |
|
"loss": 0.2118, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 3.4271604938271603, |
|
"grad_norm": 3.947251558303833, |
|
"learning_rate": 6.291358024691358e-06, |
|
"loss": 0.0656, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 3.432098765432099, |
|
"grad_norm": 13.78681755065918, |
|
"learning_rate": 6.271604938271606e-06, |
|
"loss": 0.0259, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 3.437037037037037, |
|
"grad_norm": 0.04035714268684387, |
|
"learning_rate": 6.251851851851852e-06, |
|
"loss": 0.0107, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 3.4419753086419753, |
|
"grad_norm": 0.024245211854577065, |
|
"learning_rate": 6.2320987654321e-06, |
|
"loss": 0.1175, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 3.4469135802469135, |
|
"grad_norm": 0.04458506777882576, |
|
"learning_rate": 6.212345679012346e-06, |
|
"loss": 0.2044, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 3.4518518518518517, |
|
"grad_norm": 161.80392456054688, |
|
"learning_rate": 6.192592592592594e-06, |
|
"loss": 0.2394, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 3.45679012345679, |
|
"grad_norm": 0.04583211988210678, |
|
"learning_rate": 6.17283950617284e-06, |
|
"loss": 0.0821, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.4617283950617286, |
|
"grad_norm": 0.14376536011695862, |
|
"learning_rate": 6.153086419753087e-06, |
|
"loss": 0.3085, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 3.466666666666667, |
|
"grad_norm": 92.59646606445312, |
|
"learning_rate": 6.133333333333334e-06, |
|
"loss": 0.2538, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 3.471604938271605, |
|
"grad_norm": 83.26078033447266, |
|
"learning_rate": 6.113580246913581e-06, |
|
"loss": 0.3145, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 3.476543209876543, |
|
"grad_norm": 74.77570343017578, |
|
"learning_rate": 6.093827160493828e-06, |
|
"loss": 0.1775, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 3.4814814814814814, |
|
"grad_norm": 0.038955166935920715, |
|
"learning_rate": 6.0740740740740745e-06, |
|
"loss": 0.1509, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 3.4864197530864196, |
|
"grad_norm": 97.1812973022461, |
|
"learning_rate": 6.054320987654322e-06, |
|
"loss": 0.2155, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 3.4913580246913583, |
|
"grad_norm": 73.86189270019531, |
|
"learning_rate": 6.034567901234568e-06, |
|
"loss": 0.2615, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 3.4962962962962965, |
|
"grad_norm": 0.0055229514837265015, |
|
"learning_rate": 6.014814814814815e-06, |
|
"loss": 0.2428, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 3.5012345679012347, |
|
"grad_norm": 0.0022700978443026543, |
|
"learning_rate": 5.995061728395062e-06, |
|
"loss": 0.2049, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 3.506172839506173, |
|
"grad_norm": 1.260072946548462, |
|
"learning_rate": 5.975308641975309e-06, |
|
"loss": 0.181, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 3.511111111111111, |
|
"grad_norm": 1.283315896987915, |
|
"learning_rate": 5.955555555555555e-06, |
|
"loss": 0.0807, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 3.5160493827160493, |
|
"grad_norm": 82.1073989868164, |
|
"learning_rate": 5.935802469135803e-06, |
|
"loss": 0.1029, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 3.5209876543209875, |
|
"grad_norm": 8.620868682861328, |
|
"learning_rate": 5.916049382716051e-06, |
|
"loss": 0.2403, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 3.525925925925926, |
|
"grad_norm": 6.648277282714844, |
|
"learning_rate": 5.896296296296296e-06, |
|
"loss": 0.0663, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 3.5308641975308643, |
|
"grad_norm": 0.3625084459781647, |
|
"learning_rate": 5.876543209876544e-06, |
|
"loss": 0.1895, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 3.5358024691358025, |
|
"grad_norm": 25.613967895507812, |
|
"learning_rate": 5.856790123456791e-06, |
|
"loss": 0.0466, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 3.5407407407407407, |
|
"grad_norm": 0.6308773756027222, |
|
"learning_rate": 5.837037037037038e-06, |
|
"loss": 0.0887, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 3.545679012345679, |
|
"grad_norm": 28.219980239868164, |
|
"learning_rate": 5.817283950617284e-06, |
|
"loss": 0.071, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 3.550617283950617, |
|
"grad_norm": 42.56242752075195, |
|
"learning_rate": 5.797530864197532e-06, |
|
"loss": 0.345, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 3.5555555555555554, |
|
"grad_norm": 0.07085005193948746, |
|
"learning_rate": 5.777777777777778e-06, |
|
"loss": 0.3513, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 3.560493827160494, |
|
"grad_norm": 0.4435485005378723, |
|
"learning_rate": 5.758024691358025e-06, |
|
"loss": 0.0908, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 3.565432098765432, |
|
"grad_norm": 0.009900487028062344, |
|
"learning_rate": 5.7382716049382725e-06, |
|
"loss": 0.1456, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 3.5703703703703704, |
|
"grad_norm": 0.001979109598323703, |
|
"learning_rate": 5.718518518518519e-06, |
|
"loss": 0.0493, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 3.5753086419753086, |
|
"grad_norm": 0.20845463871955872, |
|
"learning_rate": 5.698765432098766e-06, |
|
"loss": 0.264, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 3.580246913580247, |
|
"grad_norm": 0.7934794425964355, |
|
"learning_rate": 5.6790123456790125e-06, |
|
"loss": 0.0509, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 3.585185185185185, |
|
"grad_norm": 0.045501917600631714, |
|
"learning_rate": 5.65925925925926e-06, |
|
"loss": 0.0933, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 3.5901234567901232, |
|
"grad_norm": 0.040048014372587204, |
|
"learning_rate": 5.639506172839506e-06, |
|
"loss": 0.0733, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 3.595061728395062, |
|
"grad_norm": 197.66177368164062, |
|
"learning_rate": 5.619753086419753e-06, |
|
"loss": 0.2655, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"grad_norm": 0.03324214369058609, |
|
"learning_rate": 5.600000000000001e-06, |
|
"loss": 0.0812, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 3.6049382716049383, |
|
"grad_norm": 124.81009674072266, |
|
"learning_rate": 5.580246913580247e-06, |
|
"loss": 0.1874, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 3.6098765432098765, |
|
"grad_norm": 14.227179527282715, |
|
"learning_rate": 5.560493827160495e-06, |
|
"loss": 0.1483, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 3.6148148148148147, |
|
"grad_norm": 28.93998146057129, |
|
"learning_rate": 5.540740740740741e-06, |
|
"loss": 0.2179, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 3.6197530864197534, |
|
"grad_norm": 109.27143096923828, |
|
"learning_rate": 5.520987654320989e-06, |
|
"loss": 0.2175, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 3.624691358024691, |
|
"grad_norm": 3.306696653366089, |
|
"learning_rate": 5.501234567901234e-06, |
|
"loss": 0.1275, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 3.6296296296296298, |
|
"grad_norm": 53.0710563659668, |
|
"learning_rate": 5.481481481481482e-06, |
|
"loss": 0.2602, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 3.634567901234568, |
|
"grad_norm": 0.00018215861928183585, |
|
"learning_rate": 5.461728395061729e-06, |
|
"loss": 0.1973, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 3.639506172839506, |
|
"grad_norm": 14.688875198364258, |
|
"learning_rate": 5.441975308641976e-06, |
|
"loss": 0.1937, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 3.6444444444444444, |
|
"grad_norm": 121.82637023925781, |
|
"learning_rate": 5.422222222222223e-06, |
|
"loss": 0.1325, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 3.6493827160493826, |
|
"grad_norm": 0.004047624301165342, |
|
"learning_rate": 5.40246913580247e-06, |
|
"loss": 0.1085, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 3.6543209876543212, |
|
"grad_norm": 108.3661880493164, |
|
"learning_rate": 5.382716049382717e-06, |
|
"loss": 0.2458, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 3.659259259259259, |
|
"grad_norm": 0.029978841543197632, |
|
"learning_rate": 5.362962962962963e-06, |
|
"loss": 0.2308, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 3.6641975308641976, |
|
"grad_norm": 32.663150787353516, |
|
"learning_rate": 5.3432098765432105e-06, |
|
"loss": 0.192, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 3.669135802469136, |
|
"grad_norm": 0.000704328587744385, |
|
"learning_rate": 5.323456790123457e-06, |
|
"loss": 0.2431, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 3.674074074074074, |
|
"grad_norm": 81.13653564453125, |
|
"learning_rate": 5.303703703703704e-06, |
|
"loss": 0.1404, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 3.6790123456790123, |
|
"grad_norm": 0.0007958766655065119, |
|
"learning_rate": 5.2839506172839505e-06, |
|
"loss": 0.0767, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 3.6839506172839505, |
|
"grad_norm": 112.87112426757812, |
|
"learning_rate": 5.264197530864198e-06, |
|
"loss": 0.1402, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 3.688888888888889, |
|
"grad_norm": 41.893638610839844, |
|
"learning_rate": 5.244444444444445e-06, |
|
"loss": 0.1472, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 3.6938271604938273, |
|
"grad_norm": 3.585242748260498, |
|
"learning_rate": 5.224691358024691e-06, |
|
"loss": 0.0414, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 3.6987654320987655, |
|
"grad_norm": 69.6523208618164, |
|
"learning_rate": 5.2049382716049394e-06, |
|
"loss": 0.1479, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 3.7037037037037037, |
|
"grad_norm": 0.9416589736938477, |
|
"learning_rate": 5.185185185185185e-06, |
|
"loss": 0.1811, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.708641975308642, |
|
"grad_norm": 193.36740112304688, |
|
"learning_rate": 5.165432098765433e-06, |
|
"loss": 0.1734, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 3.71358024691358, |
|
"grad_norm": 83.2663803100586, |
|
"learning_rate": 5.145679012345679e-06, |
|
"loss": 0.3664, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 3.7185185185185183, |
|
"grad_norm": 1.504310131072998, |
|
"learning_rate": 5.125925925925927e-06, |
|
"loss": 0.3391, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 3.723456790123457, |
|
"grad_norm": 63.3848876953125, |
|
"learning_rate": 5.106172839506173e-06, |
|
"loss": 0.2681, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 3.728395061728395, |
|
"grad_norm": 0.005675642751157284, |
|
"learning_rate": 5.08641975308642e-06, |
|
"loss": 0.193, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 3.7333333333333334, |
|
"grad_norm": 0.013251741416752338, |
|
"learning_rate": 5.0666666666666676e-06, |
|
"loss": 0.1873, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 3.7382716049382716, |
|
"grad_norm": 0.0012360225664451718, |
|
"learning_rate": 5.046913580246914e-06, |
|
"loss": 0.2134, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 3.74320987654321, |
|
"grad_norm": 127.34367370605469, |
|
"learning_rate": 5.027160493827161e-06, |
|
"loss": 0.4686, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 3.748148148148148, |
|
"grad_norm": 0.01218173187226057, |
|
"learning_rate": 5.007407407407408e-06, |
|
"loss": 0.0103, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 3.753086419753086, |
|
"grad_norm": 0.03588619455695152, |
|
"learning_rate": 4.987654320987655e-06, |
|
"loss": 0.0478, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 3.758024691358025, |
|
"grad_norm": 126.76322937011719, |
|
"learning_rate": 4.967901234567902e-06, |
|
"loss": 0.1531, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 3.762962962962963, |
|
"grad_norm": 39.57160949707031, |
|
"learning_rate": 4.9481481481481485e-06, |
|
"loss": 0.0445, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 3.7679012345679013, |
|
"grad_norm": 0.4843272566795349, |
|
"learning_rate": 4.928395061728396e-06, |
|
"loss": 0.0298, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 3.7728395061728395, |
|
"grad_norm": 33.181583404541016, |
|
"learning_rate": 4.908641975308642e-06, |
|
"loss": 0.3563, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 3.7777777777777777, |
|
"grad_norm": 27.694658279418945, |
|
"learning_rate": 4.888888888888889e-06, |
|
"loss": 0.142, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 3.782716049382716, |
|
"grad_norm": 0.008271468803286552, |
|
"learning_rate": 4.869135802469136e-06, |
|
"loss": 0.1445, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 3.787654320987654, |
|
"grad_norm": 180.6202850341797, |
|
"learning_rate": 4.849382716049383e-06, |
|
"loss": 0.3204, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 3.7925925925925927, |
|
"grad_norm": 58.78599548339844, |
|
"learning_rate": 4.82962962962963e-06, |
|
"loss": 0.2717, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 3.797530864197531, |
|
"grad_norm": 48.85298538208008, |
|
"learning_rate": 4.8098765432098774e-06, |
|
"loss": 0.3752, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 3.802469135802469, |
|
"grad_norm": 119.5743637084961, |
|
"learning_rate": 4.790123456790124e-06, |
|
"loss": 0.266, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 3.8074074074074074, |
|
"grad_norm": 38.25589370727539, |
|
"learning_rate": 4.770370370370371e-06, |
|
"loss": 0.1581, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 3.8123456790123456, |
|
"grad_norm": 4.294593811035156, |
|
"learning_rate": 4.7506172839506175e-06, |
|
"loss": 0.0615, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 3.817283950617284, |
|
"grad_norm": 0.23868466913700104, |
|
"learning_rate": 4.730864197530865e-06, |
|
"loss": 0.2377, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 3.822222222222222, |
|
"grad_norm": 1.3772286176681519, |
|
"learning_rate": 4.711111111111111e-06, |
|
"loss": 0.1767, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 3.8271604938271606, |
|
"grad_norm": 0.004857083782553673, |
|
"learning_rate": 4.691358024691358e-06, |
|
"loss": 0.081, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 3.832098765432099, |
|
"grad_norm": 62.059326171875, |
|
"learning_rate": 4.6716049382716056e-06, |
|
"loss": 0.1362, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 3.837037037037037, |
|
"grad_norm": 0.022881271317601204, |
|
"learning_rate": 4.651851851851853e-06, |
|
"loss": 0.0713, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 3.8419753086419752, |
|
"grad_norm": 39.1450309753418, |
|
"learning_rate": 4.632098765432099e-06, |
|
"loss": 0.0745, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 3.8469135802469134, |
|
"grad_norm": 4.154773712158203, |
|
"learning_rate": 4.6123456790123464e-06, |
|
"loss": 0.1029, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 3.851851851851852, |
|
"grad_norm": 68.09147644042969, |
|
"learning_rate": 4.592592592592593e-06, |
|
"loss": 0.0558, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 3.8567901234567903, |
|
"grad_norm": 0.14514310657978058, |
|
"learning_rate": 4.57283950617284e-06, |
|
"loss": 0.0501, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 3.8617283950617285, |
|
"grad_norm": 1.0181536674499512, |
|
"learning_rate": 4.5530864197530865e-06, |
|
"loss": 0.0579, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 3.8666666666666667, |
|
"grad_norm": 141.15499877929688, |
|
"learning_rate": 4.533333333333334e-06, |
|
"loss": 0.2657, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 3.871604938271605, |
|
"grad_norm": 0.6955594420433044, |
|
"learning_rate": 4.513580246913581e-06, |
|
"loss": 0.2284, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 3.876543209876543, |
|
"grad_norm": 125.45293426513672, |
|
"learning_rate": 4.493827160493827e-06, |
|
"loss": 0.446, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 3.8814814814814813, |
|
"grad_norm": 0.0857425257563591, |
|
"learning_rate": 4.4740740740740746e-06, |
|
"loss": 0.0597, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 3.88641975308642, |
|
"grad_norm": 44.19774627685547, |
|
"learning_rate": 4.454320987654322e-06, |
|
"loss": 0.2066, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 3.891358024691358, |
|
"grad_norm": 61.00041580200195, |
|
"learning_rate": 4.434567901234568e-06, |
|
"loss": 0.1439, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 3.8962962962962964, |
|
"grad_norm": 0.8123835325241089, |
|
"learning_rate": 4.4148148148148154e-06, |
|
"loss": 0.2655, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 3.9012345679012346, |
|
"grad_norm": 0.0009880654979497194, |
|
"learning_rate": 4.395061728395062e-06, |
|
"loss": 0.1153, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 3.906172839506173, |
|
"grad_norm": 0.0027614731807261705, |
|
"learning_rate": 4.375308641975309e-06, |
|
"loss": 0.0693, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 3.911111111111111, |
|
"grad_norm": 99.65026092529297, |
|
"learning_rate": 4.3555555555555555e-06, |
|
"loss": 0.3998, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 3.916049382716049, |
|
"grad_norm": 17.23603057861328, |
|
"learning_rate": 4.335802469135803e-06, |
|
"loss": 0.2811, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 3.920987654320988, |
|
"grad_norm": 13.379603385925293, |
|
"learning_rate": 4.31604938271605e-06, |
|
"loss": 0.0989, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 3.925925925925926, |
|
"grad_norm": 0.12741827964782715, |
|
"learning_rate": 4.296296296296296e-06, |
|
"loss": 0.0431, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 3.9308641975308642, |
|
"grad_norm": 164.3784637451172, |
|
"learning_rate": 4.2765432098765436e-06, |
|
"loss": 0.3376, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 3.9358024691358025, |
|
"grad_norm": 0.002450704574584961, |
|
"learning_rate": 4.256790123456791e-06, |
|
"loss": 0.039, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 3.9407407407407407, |
|
"grad_norm": 9.37784194946289, |
|
"learning_rate": 4.237037037037037e-06, |
|
"loss": 0.3296, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 3.945679012345679, |
|
"grad_norm": 0.9755693078041077, |
|
"learning_rate": 4.2172839506172844e-06, |
|
"loss": 0.2798, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 3.950617283950617, |
|
"grad_norm": 17.373695373535156, |
|
"learning_rate": 4.197530864197531e-06, |
|
"loss": 0.044, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.9555555555555557, |
|
"grad_norm": 40.896148681640625, |
|
"learning_rate": 4.177777777777778e-06, |
|
"loss": 0.1641, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 3.960493827160494, |
|
"grad_norm": 7.210272312164307, |
|
"learning_rate": 4.158024691358025e-06, |
|
"loss": 0.0641, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 3.965432098765432, |
|
"grad_norm": 0.3746698498725891, |
|
"learning_rate": 4.138271604938272e-06, |
|
"loss": 0.1236, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 3.9703703703703703, |
|
"grad_norm": 2.9503226280212402, |
|
"learning_rate": 4.118518518518519e-06, |
|
"loss": 0.0634, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 3.9753086419753085, |
|
"grad_norm": 1.2919955253601074, |
|
"learning_rate": 4.098765432098766e-06, |
|
"loss": 0.0069, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 3.980246913580247, |
|
"grad_norm": 0.08173320442438126, |
|
"learning_rate": 4.0790123456790126e-06, |
|
"loss": 0.1177, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 3.985185185185185, |
|
"grad_norm": 0.10468322783708572, |
|
"learning_rate": 4.05925925925926e-06, |
|
"loss": 0.0602, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 3.9901234567901236, |
|
"grad_norm": 0.1967976987361908, |
|
"learning_rate": 4.039506172839506e-06, |
|
"loss": 0.1996, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 3.995061728395062, |
|
"grad_norm": 16.828914642333984, |
|
"learning_rate": 4.0197530864197534e-06, |
|
"loss": 0.0063, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 142.36537170410156, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.2674, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9833333333333333, |
|
"eval_loss": 0.06853805482387543, |
|
"eval_runtime": 32.7103, |
|
"eval_samples_per_second": 165.086, |
|
"eval_steps_per_second": 20.636, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 4.004938271604939, |
|
"grad_norm": 0.028582552447915077, |
|
"learning_rate": 3.980246913580247e-06, |
|
"loss": 0.3409, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 4.009876543209876, |
|
"grad_norm": 0.12553012371063232, |
|
"learning_rate": 3.960493827160494e-06, |
|
"loss": 0.1076, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 4.014814814814815, |
|
"grad_norm": 0.08727646619081497, |
|
"learning_rate": 3.940740740740741e-06, |
|
"loss": 0.2658, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 4.019753086419753, |
|
"grad_norm": 40.70219802856445, |
|
"learning_rate": 3.920987654320988e-06, |
|
"loss": 0.1109, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 4.0246913580246915, |
|
"grad_norm": 0.04967527464032173, |
|
"learning_rate": 3.901234567901235e-06, |
|
"loss": 0.2816, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 4.029629629629629, |
|
"grad_norm": 4.632954120635986, |
|
"learning_rate": 3.8814814814814816e-06, |
|
"loss": 0.0101, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 4.034567901234568, |
|
"grad_norm": 11.988831520080566, |
|
"learning_rate": 3.861728395061729e-06, |
|
"loss": 0.1071, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 4.0395061728395065, |
|
"grad_norm": 0.002083718776702881, |
|
"learning_rate": 3.841975308641976e-06, |
|
"loss": 0.3421, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 4.044444444444444, |
|
"grad_norm": 7.259564399719238, |
|
"learning_rate": 3.8222222222222224e-06, |
|
"loss": 0.0545, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 4.049382716049383, |
|
"grad_norm": 0.12477586418390274, |
|
"learning_rate": 3.8024691358024697e-06, |
|
"loss": 0.056, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 4.054320987654321, |
|
"grad_norm": 131.77743530273438, |
|
"learning_rate": 3.7827160493827165e-06, |
|
"loss": 0.1617, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 4.059259259259259, |
|
"grad_norm": 0.1798364818096161, |
|
"learning_rate": 3.7629629629629633e-06, |
|
"loss": 0.0063, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 4.064197530864197, |
|
"grad_norm": 111.68184661865234, |
|
"learning_rate": 3.74320987654321e-06, |
|
"loss": 0.071, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 4.069135802469136, |
|
"grad_norm": 75.00855255126953, |
|
"learning_rate": 3.723456790123457e-06, |
|
"loss": 0.4207, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 4.074074074074074, |
|
"grad_norm": 0.0791148990392685, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 0.0377, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 4.079012345679012, |
|
"grad_norm": 123.85789489746094, |
|
"learning_rate": 3.6839506172839506e-06, |
|
"loss": 0.282, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 4.083950617283951, |
|
"grad_norm": 0.0917818695306778, |
|
"learning_rate": 3.6641975308641982e-06, |
|
"loss": 0.2107, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 4.088888888888889, |
|
"grad_norm": 93.7401123046875, |
|
"learning_rate": 3.644444444444445e-06, |
|
"loss": 0.4766, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 4.093827160493827, |
|
"grad_norm": 4.973775863647461, |
|
"learning_rate": 3.624691358024692e-06, |
|
"loss": 0.1966, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 4.098765432098766, |
|
"grad_norm": 13.099119186401367, |
|
"learning_rate": 3.6049382716049387e-06, |
|
"loss": 0.0284, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 4.103703703703704, |
|
"grad_norm": 0.14128296077251434, |
|
"learning_rate": 3.5851851851851855e-06, |
|
"loss": 0.3451, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 4.108641975308642, |
|
"grad_norm": 19.09874153137207, |
|
"learning_rate": 3.5654320987654323e-06, |
|
"loss": 0.3137, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 4.11358024691358, |
|
"grad_norm": 33.85554504394531, |
|
"learning_rate": 3.545679012345679e-06, |
|
"loss": 0.1776, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 4.118518518518519, |
|
"grad_norm": 0.02345465123653412, |
|
"learning_rate": 3.525925925925926e-06, |
|
"loss": 0.2006, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 4.1234567901234565, |
|
"grad_norm": 90.08519744873047, |
|
"learning_rate": 3.5061728395061736e-06, |
|
"loss": 0.2977, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 4.128395061728395, |
|
"grad_norm": 41.20042037963867, |
|
"learning_rate": 3.4864197530864204e-06, |
|
"loss": 0.2238, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 4.133333333333334, |
|
"grad_norm": 1.0883228778839111, |
|
"learning_rate": 3.4666666666666672e-06, |
|
"loss": 0.0693, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 4.1382716049382715, |
|
"grad_norm": 0.03349454700946808, |
|
"learning_rate": 3.446913580246914e-06, |
|
"loss": 0.1569, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 4.14320987654321, |
|
"grad_norm": 18.927202224731445, |
|
"learning_rate": 3.427160493827161e-06, |
|
"loss": 0.2259, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 4.148148148148148, |
|
"grad_norm": 41.818538665771484, |
|
"learning_rate": 3.4074074074074077e-06, |
|
"loss": 0.2041, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 4.153086419753087, |
|
"grad_norm": 0.26372233033180237, |
|
"learning_rate": 3.3876543209876545e-06, |
|
"loss": 0.1225, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 4.158024691358024, |
|
"grad_norm": 45.54108810424805, |
|
"learning_rate": 3.3679012345679013e-06, |
|
"loss": 0.2084, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 4.162962962962963, |
|
"grad_norm": 0.014255751855671406, |
|
"learning_rate": 3.348148148148148e-06, |
|
"loss": 0.0153, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 4.167901234567902, |
|
"grad_norm": 0.8963614106178284, |
|
"learning_rate": 3.3283950617283953e-06, |
|
"loss": 0.0802, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 4.172839506172839, |
|
"grad_norm": 32.044166564941406, |
|
"learning_rate": 3.3086419753086426e-06, |
|
"loss": 0.1971, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 4.177777777777778, |
|
"grad_norm": 0.006651794072240591, |
|
"learning_rate": 3.2888888888888894e-06, |
|
"loss": 0.0366, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 4.182716049382716, |
|
"grad_norm": 1.5995298624038696, |
|
"learning_rate": 3.2691358024691362e-06, |
|
"loss": 0.2041, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 4.187654320987654, |
|
"grad_norm": 0.07189402729272842, |
|
"learning_rate": 3.249382716049383e-06, |
|
"loss": 0.1008, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 4.192592592592592, |
|
"grad_norm": 0.014369451440870762, |
|
"learning_rate": 3.22962962962963e-06, |
|
"loss": 0.1384, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 4.197530864197531, |
|
"grad_norm": 2.7586021423339844, |
|
"learning_rate": 3.2098765432098767e-06, |
|
"loss": 0.1149, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 4.2024691358024695, |
|
"grad_norm": 0.25027868151664734, |
|
"learning_rate": 3.1901234567901235e-06, |
|
"loss": 0.1085, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 4.207407407407407, |
|
"grad_norm": 21.993419647216797, |
|
"learning_rate": 3.1703703703703707e-06, |
|
"loss": 0.1086, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 4.212345679012346, |
|
"grad_norm": 108.14185333251953, |
|
"learning_rate": 3.1506172839506175e-06, |
|
"loss": 0.274, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 4.217283950617284, |
|
"grad_norm": 0.006499402225017548, |
|
"learning_rate": 3.1308641975308648e-06, |
|
"loss": 0.1101, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 4.222222222222222, |
|
"grad_norm": 25.40144920349121, |
|
"learning_rate": 3.1111111111111116e-06, |
|
"loss": 0.3034, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 4.22716049382716, |
|
"grad_norm": 0.04093475639820099, |
|
"learning_rate": 3.0913580246913584e-06, |
|
"loss": 0.1373, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 4.232098765432099, |
|
"grad_norm": 0.3943523168563843, |
|
"learning_rate": 3.0716049382716052e-06, |
|
"loss": 0.1059, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 4.237037037037037, |
|
"grad_norm": 34.58479309082031, |
|
"learning_rate": 3.051851851851852e-06, |
|
"loss": 0.1032, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 4.241975308641975, |
|
"grad_norm": 79.955810546875, |
|
"learning_rate": 3.032098765432099e-06, |
|
"loss": 0.1232, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 4.246913580246914, |
|
"grad_norm": 47.233482360839844, |
|
"learning_rate": 3.012345679012346e-06, |
|
"loss": 0.1098, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 4.2518518518518515, |
|
"grad_norm": 138.7650909423828, |
|
"learning_rate": 2.992592592592593e-06, |
|
"loss": 0.1554, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 4.25679012345679, |
|
"grad_norm": 34.47438430786133, |
|
"learning_rate": 2.9728395061728397e-06, |
|
"loss": 0.1909, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 4.261728395061729, |
|
"grad_norm": 0.10936783254146576, |
|
"learning_rate": 2.953086419753087e-06, |
|
"loss": 0.1279, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 4.266666666666667, |
|
"grad_norm": 66.3951416015625, |
|
"learning_rate": 2.9333333333333338e-06, |
|
"loss": 0.4878, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 4.271604938271605, |
|
"grad_norm": 0.7240855097770691, |
|
"learning_rate": 2.9135802469135806e-06, |
|
"loss": 0.171, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 4.276543209876543, |
|
"grad_norm": 84.10567474365234, |
|
"learning_rate": 2.8938271604938274e-06, |
|
"loss": 0.265, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 4.281481481481482, |
|
"grad_norm": 0.03191656991839409, |
|
"learning_rate": 2.874074074074074e-06, |
|
"loss": 0.3997, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 4.286419753086419, |
|
"grad_norm": 0.05699067562818527, |
|
"learning_rate": 2.854320987654321e-06, |
|
"loss": 0.0334, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 4.291358024691358, |
|
"grad_norm": 0.03787963092327118, |
|
"learning_rate": 2.8345679012345683e-06, |
|
"loss": 0.0026, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 4.296296296296296, |
|
"grad_norm": 0.32715028524398804, |
|
"learning_rate": 2.814814814814815e-06, |
|
"loss": 0.0851, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 4.3012345679012345, |
|
"grad_norm": 1.704313039779663, |
|
"learning_rate": 2.795061728395062e-06, |
|
"loss": 0.2827, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 4.306172839506173, |
|
"grad_norm": 35.010746002197266, |
|
"learning_rate": 2.7753086419753087e-06, |
|
"loss": 0.307, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 4.311111111111111, |
|
"grad_norm": 50.50590133666992, |
|
"learning_rate": 2.755555555555556e-06, |
|
"loss": 0.1594, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 4.3160493827160495, |
|
"grad_norm": 31.76420783996582, |
|
"learning_rate": 2.7358024691358028e-06, |
|
"loss": 0.1536, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 4.320987654320987, |
|
"grad_norm": 0.11124283820390701, |
|
"learning_rate": 2.7160493827160496e-06, |
|
"loss": 0.1278, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 4.325925925925926, |
|
"grad_norm": 29.00436019897461, |
|
"learning_rate": 2.6962962962962964e-06, |
|
"loss": 0.0417, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 4.330864197530865, |
|
"grad_norm": 0.002402759389951825, |
|
"learning_rate": 2.6765432098765436e-06, |
|
"loss": 0.077, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 4.335802469135802, |
|
"grad_norm": 5.55736780166626, |
|
"learning_rate": 2.6567901234567904e-06, |
|
"loss": 0.1247, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 4.340740740740741, |
|
"grad_norm": 0.024351775646209717, |
|
"learning_rate": 2.6370370370370373e-06, |
|
"loss": 0.1003, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 4.345679012345679, |
|
"grad_norm": 0.009600900113582611, |
|
"learning_rate": 2.617283950617284e-06, |
|
"loss": 0.1143, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 4.350617283950617, |
|
"grad_norm": 0.001896082772873342, |
|
"learning_rate": 2.597530864197531e-06, |
|
"loss": 0.0972, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 4.355555555555555, |
|
"grad_norm": 0.0376252606511116, |
|
"learning_rate": 2.577777777777778e-06, |
|
"loss": 0.1537, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 4.360493827160494, |
|
"grad_norm": 0.010516272857785225, |
|
"learning_rate": 2.558024691358025e-06, |
|
"loss": 0.0149, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 4.3654320987654325, |
|
"grad_norm": 30.120134353637695, |
|
"learning_rate": 2.5382716049382718e-06, |
|
"loss": 0.0042, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 4.37037037037037, |
|
"grad_norm": 0.48482951521873474, |
|
"learning_rate": 2.5185185185185186e-06, |
|
"loss": 0.1258, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 4.375308641975309, |
|
"grad_norm": 9.926421165466309, |
|
"learning_rate": 2.4987654320987654e-06, |
|
"loss": 0.1866, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 4.380246913580247, |
|
"grad_norm": 0.024937864392995834, |
|
"learning_rate": 2.4790123456790126e-06, |
|
"loss": 0.0231, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 4.385185185185185, |
|
"grad_norm": 0.40552499890327454, |
|
"learning_rate": 2.4592592592592594e-06, |
|
"loss": 0.0423, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 4.390123456790123, |
|
"grad_norm": 1.134421944618225, |
|
"learning_rate": 2.4395061728395063e-06, |
|
"loss": 0.1767, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 4.395061728395062, |
|
"grad_norm": 0.06691499054431915, |
|
"learning_rate": 2.419753086419753e-06, |
|
"loss": 0.2377, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"grad_norm": 1.1887983083724976, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 0.1737, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 4.404938271604938, |
|
"grad_norm": 1.4004325866699219, |
|
"learning_rate": 2.380246913580247e-06, |
|
"loss": 0.162, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 4.409876543209877, |
|
"grad_norm": 5.580018520355225, |
|
"learning_rate": 2.360493827160494e-06, |
|
"loss": 0.251, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 4.4148148148148145, |
|
"grad_norm": 0.007224132306873798, |
|
"learning_rate": 2.3407407407407408e-06, |
|
"loss": 0.1454, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 4.419753086419753, |
|
"grad_norm": 154.13819885253906, |
|
"learning_rate": 2.3209876543209876e-06, |
|
"loss": 0.3889, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 4.424691358024692, |
|
"grad_norm": 32.98945236206055, |
|
"learning_rate": 2.301234567901235e-06, |
|
"loss": 0.2466, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 4.42962962962963, |
|
"grad_norm": 0.0013707876205444336, |
|
"learning_rate": 2.2814814814814816e-06, |
|
"loss": 0.2529, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 4.434567901234568, |
|
"grad_norm": 80.57937622070312, |
|
"learning_rate": 2.2617283950617284e-06, |
|
"loss": 0.1712, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 4.439506172839506, |
|
"grad_norm": 129.87698364257812, |
|
"learning_rate": 2.2419753086419753e-06, |
|
"loss": 0.1409, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 4.444444444444445, |
|
"grad_norm": 61.0521354675293, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 0.1277, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.449382716049382, |
|
"grad_norm": 0.05561920627951622, |
|
"learning_rate": 2.2024691358024693e-06, |
|
"loss": 0.1921, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 4.454320987654321, |
|
"grad_norm": 0.02089673839509487, |
|
"learning_rate": 2.182716049382716e-06, |
|
"loss": 0.0877, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 4.459259259259259, |
|
"grad_norm": 0.0033945185132324696, |
|
"learning_rate": 2.162962962962963e-06, |
|
"loss": 0.1127, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 4.4641975308641975, |
|
"grad_norm": 0.00884201843291521, |
|
"learning_rate": 2.1432098765432098e-06, |
|
"loss": 0.1677, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 4.469135802469136, |
|
"grad_norm": 16.309391021728516, |
|
"learning_rate": 2.123456790123457e-06, |
|
"loss": 0.1119, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 4.474074074074074, |
|
"grad_norm": 0.035716574639081955, |
|
"learning_rate": 2.103703703703704e-06, |
|
"loss": 0.068, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 4.4790123456790125, |
|
"grad_norm": 0.009720105677843094, |
|
"learning_rate": 2.0839506172839506e-06, |
|
"loss": 0.0933, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 4.48395061728395, |
|
"grad_norm": 0.2953310012817383, |
|
"learning_rate": 2.0641975308641974e-06, |
|
"loss": 0.0775, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 4.488888888888889, |
|
"grad_norm": 4.523210525512695, |
|
"learning_rate": 2.0444444444444447e-06, |
|
"loss": 0.2808, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 4.493827160493828, |
|
"grad_norm": 2.265265464782715, |
|
"learning_rate": 2.0246913580246915e-06, |
|
"loss": 0.0274, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 4.498765432098765, |
|
"grad_norm": 2.9944541454315186, |
|
"learning_rate": 2.0049382716049383e-06, |
|
"loss": 0.1563, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 4.503703703703704, |
|
"grad_norm": 15.32995891571045, |
|
"learning_rate": 1.985185185185185e-06, |
|
"loss": 0.0304, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 4.508641975308642, |
|
"grad_norm": 124.7613754272461, |
|
"learning_rate": 1.9654320987654324e-06, |
|
"loss": 0.2997, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 4.51358024691358, |
|
"grad_norm": 0.20713317394256592, |
|
"learning_rate": 1.945679012345679e-06, |
|
"loss": 0.1026, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 4.518518518518518, |
|
"grad_norm": 38.10224533081055, |
|
"learning_rate": 1.925925925925926e-06, |
|
"loss": 0.0983, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 4.523456790123457, |
|
"grad_norm": 0.042433250695466995, |
|
"learning_rate": 1.906172839506173e-06, |
|
"loss": 0.0291, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 4.528395061728395, |
|
"grad_norm": 3.1156327724456787, |
|
"learning_rate": 1.8864197530864198e-06, |
|
"loss": 0.0577, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 4.533333333333333, |
|
"grad_norm": 0.026819046586751938, |
|
"learning_rate": 1.8666666666666669e-06, |
|
"loss": 0.1211, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 4.538271604938272, |
|
"grad_norm": 0.4800088107585907, |
|
"learning_rate": 1.8469135802469137e-06, |
|
"loss": 0.0023, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 4.54320987654321, |
|
"grad_norm": 0.050341859459877014, |
|
"learning_rate": 1.8271604938271605e-06, |
|
"loss": 0.036, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 4.548148148148148, |
|
"grad_norm": 0.11272630095481873, |
|
"learning_rate": 1.8074074074074075e-06, |
|
"loss": 0.0335, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 4.553086419753086, |
|
"grad_norm": 44.774688720703125, |
|
"learning_rate": 1.7876543209876545e-06, |
|
"loss": 0.1142, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 4.558024691358025, |
|
"grad_norm": 0.0022994689643383026, |
|
"learning_rate": 1.7679012345679014e-06, |
|
"loss": 0.0641, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 4.562962962962963, |
|
"grad_norm": 0.9468904733657837, |
|
"learning_rate": 1.7481481481481482e-06, |
|
"loss": 0.1574, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 4.567901234567901, |
|
"grad_norm": 0.022345565259456635, |
|
"learning_rate": 1.7283950617283952e-06, |
|
"loss": 0.1025, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 4.57283950617284, |
|
"grad_norm": 12.888065338134766, |
|
"learning_rate": 1.7086419753086422e-06, |
|
"loss": 0.1864, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 4.5777777777777775, |
|
"grad_norm": 94.58697509765625, |
|
"learning_rate": 1.688888888888889e-06, |
|
"loss": 0.1861, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 4.582716049382716, |
|
"grad_norm": 66.434326171875, |
|
"learning_rate": 1.6691358024691359e-06, |
|
"loss": 0.0646, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 4.587654320987655, |
|
"grad_norm": 0.005768710281699896, |
|
"learning_rate": 1.6493827160493827e-06, |
|
"loss": 0.1047, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 4.592592592592593, |
|
"grad_norm": 0.08475484699010849, |
|
"learning_rate": 1.62962962962963e-06, |
|
"loss": 0.1706, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 4.597530864197531, |
|
"grad_norm": 0.871222972869873, |
|
"learning_rate": 1.6098765432098767e-06, |
|
"loss": 0.0384, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 4.602469135802469, |
|
"grad_norm": 35.023040771484375, |
|
"learning_rate": 1.5901234567901235e-06, |
|
"loss": 0.1562, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 4.607407407407408, |
|
"grad_norm": 0.08310205489397049, |
|
"learning_rate": 1.5703703703703704e-06, |
|
"loss": 0.1636, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 4.612345679012345, |
|
"grad_norm": 0.008625690825283527, |
|
"learning_rate": 1.5506172839506172e-06, |
|
"loss": 0.1299, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 4.617283950617284, |
|
"grad_norm": 0.07079397141933441, |
|
"learning_rate": 1.5308641975308644e-06, |
|
"loss": 0.2401, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 4.622222222222222, |
|
"grad_norm": 0.002696413081139326, |
|
"learning_rate": 1.5111111111111112e-06, |
|
"loss": 0.1377, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 4.62716049382716, |
|
"grad_norm": 52.69441604614258, |
|
"learning_rate": 1.491358024691358e-06, |
|
"loss": 0.3121, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 4.632098765432099, |
|
"grad_norm": 192.6532745361328, |
|
"learning_rate": 1.4716049382716049e-06, |
|
"loss": 0.0441, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 4.637037037037037, |
|
"grad_norm": 249.43846130371094, |
|
"learning_rate": 1.451851851851852e-06, |
|
"loss": 0.299, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 4.6419753086419755, |
|
"grad_norm": 0.05828845128417015, |
|
"learning_rate": 1.432098765432099e-06, |
|
"loss": 0.0683, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 4.646913580246913, |
|
"grad_norm": 176.3085174560547, |
|
"learning_rate": 1.4123456790123457e-06, |
|
"loss": 0.0396, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 4.651851851851852, |
|
"grad_norm": 3.0951056480407715, |
|
"learning_rate": 1.3925925925925925e-06, |
|
"loss": 0.0874, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 4.6567901234567906, |
|
"grad_norm": 1.2149375677108765, |
|
"learning_rate": 1.3728395061728398e-06, |
|
"loss": 0.1504, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 4.661728395061728, |
|
"grad_norm": 0.05385606735944748, |
|
"learning_rate": 1.3530864197530866e-06, |
|
"loss": 0.0918, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 4.666666666666667, |
|
"grad_norm": 11.512873649597168, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"loss": 0.0947, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 4.671604938271605, |
|
"grad_norm": 0.024780087172985077, |
|
"learning_rate": 1.3135802469135802e-06, |
|
"loss": 0.0753, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 4.676543209876543, |
|
"grad_norm": 0.2996337115764618, |
|
"learning_rate": 1.2938271604938275e-06, |
|
"loss": 0.171, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 4.681481481481481, |
|
"grad_norm": 0.09016973525285721, |
|
"learning_rate": 1.2740740740740743e-06, |
|
"loss": 0.0803, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 4.68641975308642, |
|
"grad_norm": 0.24141840636730194, |
|
"learning_rate": 1.254320987654321e-06, |
|
"loss": 0.1636, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 4.6913580246913575, |
|
"grad_norm": 0.0026981073897331953, |
|
"learning_rate": 1.234567901234568e-06, |
|
"loss": 0.1209, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 4.696296296296296, |
|
"grad_norm": 0.0028422910254448652, |
|
"learning_rate": 1.214814814814815e-06, |
|
"loss": 0.0334, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 4.701234567901235, |
|
"grad_norm": 100.68513488769531, |
|
"learning_rate": 1.1950617283950618e-06, |
|
"loss": 0.3581, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 4.706172839506173, |
|
"grad_norm": 0.001111358986236155, |
|
"learning_rate": 1.1753086419753088e-06, |
|
"loss": 0.0474, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 4.711111111111111, |
|
"grad_norm": 60.36039733886719, |
|
"learning_rate": 1.1555555555555556e-06, |
|
"loss": 0.4299, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 4.716049382716049, |
|
"grad_norm": 0.0019079376943409443, |
|
"learning_rate": 1.1358024691358026e-06, |
|
"loss": 0.0945, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 4.720987654320988, |
|
"grad_norm": 0.46460771560668945, |
|
"learning_rate": 1.1160493827160494e-06, |
|
"loss": 0.312, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 4.725925925925926, |
|
"grad_norm": 1.906554937362671, |
|
"learning_rate": 1.0962962962962965e-06, |
|
"loss": 0.0951, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 4.730864197530864, |
|
"grad_norm": 1.5617965459823608, |
|
"learning_rate": 1.0765432098765433e-06, |
|
"loss": 0.1714, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 4.735802469135803, |
|
"grad_norm": 5.5619893074035645, |
|
"learning_rate": 1.0567901234567903e-06, |
|
"loss": 0.008, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 4.7407407407407405, |
|
"grad_norm": 0.01501123234629631, |
|
"learning_rate": 1.0370370370370371e-06, |
|
"loss": 0.4485, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 4.745679012345679, |
|
"grad_norm": 22.644359588623047, |
|
"learning_rate": 1.0172839506172842e-06, |
|
"loss": 0.0708, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 4.750617283950618, |
|
"grad_norm": 0.0668986439704895, |
|
"learning_rate": 9.97530864197531e-07, |
|
"loss": 0.2169, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 4.7555555555555555, |
|
"grad_norm": 0.5103172063827515, |
|
"learning_rate": 9.77777777777778e-07, |
|
"loss": 0.1709, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 4.760493827160494, |
|
"grad_norm": 63.763214111328125, |
|
"learning_rate": 9.580246913580248e-07, |
|
"loss": 0.3668, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 4.765432098765432, |
|
"grad_norm": 0.013139153830707073, |
|
"learning_rate": 9.382716049382717e-07, |
|
"loss": 0.0545, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 4.770370370370371, |
|
"grad_norm": 0.009220450185239315, |
|
"learning_rate": 9.185185185185185e-07, |
|
"loss": 0.1341, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 4.775308641975308, |
|
"grad_norm": 0.03191829100251198, |
|
"learning_rate": 8.987654320987656e-07, |
|
"loss": 0.1266, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 4.780246913580247, |
|
"grad_norm": 37.74824523925781, |
|
"learning_rate": 8.790123456790124e-07, |
|
"loss": 0.1043, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 4.785185185185185, |
|
"grad_norm": 0.002283359644934535, |
|
"learning_rate": 8.592592592592593e-07, |
|
"loss": 0.033, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 4.790123456790123, |
|
"grad_norm": 0.457742840051651, |
|
"learning_rate": 8.395061728395062e-07, |
|
"loss": 0.1186, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 4.795061728395062, |
|
"grad_norm": 0.031063128262758255, |
|
"learning_rate": 8.197530864197531e-07, |
|
"loss": 0.1125, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"grad_norm": 0.012924841605126858, |
|
"learning_rate": 8.000000000000001e-07, |
|
"loss": 0.0156, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 4.8049382716049385, |
|
"grad_norm": 0.11566291004419327, |
|
"learning_rate": 7.802469135802469e-07, |
|
"loss": 0.1286, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 4.809876543209876, |
|
"grad_norm": 0.0004868748364970088, |
|
"learning_rate": 7.604938271604939e-07, |
|
"loss": 0.0012, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 4.814814814814815, |
|
"grad_norm": 81.78207397460938, |
|
"learning_rate": 7.407407407407407e-07, |
|
"loss": 0.1942, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 4.8197530864197535, |
|
"grad_norm": 35.868988037109375, |
|
"learning_rate": 7.209876543209878e-07, |
|
"loss": 0.0298, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 4.824691358024691, |
|
"grad_norm": 147.98873901367188, |
|
"learning_rate": 7.012345679012346e-07, |
|
"loss": 0.2483, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 4.82962962962963, |
|
"grad_norm": 0.013545212335884571, |
|
"learning_rate": 6.814814814814816e-07, |
|
"loss": 0.1292, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 4.834567901234568, |
|
"grad_norm": 0.09124937653541565, |
|
"learning_rate": 6.617283950617284e-07, |
|
"loss": 0.0697, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 4.839506172839506, |
|
"grad_norm": 0.005743750836700201, |
|
"learning_rate": 6.419753086419754e-07, |
|
"loss": 0.0975, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 4.844444444444444, |
|
"grad_norm": 60.90267562866211, |
|
"learning_rate": 6.222222222222223e-07, |
|
"loss": 0.0291, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 4.849382716049383, |
|
"grad_norm": 0.005148892290890217, |
|
"learning_rate": 6.024691358024692e-07, |
|
"loss": 0.0886, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 4.8543209876543205, |
|
"grad_norm": 134.9575958251953, |
|
"learning_rate": 5.827160493827161e-07, |
|
"loss": 0.2035, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 4.859259259259259, |
|
"grad_norm": 3.4503517150878906, |
|
"learning_rate": 5.62962962962963e-07, |
|
"loss": 0.1403, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 4.864197530864198, |
|
"grad_norm": 0.5870628356933594, |
|
"learning_rate": 5.432098765432099e-07, |
|
"loss": 0.0645, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 4.869135802469136, |
|
"grad_norm": 2.804311513900757, |
|
"learning_rate": 5.234567901234569e-07, |
|
"loss": 0.0234, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 4.874074074074074, |
|
"grad_norm": 0.07958123087882996, |
|
"learning_rate": 5.037037037037038e-07, |
|
"loss": 0.0569, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 4.879012345679012, |
|
"grad_norm": 0.013184885494410992, |
|
"learning_rate": 4.839506172839507e-07, |
|
"loss": 0.0514, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 4.883950617283951, |
|
"grad_norm": 0.04747697710990906, |
|
"learning_rate": 4.6419753086419757e-07, |
|
"loss": 0.0002, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 4.888888888888889, |
|
"grad_norm": 3.1284358501434326, |
|
"learning_rate": 4.444444444444445e-07, |
|
"loss": 0.0066, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 4.893827160493827, |
|
"grad_norm": 0.6298085451126099, |
|
"learning_rate": 4.246913580246914e-07, |
|
"loss": 0.0583, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 4.898765432098766, |
|
"grad_norm": 0.012326150201261044, |
|
"learning_rate": 4.0493827160493833e-07, |
|
"loss": 0.0099, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 4.9037037037037035, |
|
"grad_norm": 2.6905531883239746, |
|
"learning_rate": 3.8518518518518525e-07, |
|
"loss": 0.1259, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 4.908641975308642, |
|
"grad_norm": 120.47846221923828, |
|
"learning_rate": 3.6543209876543217e-07, |
|
"loss": 0.1349, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 4.91358024691358, |
|
"grad_norm": 0.0025870108511298895, |
|
"learning_rate": 3.45679012345679e-07, |
|
"loss": 0.1368, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 4.9185185185185185, |
|
"grad_norm": 0.8233745694160461, |
|
"learning_rate": 3.259259259259259e-07, |
|
"loss": 0.1123, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 4.923456790123457, |
|
"grad_norm": 0.0019518863409757614, |
|
"learning_rate": 3.061728395061729e-07, |
|
"loss": 0.285, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 4.928395061728395, |
|
"grad_norm": 0.3376046121120453, |
|
"learning_rate": 2.864197530864198e-07, |
|
"loss": 0.4414, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 4.933333333333334, |
|
"grad_norm": 0.006334675010293722, |
|
"learning_rate": 2.666666666666667e-07, |
|
"loss": 0.0841, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 4.938271604938271, |
|
"grad_norm": 0.002394834766164422, |
|
"learning_rate": 2.469135802469136e-07, |
|
"loss": 0.0574, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.94320987654321, |
|
"grad_norm": 0.0032636672258377075, |
|
"learning_rate": 2.2716049382716051e-07, |
|
"loss": 0.2294, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 4.948148148148148, |
|
"grad_norm": 113.65235137939453, |
|
"learning_rate": 2.074074074074074e-07, |
|
"loss": 0.1907, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 4.953086419753086, |
|
"grad_norm": 0.006610922981053591, |
|
"learning_rate": 1.8765432098765433e-07, |
|
"loss": 0.2999, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 4.958024691358025, |
|
"grad_norm": 21.574785232543945, |
|
"learning_rate": 1.6790123456790125e-07, |
|
"loss": 0.1753, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 4.962962962962963, |
|
"grad_norm": 0.019113583490252495, |
|
"learning_rate": 1.4814814814814817e-07, |
|
"loss": 0.1539, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 4.9679012345679014, |
|
"grad_norm": 142.03480529785156, |
|
"learning_rate": 1.2839506172839507e-07, |
|
"loss": 0.201, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 4.972839506172839, |
|
"grad_norm": 0.005930395796895027, |
|
"learning_rate": 1.0864197530864197e-07, |
|
"loss": 0.3736, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 4.977777777777778, |
|
"grad_norm": 0.011048276908695698, |
|
"learning_rate": 8.88888888888889e-08, |
|
"loss": 0.1982, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 4.9827160493827165, |
|
"grad_norm": 0.11679836362600327, |
|
"learning_rate": 6.913580246913582e-08, |
|
"loss": 0.2382, |
|
"step": 10090 |
|
}, |
|
{ |
|
"epoch": 4.987654320987654, |
|
"grad_norm": 114.29679870605469, |
|
"learning_rate": 4.938271604938272e-08, |
|
"loss": 0.5543, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 4.992592592592593, |
|
"grad_norm": 0.07527362555265427, |
|
"learning_rate": 2.9629629629629632e-08, |
|
"loss": 0.0568, |
|
"step": 10110 |
|
}, |
|
{ |
|
"epoch": 4.997530864197531, |
|
"grad_norm": 1.4482346773147583, |
|
"learning_rate": 9.876543209876544e-09, |
|
"loss": 0.2086, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9862962962962963, |
|
"eval_loss": 0.060996126383543015, |
|
"eval_runtime": 32.7337, |
|
"eval_samples_per_second": 164.968, |
|
"eval_steps_per_second": 20.621, |
|
"step": 10125 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 10125, |
|
"total_flos": 2.013785167306752e+18, |
|
"train_loss": 0.2160879238260289, |
|
"train_runtime": 1485.1852, |
|
"train_samples_per_second": 54.539, |
|
"train_steps_per_second": 6.817 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 10125, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.013785167306752e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|