diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,4933 @@ +{ + "best_metric": 0.9518218623481781, + "best_model_checkpoint": "swin-tiny-patch4-window7-224-hotel_images_classifier_v2/checkpoint-3470", + "epoch": 4.9946023749550195, + "eval_steps": 500, + "global_step": 3470, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "grad_norm": 4.747580528259277, + "learning_rate": 7.204610951008646e-07, + "loss": 1.9842, + "step": 5 + }, + { + "epoch": 0.01, + "grad_norm": 5.086691379547119, + "learning_rate": 1.4409221902017292e-06, + "loss": 1.9811, + "step": 10 + }, + { + "epoch": 0.02, + "grad_norm": 4.6829681396484375, + "learning_rate": 2.161383285302594e-06, + "loss": 1.9832, + "step": 15 + }, + { + "epoch": 0.03, + "grad_norm": 4.578265190124512, + "learning_rate": 2.8818443804034585e-06, + "loss": 1.9501, + "step": 20 + }, + { + "epoch": 0.04, + "grad_norm": 4.904880523681641, + "learning_rate": 3.602305475504323e-06, + "loss": 1.917, + "step": 25 + }, + { + "epoch": 0.04, + "grad_norm": 5.561208248138428, + "learning_rate": 4.322766570605188e-06, + "loss": 1.868, + "step": 30 + }, + { + "epoch": 0.05, + "grad_norm": 4.837254047393799, + "learning_rate": 5.043227665706052e-06, + "loss": 1.8244, + "step": 35 + }, + { + "epoch": 0.06, + "grad_norm": 4.515142440795898, + "learning_rate": 5.763688760806917e-06, + "loss": 1.7756, + "step": 40 + }, + { + "epoch": 0.06, + "grad_norm": 6.021132946014404, + "learning_rate": 6.484149855907781e-06, + "loss": 1.7231, + "step": 45 + }, + { + "epoch": 0.07, + "grad_norm": 6.325433254241943, + "learning_rate": 7.204610951008646e-06, + "loss": 1.6389, + "step": 50 + }, + { + "epoch": 0.08, + "grad_norm": 6.28499698638916, + "learning_rate": 7.92507204610951e-06, + "loss": 1.5651, + "step": 55 + }, + { + "epoch": 0.09, + "grad_norm": 4.940927982330322, + "learning_rate": 8.645533141210376e-06, + "loss": 1.5001, + "step": 60 + }, + { + "epoch": 0.09, + "grad_norm": 4.624394416809082, + "learning_rate": 9.36599423631124e-06, + "loss": 1.3998, + "step": 65 + }, + { + "epoch": 0.1, + "grad_norm": 4.9989118576049805, + "learning_rate": 1.0086455331412104e-05, + "loss": 1.2913, + "step": 70 + }, + { + "epoch": 0.11, + "grad_norm": 6.203399658203125, + "learning_rate": 1.0806916426512968e-05, + "loss": 1.1994, + "step": 75 + }, + { + "epoch": 0.12, + "grad_norm": 4.825283050537109, + "learning_rate": 1.1527377521613834e-05, + "loss": 1.0557, + "step": 80 + }, + { + "epoch": 0.12, + "grad_norm": 6.639811992645264, + "learning_rate": 1.2247838616714698e-05, + "loss": 1.0077, + "step": 85 + }, + { + "epoch": 0.13, + "grad_norm": 5.167383670806885, + "learning_rate": 1.2968299711815562e-05, + "loss": 0.9303, + "step": 90 + }, + { + "epoch": 0.14, + "grad_norm": 6.177196979522705, + "learning_rate": 1.3688760806916426e-05, + "loss": 0.7967, + "step": 95 + }, + { + "epoch": 0.14, + "grad_norm": 5.489429950714111, + "learning_rate": 1.4409221902017291e-05, + "loss": 0.7269, + "step": 100 + }, + { + "epoch": 0.15, + "grad_norm": 5.555374622344971, + "learning_rate": 1.5129682997118155e-05, + "loss": 0.7176, + "step": 105 + }, + { + "epoch": 0.16, + "grad_norm": 11.141295433044434, + "learning_rate": 1.585014409221902e-05, + "loss": 0.6796, + "step": 110 + }, + { + "epoch": 0.17, + "grad_norm": 7.412641525268555, + "learning_rate": 1.6570605187319883e-05, + "loss": 0.6028, + "step": 115 + }, + { + "epoch": 0.17, + "grad_norm": 6.904923439025879, + "learning_rate": 1.7291066282420752e-05, + "loss": 0.6348, + "step": 120 + }, + { + "epoch": 0.18, + "grad_norm": 11.165042877197266, + "learning_rate": 1.8011527377521615e-05, + "loss": 0.5814, + "step": 125 + }, + { + "epoch": 0.19, + "grad_norm": 7.367648124694824, + "learning_rate": 1.873198847262248e-05, + "loss": 0.5858, + "step": 130 + }, + { + "epoch": 0.19, + "grad_norm": 7.115988254547119, + "learning_rate": 1.9452449567723343e-05, + "loss": 0.5316, + "step": 135 + }, + { + "epoch": 0.2, + "grad_norm": 6.44365119934082, + "learning_rate": 2.017291066282421e-05, + "loss": 0.5049, + "step": 140 + }, + { + "epoch": 0.21, + "grad_norm": 7.195384502410889, + "learning_rate": 2.0893371757925074e-05, + "loss": 0.5511, + "step": 145 + }, + { + "epoch": 0.22, + "grad_norm": 17.6825008392334, + "learning_rate": 2.1613832853025936e-05, + "loss": 0.5124, + "step": 150 + }, + { + "epoch": 0.22, + "grad_norm": 7.656848907470703, + "learning_rate": 2.2334293948126802e-05, + "loss": 0.4794, + "step": 155 + }, + { + "epoch": 0.23, + "grad_norm": 7.221956729888916, + "learning_rate": 2.3054755043227668e-05, + "loss": 0.4773, + "step": 160 + }, + { + "epoch": 0.24, + "grad_norm": 16.787612915039062, + "learning_rate": 2.3775216138328533e-05, + "loss": 0.4746, + "step": 165 + }, + { + "epoch": 0.24, + "grad_norm": 7.123960494995117, + "learning_rate": 2.4495677233429396e-05, + "loss": 0.4734, + "step": 170 + }, + { + "epoch": 0.25, + "grad_norm": 7.737701416015625, + "learning_rate": 2.5216138328530258e-05, + "loss": 0.4613, + "step": 175 + }, + { + "epoch": 0.26, + "grad_norm": 7.011651515960693, + "learning_rate": 2.5936599423631124e-05, + "loss": 0.4886, + "step": 180 + }, + { + "epoch": 0.27, + "grad_norm": 8.571374893188477, + "learning_rate": 2.6657060518731993e-05, + "loss": 0.4702, + "step": 185 + }, + { + "epoch": 0.27, + "grad_norm": 7.675159454345703, + "learning_rate": 2.737752161383285e-05, + "loss": 0.5365, + "step": 190 + }, + { + "epoch": 0.28, + "grad_norm": 5.9088239669799805, + "learning_rate": 2.8097982708933717e-05, + "loss": 0.3823, + "step": 195 + }, + { + "epoch": 0.29, + "grad_norm": 5.840087413787842, + "learning_rate": 2.8818443804034583e-05, + "loss": 0.408, + "step": 200 + }, + { + "epoch": 0.3, + "grad_norm": 6.880429267883301, + "learning_rate": 2.953890489913545e-05, + "loss": 0.4787, + "step": 205 + }, + { + "epoch": 0.3, + "grad_norm": 5.355893611907959, + "learning_rate": 3.025936599423631e-05, + "loss": 0.377, + "step": 210 + }, + { + "epoch": 0.31, + "grad_norm": 7.921416759490967, + "learning_rate": 3.097982708933718e-05, + "loss": 0.4504, + "step": 215 + }, + { + "epoch": 0.32, + "grad_norm": 5.329736232757568, + "learning_rate": 3.170028818443804e-05, + "loss": 0.4056, + "step": 220 + }, + { + "epoch": 0.32, + "grad_norm": 5.699007034301758, + "learning_rate": 3.242074927953891e-05, + "loss": 0.395, + "step": 225 + }, + { + "epoch": 0.33, + "grad_norm": 10.29712963104248, + "learning_rate": 3.314121037463977e-05, + "loss": 0.433, + "step": 230 + }, + { + "epoch": 0.34, + "grad_norm": 8.653733253479004, + "learning_rate": 3.3861671469740636e-05, + "loss": 0.3733, + "step": 235 + }, + { + "epoch": 0.35, + "grad_norm": 4.476428508758545, + "learning_rate": 3.4582132564841505e-05, + "loss": 0.3758, + "step": 240 + }, + { + "epoch": 0.35, + "grad_norm": 7.4768571853637695, + "learning_rate": 3.530259365994236e-05, + "loss": 0.4628, + "step": 245 + }, + { + "epoch": 0.36, + "grad_norm": 7.058348655700684, + "learning_rate": 3.602305475504323e-05, + "loss": 0.3855, + "step": 250 + }, + { + "epoch": 0.37, + "grad_norm": 7.238952159881592, + "learning_rate": 3.674351585014409e-05, + "loss": 0.389, + "step": 255 + }, + { + "epoch": 0.37, + "grad_norm": 7.494441032409668, + "learning_rate": 3.746397694524496e-05, + "loss": 0.4285, + "step": 260 + }, + { + "epoch": 0.38, + "grad_norm": 6.927433490753174, + "learning_rate": 3.818443804034582e-05, + "loss": 0.357, + "step": 265 + }, + { + "epoch": 0.39, + "grad_norm": 8.478387832641602, + "learning_rate": 3.8904899135446685e-05, + "loss": 0.3612, + "step": 270 + }, + { + "epoch": 0.4, + "grad_norm": 9.04246997833252, + "learning_rate": 3.9625360230547554e-05, + "loss": 0.3068, + "step": 275 + }, + { + "epoch": 0.4, + "grad_norm": 7.052452087402344, + "learning_rate": 4.034582132564842e-05, + "loss": 0.3388, + "step": 280 + }, + { + "epoch": 0.41, + "grad_norm": 6.3666510581970215, + "learning_rate": 4.106628242074928e-05, + "loss": 0.398, + "step": 285 + }, + { + "epoch": 0.42, + "grad_norm": 7.982662200927734, + "learning_rate": 4.178674351585015e-05, + "loss": 0.3796, + "step": 290 + }, + { + "epoch": 0.42, + "grad_norm": 6.020977973937988, + "learning_rate": 4.250720461095101e-05, + "loss": 0.4266, + "step": 295 + }, + { + "epoch": 0.43, + "grad_norm": 7.010791778564453, + "learning_rate": 4.322766570605187e-05, + "loss": 0.4219, + "step": 300 + }, + { + "epoch": 0.44, + "grad_norm": 5.0191216468811035, + "learning_rate": 4.394812680115274e-05, + "loss": 0.3489, + "step": 305 + }, + { + "epoch": 0.45, + "grad_norm": 5.907705307006836, + "learning_rate": 4.4668587896253604e-05, + "loss": 0.3821, + "step": 310 + }, + { + "epoch": 0.45, + "grad_norm": 6.560094356536865, + "learning_rate": 4.538904899135447e-05, + "loss": 0.3874, + "step": 315 + }, + { + "epoch": 0.46, + "grad_norm": 6.429476737976074, + "learning_rate": 4.6109510086455335e-05, + "loss": 0.4096, + "step": 320 + }, + { + "epoch": 0.47, + "grad_norm": 7.065363883972168, + "learning_rate": 4.68299711815562e-05, + "loss": 0.3778, + "step": 325 + }, + { + "epoch": 0.47, + "grad_norm": 7.916449069976807, + "learning_rate": 4.7550432276657067e-05, + "loss": 0.3491, + "step": 330 + }, + { + "epoch": 0.48, + "grad_norm": 5.4434709548950195, + "learning_rate": 4.827089337175792e-05, + "loss": 0.3645, + "step": 335 + }, + { + "epoch": 0.49, + "grad_norm": 6.34391975402832, + "learning_rate": 4.899135446685879e-05, + "loss": 0.3778, + "step": 340 + }, + { + "epoch": 0.5, + "grad_norm": 6.070534706115723, + "learning_rate": 4.971181556195966e-05, + "loss": 0.4307, + "step": 345 + }, + { + "epoch": 0.5, + "grad_norm": 8.251782417297363, + "learning_rate": 4.995196926032661e-05, + "loss": 0.3964, + "step": 350 + }, + { + "epoch": 0.51, + "grad_norm": 5.293612957000732, + "learning_rate": 4.9871918027537626e-05, + "loss": 0.3379, + "step": 355 + }, + { + "epoch": 0.52, + "grad_norm": 7.164644241333008, + "learning_rate": 4.979186679474864e-05, + "loss": 0.3969, + "step": 360 + }, + { + "epoch": 0.53, + "grad_norm": 4.961303234100342, + "learning_rate": 4.971181556195966e-05, + "loss": 0.3697, + "step": 365 + }, + { + "epoch": 0.53, + "grad_norm": 6.196359157562256, + "learning_rate": 4.9631764329170674e-05, + "loss": 0.3448, + "step": 370 + }, + { + "epoch": 0.54, + "grad_norm": 5.836663722991943, + "learning_rate": 4.955171309638169e-05, + "loss": 0.3939, + "step": 375 + }, + { + "epoch": 0.55, + "grad_norm": 5.845285892486572, + "learning_rate": 4.94716618635927e-05, + "loss": 0.356, + "step": 380 + }, + { + "epoch": 0.55, + "grad_norm": 3.937917947769165, + "learning_rate": 4.9391610630803715e-05, + "loss": 0.3033, + "step": 385 + }, + { + "epoch": 0.56, + "grad_norm": 6.883370399475098, + "learning_rate": 4.9311559398014736e-05, + "loss": 0.415, + "step": 390 + }, + { + "epoch": 0.57, + "grad_norm": 6.164604663848877, + "learning_rate": 4.923150816522575e-05, + "loss": 0.3794, + "step": 395 + }, + { + "epoch": 0.58, + "grad_norm": 10.906937599182129, + "learning_rate": 4.9151456932436764e-05, + "loss": 0.3967, + "step": 400 + }, + { + "epoch": 0.58, + "grad_norm": 3.428271532058716, + "learning_rate": 4.907140569964778e-05, + "loss": 0.3312, + "step": 405 + }, + { + "epoch": 0.59, + "grad_norm": 7.288811206817627, + "learning_rate": 4.899135446685879e-05, + "loss": 0.3681, + "step": 410 + }, + { + "epoch": 0.6, + "grad_norm": 8.319820404052734, + "learning_rate": 4.8911303234069805e-05, + "loss": 0.3265, + "step": 415 + }, + { + "epoch": 0.6, + "grad_norm": 6.9813232421875, + "learning_rate": 4.883125200128082e-05, + "loss": 0.3233, + "step": 420 + }, + { + "epoch": 0.61, + "grad_norm": 5.874197959899902, + "learning_rate": 4.875120076849184e-05, + "loss": 0.3689, + "step": 425 + }, + { + "epoch": 0.62, + "grad_norm": 5.609955787658691, + "learning_rate": 4.867114953570285e-05, + "loss": 0.4213, + "step": 430 + }, + { + "epoch": 0.63, + "grad_norm": 5.877446174621582, + "learning_rate": 4.859109830291387e-05, + "loss": 0.419, + "step": 435 + }, + { + "epoch": 0.63, + "grad_norm": 6.771636962890625, + "learning_rate": 4.851104707012488e-05, + "loss": 0.3529, + "step": 440 + }, + { + "epoch": 0.64, + "grad_norm": 9.461392402648926, + "learning_rate": 4.8430995837335894e-05, + "loss": 0.3725, + "step": 445 + }, + { + "epoch": 0.65, + "grad_norm": 5.563230991363525, + "learning_rate": 4.835094460454691e-05, + "loss": 0.3675, + "step": 450 + }, + { + "epoch": 0.65, + "grad_norm": 6.4672465324401855, + "learning_rate": 4.827089337175792e-05, + "loss": 0.3156, + "step": 455 + }, + { + "epoch": 0.66, + "grad_norm": 3.8499579429626465, + "learning_rate": 4.819084213896894e-05, + "loss": 0.322, + "step": 460 + }, + { + "epoch": 0.67, + "grad_norm": 5.031641960144043, + "learning_rate": 4.8110790906179956e-05, + "loss": 0.3936, + "step": 465 + }, + { + "epoch": 0.68, + "grad_norm": 5.684152603149414, + "learning_rate": 4.803073967339097e-05, + "loss": 0.3898, + "step": 470 + }, + { + "epoch": 0.68, + "grad_norm": 5.913132190704346, + "learning_rate": 4.7950688440601984e-05, + "loss": 0.3331, + "step": 475 + }, + { + "epoch": 0.69, + "grad_norm": 5.199942588806152, + "learning_rate": 4.7870637207813005e-05, + "loss": 0.3303, + "step": 480 + }, + { + "epoch": 0.7, + "grad_norm": 3.992769956588745, + "learning_rate": 4.779058597502402e-05, + "loss": 0.3422, + "step": 485 + }, + { + "epoch": 0.71, + "grad_norm": 6.158402919769287, + "learning_rate": 4.771053474223503e-05, + "loss": 0.3152, + "step": 490 + }, + { + "epoch": 0.71, + "grad_norm": 4.361845016479492, + "learning_rate": 4.763048350944605e-05, + "loss": 0.3057, + "step": 495 + }, + { + "epoch": 0.72, + "grad_norm": 4.663881301879883, + "learning_rate": 4.7550432276657067e-05, + "loss": 0.3461, + "step": 500 + }, + { + "epoch": 0.73, + "grad_norm": 7.09819221496582, + "learning_rate": 4.747038104386808e-05, + "loss": 0.3675, + "step": 505 + }, + { + "epoch": 0.73, + "grad_norm": 5.0237956047058105, + "learning_rate": 4.7390329811079094e-05, + "loss": 0.3274, + "step": 510 + }, + { + "epoch": 0.74, + "grad_norm": 5.483020782470703, + "learning_rate": 4.731027857829011e-05, + "loss": 0.3055, + "step": 515 + }, + { + "epoch": 0.75, + "grad_norm": 4.972677707672119, + "learning_rate": 4.723022734550112e-05, + "loss": 0.3236, + "step": 520 + }, + { + "epoch": 0.76, + "grad_norm": 7.017973899841309, + "learning_rate": 4.7150176112712136e-05, + "loss": 0.3543, + "step": 525 + }, + { + "epoch": 0.76, + "grad_norm": 8.219503402709961, + "learning_rate": 4.7070124879923156e-05, + "loss": 0.3791, + "step": 530 + }, + { + "epoch": 0.77, + "grad_norm": 5.836394309997559, + "learning_rate": 4.699007364713417e-05, + "loss": 0.2882, + "step": 535 + }, + { + "epoch": 0.78, + "grad_norm": 6.394532680511475, + "learning_rate": 4.6910022414345184e-05, + "loss": 0.3741, + "step": 540 + }, + { + "epoch": 0.78, + "grad_norm": 5.4533843994140625, + "learning_rate": 4.68299711815562e-05, + "loss": 0.3852, + "step": 545 + }, + { + "epoch": 0.79, + "grad_norm": 6.065195083618164, + "learning_rate": 4.674991994876721e-05, + "loss": 0.3589, + "step": 550 + }, + { + "epoch": 0.8, + "grad_norm": 4.000141620635986, + "learning_rate": 4.6669868715978225e-05, + "loss": 0.2865, + "step": 555 + }, + { + "epoch": 0.81, + "grad_norm": 6.05587100982666, + "learning_rate": 4.658981748318924e-05, + "loss": 0.316, + "step": 560 + }, + { + "epoch": 0.81, + "grad_norm": 5.1732892990112305, + "learning_rate": 4.650976625040026e-05, + "loss": 0.2768, + "step": 565 + }, + { + "epoch": 0.82, + "grad_norm": 4.745729446411133, + "learning_rate": 4.642971501761127e-05, + "loss": 0.2796, + "step": 570 + }, + { + "epoch": 0.83, + "grad_norm": 4.964130878448486, + "learning_rate": 4.634966378482229e-05, + "loss": 0.3268, + "step": 575 + }, + { + "epoch": 0.83, + "grad_norm": 5.333953857421875, + "learning_rate": 4.62696125520333e-05, + "loss": 0.321, + "step": 580 + }, + { + "epoch": 0.84, + "grad_norm": 4.004300117492676, + "learning_rate": 4.6189561319244315e-05, + "loss": 0.3371, + "step": 585 + }, + { + "epoch": 0.85, + "grad_norm": 6.5950751304626465, + "learning_rate": 4.6109510086455335e-05, + "loss": 0.3028, + "step": 590 + }, + { + "epoch": 0.86, + "grad_norm": 4.516002655029297, + "learning_rate": 4.602945885366635e-05, + "loss": 0.3539, + "step": 595 + }, + { + "epoch": 0.86, + "grad_norm": 5.180628776550293, + "learning_rate": 4.594940762087736e-05, + "loss": 0.35, + "step": 600 + }, + { + "epoch": 0.87, + "grad_norm": 3.2567028999328613, + "learning_rate": 4.586935638808838e-05, + "loss": 0.323, + "step": 605 + }, + { + "epoch": 0.88, + "grad_norm": 3.9456095695495605, + "learning_rate": 4.57893051552994e-05, + "loss": 0.3378, + "step": 610 + }, + { + "epoch": 0.89, + "grad_norm": 3.6121273040771484, + "learning_rate": 4.570925392251041e-05, + "loss": 0.2565, + "step": 615 + }, + { + "epoch": 0.89, + "grad_norm": 4.358009338378906, + "learning_rate": 4.5629202689721425e-05, + "loss": 0.3147, + "step": 620 + }, + { + "epoch": 0.9, + "grad_norm": 7.531122207641602, + "learning_rate": 4.554915145693244e-05, + "loss": 0.3346, + "step": 625 + }, + { + "epoch": 0.91, + "grad_norm": 5.810347557067871, + "learning_rate": 4.546910022414345e-05, + "loss": 0.3196, + "step": 630 + }, + { + "epoch": 0.91, + "grad_norm": 6.805031776428223, + "learning_rate": 4.538904899135447e-05, + "loss": 0.2952, + "step": 635 + }, + { + "epoch": 0.92, + "grad_norm": 4.857294082641602, + "learning_rate": 4.530899775856549e-05, + "loss": 0.315, + "step": 640 + }, + { + "epoch": 0.93, + "grad_norm": 4.595619201660156, + "learning_rate": 4.52289465257765e-05, + "loss": 0.3231, + "step": 645 + }, + { + "epoch": 0.94, + "grad_norm": 5.075206279754639, + "learning_rate": 4.5148895292987514e-05, + "loss": 0.3019, + "step": 650 + }, + { + "epoch": 0.94, + "grad_norm": 4.71131706237793, + "learning_rate": 4.506884406019853e-05, + "loss": 0.3249, + "step": 655 + }, + { + "epoch": 0.95, + "grad_norm": 5.032394886016846, + "learning_rate": 4.498879282740954e-05, + "loss": 0.2653, + "step": 660 + }, + { + "epoch": 0.96, + "grad_norm": 6.4502997398376465, + "learning_rate": 4.4908741594620556e-05, + "loss": 0.309, + "step": 665 + }, + { + "epoch": 0.96, + "grad_norm": 5.608312129974365, + "learning_rate": 4.4828690361831576e-05, + "loss": 0.2943, + "step": 670 + }, + { + "epoch": 0.97, + "grad_norm": 5.454727649688721, + "learning_rate": 4.474863912904259e-05, + "loss": 0.3037, + "step": 675 + }, + { + "epoch": 0.98, + "grad_norm": 4.60232400894165, + "learning_rate": 4.4668587896253604e-05, + "loss": 0.2739, + "step": 680 + }, + { + "epoch": 0.99, + "grad_norm": 5.319153308868408, + "learning_rate": 4.458853666346462e-05, + "loss": 0.2811, + "step": 685 + }, + { + "epoch": 0.99, + "grad_norm": 4.6785054206848145, + "learning_rate": 4.450848543067563e-05, + "loss": 0.2929, + "step": 690 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.9385627530364372, + "eval_loss": 0.16880780458450317, + "eval_runtime": 32.041, + "eval_samples_per_second": 308.355, + "eval_steps_per_second": 9.644, + "step": 694 + }, + { + "epoch": 1.0, + "grad_norm": 6.902053356170654, + "learning_rate": 4.4428434197886645e-05, + "loss": 0.3839, + "step": 695 + }, + { + "epoch": 1.01, + "grad_norm": 4.171269416809082, + "learning_rate": 4.434838296509766e-05, + "loss": 0.3153, + "step": 700 + }, + { + "epoch": 1.01, + "grad_norm": 3.1970090866088867, + "learning_rate": 4.426833173230868e-05, + "loss": 0.2399, + "step": 705 + }, + { + "epoch": 1.02, + "grad_norm": 5.519264221191406, + "learning_rate": 4.4188280499519693e-05, + "loss": 0.2775, + "step": 710 + }, + { + "epoch": 1.03, + "grad_norm": 4.797208786010742, + "learning_rate": 4.4108229266730714e-05, + "loss": 0.2805, + "step": 715 + }, + { + "epoch": 1.04, + "grad_norm": 9.114941596984863, + "learning_rate": 4.402817803394173e-05, + "loss": 0.2846, + "step": 720 + }, + { + "epoch": 1.04, + "grad_norm": 4.987404823303223, + "learning_rate": 4.394812680115274e-05, + "loss": 0.2849, + "step": 725 + }, + { + "epoch": 1.05, + "grad_norm": 6.2959136962890625, + "learning_rate": 4.3868075568363755e-05, + "loss": 0.3129, + "step": 730 + }, + { + "epoch": 1.06, + "grad_norm": 4.492276668548584, + "learning_rate": 4.378802433557477e-05, + "loss": 0.2384, + "step": 735 + }, + { + "epoch": 1.07, + "grad_norm": 3.5424952507019043, + "learning_rate": 4.370797310278579e-05, + "loss": 0.2183, + "step": 740 + }, + { + "epoch": 1.07, + "grad_norm": 7.594015598297119, + "learning_rate": 4.3627921869996804e-05, + "loss": 0.2657, + "step": 745 + }, + { + "epoch": 1.08, + "grad_norm": 6.9036431312561035, + "learning_rate": 4.354787063720782e-05, + "loss": 0.2678, + "step": 750 + }, + { + "epoch": 1.09, + "grad_norm": 7.780063629150391, + "learning_rate": 4.346781940441883e-05, + "loss": 0.3054, + "step": 755 + }, + { + "epoch": 1.09, + "grad_norm": 5.562774181365967, + "learning_rate": 4.3387768171629845e-05, + "loss": 0.272, + "step": 760 + }, + { + "epoch": 1.1, + "grad_norm": 7.2162861824035645, + "learning_rate": 4.330771693884086e-05, + "loss": 0.2678, + "step": 765 + }, + { + "epoch": 1.11, + "grad_norm": 5.875248432159424, + "learning_rate": 4.322766570605187e-05, + "loss": 0.2691, + "step": 770 + }, + { + "epoch": 1.12, + "grad_norm": 4.324618339538574, + "learning_rate": 4.314761447326289e-05, + "loss": 0.3025, + "step": 775 + }, + { + "epoch": 1.12, + "grad_norm": 4.129276275634766, + "learning_rate": 4.306756324047391e-05, + "loss": 0.2596, + "step": 780 + }, + { + "epoch": 1.13, + "grad_norm": 3.086761713027954, + "learning_rate": 4.298751200768492e-05, + "loss": 0.2528, + "step": 785 + }, + { + "epoch": 1.14, + "grad_norm": 4.340246200561523, + "learning_rate": 4.2907460774895934e-05, + "loss": 0.223, + "step": 790 + }, + { + "epoch": 1.14, + "grad_norm": 3.6360461711883545, + "learning_rate": 4.282740954210695e-05, + "loss": 0.2547, + "step": 795 + }, + { + "epoch": 1.15, + "grad_norm": 4.182173252105713, + "learning_rate": 4.274735830931796e-05, + "loss": 0.287, + "step": 800 + }, + { + "epoch": 1.16, + "grad_norm": 4.418725490570068, + "learning_rate": 4.2667307076528976e-05, + "loss": 0.2888, + "step": 805 + }, + { + "epoch": 1.17, + "grad_norm": 4.325172424316406, + "learning_rate": 4.2587255843739996e-05, + "loss": 0.2634, + "step": 810 + }, + { + "epoch": 1.17, + "grad_norm": 5.551906585693359, + "learning_rate": 4.250720461095101e-05, + "loss": 0.2651, + "step": 815 + }, + { + "epoch": 1.18, + "grad_norm": 3.631472110748291, + "learning_rate": 4.2427153378162024e-05, + "loss": 0.2745, + "step": 820 + }, + { + "epoch": 1.19, + "grad_norm": 3.5533196926116943, + "learning_rate": 4.234710214537304e-05, + "loss": 0.2936, + "step": 825 + }, + { + "epoch": 1.19, + "grad_norm": 4.504055023193359, + "learning_rate": 4.226705091258406e-05, + "loss": 0.3025, + "step": 830 + }, + { + "epoch": 1.2, + "grad_norm": 4.739752292633057, + "learning_rate": 4.218699967979507e-05, + "loss": 0.303, + "step": 835 + }, + { + "epoch": 1.21, + "grad_norm": 5.039779186248779, + "learning_rate": 4.2106948447006086e-05, + "loss": 0.2663, + "step": 840 + }, + { + "epoch": 1.22, + "grad_norm": 3.7070090770721436, + "learning_rate": 4.2026897214217107e-05, + "loss": 0.2663, + "step": 845 + }, + { + "epoch": 1.22, + "grad_norm": 4.351013660430908, + "learning_rate": 4.194684598142812e-05, + "loss": 0.2454, + "step": 850 + }, + { + "epoch": 1.23, + "grad_norm": 5.0032830238342285, + "learning_rate": 4.1866794748639134e-05, + "loss": 0.245, + "step": 855 + }, + { + "epoch": 1.24, + "grad_norm": 3.203274965286255, + "learning_rate": 4.178674351585015e-05, + "loss": 0.3036, + "step": 860 + }, + { + "epoch": 1.25, + "grad_norm": 4.47341775894165, + "learning_rate": 4.170669228306116e-05, + "loss": 0.3336, + "step": 865 + }, + { + "epoch": 1.25, + "grad_norm": 4.188334941864014, + "learning_rate": 4.1626641050272176e-05, + "loss": 0.2529, + "step": 870 + }, + { + "epoch": 1.26, + "grad_norm": 3.3264882564544678, + "learning_rate": 4.154658981748319e-05, + "loss": 0.2258, + "step": 875 + }, + { + "epoch": 1.27, + "grad_norm": 4.058962821960449, + "learning_rate": 4.146653858469421e-05, + "loss": 0.3129, + "step": 880 + }, + { + "epoch": 1.27, + "grad_norm": 4.271402359008789, + "learning_rate": 4.1386487351905224e-05, + "loss": 0.2605, + "step": 885 + }, + { + "epoch": 1.28, + "grad_norm": 8.134669303894043, + "learning_rate": 4.130643611911624e-05, + "loss": 0.3072, + "step": 890 + }, + { + "epoch": 1.29, + "grad_norm": 5.065728664398193, + "learning_rate": 4.122638488632725e-05, + "loss": 0.2557, + "step": 895 + }, + { + "epoch": 1.3, + "grad_norm": 4.518153190612793, + "learning_rate": 4.1146333653538265e-05, + "loss": 0.2591, + "step": 900 + }, + { + "epoch": 1.3, + "grad_norm": 6.0956926345825195, + "learning_rate": 4.106628242074928e-05, + "loss": 0.3001, + "step": 905 + }, + { + "epoch": 1.31, + "grad_norm": 4.715207099914551, + "learning_rate": 4.098623118796029e-05, + "loss": 0.2882, + "step": 910 + }, + { + "epoch": 1.32, + "grad_norm": 6.3927435874938965, + "learning_rate": 4.090617995517131e-05, + "loss": 0.2733, + "step": 915 + }, + { + "epoch": 1.32, + "grad_norm": 3.886277198791504, + "learning_rate": 4.082612872238233e-05, + "loss": 0.2558, + "step": 920 + }, + { + "epoch": 1.33, + "grad_norm": 6.690213203430176, + "learning_rate": 4.074607748959334e-05, + "loss": 0.2411, + "step": 925 + }, + { + "epoch": 1.34, + "grad_norm": 5.04226016998291, + "learning_rate": 4.0666026256804355e-05, + "loss": 0.2814, + "step": 930 + }, + { + "epoch": 1.35, + "grad_norm": 6.361902236938477, + "learning_rate": 4.058597502401537e-05, + "loss": 0.1918, + "step": 935 + }, + { + "epoch": 1.35, + "grad_norm": 6.6365227699279785, + "learning_rate": 4.050592379122638e-05, + "loss": 0.2714, + "step": 940 + }, + { + "epoch": 1.36, + "grad_norm": 4.794340133666992, + "learning_rate": 4.04258725584374e-05, + "loss": 0.269, + "step": 945 + }, + { + "epoch": 1.37, + "grad_norm": 5.207016468048096, + "learning_rate": 4.034582132564842e-05, + "loss": 0.2955, + "step": 950 + }, + { + "epoch": 1.37, + "grad_norm": 5.347695350646973, + "learning_rate": 4.026577009285944e-05, + "loss": 0.2341, + "step": 955 + }, + { + "epoch": 1.38, + "grad_norm": 7.788352966308594, + "learning_rate": 4.018571886007045e-05, + "loss": 0.2228, + "step": 960 + }, + { + "epoch": 1.39, + "grad_norm": 4.078495025634766, + "learning_rate": 4.0105667627281465e-05, + "loss": 0.2408, + "step": 965 + }, + { + "epoch": 1.4, + "grad_norm": 5.237365245819092, + "learning_rate": 4.002561639449248e-05, + "loss": 0.2891, + "step": 970 + }, + { + "epoch": 1.4, + "grad_norm": 5.711833953857422, + "learning_rate": 3.994556516170349e-05, + "loss": 0.323, + "step": 975 + }, + { + "epoch": 1.41, + "grad_norm": 3.250711679458618, + "learning_rate": 3.9865513928914506e-05, + "loss": 0.2945, + "step": 980 + }, + { + "epoch": 1.42, + "grad_norm": 6.933974266052246, + "learning_rate": 3.978546269612553e-05, + "loss": 0.2507, + "step": 985 + }, + { + "epoch": 1.42, + "grad_norm": 4.515052795410156, + "learning_rate": 3.970541146333654e-05, + "loss": 0.265, + "step": 990 + }, + { + "epoch": 1.43, + "grad_norm": 4.89296293258667, + "learning_rate": 3.9625360230547554e-05, + "loss": 0.2868, + "step": 995 + }, + { + "epoch": 1.44, + "grad_norm": 4.629034996032715, + "learning_rate": 3.954530899775857e-05, + "loss": 0.2773, + "step": 1000 + }, + { + "epoch": 1.45, + "grad_norm": 3.881559371948242, + "learning_rate": 3.946525776496958e-05, + "loss": 0.3336, + "step": 1005 + }, + { + "epoch": 1.45, + "grad_norm": 3.4768316745758057, + "learning_rate": 3.9385206532180596e-05, + "loss": 0.2212, + "step": 1010 + }, + { + "epoch": 1.46, + "grad_norm": 5.582344055175781, + "learning_rate": 3.930515529939161e-05, + "loss": 0.3031, + "step": 1015 + }, + { + "epoch": 1.47, + "grad_norm": 3.73008131980896, + "learning_rate": 3.922510406660262e-05, + "loss": 0.2557, + "step": 1020 + }, + { + "epoch": 1.48, + "grad_norm": 5.319180011749268, + "learning_rate": 3.9145052833813644e-05, + "loss": 0.2679, + "step": 1025 + }, + { + "epoch": 1.48, + "grad_norm": 6.709672451019287, + "learning_rate": 3.906500160102466e-05, + "loss": 0.2471, + "step": 1030 + }, + { + "epoch": 1.49, + "grad_norm": 5.294819355010986, + "learning_rate": 3.898495036823567e-05, + "loss": 0.2661, + "step": 1035 + }, + { + "epoch": 1.5, + "grad_norm": 3.2995288372039795, + "learning_rate": 3.8904899135446685e-05, + "loss": 0.2789, + "step": 1040 + }, + { + "epoch": 1.5, + "grad_norm": 4.34086799621582, + "learning_rate": 3.88248479026577e-05, + "loss": 0.2789, + "step": 1045 + }, + { + "epoch": 1.51, + "grad_norm": 5.209534168243408, + "learning_rate": 3.874479666986871e-05, + "loss": 0.3002, + "step": 1050 + }, + { + "epoch": 1.52, + "grad_norm": 5.175271034240723, + "learning_rate": 3.8664745437079733e-05, + "loss": 0.2631, + "step": 1055 + }, + { + "epoch": 1.53, + "grad_norm": 4.909916400909424, + "learning_rate": 3.858469420429075e-05, + "loss": 0.25, + "step": 1060 + }, + { + "epoch": 1.53, + "grad_norm": 3.8786613941192627, + "learning_rate": 3.850464297150176e-05, + "loss": 0.226, + "step": 1065 + }, + { + "epoch": 1.54, + "grad_norm": 4.349425315856934, + "learning_rate": 3.842459173871278e-05, + "loss": 0.2635, + "step": 1070 + }, + { + "epoch": 1.55, + "grad_norm": 5.107605934143066, + "learning_rate": 3.8344540505923795e-05, + "loss": 0.2536, + "step": 1075 + }, + { + "epoch": 1.55, + "grad_norm": 5.436495780944824, + "learning_rate": 3.826448927313481e-05, + "loss": 0.2911, + "step": 1080 + }, + { + "epoch": 1.56, + "grad_norm": 5.1116156578063965, + "learning_rate": 3.818443804034582e-05, + "loss": 0.3064, + "step": 1085 + }, + { + "epoch": 1.57, + "grad_norm": 4.1365742683410645, + "learning_rate": 3.810438680755684e-05, + "loss": 0.2003, + "step": 1090 + }, + { + "epoch": 1.58, + "grad_norm": 5.43222188949585, + "learning_rate": 3.802433557476786e-05, + "loss": 0.291, + "step": 1095 + }, + { + "epoch": 1.58, + "grad_norm": 6.062341690063477, + "learning_rate": 3.794428434197887e-05, + "loss": 0.2325, + "step": 1100 + }, + { + "epoch": 1.59, + "grad_norm": 4.5507097244262695, + "learning_rate": 3.7864233109189885e-05, + "loss": 0.2493, + "step": 1105 + }, + { + "epoch": 1.6, + "grad_norm": 3.3975865840911865, + "learning_rate": 3.77841818764009e-05, + "loss": 0.2349, + "step": 1110 + }, + { + "epoch": 1.6, + "grad_norm": 3.967979907989502, + "learning_rate": 3.770413064361191e-05, + "loss": 0.2364, + "step": 1115 + }, + { + "epoch": 1.61, + "grad_norm": 4.541342735290527, + "learning_rate": 3.7624079410822926e-05, + "loss": 0.2285, + "step": 1120 + }, + { + "epoch": 1.62, + "grad_norm": 4.848491668701172, + "learning_rate": 3.754402817803394e-05, + "loss": 0.235, + "step": 1125 + }, + { + "epoch": 1.63, + "grad_norm": 5.879725933074951, + "learning_rate": 3.746397694524496e-05, + "loss": 0.2759, + "step": 1130 + }, + { + "epoch": 1.63, + "grad_norm": 6.01210880279541, + "learning_rate": 3.7383925712455975e-05, + "loss": 0.3345, + "step": 1135 + }, + { + "epoch": 1.64, + "grad_norm": 4.760444641113281, + "learning_rate": 3.730387447966699e-05, + "loss": 0.2708, + "step": 1140 + }, + { + "epoch": 1.65, + "grad_norm": 4.630128860473633, + "learning_rate": 3.7223823246878e-05, + "loss": 0.3049, + "step": 1145 + }, + { + "epoch": 1.66, + "grad_norm": 4.3284101486206055, + "learning_rate": 3.7143772014089016e-05, + "loss": 0.2822, + "step": 1150 + }, + { + "epoch": 1.66, + "grad_norm": 6.679904937744141, + "learning_rate": 3.706372078130003e-05, + "loss": 0.2764, + "step": 1155 + }, + { + "epoch": 1.67, + "grad_norm": 5.192065238952637, + "learning_rate": 3.6983669548511043e-05, + "loss": 0.2479, + "step": 1160 + }, + { + "epoch": 1.68, + "grad_norm": 4.901111125946045, + "learning_rate": 3.6903618315722064e-05, + "loss": 0.2849, + "step": 1165 + }, + { + "epoch": 1.68, + "grad_norm": 6.2184977531433105, + "learning_rate": 3.682356708293308e-05, + "loss": 0.2667, + "step": 1170 + }, + { + "epoch": 1.69, + "grad_norm": 5.900247573852539, + "learning_rate": 3.674351585014409e-05, + "loss": 0.2992, + "step": 1175 + }, + { + "epoch": 1.7, + "grad_norm": 3.7004477977752686, + "learning_rate": 3.666346461735511e-05, + "loss": 0.2791, + "step": 1180 + }, + { + "epoch": 1.71, + "grad_norm": 4.646676063537598, + "learning_rate": 3.6583413384566126e-05, + "loss": 0.2525, + "step": 1185 + }, + { + "epoch": 1.71, + "grad_norm": 4.426496982574463, + "learning_rate": 3.650336215177714e-05, + "loss": 0.2624, + "step": 1190 + }, + { + "epoch": 1.72, + "grad_norm": 4.333110809326172, + "learning_rate": 3.6423310918988154e-05, + "loss": 0.2777, + "step": 1195 + }, + { + "epoch": 1.73, + "grad_norm": 3.7483744621276855, + "learning_rate": 3.6343259686199174e-05, + "loss": 0.2897, + "step": 1200 + }, + { + "epoch": 1.73, + "grad_norm": 5.556215286254883, + "learning_rate": 3.626320845341019e-05, + "loss": 0.3432, + "step": 1205 + }, + { + "epoch": 1.74, + "grad_norm": 4.707242965698242, + "learning_rate": 3.61831572206212e-05, + "loss": 0.2439, + "step": 1210 + }, + { + "epoch": 1.75, + "grad_norm": 4.767390251159668, + "learning_rate": 3.6103105987832216e-05, + "loss": 0.2744, + "step": 1215 + }, + { + "epoch": 1.76, + "grad_norm": 4.1662492752075195, + "learning_rate": 3.602305475504323e-05, + "loss": 0.267, + "step": 1220 + }, + { + "epoch": 1.76, + "grad_norm": 4.437891006469727, + "learning_rate": 3.594300352225424e-05, + "loss": 0.2354, + "step": 1225 + }, + { + "epoch": 1.77, + "grad_norm": 5.63749361038208, + "learning_rate": 3.586295228946526e-05, + "loss": 0.2557, + "step": 1230 + }, + { + "epoch": 1.78, + "grad_norm": 6.398256778717041, + "learning_rate": 3.578290105667628e-05, + "loss": 0.2697, + "step": 1235 + }, + { + "epoch": 1.78, + "grad_norm": 4.15376091003418, + "learning_rate": 3.570284982388729e-05, + "loss": 0.2672, + "step": 1240 + }, + { + "epoch": 1.79, + "grad_norm": 8.952369689941406, + "learning_rate": 3.5622798591098305e-05, + "loss": 0.2992, + "step": 1245 + }, + { + "epoch": 1.8, + "grad_norm": 7.161625385284424, + "learning_rate": 3.554274735830932e-05, + "loss": 0.3067, + "step": 1250 + }, + { + "epoch": 1.81, + "grad_norm": 3.848027467727661, + "learning_rate": 3.546269612552033e-05, + "loss": 0.3165, + "step": 1255 + }, + { + "epoch": 1.81, + "grad_norm": 5.625514507293701, + "learning_rate": 3.5382644892731347e-05, + "loss": 0.2792, + "step": 1260 + }, + { + "epoch": 1.82, + "grad_norm": 3.829505681991577, + "learning_rate": 3.530259365994236e-05, + "loss": 0.258, + "step": 1265 + }, + { + "epoch": 1.83, + "grad_norm": 4.038649559020996, + "learning_rate": 3.522254242715338e-05, + "loss": 0.2668, + "step": 1270 + }, + { + "epoch": 1.84, + "grad_norm": 3.746533155441284, + "learning_rate": 3.5142491194364395e-05, + "loss": 0.2571, + "step": 1275 + }, + { + "epoch": 1.84, + "grad_norm": 3.9205687046051025, + "learning_rate": 3.506243996157541e-05, + "loss": 0.2148, + "step": 1280 + }, + { + "epoch": 1.85, + "grad_norm": 5.464355945587158, + "learning_rate": 3.498238872878642e-05, + "loss": 0.2707, + "step": 1285 + }, + { + "epoch": 1.86, + "grad_norm": 4.321130752563477, + "learning_rate": 3.4902337495997436e-05, + "loss": 0.2434, + "step": 1290 + }, + { + "epoch": 1.86, + "grad_norm": 6.3836588859558105, + "learning_rate": 3.482228626320846e-05, + "loss": 0.2601, + "step": 1295 + }, + { + "epoch": 1.87, + "grad_norm": 2.9065053462982178, + "learning_rate": 3.474223503041947e-05, + "loss": 0.2033, + "step": 1300 + }, + { + "epoch": 1.88, + "grad_norm": 4.280132293701172, + "learning_rate": 3.4662183797630484e-05, + "loss": 0.2708, + "step": 1305 + }, + { + "epoch": 1.89, + "grad_norm": 5.5674262046813965, + "learning_rate": 3.4582132564841505e-05, + "loss": 0.2899, + "step": 1310 + }, + { + "epoch": 1.89, + "grad_norm": 4.071995735168457, + "learning_rate": 3.450208133205252e-05, + "loss": 0.2714, + "step": 1315 + }, + { + "epoch": 1.9, + "grad_norm": 6.83046817779541, + "learning_rate": 3.442203009926353e-05, + "loss": 0.2563, + "step": 1320 + }, + { + "epoch": 1.91, + "grad_norm": 4.866962432861328, + "learning_rate": 3.4341978866474546e-05, + "loss": 0.2898, + "step": 1325 + }, + { + "epoch": 1.91, + "grad_norm": 6.10991096496582, + "learning_rate": 3.426192763368556e-05, + "loss": 0.2927, + "step": 1330 + }, + { + "epoch": 1.92, + "grad_norm": 8.084212303161621, + "learning_rate": 3.4181876400896574e-05, + "loss": 0.2668, + "step": 1335 + }, + { + "epoch": 1.93, + "grad_norm": 2.702385902404785, + "learning_rate": 3.4101825168107594e-05, + "loss": 0.2617, + "step": 1340 + }, + { + "epoch": 1.94, + "grad_norm": 5.180947303771973, + "learning_rate": 3.402177393531861e-05, + "loss": 0.2411, + "step": 1345 + }, + { + "epoch": 1.94, + "grad_norm": 3.0766685009002686, + "learning_rate": 3.394172270252962e-05, + "loss": 0.2723, + "step": 1350 + }, + { + "epoch": 1.95, + "grad_norm": 3.833108901977539, + "learning_rate": 3.3861671469740636e-05, + "loss": 0.2237, + "step": 1355 + }, + { + "epoch": 1.96, + "grad_norm": 4.505425930023193, + "learning_rate": 3.378162023695165e-05, + "loss": 0.2685, + "step": 1360 + }, + { + "epoch": 1.96, + "grad_norm": 3.9498701095581055, + "learning_rate": 3.370156900416266e-05, + "loss": 0.2637, + "step": 1365 + }, + { + "epoch": 1.97, + "grad_norm": 6.345920562744141, + "learning_rate": 3.362151777137368e-05, + "loss": 0.2745, + "step": 1370 + }, + { + "epoch": 1.98, + "grad_norm": 4.702010154724121, + "learning_rate": 3.35414665385847e-05, + "loss": 0.2837, + "step": 1375 + }, + { + "epoch": 1.99, + "grad_norm": 4.943043231964111, + "learning_rate": 3.346141530579571e-05, + "loss": 0.2525, + "step": 1380 + }, + { + "epoch": 1.99, + "grad_norm": 3.8749611377716064, + "learning_rate": 3.3381364073006725e-05, + "loss": 0.2499, + "step": 1385 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9425101214574899, + "eval_loss": 0.1516382098197937, + "eval_runtime": 31.6984, + "eval_samples_per_second": 311.687, + "eval_steps_per_second": 9.748, + "step": 1389 + }, + { + "epoch": 2.0, + "grad_norm": 6.219438076019287, + "learning_rate": 3.330131284021774e-05, + "loss": 0.3071, + "step": 1390 + }, + { + "epoch": 2.01, + "grad_norm": 4.6552629470825195, + "learning_rate": 3.322126160742875e-05, + "loss": 0.2364, + "step": 1395 + }, + { + "epoch": 2.02, + "grad_norm": 3.997241497039795, + "learning_rate": 3.314121037463977e-05, + "loss": 0.249, + "step": 1400 + }, + { + "epoch": 2.02, + "grad_norm": 3.6796419620513916, + "learning_rate": 3.306115914185078e-05, + "loss": 0.2261, + "step": 1405 + }, + { + "epoch": 2.03, + "grad_norm": 3.0016541481018066, + "learning_rate": 3.29811079090618e-05, + "loss": 0.2127, + "step": 1410 + }, + { + "epoch": 2.04, + "grad_norm": 4.60055685043335, + "learning_rate": 3.2901056676272815e-05, + "loss": 0.2207, + "step": 1415 + }, + { + "epoch": 2.04, + "grad_norm": 6.432025909423828, + "learning_rate": 3.2821005443483835e-05, + "loss": 0.2088, + "step": 1420 + }, + { + "epoch": 2.05, + "grad_norm": 4.049763202667236, + "learning_rate": 3.274095421069485e-05, + "loss": 0.2193, + "step": 1425 + }, + { + "epoch": 2.06, + "grad_norm": 4.77670955657959, + "learning_rate": 3.266090297790586e-05, + "loss": 0.2666, + "step": 1430 + }, + { + "epoch": 2.07, + "grad_norm": 3.876225709915161, + "learning_rate": 3.258085174511688e-05, + "loss": 0.222, + "step": 1435 + }, + { + "epoch": 2.07, + "grad_norm": 2.917393207550049, + "learning_rate": 3.250080051232789e-05, + "loss": 0.2481, + "step": 1440 + }, + { + "epoch": 2.08, + "grad_norm": 3.858349084854126, + "learning_rate": 3.242074927953891e-05, + "loss": 0.2929, + "step": 1445 + }, + { + "epoch": 2.09, + "grad_norm": 4.08052921295166, + "learning_rate": 3.2340698046749925e-05, + "loss": 0.2081, + "step": 1450 + }, + { + "epoch": 2.09, + "grad_norm": 3.8843398094177246, + "learning_rate": 3.226064681396094e-05, + "loss": 0.1917, + "step": 1455 + }, + { + "epoch": 2.1, + "grad_norm": 4.356058597564697, + "learning_rate": 3.218059558117195e-05, + "loss": 0.2211, + "step": 1460 + }, + { + "epoch": 2.11, + "grad_norm": 5.629312038421631, + "learning_rate": 3.2100544348382966e-05, + "loss": 0.2704, + "step": 1465 + }, + { + "epoch": 2.12, + "grad_norm": 3.8312325477600098, + "learning_rate": 3.202049311559398e-05, + "loss": 0.2418, + "step": 1470 + }, + { + "epoch": 2.12, + "grad_norm": 3.1079790592193604, + "learning_rate": 3.1940441882804994e-05, + "loss": 0.1948, + "step": 1475 + }, + { + "epoch": 2.13, + "grad_norm": 4.682496547698975, + "learning_rate": 3.1860390650016015e-05, + "loss": 0.2023, + "step": 1480 + }, + { + "epoch": 2.14, + "grad_norm": 4.4082489013671875, + "learning_rate": 3.178033941722703e-05, + "loss": 0.2346, + "step": 1485 + }, + { + "epoch": 2.14, + "grad_norm": 5.721102714538574, + "learning_rate": 3.170028818443804e-05, + "loss": 0.2294, + "step": 1490 + }, + { + "epoch": 2.15, + "grad_norm": 3.2310311794281006, + "learning_rate": 3.1620236951649056e-05, + "loss": 0.2074, + "step": 1495 + }, + { + "epoch": 2.16, + "grad_norm": 5.734870433807373, + "learning_rate": 3.154018571886007e-05, + "loss": 0.2244, + "step": 1500 + }, + { + "epoch": 2.17, + "grad_norm": 4.256961822509766, + "learning_rate": 3.1460134486071084e-05, + "loss": 0.2208, + "step": 1505 + }, + { + "epoch": 2.17, + "grad_norm": 6.9470696449279785, + "learning_rate": 3.13800832532821e-05, + "loss": 0.2736, + "step": 1510 + }, + { + "epoch": 2.18, + "grad_norm": 2.8514010906219482, + "learning_rate": 3.130003202049312e-05, + "loss": 0.1989, + "step": 1515 + }, + { + "epoch": 2.19, + "grad_norm": 4.2279744148254395, + "learning_rate": 3.121998078770413e-05, + "loss": 0.2753, + "step": 1520 + }, + { + "epoch": 2.2, + "grad_norm": 3.349268674850464, + "learning_rate": 3.1139929554915145e-05, + "loss": 0.181, + "step": 1525 + }, + { + "epoch": 2.2, + "grad_norm": 4.550454616546631, + "learning_rate": 3.105987832212616e-05, + "loss": 0.2536, + "step": 1530 + }, + { + "epoch": 2.21, + "grad_norm": 3.7860782146453857, + "learning_rate": 3.097982708933718e-05, + "loss": 0.2331, + "step": 1535 + }, + { + "epoch": 2.22, + "grad_norm": 4.5719170570373535, + "learning_rate": 3.0899775856548194e-05, + "loss": 0.2408, + "step": 1540 + }, + { + "epoch": 2.22, + "grad_norm": 4.448012828826904, + "learning_rate": 3.081972462375921e-05, + "loss": 0.2219, + "step": 1545 + }, + { + "epoch": 2.23, + "grad_norm": 3.7972702980041504, + "learning_rate": 3.073967339097023e-05, + "loss": 0.2691, + "step": 1550 + }, + { + "epoch": 2.24, + "grad_norm": 4.268452167510986, + "learning_rate": 3.065962215818124e-05, + "loss": 0.2215, + "step": 1555 + }, + { + "epoch": 2.25, + "grad_norm": 4.145329475402832, + "learning_rate": 3.0579570925392256e-05, + "loss": 0.2488, + "step": 1560 + }, + { + "epoch": 2.25, + "grad_norm": 5.501221656799316, + "learning_rate": 3.049951969260327e-05, + "loss": 0.2441, + "step": 1565 + }, + { + "epoch": 2.26, + "grad_norm": 4.3408203125, + "learning_rate": 3.0419468459814283e-05, + "loss": 0.2308, + "step": 1570 + }, + { + "epoch": 2.27, + "grad_norm": 4.104162216186523, + "learning_rate": 3.0339417227025297e-05, + "loss": 0.2538, + "step": 1575 + }, + { + "epoch": 2.27, + "grad_norm": 5.441348075866699, + "learning_rate": 3.025936599423631e-05, + "loss": 0.2742, + "step": 1580 + }, + { + "epoch": 2.28, + "grad_norm": 3.3526971340179443, + "learning_rate": 3.017931476144733e-05, + "loss": 0.1934, + "step": 1585 + }, + { + "epoch": 2.29, + "grad_norm": 3.5918030738830566, + "learning_rate": 3.0099263528658345e-05, + "loss": 0.256, + "step": 1590 + }, + { + "epoch": 2.3, + "grad_norm": 3.9758517742156982, + "learning_rate": 3.001921229586936e-05, + "loss": 0.2096, + "step": 1595 + }, + { + "epoch": 2.3, + "grad_norm": 2.7759931087493896, + "learning_rate": 2.9939161063080373e-05, + "loss": 0.2545, + "step": 1600 + }, + { + "epoch": 2.31, + "grad_norm": 6.958917140960693, + "learning_rate": 2.9859109830291387e-05, + "loss": 0.2293, + "step": 1605 + }, + { + "epoch": 2.32, + "grad_norm": 4.162193775177002, + "learning_rate": 2.97790585975024e-05, + "loss": 0.2095, + "step": 1610 + }, + { + "epoch": 2.32, + "grad_norm": 3.701801061630249, + "learning_rate": 2.9699007364713418e-05, + "loss": 0.2339, + "step": 1615 + }, + { + "epoch": 2.33, + "grad_norm": 3.290947437286377, + "learning_rate": 2.9618956131924435e-05, + "loss": 0.209, + "step": 1620 + }, + { + "epoch": 2.34, + "grad_norm": 4.3231024742126465, + "learning_rate": 2.953890489913545e-05, + "loss": 0.2791, + "step": 1625 + }, + { + "epoch": 2.35, + "grad_norm": 3.6642446517944336, + "learning_rate": 2.9458853666346466e-05, + "loss": 0.2382, + "step": 1630 + }, + { + "epoch": 2.35, + "grad_norm": 6.942342281341553, + "learning_rate": 2.937880243355748e-05, + "loss": 0.2406, + "step": 1635 + }, + { + "epoch": 2.36, + "grad_norm": 3.886199712753296, + "learning_rate": 2.9298751200768493e-05, + "loss": 0.218, + "step": 1640 + }, + { + "epoch": 2.37, + "grad_norm": 3.8468515872955322, + "learning_rate": 2.9218699967979507e-05, + "loss": 0.2449, + "step": 1645 + }, + { + "epoch": 2.37, + "grad_norm": 3.2598648071289062, + "learning_rate": 2.913864873519052e-05, + "loss": 0.2276, + "step": 1650 + }, + { + "epoch": 2.38, + "grad_norm": 3.9356770515441895, + "learning_rate": 2.905859750240154e-05, + "loss": 0.2481, + "step": 1655 + }, + { + "epoch": 2.39, + "grad_norm": 5.803495407104492, + "learning_rate": 2.8978546269612555e-05, + "loss": 0.2699, + "step": 1660 + }, + { + "epoch": 2.4, + "grad_norm": 3.3325111865997314, + "learning_rate": 2.889849503682357e-05, + "loss": 0.2206, + "step": 1665 + }, + { + "epoch": 2.4, + "grad_norm": 5.40475606918335, + "learning_rate": 2.8818443804034583e-05, + "loss": 0.2295, + "step": 1670 + }, + { + "epoch": 2.41, + "grad_norm": 4.207846164703369, + "learning_rate": 2.8738392571245597e-05, + "loss": 0.2268, + "step": 1675 + }, + { + "epoch": 2.42, + "grad_norm": 3.405880928039551, + "learning_rate": 2.8658341338456614e-05, + "loss": 0.2773, + "step": 1680 + }, + { + "epoch": 2.43, + "grad_norm": 4.502201557159424, + "learning_rate": 2.8578290105667628e-05, + "loss": 0.2459, + "step": 1685 + }, + { + "epoch": 2.43, + "grad_norm": 2.8585033416748047, + "learning_rate": 2.8498238872878645e-05, + "loss": 0.2626, + "step": 1690 + }, + { + "epoch": 2.44, + "grad_norm": 4.774590015411377, + "learning_rate": 2.8418187640089662e-05, + "loss": 0.2242, + "step": 1695 + }, + { + "epoch": 2.45, + "grad_norm": 6.423954010009766, + "learning_rate": 2.8338136407300676e-05, + "loss": 0.2711, + "step": 1700 + }, + { + "epoch": 2.45, + "grad_norm": 5.023673057556152, + "learning_rate": 2.825808517451169e-05, + "loss": 0.2191, + "step": 1705 + }, + { + "epoch": 2.46, + "grad_norm": 3.246953010559082, + "learning_rate": 2.8178033941722703e-05, + "loss": 0.2032, + "step": 1710 + }, + { + "epoch": 2.47, + "grad_norm": 4.740121364593506, + "learning_rate": 2.8097982708933717e-05, + "loss": 0.2257, + "step": 1715 + }, + { + "epoch": 2.48, + "grad_norm": 4.652435302734375, + "learning_rate": 2.801793147614473e-05, + "loss": 0.2441, + "step": 1720 + }, + { + "epoch": 2.48, + "grad_norm": 3.7246835231781006, + "learning_rate": 2.7937880243355745e-05, + "loss": 0.2064, + "step": 1725 + }, + { + "epoch": 2.49, + "grad_norm": 2.8556969165802, + "learning_rate": 2.7857829010566765e-05, + "loss": 0.2002, + "step": 1730 + }, + { + "epoch": 2.5, + "grad_norm": 3.9338796138763428, + "learning_rate": 2.777777777777778e-05, + "loss": 0.2608, + "step": 1735 + }, + { + "epoch": 2.5, + "grad_norm": 3.847045660018921, + "learning_rate": 2.7697726544988796e-05, + "loss": 0.2167, + "step": 1740 + }, + { + "epoch": 2.51, + "grad_norm": 3.5335538387298584, + "learning_rate": 2.761767531219981e-05, + "loss": 0.1966, + "step": 1745 + }, + { + "epoch": 2.52, + "grad_norm": 3.702679395675659, + "learning_rate": 2.7537624079410824e-05, + "loss": 0.1865, + "step": 1750 + }, + { + "epoch": 2.53, + "grad_norm": 3.013113498687744, + "learning_rate": 2.7457572846621838e-05, + "loss": 0.199, + "step": 1755 + }, + { + "epoch": 2.53, + "grad_norm": 3.300877809524536, + "learning_rate": 2.737752161383285e-05, + "loss": 0.2504, + "step": 1760 + }, + { + "epoch": 2.54, + "grad_norm": 5.806422233581543, + "learning_rate": 2.7297470381043872e-05, + "loss": 0.2362, + "step": 1765 + }, + { + "epoch": 2.55, + "grad_norm": 6.372203826904297, + "learning_rate": 2.7217419148254886e-05, + "loss": 0.2298, + "step": 1770 + }, + { + "epoch": 2.55, + "grad_norm": 6.462773323059082, + "learning_rate": 2.71373679154659e-05, + "loss": 0.2367, + "step": 1775 + }, + { + "epoch": 2.56, + "grad_norm": 5.330246448516846, + "learning_rate": 2.7057316682676913e-05, + "loss": 0.2543, + "step": 1780 + }, + { + "epoch": 2.57, + "grad_norm": 4.1171956062316895, + "learning_rate": 2.6977265449887927e-05, + "loss": 0.2057, + "step": 1785 + }, + { + "epoch": 2.58, + "grad_norm": 3.247389316558838, + "learning_rate": 2.6897214217098944e-05, + "loss": 0.1965, + "step": 1790 + }, + { + "epoch": 2.58, + "grad_norm": 2.7912063598632812, + "learning_rate": 2.6817162984309958e-05, + "loss": 0.2103, + "step": 1795 + }, + { + "epoch": 2.59, + "grad_norm": 2.85927152633667, + "learning_rate": 2.6737111751520975e-05, + "loss": 0.2226, + "step": 1800 + }, + { + "epoch": 2.6, + "grad_norm": 3.5677337646484375, + "learning_rate": 2.6657060518731993e-05, + "loss": 0.2193, + "step": 1805 + }, + { + "epoch": 2.61, + "grad_norm": 5.31620979309082, + "learning_rate": 2.6577009285943006e-05, + "loss": 0.2569, + "step": 1810 + }, + { + "epoch": 2.61, + "grad_norm": 5.1970038414001465, + "learning_rate": 2.649695805315402e-05, + "loss": 0.2235, + "step": 1815 + }, + { + "epoch": 2.62, + "grad_norm": 3.6116130352020264, + "learning_rate": 2.6416906820365034e-05, + "loss": 0.2353, + "step": 1820 + }, + { + "epoch": 2.63, + "grad_norm": 4.2939043045043945, + "learning_rate": 2.6336855587576048e-05, + "loss": 0.2448, + "step": 1825 + }, + { + "epoch": 2.63, + "grad_norm": 3.7755072116851807, + "learning_rate": 2.625680435478706e-05, + "loss": 0.2131, + "step": 1830 + }, + { + "epoch": 2.64, + "grad_norm": 4.578812122344971, + "learning_rate": 2.6176753121998082e-05, + "loss": 0.2167, + "step": 1835 + }, + { + "epoch": 2.65, + "grad_norm": 4.904923439025879, + "learning_rate": 2.6096701889209096e-05, + "loss": 0.2228, + "step": 1840 + }, + { + "epoch": 2.66, + "grad_norm": 5.128912448883057, + "learning_rate": 2.601665065642011e-05, + "loss": 0.2888, + "step": 1845 + }, + { + "epoch": 2.66, + "grad_norm": 5.788363933563232, + "learning_rate": 2.5936599423631124e-05, + "loss": 0.2421, + "step": 1850 + }, + { + "epoch": 2.67, + "grad_norm": 4.001156806945801, + "learning_rate": 2.585654819084214e-05, + "loss": 0.1997, + "step": 1855 + }, + { + "epoch": 2.68, + "grad_norm": 4.3057475090026855, + "learning_rate": 2.5776496958053155e-05, + "loss": 0.2434, + "step": 1860 + }, + { + "epoch": 2.68, + "grad_norm": 3.524348258972168, + "learning_rate": 2.5696445725264168e-05, + "loss": 0.2188, + "step": 1865 + }, + { + "epoch": 2.69, + "grad_norm": 6.004559516906738, + "learning_rate": 2.561639449247519e-05, + "loss": 0.2426, + "step": 1870 + }, + { + "epoch": 2.7, + "grad_norm": 4.429930686950684, + "learning_rate": 2.5536343259686203e-05, + "loss": 0.2306, + "step": 1875 + }, + { + "epoch": 2.71, + "grad_norm": 5.706151008605957, + "learning_rate": 2.5456292026897216e-05, + "loss": 0.2194, + "step": 1880 + }, + { + "epoch": 2.71, + "grad_norm": 4.148650169372559, + "learning_rate": 2.537624079410823e-05, + "loss": 0.2683, + "step": 1885 + }, + { + "epoch": 2.72, + "grad_norm": 3.2449026107788086, + "learning_rate": 2.5296189561319244e-05, + "loss": 0.2539, + "step": 1890 + }, + { + "epoch": 2.73, + "grad_norm": 3.6404850482940674, + "learning_rate": 2.5216138328530258e-05, + "loss": 0.2221, + "step": 1895 + }, + { + "epoch": 2.73, + "grad_norm": 3.1382288932800293, + "learning_rate": 2.513608709574127e-05, + "loss": 0.2266, + "step": 1900 + }, + { + "epoch": 2.74, + "grad_norm": 8.027711868286133, + "learning_rate": 2.5056035862952292e-05, + "loss": 0.2944, + "step": 1905 + }, + { + "epoch": 2.75, + "grad_norm": 7.140124797821045, + "learning_rate": 2.4975984630163306e-05, + "loss": 0.2036, + "step": 1910 + }, + { + "epoch": 2.76, + "grad_norm": 3.4655325412750244, + "learning_rate": 2.489593339737432e-05, + "loss": 0.1955, + "step": 1915 + }, + { + "epoch": 2.76, + "grad_norm": 3.295433759689331, + "learning_rate": 2.4815882164585337e-05, + "loss": 0.2114, + "step": 1920 + }, + { + "epoch": 2.77, + "grad_norm": 3.806304931640625, + "learning_rate": 2.473583093179635e-05, + "loss": 0.206, + "step": 1925 + }, + { + "epoch": 2.78, + "grad_norm": 4.674000263214111, + "learning_rate": 2.4655779699007368e-05, + "loss": 0.2215, + "step": 1930 + }, + { + "epoch": 2.79, + "grad_norm": 3.5063233375549316, + "learning_rate": 2.4575728466218382e-05, + "loss": 0.2583, + "step": 1935 + }, + { + "epoch": 2.79, + "grad_norm": 3.4132816791534424, + "learning_rate": 2.4495677233429396e-05, + "loss": 0.2388, + "step": 1940 + }, + { + "epoch": 2.8, + "grad_norm": 3.2140300273895264, + "learning_rate": 2.441562600064041e-05, + "loss": 0.2395, + "step": 1945 + }, + { + "epoch": 2.81, + "grad_norm": 4.795976638793945, + "learning_rate": 2.4335574767851427e-05, + "loss": 0.2206, + "step": 1950 + }, + { + "epoch": 2.81, + "grad_norm": 3.491682767868042, + "learning_rate": 2.425552353506244e-05, + "loss": 0.2553, + "step": 1955 + }, + { + "epoch": 2.82, + "grad_norm": 4.174879550933838, + "learning_rate": 2.4175472302273454e-05, + "loss": 0.1969, + "step": 1960 + }, + { + "epoch": 2.83, + "grad_norm": 3.776137590408325, + "learning_rate": 2.409542106948447e-05, + "loss": 0.2276, + "step": 1965 + }, + { + "epoch": 2.84, + "grad_norm": 3.7050764560699463, + "learning_rate": 2.4015369836695485e-05, + "loss": 0.2001, + "step": 1970 + }, + { + "epoch": 2.84, + "grad_norm": 3.4648373126983643, + "learning_rate": 2.3935318603906502e-05, + "loss": 0.2538, + "step": 1975 + }, + { + "epoch": 2.85, + "grad_norm": 4.3064727783203125, + "learning_rate": 2.3855267371117516e-05, + "loss": 0.2579, + "step": 1980 + }, + { + "epoch": 2.86, + "grad_norm": 2.671032428741455, + "learning_rate": 2.3775216138328533e-05, + "loss": 0.2443, + "step": 1985 + }, + { + "epoch": 2.86, + "grad_norm": 4.2159013748168945, + "learning_rate": 2.3695164905539547e-05, + "loss": 0.2373, + "step": 1990 + }, + { + "epoch": 2.87, + "grad_norm": 3.787076711654663, + "learning_rate": 2.361511367275056e-05, + "loss": 0.2179, + "step": 1995 + }, + { + "epoch": 2.88, + "grad_norm": 3.971762180328369, + "learning_rate": 2.3535062439961578e-05, + "loss": 0.2356, + "step": 2000 + }, + { + "epoch": 2.89, + "grad_norm": 5.022749900817871, + "learning_rate": 2.3455011207172592e-05, + "loss": 0.2167, + "step": 2005 + }, + { + "epoch": 2.89, + "grad_norm": 4.616547107696533, + "learning_rate": 2.3374959974383606e-05, + "loss": 0.2266, + "step": 2010 + }, + { + "epoch": 2.9, + "grad_norm": 4.522019386291504, + "learning_rate": 2.329490874159462e-05, + "loss": 0.247, + "step": 2015 + }, + { + "epoch": 2.91, + "grad_norm": 5.141051292419434, + "learning_rate": 2.3214857508805637e-05, + "loss": 0.2028, + "step": 2020 + }, + { + "epoch": 2.91, + "grad_norm": 3.577793836593628, + "learning_rate": 2.313480627601665e-05, + "loss": 0.1924, + "step": 2025 + }, + { + "epoch": 2.92, + "grad_norm": 5.1364665031433105, + "learning_rate": 2.3054755043227668e-05, + "loss": 0.226, + "step": 2030 + }, + { + "epoch": 2.93, + "grad_norm": 3.8625662326812744, + "learning_rate": 2.297470381043868e-05, + "loss": 0.2329, + "step": 2035 + }, + { + "epoch": 2.94, + "grad_norm": 4.119937419891357, + "learning_rate": 2.28946525776497e-05, + "loss": 0.2037, + "step": 2040 + }, + { + "epoch": 2.94, + "grad_norm": 3.1188371181488037, + "learning_rate": 2.2814601344860712e-05, + "loss": 0.231, + "step": 2045 + }, + { + "epoch": 2.95, + "grad_norm": 4.263334274291992, + "learning_rate": 2.2734550112071726e-05, + "loss": 0.219, + "step": 2050 + }, + { + "epoch": 2.96, + "grad_norm": 4.002464771270752, + "learning_rate": 2.2654498879282743e-05, + "loss": 0.1927, + "step": 2055 + }, + { + "epoch": 2.97, + "grad_norm": 3.5694775581359863, + "learning_rate": 2.2574447646493757e-05, + "loss": 0.1803, + "step": 2060 + }, + { + "epoch": 2.97, + "grad_norm": 4.048843860626221, + "learning_rate": 2.249439641370477e-05, + "loss": 0.1837, + "step": 2065 + }, + { + "epoch": 2.98, + "grad_norm": 4.335817337036133, + "learning_rate": 2.2414345180915788e-05, + "loss": 0.227, + "step": 2070 + }, + { + "epoch": 2.99, + "grad_norm": 4.292420864105225, + "learning_rate": 2.2334293948126802e-05, + "loss": 0.2535, + "step": 2075 + }, + { + "epoch": 2.99, + "grad_norm": 3.625598430633545, + "learning_rate": 2.2254242715337816e-05, + "loss": 0.1633, + "step": 2080 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.9487854251012146, + "eval_loss": 0.1372506320476532, + "eval_runtime": 31.8832, + "eval_samples_per_second": 309.881, + "eval_steps_per_second": 9.692, + "step": 2084 + }, + { + "epoch": 3.0, + "grad_norm": 4.9075140953063965, + "learning_rate": 2.217419148254883e-05, + "loss": 0.19, + "step": 2085 + }, + { + "epoch": 3.01, + "grad_norm": 4.76453971862793, + "learning_rate": 2.2094140249759847e-05, + "loss": 0.214, + "step": 2090 + }, + { + "epoch": 3.02, + "grad_norm": 3.710191011428833, + "learning_rate": 2.2014089016970864e-05, + "loss": 0.2197, + "step": 2095 + }, + { + "epoch": 3.02, + "grad_norm": 3.287574529647827, + "learning_rate": 2.1934037784181878e-05, + "loss": 0.1939, + "step": 2100 + }, + { + "epoch": 3.03, + "grad_norm": 3.7616758346557617, + "learning_rate": 2.1853986551392895e-05, + "loss": 0.209, + "step": 2105 + }, + { + "epoch": 3.04, + "grad_norm": 3.6096699237823486, + "learning_rate": 2.177393531860391e-05, + "loss": 0.2195, + "step": 2110 + }, + { + "epoch": 3.04, + "grad_norm": 4.259820461273193, + "learning_rate": 2.1693884085814922e-05, + "loss": 0.1813, + "step": 2115 + }, + { + "epoch": 3.05, + "grad_norm": 4.710832118988037, + "learning_rate": 2.1613832853025936e-05, + "loss": 0.2054, + "step": 2120 + }, + { + "epoch": 3.06, + "grad_norm": 2.757356882095337, + "learning_rate": 2.1533781620236953e-05, + "loss": 0.2276, + "step": 2125 + }, + { + "epoch": 3.07, + "grad_norm": 4.743321418762207, + "learning_rate": 2.1453730387447967e-05, + "loss": 0.1603, + "step": 2130 + }, + { + "epoch": 3.07, + "grad_norm": 3.536240339279175, + "learning_rate": 2.137367915465898e-05, + "loss": 0.1888, + "step": 2135 + }, + { + "epoch": 3.08, + "grad_norm": 3.635094404220581, + "learning_rate": 2.1293627921869998e-05, + "loss": 0.1841, + "step": 2140 + }, + { + "epoch": 3.09, + "grad_norm": 4.491457939147949, + "learning_rate": 2.1213576689081012e-05, + "loss": 0.2013, + "step": 2145 + }, + { + "epoch": 3.09, + "grad_norm": 5.20548152923584, + "learning_rate": 2.113352545629203e-05, + "loss": 0.1618, + "step": 2150 + }, + { + "epoch": 3.1, + "grad_norm": 3.6702117919921875, + "learning_rate": 2.1053474223503043e-05, + "loss": 0.2106, + "step": 2155 + }, + { + "epoch": 3.11, + "grad_norm": 3.9622325897216797, + "learning_rate": 2.097342299071406e-05, + "loss": 0.2488, + "step": 2160 + }, + { + "epoch": 3.12, + "grad_norm": 7.823854923248291, + "learning_rate": 2.0893371757925074e-05, + "loss": 0.2107, + "step": 2165 + }, + { + "epoch": 3.12, + "grad_norm": 5.4744791984558105, + "learning_rate": 2.0813320525136088e-05, + "loss": 0.1888, + "step": 2170 + }, + { + "epoch": 3.13, + "grad_norm": 3.024887800216675, + "learning_rate": 2.0733269292347105e-05, + "loss": 0.2051, + "step": 2175 + }, + { + "epoch": 3.14, + "grad_norm": 3.444693088531494, + "learning_rate": 2.065321805955812e-05, + "loss": 0.2404, + "step": 2180 + }, + { + "epoch": 3.15, + "grad_norm": 4.3029656410217285, + "learning_rate": 2.0573166826769133e-05, + "loss": 0.215, + "step": 2185 + }, + { + "epoch": 3.15, + "grad_norm": 4.038111209869385, + "learning_rate": 2.0493115593980146e-05, + "loss": 0.2003, + "step": 2190 + }, + { + "epoch": 3.16, + "grad_norm": 4.064023494720459, + "learning_rate": 2.0413064361191164e-05, + "loss": 0.1961, + "step": 2195 + }, + { + "epoch": 3.17, + "grad_norm": 5.2245707511901855, + "learning_rate": 2.0333013128402177e-05, + "loss": 0.2172, + "step": 2200 + }, + { + "epoch": 3.17, + "grad_norm": 4.670438289642334, + "learning_rate": 2.025296189561319e-05, + "loss": 0.1992, + "step": 2205 + }, + { + "epoch": 3.18, + "grad_norm": 4.39680290222168, + "learning_rate": 2.017291066282421e-05, + "loss": 0.2174, + "step": 2210 + }, + { + "epoch": 3.19, + "grad_norm": 6.914219379425049, + "learning_rate": 2.0092859430035225e-05, + "loss": 0.1968, + "step": 2215 + }, + { + "epoch": 3.2, + "grad_norm": 3.2190115451812744, + "learning_rate": 2.001280819724624e-05, + "loss": 0.1939, + "step": 2220 + }, + { + "epoch": 3.2, + "grad_norm": 3.638925075531006, + "learning_rate": 1.9932756964457253e-05, + "loss": 0.2431, + "step": 2225 + }, + { + "epoch": 3.21, + "grad_norm": 5.030416965484619, + "learning_rate": 1.985270573166827e-05, + "loss": 0.2094, + "step": 2230 + }, + { + "epoch": 3.22, + "grad_norm": 5.105839729309082, + "learning_rate": 1.9772654498879284e-05, + "loss": 0.2165, + "step": 2235 + }, + { + "epoch": 3.22, + "grad_norm": 4.913294315338135, + "learning_rate": 1.9692603266090298e-05, + "loss": 0.2171, + "step": 2240 + }, + { + "epoch": 3.23, + "grad_norm": 4.230659008026123, + "learning_rate": 1.961255203330131e-05, + "loss": 0.2088, + "step": 2245 + }, + { + "epoch": 3.24, + "grad_norm": 4.271526336669922, + "learning_rate": 1.953250080051233e-05, + "loss": 0.215, + "step": 2250 + }, + { + "epoch": 3.25, + "grad_norm": 6.460733413696289, + "learning_rate": 1.9452449567723343e-05, + "loss": 0.2241, + "step": 2255 + }, + { + "epoch": 3.25, + "grad_norm": 2.8896567821502686, + "learning_rate": 1.9372398334934356e-05, + "loss": 0.1587, + "step": 2260 + }, + { + "epoch": 3.26, + "grad_norm": 3.2169876098632812, + "learning_rate": 1.9292347102145374e-05, + "loss": 0.1587, + "step": 2265 + }, + { + "epoch": 3.27, + "grad_norm": 4.299535274505615, + "learning_rate": 1.921229586935639e-05, + "loss": 0.1819, + "step": 2270 + }, + { + "epoch": 3.27, + "grad_norm": 3.9862189292907715, + "learning_rate": 1.9132244636567405e-05, + "loss": 0.2099, + "step": 2275 + }, + { + "epoch": 3.28, + "grad_norm": 5.323502540588379, + "learning_rate": 1.905219340377842e-05, + "loss": 0.222, + "step": 2280 + }, + { + "epoch": 3.29, + "grad_norm": 3.4311234951019287, + "learning_rate": 1.8972142170989436e-05, + "loss": 0.1956, + "step": 2285 + }, + { + "epoch": 3.3, + "grad_norm": 4.878343105316162, + "learning_rate": 1.889209093820045e-05, + "loss": 0.1814, + "step": 2290 + }, + { + "epoch": 3.3, + "grad_norm": 2.903064489364624, + "learning_rate": 1.8812039705411463e-05, + "loss": 0.2397, + "step": 2295 + }, + { + "epoch": 3.31, + "grad_norm": 5.286783695220947, + "learning_rate": 1.873198847262248e-05, + "loss": 0.2362, + "step": 2300 + }, + { + "epoch": 3.32, + "grad_norm": 4.201813220977783, + "learning_rate": 1.8651937239833494e-05, + "loss": 0.2235, + "step": 2305 + }, + { + "epoch": 3.32, + "grad_norm": 3.4148082733154297, + "learning_rate": 1.8571886007044508e-05, + "loss": 0.1922, + "step": 2310 + }, + { + "epoch": 3.33, + "grad_norm": 4.562300682067871, + "learning_rate": 1.8491834774255522e-05, + "loss": 0.2013, + "step": 2315 + }, + { + "epoch": 3.34, + "grad_norm": 6.004905700683594, + "learning_rate": 1.841178354146654e-05, + "loss": 0.2215, + "step": 2320 + }, + { + "epoch": 3.35, + "grad_norm": 4.642991065979004, + "learning_rate": 1.8331732308677556e-05, + "loss": 0.2085, + "step": 2325 + }, + { + "epoch": 3.35, + "grad_norm": 2.796497344970703, + "learning_rate": 1.825168107588857e-05, + "loss": 0.2126, + "step": 2330 + }, + { + "epoch": 3.36, + "grad_norm": 6.009349346160889, + "learning_rate": 1.8171629843099587e-05, + "loss": 0.1906, + "step": 2335 + }, + { + "epoch": 3.37, + "grad_norm": 4.415472507476807, + "learning_rate": 1.80915786103106e-05, + "loss": 0.2013, + "step": 2340 + }, + { + "epoch": 3.38, + "grad_norm": 2.890207529067993, + "learning_rate": 1.8011527377521615e-05, + "loss": 0.2017, + "step": 2345 + }, + { + "epoch": 3.38, + "grad_norm": 3.2712149620056152, + "learning_rate": 1.793147614473263e-05, + "loss": 0.1997, + "step": 2350 + }, + { + "epoch": 3.39, + "grad_norm": 4.87721061706543, + "learning_rate": 1.7851424911943646e-05, + "loss": 0.1944, + "step": 2355 + }, + { + "epoch": 3.4, + "grad_norm": 5.590481281280518, + "learning_rate": 1.777137367915466e-05, + "loss": 0.1749, + "step": 2360 + }, + { + "epoch": 3.4, + "grad_norm": 3.1477975845336914, + "learning_rate": 1.7691322446365673e-05, + "loss": 0.1734, + "step": 2365 + }, + { + "epoch": 3.41, + "grad_norm": 4.50333309173584, + "learning_rate": 1.761127121357669e-05, + "loss": 0.244, + "step": 2370 + }, + { + "epoch": 3.42, + "grad_norm": 4.189910411834717, + "learning_rate": 1.7531219980787704e-05, + "loss": 0.2015, + "step": 2375 + }, + { + "epoch": 3.43, + "grad_norm": 4.48671817779541, + "learning_rate": 1.7451168747998718e-05, + "loss": 0.1994, + "step": 2380 + }, + { + "epoch": 3.43, + "grad_norm": 3.9251739978790283, + "learning_rate": 1.7371117515209735e-05, + "loss": 0.1798, + "step": 2385 + }, + { + "epoch": 3.44, + "grad_norm": 2.792525291442871, + "learning_rate": 1.7291066282420752e-05, + "loss": 0.1628, + "step": 2390 + }, + { + "epoch": 3.45, + "grad_norm": 3.325592041015625, + "learning_rate": 1.7211015049631766e-05, + "loss": 0.2069, + "step": 2395 + }, + { + "epoch": 3.45, + "grad_norm": 3.9942626953125, + "learning_rate": 1.713096381684278e-05, + "loss": 0.1866, + "step": 2400 + }, + { + "epoch": 3.46, + "grad_norm": 5.486047267913818, + "learning_rate": 1.7050912584053797e-05, + "loss": 0.2185, + "step": 2405 + }, + { + "epoch": 3.47, + "grad_norm": 3.5321319103240967, + "learning_rate": 1.697086135126481e-05, + "loss": 0.2068, + "step": 2410 + }, + { + "epoch": 3.48, + "grad_norm": 4.118142127990723, + "learning_rate": 1.6890810118475825e-05, + "loss": 0.2076, + "step": 2415 + }, + { + "epoch": 3.48, + "grad_norm": 4.678371906280518, + "learning_rate": 1.681075888568684e-05, + "loss": 0.1948, + "step": 2420 + }, + { + "epoch": 3.49, + "grad_norm": 5.298951148986816, + "learning_rate": 1.6730707652897856e-05, + "loss": 0.2142, + "step": 2425 + }, + { + "epoch": 3.5, + "grad_norm": 4.5779900550842285, + "learning_rate": 1.665065642010887e-05, + "loss": 0.1994, + "step": 2430 + }, + { + "epoch": 3.5, + "grad_norm": 4.762623310089111, + "learning_rate": 1.6570605187319883e-05, + "loss": 0.2213, + "step": 2435 + }, + { + "epoch": 3.51, + "grad_norm": 4.956728458404541, + "learning_rate": 1.64905539545309e-05, + "loss": 0.1818, + "step": 2440 + }, + { + "epoch": 3.52, + "grad_norm": 3.7195310592651367, + "learning_rate": 1.6410502721741918e-05, + "loss": 0.2171, + "step": 2445 + }, + { + "epoch": 3.53, + "grad_norm": 3.115422010421753, + "learning_rate": 1.633045148895293e-05, + "loss": 0.1873, + "step": 2450 + }, + { + "epoch": 3.53, + "grad_norm": 2.4611568450927734, + "learning_rate": 1.6250400256163945e-05, + "loss": 0.1999, + "step": 2455 + }, + { + "epoch": 3.54, + "grad_norm": 7.129974842071533, + "learning_rate": 1.6170349023374962e-05, + "loss": 0.2039, + "step": 2460 + }, + { + "epoch": 3.55, + "grad_norm": 3.4364309310913086, + "learning_rate": 1.6090297790585976e-05, + "loss": 0.2019, + "step": 2465 + }, + { + "epoch": 3.56, + "grad_norm": 7.869508266448975, + "learning_rate": 1.601024655779699e-05, + "loss": 0.1678, + "step": 2470 + }, + { + "epoch": 3.56, + "grad_norm": 4.7185378074646, + "learning_rate": 1.5930195325008007e-05, + "loss": 0.1934, + "step": 2475 + }, + { + "epoch": 3.57, + "grad_norm": 7.357175350189209, + "learning_rate": 1.585014409221902e-05, + "loss": 0.1998, + "step": 2480 + }, + { + "epoch": 3.58, + "grad_norm": 3.6080660820007324, + "learning_rate": 1.5770092859430035e-05, + "loss": 0.1949, + "step": 2485 + }, + { + "epoch": 3.58, + "grad_norm": 2.9534220695495605, + "learning_rate": 1.569004162664105e-05, + "loss": 0.1772, + "step": 2490 + }, + { + "epoch": 3.59, + "grad_norm": 4.7188401222229, + "learning_rate": 1.5609990393852066e-05, + "loss": 0.2164, + "step": 2495 + }, + { + "epoch": 3.6, + "grad_norm": 5.8504180908203125, + "learning_rate": 1.552993916106308e-05, + "loss": 0.2283, + "step": 2500 + }, + { + "epoch": 3.61, + "grad_norm": 4.23643684387207, + "learning_rate": 1.5449887928274097e-05, + "loss": 0.2003, + "step": 2505 + }, + { + "epoch": 3.61, + "grad_norm": 2.19675350189209, + "learning_rate": 1.5369836695485114e-05, + "loss": 0.1997, + "step": 2510 + }, + { + "epoch": 3.62, + "grad_norm": 5.1381330490112305, + "learning_rate": 1.5289785462696128e-05, + "loss": 0.195, + "step": 2515 + }, + { + "epoch": 3.63, + "grad_norm": 3.739199161529541, + "learning_rate": 1.5209734229907142e-05, + "loss": 0.1596, + "step": 2520 + }, + { + "epoch": 3.63, + "grad_norm": 4.581226348876953, + "learning_rate": 1.5129682997118155e-05, + "loss": 0.2086, + "step": 2525 + }, + { + "epoch": 3.64, + "grad_norm": 5.416107177734375, + "learning_rate": 1.5049631764329173e-05, + "loss": 0.2517, + "step": 2530 + }, + { + "epoch": 3.65, + "grad_norm": 6.070262908935547, + "learning_rate": 1.4969580531540186e-05, + "loss": 0.1801, + "step": 2535 + }, + { + "epoch": 3.66, + "grad_norm": 4.063976764678955, + "learning_rate": 1.48895292987512e-05, + "loss": 0.2302, + "step": 2540 + }, + { + "epoch": 3.66, + "grad_norm": 3.717087745666504, + "learning_rate": 1.4809478065962217e-05, + "loss": 0.2185, + "step": 2545 + }, + { + "epoch": 3.67, + "grad_norm": 3.2319772243499756, + "learning_rate": 1.4729426833173233e-05, + "loss": 0.2609, + "step": 2550 + }, + { + "epoch": 3.68, + "grad_norm": 3.7224340438842773, + "learning_rate": 1.4649375600384247e-05, + "loss": 0.1906, + "step": 2555 + }, + { + "epoch": 3.68, + "grad_norm": 6.972284317016602, + "learning_rate": 1.456932436759526e-05, + "loss": 0.2232, + "step": 2560 + }, + { + "epoch": 3.69, + "grad_norm": 3.514923095703125, + "learning_rate": 1.4489273134806278e-05, + "loss": 0.2081, + "step": 2565 + }, + { + "epoch": 3.7, + "grad_norm": 5.140145301818848, + "learning_rate": 1.4409221902017291e-05, + "loss": 0.2099, + "step": 2570 + }, + { + "epoch": 3.71, + "grad_norm": 2.977041482925415, + "learning_rate": 1.4329170669228307e-05, + "loss": 0.1689, + "step": 2575 + }, + { + "epoch": 3.71, + "grad_norm": 2.9438095092773438, + "learning_rate": 1.4249119436439322e-05, + "loss": 0.1788, + "step": 2580 + }, + { + "epoch": 3.72, + "grad_norm": 3.311598777770996, + "learning_rate": 1.4169068203650338e-05, + "loss": 0.1787, + "step": 2585 + }, + { + "epoch": 3.73, + "grad_norm": 4.066298961639404, + "learning_rate": 1.4089016970861352e-05, + "loss": 0.2049, + "step": 2590 + }, + { + "epoch": 3.74, + "grad_norm": 3.8641276359558105, + "learning_rate": 1.4008965738072365e-05, + "loss": 0.2064, + "step": 2595 + }, + { + "epoch": 3.74, + "grad_norm": 4.785098075866699, + "learning_rate": 1.3928914505283383e-05, + "loss": 0.213, + "step": 2600 + }, + { + "epoch": 3.75, + "grad_norm": 3.3832712173461914, + "learning_rate": 1.3848863272494398e-05, + "loss": 0.203, + "step": 2605 + }, + { + "epoch": 3.76, + "grad_norm": 3.8471434116363525, + "learning_rate": 1.3768812039705412e-05, + "loss": 0.2192, + "step": 2610 + }, + { + "epoch": 3.76, + "grad_norm": 4.769313335418701, + "learning_rate": 1.3688760806916426e-05, + "loss": 0.2191, + "step": 2615 + }, + { + "epoch": 3.77, + "grad_norm": 3.5882818698883057, + "learning_rate": 1.3608709574127443e-05, + "loss": 0.1952, + "step": 2620 + }, + { + "epoch": 3.78, + "grad_norm": 4.177798271179199, + "learning_rate": 1.3528658341338457e-05, + "loss": 0.2209, + "step": 2625 + }, + { + "epoch": 3.79, + "grad_norm": 5.218222618103027, + "learning_rate": 1.3448607108549472e-05, + "loss": 0.1953, + "step": 2630 + }, + { + "epoch": 3.79, + "grad_norm": 4.669002056121826, + "learning_rate": 1.3368555875760488e-05, + "loss": 0.2017, + "step": 2635 + }, + { + "epoch": 3.8, + "grad_norm": 4.992402076721191, + "learning_rate": 1.3288504642971503e-05, + "loss": 0.2702, + "step": 2640 + }, + { + "epoch": 3.81, + "grad_norm": 3.818152666091919, + "learning_rate": 1.3208453410182517e-05, + "loss": 0.2195, + "step": 2645 + }, + { + "epoch": 3.81, + "grad_norm": 3.825201988220215, + "learning_rate": 1.312840217739353e-05, + "loss": 0.2086, + "step": 2650 + }, + { + "epoch": 3.82, + "grad_norm": 3.2888553142547607, + "learning_rate": 1.3048350944604548e-05, + "loss": 0.1899, + "step": 2655 + }, + { + "epoch": 3.83, + "grad_norm": 4.896663665771484, + "learning_rate": 1.2968299711815562e-05, + "loss": 0.2154, + "step": 2660 + }, + { + "epoch": 3.84, + "grad_norm": 3.9895691871643066, + "learning_rate": 1.2888248479026577e-05, + "loss": 0.2251, + "step": 2665 + }, + { + "epoch": 3.84, + "grad_norm": 3.9652981758117676, + "learning_rate": 1.2808197246237594e-05, + "loss": 0.2116, + "step": 2670 + }, + { + "epoch": 3.85, + "grad_norm": 4.93154764175415, + "learning_rate": 1.2728146013448608e-05, + "loss": 0.2597, + "step": 2675 + }, + { + "epoch": 3.86, + "grad_norm": 4.236401081085205, + "learning_rate": 1.2648094780659622e-05, + "loss": 0.2312, + "step": 2680 + }, + { + "epoch": 3.86, + "grad_norm": 3.95443058013916, + "learning_rate": 1.2568043547870636e-05, + "loss": 0.1696, + "step": 2685 + }, + { + "epoch": 3.87, + "grad_norm": 2.7311601638793945, + "learning_rate": 1.2487992315081653e-05, + "loss": 0.1625, + "step": 2690 + }, + { + "epoch": 3.88, + "grad_norm": 3.6803927421569824, + "learning_rate": 1.2407941082292668e-05, + "loss": 0.2069, + "step": 2695 + }, + { + "epoch": 3.89, + "grad_norm": 3.391956329345703, + "learning_rate": 1.2327889849503684e-05, + "loss": 0.1779, + "step": 2700 + }, + { + "epoch": 3.89, + "grad_norm": 3.478215456008911, + "learning_rate": 1.2247838616714698e-05, + "loss": 0.1874, + "step": 2705 + }, + { + "epoch": 3.9, + "grad_norm": 2.4775846004486084, + "learning_rate": 1.2167787383925713e-05, + "loss": 0.1953, + "step": 2710 + }, + { + "epoch": 3.91, + "grad_norm": 4.715533256530762, + "learning_rate": 1.2087736151136727e-05, + "loss": 0.1863, + "step": 2715 + }, + { + "epoch": 3.92, + "grad_norm": 4.083915710449219, + "learning_rate": 1.2007684918347743e-05, + "loss": 0.1871, + "step": 2720 + }, + { + "epoch": 3.92, + "grad_norm": 2.535428285598755, + "learning_rate": 1.1927633685558758e-05, + "loss": 0.2084, + "step": 2725 + }, + { + "epoch": 3.93, + "grad_norm": 5.987590789794922, + "learning_rate": 1.1847582452769774e-05, + "loss": 0.172, + "step": 2730 + }, + { + "epoch": 3.94, + "grad_norm": 4.185674667358398, + "learning_rate": 1.1767531219980789e-05, + "loss": 0.2106, + "step": 2735 + }, + { + "epoch": 3.94, + "grad_norm": 3.0659992694854736, + "learning_rate": 1.1687479987191803e-05, + "loss": 0.1839, + "step": 2740 + }, + { + "epoch": 3.95, + "grad_norm": 8.405370712280273, + "learning_rate": 1.1607428754402818e-05, + "loss": 0.2449, + "step": 2745 + }, + { + "epoch": 3.96, + "grad_norm": 5.262624740600586, + "learning_rate": 1.1527377521613834e-05, + "loss": 0.1982, + "step": 2750 + }, + { + "epoch": 3.97, + "grad_norm": 3.3970797061920166, + "learning_rate": 1.144732628882485e-05, + "loss": 0.2383, + "step": 2755 + }, + { + "epoch": 3.97, + "grad_norm": 4.604133129119873, + "learning_rate": 1.1367275056035863e-05, + "loss": 0.211, + "step": 2760 + }, + { + "epoch": 3.98, + "grad_norm": 4.767920970916748, + "learning_rate": 1.1287223823246879e-05, + "loss": 0.2111, + "step": 2765 + }, + { + "epoch": 3.99, + "grad_norm": 4.075857162475586, + "learning_rate": 1.1207172590457894e-05, + "loss": 0.2011, + "step": 2770 + }, + { + "epoch": 3.99, + "grad_norm": 3.293419599533081, + "learning_rate": 1.1127121357668908e-05, + "loss": 0.1943, + "step": 2775 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9510121457489878, + "eval_loss": 0.131936714053154, + "eval_runtime": 31.7023, + "eval_samples_per_second": 311.649, + "eval_steps_per_second": 9.747, + "step": 2779 + }, + { + "epoch": 4.0, + "grad_norm": 2.756840229034424, + "learning_rate": 1.1047070124879923e-05, + "loss": 0.2012, + "step": 2780 + }, + { + "epoch": 4.01, + "grad_norm": 4.239038467407227, + "learning_rate": 1.0967018892090939e-05, + "loss": 0.1637, + "step": 2785 + }, + { + "epoch": 4.02, + "grad_norm": 3.6597139835357666, + "learning_rate": 1.0886967659301954e-05, + "loss": 0.1848, + "step": 2790 + }, + { + "epoch": 4.02, + "grad_norm": 3.050875425338745, + "learning_rate": 1.0806916426512968e-05, + "loss": 0.1565, + "step": 2795 + }, + { + "epoch": 4.03, + "grad_norm": 4.3006463050842285, + "learning_rate": 1.0726865193723984e-05, + "loss": 0.2116, + "step": 2800 + }, + { + "epoch": 4.04, + "grad_norm": 4.682863712310791, + "learning_rate": 1.0646813960934999e-05, + "loss": 0.1974, + "step": 2805 + }, + { + "epoch": 4.04, + "grad_norm": 3.8604190349578857, + "learning_rate": 1.0566762728146015e-05, + "loss": 0.1972, + "step": 2810 + }, + { + "epoch": 4.05, + "grad_norm": 4.325167655944824, + "learning_rate": 1.048671149535703e-05, + "loss": 0.1732, + "step": 2815 + }, + { + "epoch": 4.06, + "grad_norm": 6.881094932556152, + "learning_rate": 1.0406660262568044e-05, + "loss": 0.2527, + "step": 2820 + }, + { + "epoch": 4.07, + "grad_norm": 6.374682426452637, + "learning_rate": 1.032660902977906e-05, + "loss": 0.224, + "step": 2825 + }, + { + "epoch": 4.07, + "grad_norm": 3.154886245727539, + "learning_rate": 1.0246557796990073e-05, + "loss": 0.1613, + "step": 2830 + }, + { + "epoch": 4.08, + "grad_norm": 5.165164470672607, + "learning_rate": 1.0166506564201089e-05, + "loss": 0.225, + "step": 2835 + }, + { + "epoch": 4.09, + "grad_norm": 3.388165235519409, + "learning_rate": 1.0086455331412104e-05, + "loss": 0.2189, + "step": 2840 + }, + { + "epoch": 4.09, + "grad_norm": 4.795779705047607, + "learning_rate": 1.000640409862312e-05, + "loss": 0.2027, + "step": 2845 + }, + { + "epoch": 4.1, + "grad_norm": 3.341182231903076, + "learning_rate": 9.926352865834135e-06, + "loss": 0.1931, + "step": 2850 + }, + { + "epoch": 4.11, + "grad_norm": 1.956528902053833, + "learning_rate": 9.846301633045149e-06, + "loss": 0.2208, + "step": 2855 + }, + { + "epoch": 4.12, + "grad_norm": 6.8234076499938965, + "learning_rate": 9.766250400256164e-06, + "loss": 0.2388, + "step": 2860 + }, + { + "epoch": 4.12, + "grad_norm": 2.924370527267456, + "learning_rate": 9.686199167467178e-06, + "loss": 0.2046, + "step": 2865 + }, + { + "epoch": 4.13, + "grad_norm": 5.049492359161377, + "learning_rate": 9.606147934678195e-06, + "loss": 0.1876, + "step": 2870 + }, + { + "epoch": 4.14, + "grad_norm": 4.749929428100586, + "learning_rate": 9.52609670188921e-06, + "loss": 0.1649, + "step": 2875 + }, + { + "epoch": 4.15, + "grad_norm": 3.702878475189209, + "learning_rate": 9.446045469100225e-06, + "loss": 0.2309, + "step": 2880 + }, + { + "epoch": 4.15, + "grad_norm": 6.8818745613098145, + "learning_rate": 9.36599423631124e-06, + "loss": 0.2012, + "step": 2885 + }, + { + "epoch": 4.16, + "grad_norm": 3.418677568435669, + "learning_rate": 9.285943003522254e-06, + "loss": 0.2209, + "step": 2890 + }, + { + "epoch": 4.17, + "grad_norm": 3.8437540531158447, + "learning_rate": 9.20589177073327e-06, + "loss": 0.1668, + "step": 2895 + }, + { + "epoch": 4.17, + "grad_norm": 3.2534446716308594, + "learning_rate": 9.125840537944285e-06, + "loss": 0.2346, + "step": 2900 + }, + { + "epoch": 4.18, + "grad_norm": 4.049452781677246, + "learning_rate": 9.0457893051553e-06, + "loss": 0.1752, + "step": 2905 + }, + { + "epoch": 4.19, + "grad_norm": 4.121111869812012, + "learning_rate": 8.965738072366314e-06, + "loss": 0.2057, + "step": 2910 + }, + { + "epoch": 4.2, + "grad_norm": 5.423705577850342, + "learning_rate": 8.88568683957733e-06, + "loss": 0.1958, + "step": 2915 + }, + { + "epoch": 4.2, + "grad_norm": 3.153987407684326, + "learning_rate": 8.805635606788345e-06, + "loss": 0.1547, + "step": 2920 + }, + { + "epoch": 4.21, + "grad_norm": 3.7586491107940674, + "learning_rate": 8.725584373999359e-06, + "loss": 0.224, + "step": 2925 + }, + { + "epoch": 4.22, + "grad_norm": 4.077225208282471, + "learning_rate": 8.645533141210376e-06, + "loss": 0.2113, + "step": 2930 + }, + { + "epoch": 4.22, + "grad_norm": 6.970191478729248, + "learning_rate": 8.56548190842139e-06, + "loss": 0.2032, + "step": 2935 + }, + { + "epoch": 4.23, + "grad_norm": 4.3456926345825195, + "learning_rate": 8.485430675632405e-06, + "loss": 0.2111, + "step": 2940 + }, + { + "epoch": 4.24, + "grad_norm": 3.5162301063537598, + "learning_rate": 8.40537944284342e-06, + "loss": 0.1873, + "step": 2945 + }, + { + "epoch": 4.25, + "grad_norm": 5.653372764587402, + "learning_rate": 8.325328210054435e-06, + "loss": 0.178, + "step": 2950 + }, + { + "epoch": 4.25, + "grad_norm": 2.084319829940796, + "learning_rate": 8.24527697726545e-06, + "loss": 0.185, + "step": 2955 + }, + { + "epoch": 4.26, + "grad_norm": 3.9863054752349854, + "learning_rate": 8.165225744476466e-06, + "loss": 0.1945, + "step": 2960 + }, + { + "epoch": 4.27, + "grad_norm": 6.000556468963623, + "learning_rate": 8.085174511687481e-06, + "loss": 0.1823, + "step": 2965 + }, + { + "epoch": 4.27, + "grad_norm": 3.515742778778076, + "learning_rate": 8.005123278898495e-06, + "loss": 0.1957, + "step": 2970 + }, + { + "epoch": 4.28, + "grad_norm": 2.8108863830566406, + "learning_rate": 7.92507204610951e-06, + "loss": 0.1838, + "step": 2975 + }, + { + "epoch": 4.29, + "grad_norm": 5.262875556945801, + "learning_rate": 7.845020813320524e-06, + "loss": 0.2389, + "step": 2980 + }, + { + "epoch": 4.3, + "grad_norm": 5.4690752029418945, + "learning_rate": 7.76496958053154e-06, + "loss": 0.1823, + "step": 2985 + }, + { + "epoch": 4.3, + "grad_norm": 2.1274213790893555, + "learning_rate": 7.684918347742557e-06, + "loss": 0.1233, + "step": 2990 + }, + { + "epoch": 4.31, + "grad_norm": 6.855415344238281, + "learning_rate": 7.604867114953571e-06, + "loss": 0.2284, + "step": 2995 + }, + { + "epoch": 4.32, + "grad_norm": 5.152151584625244, + "learning_rate": 7.524815882164586e-06, + "loss": 0.1856, + "step": 3000 + }, + { + "epoch": 4.33, + "grad_norm": 4.211722373962402, + "learning_rate": 7.4447646493756e-06, + "loss": 0.2111, + "step": 3005 + }, + { + "epoch": 4.33, + "grad_norm": 4.821152210235596, + "learning_rate": 7.364713416586616e-06, + "loss": 0.1541, + "step": 3010 + }, + { + "epoch": 4.34, + "grad_norm": 3.2400951385498047, + "learning_rate": 7.28466218379763e-06, + "loss": 0.21, + "step": 3015 + }, + { + "epoch": 4.35, + "grad_norm": 3.82334566116333, + "learning_rate": 7.204610951008646e-06, + "loss": 0.1835, + "step": 3020 + }, + { + "epoch": 4.35, + "grad_norm": 4.301241397857666, + "learning_rate": 7.124559718219661e-06, + "loss": 0.2246, + "step": 3025 + }, + { + "epoch": 4.36, + "grad_norm": 3.4558205604553223, + "learning_rate": 7.044508485430676e-06, + "loss": 0.1766, + "step": 3030 + }, + { + "epoch": 4.37, + "grad_norm": 3.872791290283203, + "learning_rate": 6.964457252641691e-06, + "loss": 0.2126, + "step": 3035 + }, + { + "epoch": 4.38, + "grad_norm": 2.319420099258423, + "learning_rate": 6.884406019852706e-06, + "loss": 0.179, + "step": 3040 + }, + { + "epoch": 4.38, + "grad_norm": 6.737104892730713, + "learning_rate": 6.8043547870637215e-06, + "loss": 0.1882, + "step": 3045 + }, + { + "epoch": 4.39, + "grad_norm": 4.559133052825928, + "learning_rate": 6.724303554274736e-06, + "loss": 0.1808, + "step": 3050 + }, + { + "epoch": 4.4, + "grad_norm": 3.060370922088623, + "learning_rate": 6.644252321485752e-06, + "loss": 0.1923, + "step": 3055 + }, + { + "epoch": 4.4, + "grad_norm": 5.091296672821045, + "learning_rate": 6.564201088696765e-06, + "loss": 0.2012, + "step": 3060 + }, + { + "epoch": 4.41, + "grad_norm": 2.942782163619995, + "learning_rate": 6.484149855907781e-06, + "loss": 0.1731, + "step": 3065 + }, + { + "epoch": 4.42, + "grad_norm": 4.692785263061523, + "learning_rate": 6.404098623118797e-06, + "loss": 0.1765, + "step": 3070 + }, + { + "epoch": 4.43, + "grad_norm": 4.15416145324707, + "learning_rate": 6.324047390329811e-06, + "loss": 0.168, + "step": 3075 + }, + { + "epoch": 4.43, + "grad_norm": 4.836540699005127, + "learning_rate": 6.2439961575408265e-06, + "loss": 0.1884, + "step": 3080 + }, + { + "epoch": 4.44, + "grad_norm": 5.723465442657471, + "learning_rate": 6.163944924751842e-06, + "loss": 0.2006, + "step": 3085 + }, + { + "epoch": 4.45, + "grad_norm": 3.738910675048828, + "learning_rate": 6.083893691962857e-06, + "loss": 0.152, + "step": 3090 + }, + { + "epoch": 4.45, + "grad_norm": 4.6227641105651855, + "learning_rate": 6.003842459173871e-06, + "loss": 0.1885, + "step": 3095 + }, + { + "epoch": 4.46, + "grad_norm": 4.877871036529541, + "learning_rate": 5.923791226384887e-06, + "loss": 0.1635, + "step": 3100 + }, + { + "epoch": 4.47, + "grad_norm": 3.391716480255127, + "learning_rate": 5.843739993595901e-06, + "loss": 0.1917, + "step": 3105 + }, + { + "epoch": 4.48, + "grad_norm": 3.0858306884765625, + "learning_rate": 5.763688760806917e-06, + "loss": 0.1981, + "step": 3110 + }, + { + "epoch": 4.48, + "grad_norm": 3.075488805770874, + "learning_rate": 5.6836375280179315e-06, + "loss": 0.175, + "step": 3115 + }, + { + "epoch": 4.49, + "grad_norm": 4.415194988250732, + "learning_rate": 5.603586295228947e-06, + "loss": 0.2039, + "step": 3120 + }, + { + "epoch": 4.5, + "grad_norm": 4.507144451141357, + "learning_rate": 5.523535062439962e-06, + "loss": 0.1816, + "step": 3125 + }, + { + "epoch": 4.51, + "grad_norm": 4.327670097351074, + "learning_rate": 5.443483829650977e-06, + "loss": 0.2072, + "step": 3130 + }, + { + "epoch": 4.51, + "grad_norm": 3.314438819885254, + "learning_rate": 5.363432596861992e-06, + "loss": 0.1997, + "step": 3135 + }, + { + "epoch": 4.52, + "grad_norm": 3.981945753097534, + "learning_rate": 5.283381364073007e-06, + "loss": 0.1643, + "step": 3140 + }, + { + "epoch": 4.53, + "grad_norm": 3.4533607959747314, + "learning_rate": 5.203330131284022e-06, + "loss": 0.1503, + "step": 3145 + }, + { + "epoch": 4.53, + "grad_norm": 3.6115882396698, + "learning_rate": 5.123278898495037e-06, + "loss": 0.1712, + "step": 3150 + }, + { + "epoch": 4.54, + "grad_norm": 2.636838912963867, + "learning_rate": 5.043227665706052e-06, + "loss": 0.1828, + "step": 3155 + }, + { + "epoch": 4.55, + "grad_norm": 3.045761823654175, + "learning_rate": 4.9631764329170676e-06, + "loss": 0.167, + "step": 3160 + }, + { + "epoch": 4.56, + "grad_norm": 5.738334655761719, + "learning_rate": 4.883125200128082e-06, + "loss": 0.2237, + "step": 3165 + }, + { + "epoch": 4.56, + "grad_norm": 2.163240909576416, + "learning_rate": 4.803073967339098e-06, + "loss": 0.1411, + "step": 3170 + }, + { + "epoch": 4.57, + "grad_norm": 5.213181495666504, + "learning_rate": 4.723022734550112e-06, + "loss": 0.1874, + "step": 3175 + }, + { + "epoch": 4.58, + "grad_norm": 3.869131565093994, + "learning_rate": 4.642971501761127e-06, + "loss": 0.1756, + "step": 3180 + }, + { + "epoch": 4.58, + "grad_norm": 3.244732618331909, + "learning_rate": 4.5629202689721425e-06, + "loss": 0.1829, + "step": 3185 + }, + { + "epoch": 4.59, + "grad_norm": 3.5364272594451904, + "learning_rate": 4.482869036183157e-06, + "loss": 0.1861, + "step": 3190 + }, + { + "epoch": 4.6, + "grad_norm": 2.5283873081207275, + "learning_rate": 4.402817803394173e-06, + "loss": 0.1931, + "step": 3195 + }, + { + "epoch": 4.61, + "grad_norm": 3.36181902885437, + "learning_rate": 4.322766570605188e-06, + "loss": 0.2183, + "step": 3200 + }, + { + "epoch": 4.61, + "grad_norm": 5.513607025146484, + "learning_rate": 4.242715337816203e-06, + "loss": 0.1717, + "step": 3205 + }, + { + "epoch": 4.62, + "grad_norm": 5.976490497589111, + "learning_rate": 4.162664105027217e-06, + "loss": 0.202, + "step": 3210 + }, + { + "epoch": 4.63, + "grad_norm": 3.3449387550354004, + "learning_rate": 4.082612872238233e-06, + "loss": 0.2165, + "step": 3215 + }, + { + "epoch": 4.63, + "grad_norm": 3.3972129821777344, + "learning_rate": 4.0025616394492475e-06, + "loss": 0.1994, + "step": 3220 + }, + { + "epoch": 4.64, + "grad_norm": 4.022273540496826, + "learning_rate": 3.922510406660262e-06, + "loss": 0.168, + "step": 3225 + }, + { + "epoch": 4.65, + "grad_norm": 3.2063329219818115, + "learning_rate": 3.8424591738712785e-06, + "loss": 0.1862, + "step": 3230 + }, + { + "epoch": 4.66, + "grad_norm": 3.1869962215423584, + "learning_rate": 3.762407941082293e-06, + "loss": 0.1583, + "step": 3235 + }, + { + "epoch": 4.66, + "grad_norm": 3.648125171661377, + "learning_rate": 3.682356708293308e-06, + "loss": 0.2026, + "step": 3240 + }, + { + "epoch": 4.67, + "grad_norm": 4.182619571685791, + "learning_rate": 3.602305475504323e-06, + "loss": 0.1711, + "step": 3245 + }, + { + "epoch": 4.68, + "grad_norm": 3.2886900901794434, + "learning_rate": 3.522254242715338e-06, + "loss": 0.1778, + "step": 3250 + }, + { + "epoch": 4.69, + "grad_norm": 3.8204097747802734, + "learning_rate": 3.442203009926353e-06, + "loss": 0.1906, + "step": 3255 + }, + { + "epoch": 4.69, + "grad_norm": 4.073367595672607, + "learning_rate": 3.362151777137368e-06, + "loss": 0.1693, + "step": 3260 + }, + { + "epoch": 4.7, + "grad_norm": 4.779504299163818, + "learning_rate": 3.2821005443483827e-06, + "loss": 0.2031, + "step": 3265 + }, + { + "epoch": 4.71, + "grad_norm": 4.730034828186035, + "learning_rate": 3.2020493115593986e-06, + "loss": 0.1731, + "step": 3270 + }, + { + "epoch": 4.71, + "grad_norm": 4.198641300201416, + "learning_rate": 3.1219980787704133e-06, + "loss": 0.1982, + "step": 3275 + }, + { + "epoch": 4.72, + "grad_norm": 3.796201229095459, + "learning_rate": 3.0419468459814283e-06, + "loss": 0.2502, + "step": 3280 + }, + { + "epoch": 4.73, + "grad_norm": 3.4022860527038574, + "learning_rate": 2.9618956131924434e-06, + "loss": 0.1746, + "step": 3285 + }, + { + "epoch": 4.74, + "grad_norm": 3.493821859359741, + "learning_rate": 2.8818443804034585e-06, + "loss": 0.1862, + "step": 3290 + }, + { + "epoch": 4.74, + "grad_norm": 4.883081436157227, + "learning_rate": 2.8017931476144735e-06, + "loss": 0.1832, + "step": 3295 + }, + { + "epoch": 4.75, + "grad_norm": 4.014003753662109, + "learning_rate": 2.7217419148254886e-06, + "loss": 0.2232, + "step": 3300 + }, + { + "epoch": 4.76, + "grad_norm": 3.3797993659973145, + "learning_rate": 2.6416906820365037e-06, + "loss": 0.1791, + "step": 3305 + }, + { + "epoch": 4.76, + "grad_norm": 2.9076929092407227, + "learning_rate": 2.5616394492475183e-06, + "loss": 0.1557, + "step": 3310 + }, + { + "epoch": 4.77, + "grad_norm": 5.119110584259033, + "learning_rate": 2.4815882164585338e-06, + "loss": 0.1989, + "step": 3315 + }, + { + "epoch": 4.78, + "grad_norm": 3.889577627182007, + "learning_rate": 2.401536983669549e-06, + "loss": 0.1771, + "step": 3320 + }, + { + "epoch": 4.79, + "grad_norm": 2.979879379272461, + "learning_rate": 2.3214857508805635e-06, + "loss": 0.2187, + "step": 3325 + }, + { + "epoch": 4.79, + "grad_norm": 4.31455135345459, + "learning_rate": 2.2414345180915786e-06, + "loss": 0.1818, + "step": 3330 + }, + { + "epoch": 4.8, + "grad_norm": 5.267322540283203, + "learning_rate": 2.161383285302594e-06, + "loss": 0.1564, + "step": 3335 + }, + { + "epoch": 4.81, + "grad_norm": 4.620851516723633, + "learning_rate": 2.0813320525136087e-06, + "loss": 0.2058, + "step": 3340 + }, + { + "epoch": 4.81, + "grad_norm": 3.6133904457092285, + "learning_rate": 2.0012808197246238e-06, + "loss": 0.1678, + "step": 3345 + }, + { + "epoch": 4.82, + "grad_norm": 2.955531358718872, + "learning_rate": 1.9212295869356392e-06, + "loss": 0.1771, + "step": 3350 + }, + { + "epoch": 4.83, + "grad_norm": 5.3159403800964355, + "learning_rate": 1.841178354146654e-06, + "loss": 0.2387, + "step": 3355 + }, + { + "epoch": 4.84, + "grad_norm": 3.5263235569000244, + "learning_rate": 1.761127121357669e-06, + "loss": 0.2061, + "step": 3360 + }, + { + "epoch": 4.84, + "grad_norm": 3.794788122177124, + "learning_rate": 1.681075888568684e-06, + "loss": 0.1975, + "step": 3365 + }, + { + "epoch": 4.85, + "grad_norm": 3.7242631912231445, + "learning_rate": 1.6010246557796993e-06, + "loss": 0.202, + "step": 3370 + }, + { + "epoch": 4.86, + "grad_norm": 3.291221857070923, + "learning_rate": 1.5209734229907142e-06, + "loss": 0.1749, + "step": 3375 + }, + { + "epoch": 4.87, + "grad_norm": 7.191506385803223, + "learning_rate": 1.4409221902017292e-06, + "loss": 0.1787, + "step": 3380 + }, + { + "epoch": 4.87, + "grad_norm": 3.5962772369384766, + "learning_rate": 1.3608709574127443e-06, + "loss": 0.1894, + "step": 3385 + }, + { + "epoch": 4.88, + "grad_norm": 3.013857126235962, + "learning_rate": 1.2808197246237591e-06, + "loss": 0.1439, + "step": 3390 + }, + { + "epoch": 4.89, + "grad_norm": 3.8775179386138916, + "learning_rate": 1.2007684918347744e-06, + "loss": 0.1709, + "step": 3395 + }, + { + "epoch": 4.89, + "grad_norm": 5.876482963562012, + "learning_rate": 1.1207172590457893e-06, + "loss": 0.1823, + "step": 3400 + }, + { + "epoch": 4.9, + "grad_norm": 3.76519513130188, + "learning_rate": 1.0406660262568043e-06, + "loss": 0.1932, + "step": 3405 + }, + { + "epoch": 4.91, + "grad_norm": 3.4437146186828613, + "learning_rate": 9.606147934678196e-07, + "loss": 0.2059, + "step": 3410 + }, + { + "epoch": 4.92, + "grad_norm": 4.459022045135498, + "learning_rate": 8.805635606788345e-07, + "loss": 0.2016, + "step": 3415 + }, + { + "epoch": 4.92, + "grad_norm": 3.656373977661133, + "learning_rate": 8.005123278898497e-07, + "loss": 0.1869, + "step": 3420 + }, + { + "epoch": 4.93, + "grad_norm": 2.2337965965270996, + "learning_rate": 7.204610951008646e-07, + "loss": 0.1501, + "step": 3425 + }, + { + "epoch": 4.94, + "grad_norm": 5.598134994506836, + "learning_rate": 6.404098623118796e-07, + "loss": 0.1659, + "step": 3430 + }, + { + "epoch": 4.94, + "grad_norm": 4.543219089508057, + "learning_rate": 5.603586295228946e-07, + "loss": 0.2164, + "step": 3435 + }, + { + "epoch": 4.95, + "grad_norm": 4.817913055419922, + "learning_rate": 4.803073967339098e-07, + "loss": 0.1332, + "step": 3440 + }, + { + "epoch": 4.96, + "grad_norm": 6.280834674835205, + "learning_rate": 4.002561639449248e-07, + "loss": 0.2054, + "step": 3445 + }, + { + "epoch": 4.97, + "grad_norm": 3.0518364906311035, + "learning_rate": 3.202049311559398e-07, + "loss": 0.1904, + "step": 3450 + }, + { + "epoch": 4.97, + "grad_norm": 3.695298910140991, + "learning_rate": 2.401536983669549e-07, + "loss": 0.1784, + "step": 3455 + }, + { + "epoch": 4.98, + "grad_norm": 6.226070880889893, + "learning_rate": 1.601024655779699e-07, + "loss": 0.1871, + "step": 3460 + }, + { + "epoch": 4.99, + "grad_norm": 4.446568489074707, + "learning_rate": 8.005123278898495e-08, + "loss": 0.2494, + "step": 3465 + }, + { + "epoch": 4.99, + "grad_norm": 5.050913333892822, + "learning_rate": 0.0, + "loss": 0.2138, + "step": 3470 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9518218623481781, + "eval_loss": 0.1259032040834427, + "eval_runtime": 31.3409, + "eval_samples_per_second": 315.243, + "eval_steps_per_second": 9.859, + "step": 3470 + }, + { + "epoch": 4.99, + "step": 3470, + "total_flos": 1.1039888050539651e+19, + "train_loss": 0.287632371283402, + "train_runtime": 2790.3597, + "train_samples_per_second": 159.318, + "train_steps_per_second": 1.244 + } + ], + "logging_steps": 5, + "max_steps": 3470, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 500, + "total_flos": 1.1039888050539651e+19, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +}