zhuchi76's picture
detr-resnet-50-finetuned-boat-dataset 30 epoch
266e328 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.999740596627756,
"eval_steps": 500,
"global_step": 38548,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00648508430609598,
"grad_norm": 75.29356384277344,
"learning_rate": 9.997838305231302e-06,
"loss": 3.9392,
"step": 50
},
{
"epoch": 0.01297016861219196,
"grad_norm": 76.29737091064453,
"learning_rate": 9.995676610462604e-06,
"loss": 3.2764,
"step": 100
},
{
"epoch": 0.019455252918287938,
"grad_norm": 41.812530517578125,
"learning_rate": 9.993514915693906e-06,
"loss": 2.7916,
"step": 150
},
{
"epoch": 0.02594033722438392,
"grad_norm": 73.83484649658203,
"learning_rate": 9.991353220925206e-06,
"loss": 2.5989,
"step": 200
},
{
"epoch": 0.0324254215304799,
"grad_norm": 117.84382629394531,
"learning_rate": 9.989191526156507e-06,
"loss": 2.5196,
"step": 250
},
{
"epoch": 0.038910505836575876,
"grad_norm": 66.67141723632812,
"learning_rate": 9.987029831387809e-06,
"loss": 2.4034,
"step": 300
},
{
"epoch": 0.04539559014267185,
"grad_norm": 87.57711791992188,
"learning_rate": 9.98486813661911e-06,
"loss": 2.4226,
"step": 350
},
{
"epoch": 0.05188067444876784,
"grad_norm": 40.56406784057617,
"learning_rate": 9.982706441850412e-06,
"loss": 2.3475,
"step": 400
},
{
"epoch": 0.058365758754863814,
"grad_norm": 168.55384826660156,
"learning_rate": 9.980544747081713e-06,
"loss": 2.306,
"step": 450
},
{
"epoch": 0.0648508430609598,
"grad_norm": 72.2697982788086,
"learning_rate": 9.978383052313015e-06,
"loss": 2.3191,
"step": 500
},
{
"epoch": 0.07133592736705577,
"grad_norm": 140.37997436523438,
"learning_rate": 9.976221357544315e-06,
"loss": 2.2995,
"step": 550
},
{
"epoch": 0.07782101167315175,
"grad_norm": 48.368682861328125,
"learning_rate": 9.974059662775617e-06,
"loss": 2.2649,
"step": 600
},
{
"epoch": 0.08430609597924774,
"grad_norm": 49.51039123535156,
"learning_rate": 9.971897968006918e-06,
"loss": 2.273,
"step": 650
},
{
"epoch": 0.0907911802853437,
"grad_norm": 95.86265563964844,
"learning_rate": 9.96973627323822e-06,
"loss": 2.2113,
"step": 700
},
{
"epoch": 0.09727626459143969,
"grad_norm": 59.293392181396484,
"learning_rate": 9.967574578469521e-06,
"loss": 2.2497,
"step": 750
},
{
"epoch": 0.10376134889753567,
"grad_norm": 96.03455352783203,
"learning_rate": 9.965412883700823e-06,
"loss": 2.1745,
"step": 800
},
{
"epoch": 0.11024643320363164,
"grad_norm": 58.87235641479492,
"learning_rate": 9.963251188932125e-06,
"loss": 2.2074,
"step": 850
},
{
"epoch": 0.11673151750972763,
"grad_norm": 60.09347152709961,
"learning_rate": 9.961089494163424e-06,
"loss": 2.2143,
"step": 900
},
{
"epoch": 0.12321660181582361,
"grad_norm": 33.75955581665039,
"learning_rate": 9.958927799394726e-06,
"loss": 2.1443,
"step": 950
},
{
"epoch": 0.1297016861219196,
"grad_norm": 67.69246673583984,
"learning_rate": 9.956766104626028e-06,
"loss": 2.1568,
"step": 1000
},
{
"epoch": 0.13618677042801555,
"grad_norm": 61.85429763793945,
"learning_rate": 9.95460440985733e-06,
"loss": 2.1036,
"step": 1050
},
{
"epoch": 0.14267185473411154,
"grad_norm": 47.63957595825195,
"learning_rate": 9.95244271508863e-06,
"loss": 2.0988,
"step": 1100
},
{
"epoch": 0.14915693904020752,
"grad_norm": 72.68657684326172,
"learning_rate": 9.950281020319932e-06,
"loss": 2.0674,
"step": 1150
},
{
"epoch": 0.1556420233463035,
"grad_norm": 137.7786865234375,
"learning_rate": 9.948119325551234e-06,
"loss": 2.0437,
"step": 1200
},
{
"epoch": 0.1621271076523995,
"grad_norm": 28.063282012939453,
"learning_rate": 9.945957630782534e-06,
"loss": 2.0778,
"step": 1250
},
{
"epoch": 0.16861219195849547,
"grad_norm": 183.83932495117188,
"learning_rate": 9.943795936013836e-06,
"loss": 2.0364,
"step": 1300
},
{
"epoch": 0.17509727626459143,
"grad_norm": 69.06026458740234,
"learning_rate": 9.941634241245137e-06,
"loss": 2.0518,
"step": 1350
},
{
"epoch": 0.1815823605706874,
"grad_norm": 46.897403717041016,
"learning_rate": 9.939472546476439e-06,
"loss": 2.0278,
"step": 1400
},
{
"epoch": 0.1880674448767834,
"grad_norm": 58.688289642333984,
"learning_rate": 9.93731085170774e-06,
"loss": 2.0535,
"step": 1450
},
{
"epoch": 0.19455252918287938,
"grad_norm": 45.544673919677734,
"learning_rate": 9.935149156939042e-06,
"loss": 2.0326,
"step": 1500
},
{
"epoch": 0.20103761348897536,
"grad_norm": 52.032257080078125,
"learning_rate": 9.932987462170342e-06,
"loss": 2.0506,
"step": 1550
},
{
"epoch": 0.20752269779507135,
"grad_norm": 85.55571746826172,
"learning_rate": 9.930825767401643e-06,
"loss": 2.0022,
"step": 1600
},
{
"epoch": 0.2140077821011673,
"grad_norm": 82.76732635498047,
"learning_rate": 9.928664072632945e-06,
"loss": 2.0245,
"step": 1650
},
{
"epoch": 0.2204928664072633,
"grad_norm": 132.71685791015625,
"learning_rate": 9.926502377864247e-06,
"loss": 2.034,
"step": 1700
},
{
"epoch": 0.22697795071335927,
"grad_norm": 37.891571044921875,
"learning_rate": 9.924340683095548e-06,
"loss": 2.0265,
"step": 1750
},
{
"epoch": 0.23346303501945526,
"grad_norm": 74.43115234375,
"learning_rate": 9.92217898832685e-06,
"loss": 1.9993,
"step": 1800
},
{
"epoch": 0.23994811932555124,
"grad_norm": 43.991119384765625,
"learning_rate": 9.920017293558151e-06,
"loss": 1.95,
"step": 1850
},
{
"epoch": 0.24643320363164722,
"grad_norm": 71.00599670410156,
"learning_rate": 9.917855598789451e-06,
"loss": 1.9378,
"step": 1900
},
{
"epoch": 0.2529182879377432,
"grad_norm": 99.50753784179688,
"learning_rate": 9.915693904020753e-06,
"loss": 1.9923,
"step": 1950
},
{
"epoch": 0.2594033722438392,
"grad_norm": 139.31951904296875,
"learning_rate": 9.913532209252054e-06,
"loss": 1.9471,
"step": 2000
},
{
"epoch": 0.26588845654993515,
"grad_norm": 58.99448776245117,
"learning_rate": 9.911370514483356e-06,
"loss": 1.8731,
"step": 2050
},
{
"epoch": 0.2723735408560311,
"grad_norm": 72.88382720947266,
"learning_rate": 9.909208819714658e-06,
"loss": 1.9395,
"step": 2100
},
{
"epoch": 0.2788586251621271,
"grad_norm": 91.64574432373047,
"learning_rate": 9.90704712494596e-06,
"loss": 1.835,
"step": 2150
},
{
"epoch": 0.2853437094682231,
"grad_norm": 69.67208862304688,
"learning_rate": 9.90488543017726e-06,
"loss": 1.8692,
"step": 2200
},
{
"epoch": 0.2918287937743191,
"grad_norm": 81.73461151123047,
"learning_rate": 9.90272373540856e-06,
"loss": 1.8508,
"step": 2250
},
{
"epoch": 0.29831387808041504,
"grad_norm": 89.83018493652344,
"learning_rate": 9.900562040639862e-06,
"loss": 1.8786,
"step": 2300
},
{
"epoch": 0.30479896238651105,
"grad_norm": 63.75005340576172,
"learning_rate": 9.898400345871164e-06,
"loss": 1.8883,
"step": 2350
},
{
"epoch": 0.311284046692607,
"grad_norm": 52.06178283691406,
"learning_rate": 9.896238651102465e-06,
"loss": 1.8696,
"step": 2400
},
{
"epoch": 0.31776913099870296,
"grad_norm": 164.74493408203125,
"learning_rate": 9.894076956333767e-06,
"loss": 1.8852,
"step": 2450
},
{
"epoch": 0.324254215304799,
"grad_norm": 132.9710235595703,
"learning_rate": 9.891915261565069e-06,
"loss": 1.8436,
"step": 2500
},
{
"epoch": 0.33073929961089493,
"grad_norm": 26.191741943359375,
"learning_rate": 9.88975356679637e-06,
"loss": 1.8739,
"step": 2550
},
{
"epoch": 0.33722438391699094,
"grad_norm": 39.41121292114258,
"learning_rate": 9.88759187202767e-06,
"loss": 1.8541,
"step": 2600
},
{
"epoch": 0.3437094682230869,
"grad_norm": 162.25184631347656,
"learning_rate": 9.885430177258972e-06,
"loss": 1.838,
"step": 2650
},
{
"epoch": 0.35019455252918286,
"grad_norm": 45.588375091552734,
"learning_rate": 9.883268482490273e-06,
"loss": 1.8151,
"step": 2700
},
{
"epoch": 0.35667963683527887,
"grad_norm": 58.42121887207031,
"learning_rate": 9.881106787721575e-06,
"loss": 1.8489,
"step": 2750
},
{
"epoch": 0.3631647211413748,
"grad_norm": 58.45185470581055,
"learning_rate": 9.878945092952877e-06,
"loss": 1.834,
"step": 2800
},
{
"epoch": 0.36964980544747084,
"grad_norm": 43.55636215209961,
"learning_rate": 9.876783398184178e-06,
"loss": 1.8172,
"step": 2850
},
{
"epoch": 0.3761348897535668,
"grad_norm": 111.3084487915039,
"learning_rate": 9.87462170341548e-06,
"loss": 1.7982,
"step": 2900
},
{
"epoch": 0.38261997405966275,
"grad_norm": 48.10725402832031,
"learning_rate": 9.87246000864678e-06,
"loss": 1.815,
"step": 2950
},
{
"epoch": 0.38910505836575876,
"grad_norm": 204.3961639404297,
"learning_rate": 9.870298313878081e-06,
"loss": 1.7829,
"step": 3000
},
{
"epoch": 0.3955901426718547,
"grad_norm": 57.22758483886719,
"learning_rate": 9.868136619109383e-06,
"loss": 1.8291,
"step": 3050
},
{
"epoch": 0.40207522697795073,
"grad_norm": 129.09658813476562,
"learning_rate": 9.865974924340684e-06,
"loss": 1.8161,
"step": 3100
},
{
"epoch": 0.4085603112840467,
"grad_norm": 36.433780670166016,
"learning_rate": 9.863813229571986e-06,
"loss": 1.7766,
"step": 3150
},
{
"epoch": 0.4150453955901427,
"grad_norm": 61.64483642578125,
"learning_rate": 9.861651534803288e-06,
"loss": 1.8117,
"step": 3200
},
{
"epoch": 0.42153047989623865,
"grad_norm": 107.76203918457031,
"learning_rate": 9.85948984003459e-06,
"loss": 1.7823,
"step": 3250
},
{
"epoch": 0.4280155642023346,
"grad_norm": 118.72686004638672,
"learning_rate": 9.857328145265889e-06,
"loss": 1.7688,
"step": 3300
},
{
"epoch": 0.4345006485084306,
"grad_norm": 166.0326385498047,
"learning_rate": 9.85516645049719e-06,
"loss": 1.759,
"step": 3350
},
{
"epoch": 0.4409857328145266,
"grad_norm": 81.56283569335938,
"learning_rate": 9.853004755728492e-06,
"loss": 1.7628,
"step": 3400
},
{
"epoch": 0.4474708171206226,
"grad_norm": 80.8810043334961,
"learning_rate": 9.850843060959794e-06,
"loss": 1.7637,
"step": 3450
},
{
"epoch": 0.45395590142671854,
"grad_norm": 61.64249801635742,
"learning_rate": 9.848681366191095e-06,
"loss": 1.779,
"step": 3500
},
{
"epoch": 0.4604409857328145,
"grad_norm": 88.62084197998047,
"learning_rate": 9.846519671422397e-06,
"loss": 1.7477,
"step": 3550
},
{
"epoch": 0.4669260700389105,
"grad_norm": 52.68846893310547,
"learning_rate": 9.844357976653699e-06,
"loss": 1.7579,
"step": 3600
},
{
"epoch": 0.47341115434500647,
"grad_norm": 100.87641143798828,
"learning_rate": 9.842196281884999e-06,
"loss": 1.7269,
"step": 3650
},
{
"epoch": 0.4798962386511025,
"grad_norm": 110.61991882324219,
"learning_rate": 9.8400345871163e-06,
"loss": 1.718,
"step": 3700
},
{
"epoch": 0.48638132295719844,
"grad_norm": 191.92201232910156,
"learning_rate": 9.8378728923476e-06,
"loss": 1.7211,
"step": 3750
},
{
"epoch": 0.49286640726329445,
"grad_norm": 92.11019897460938,
"learning_rate": 9.835711197578902e-06,
"loss": 1.6597,
"step": 3800
},
{
"epoch": 0.4993514915693904,
"grad_norm": 113.60994720458984,
"learning_rate": 9.833549502810203e-06,
"loss": 1.702,
"step": 3850
},
{
"epoch": 0.5058365758754864,
"grad_norm": 36.078678131103516,
"learning_rate": 9.831387808041505e-06,
"loss": 1.6789,
"step": 3900
},
{
"epoch": 0.5123216601815823,
"grad_norm": 36.602169036865234,
"learning_rate": 9.829226113272806e-06,
"loss": 1.6852,
"step": 3950
},
{
"epoch": 0.5188067444876784,
"grad_norm": 35.471885681152344,
"learning_rate": 9.827064418504108e-06,
"loss": 1.7237,
"step": 4000
},
{
"epoch": 0.5252918287937743,
"grad_norm": 82.1302261352539,
"learning_rate": 9.82490272373541e-06,
"loss": 1.6826,
"step": 4050
},
{
"epoch": 0.5317769130998703,
"grad_norm": 88.32125854492188,
"learning_rate": 9.82274102896671e-06,
"loss": 1.6928,
"step": 4100
},
{
"epoch": 0.5382619974059663,
"grad_norm": 46.689208984375,
"learning_rate": 9.820579334198011e-06,
"loss": 1.6711,
"step": 4150
},
{
"epoch": 0.5447470817120622,
"grad_norm": 51.88264465332031,
"learning_rate": 9.818417639429313e-06,
"loss": 1.661,
"step": 4200
},
{
"epoch": 0.5512321660181583,
"grad_norm": 26.311506271362305,
"learning_rate": 9.816255944660614e-06,
"loss": 1.647,
"step": 4250
},
{
"epoch": 0.5577172503242542,
"grad_norm": 56.531463623046875,
"learning_rate": 9.814094249891916e-06,
"loss": 1.6632,
"step": 4300
},
{
"epoch": 0.5642023346303502,
"grad_norm": 38.8277702331543,
"learning_rate": 9.811932555123218e-06,
"loss": 1.6396,
"step": 4350
},
{
"epoch": 0.5706874189364461,
"grad_norm": 110.19149017333984,
"learning_rate": 9.809770860354519e-06,
"loss": 1.631,
"step": 4400
},
{
"epoch": 0.5771725032425421,
"grad_norm": 31.21686553955078,
"learning_rate": 9.807609165585819e-06,
"loss": 1.6272,
"step": 4450
},
{
"epoch": 0.5836575875486382,
"grad_norm": 54.84867477416992,
"learning_rate": 9.80544747081712e-06,
"loss": 1.6055,
"step": 4500
},
{
"epoch": 0.5901426718547341,
"grad_norm": 73.87213134765625,
"learning_rate": 9.803285776048422e-06,
"loss": 1.5888,
"step": 4550
},
{
"epoch": 0.5966277561608301,
"grad_norm": 33.05928039550781,
"learning_rate": 9.801124081279724e-06,
"loss": 1.6019,
"step": 4600
},
{
"epoch": 0.603112840466926,
"grad_norm": 58.189022064208984,
"learning_rate": 9.798962386511025e-06,
"loss": 1.5611,
"step": 4650
},
{
"epoch": 0.6095979247730221,
"grad_norm": 98.57378387451172,
"learning_rate": 9.796800691742327e-06,
"loss": 1.597,
"step": 4700
},
{
"epoch": 0.6160830090791181,
"grad_norm": 40.484683990478516,
"learning_rate": 9.794638996973629e-06,
"loss": 1.5792,
"step": 4750
},
{
"epoch": 0.622568093385214,
"grad_norm": 137.65402221679688,
"learning_rate": 9.792477302204928e-06,
"loss": 1.5859,
"step": 4800
},
{
"epoch": 0.62905317769131,
"grad_norm": 222.86753845214844,
"learning_rate": 9.79031560743623e-06,
"loss": 1.5545,
"step": 4850
},
{
"epoch": 0.6355382619974059,
"grad_norm": 61.394309997558594,
"learning_rate": 9.788153912667532e-06,
"loss": 1.5275,
"step": 4900
},
{
"epoch": 0.642023346303502,
"grad_norm": 56.79536437988281,
"learning_rate": 9.785992217898833e-06,
"loss": 1.5688,
"step": 4950
},
{
"epoch": 0.648508430609598,
"grad_norm": 35.67102813720703,
"learning_rate": 9.783830523130135e-06,
"loss": 1.5359,
"step": 5000
},
{
"epoch": 0.6549935149156939,
"grad_norm": 106.68746948242188,
"learning_rate": 9.781668828361436e-06,
"loss": 1.5443,
"step": 5050
},
{
"epoch": 0.6614785992217899,
"grad_norm": 118.8326644897461,
"learning_rate": 9.779507133592736e-06,
"loss": 1.5223,
"step": 5100
},
{
"epoch": 0.6679636835278858,
"grad_norm": 98.10018920898438,
"learning_rate": 9.777345438824038e-06,
"loss": 1.5243,
"step": 5150
},
{
"epoch": 0.6744487678339819,
"grad_norm": 77.71648406982422,
"learning_rate": 9.77518374405534e-06,
"loss": 1.5383,
"step": 5200
},
{
"epoch": 0.6809338521400778,
"grad_norm": 58.089149475097656,
"learning_rate": 9.773022049286641e-06,
"loss": 1.4545,
"step": 5250
},
{
"epoch": 0.6874189364461738,
"grad_norm": 37.549198150634766,
"learning_rate": 9.770860354517943e-06,
"loss": 1.454,
"step": 5300
},
{
"epoch": 0.6939040207522698,
"grad_norm": 105.52291107177734,
"learning_rate": 9.768698659749244e-06,
"loss": 1.4583,
"step": 5350
},
{
"epoch": 0.7003891050583657,
"grad_norm": 48.97454833984375,
"learning_rate": 9.766536964980546e-06,
"loss": 1.4752,
"step": 5400
},
{
"epoch": 0.7068741893644618,
"grad_norm": 46.371768951416016,
"learning_rate": 9.764375270211846e-06,
"loss": 1.4495,
"step": 5450
},
{
"epoch": 0.7133592736705577,
"grad_norm": 46.270530700683594,
"learning_rate": 9.762213575443147e-06,
"loss": 1.4777,
"step": 5500
},
{
"epoch": 0.7198443579766537,
"grad_norm": 74.16635131835938,
"learning_rate": 9.760051880674449e-06,
"loss": 1.4826,
"step": 5550
},
{
"epoch": 0.7263294422827496,
"grad_norm": 128.18382263183594,
"learning_rate": 9.75789018590575e-06,
"loss": 1.4844,
"step": 5600
},
{
"epoch": 0.7328145265888456,
"grad_norm": 69.79833984375,
"learning_rate": 9.755728491137052e-06,
"loss": 1.5204,
"step": 5650
},
{
"epoch": 0.7392996108949417,
"grad_norm": 60.08644104003906,
"learning_rate": 9.753566796368354e-06,
"loss": 1.4153,
"step": 5700
},
{
"epoch": 0.7457846952010376,
"grad_norm": 28.086040496826172,
"learning_rate": 9.751405101599655e-06,
"loss": 1.4365,
"step": 5750
},
{
"epoch": 0.7522697795071336,
"grad_norm": 123.76924133300781,
"learning_rate": 9.749243406830955e-06,
"loss": 1.4517,
"step": 5800
},
{
"epoch": 0.7587548638132295,
"grad_norm": 51.66524124145508,
"learning_rate": 9.747081712062257e-06,
"loss": 1.3986,
"step": 5850
},
{
"epoch": 0.7652399481193255,
"grad_norm": 108.23607635498047,
"learning_rate": 9.744920017293558e-06,
"loss": 1.4244,
"step": 5900
},
{
"epoch": 0.7717250324254216,
"grad_norm": 231.63455200195312,
"learning_rate": 9.74275832252486e-06,
"loss": 1.4064,
"step": 5950
},
{
"epoch": 0.7782101167315175,
"grad_norm": 81.5296630859375,
"learning_rate": 9.740596627756162e-06,
"loss": 1.4159,
"step": 6000
},
{
"epoch": 0.7846952010376135,
"grad_norm": 88.66846466064453,
"learning_rate": 9.738434932987463e-06,
"loss": 1.3972,
"step": 6050
},
{
"epoch": 0.7911802853437094,
"grad_norm": 51.40989303588867,
"learning_rate": 9.736273238218765e-06,
"loss": 1.3936,
"step": 6100
},
{
"epoch": 0.7976653696498055,
"grad_norm": 125.95860290527344,
"learning_rate": 9.734111543450065e-06,
"loss": 1.4171,
"step": 6150
},
{
"epoch": 0.8041504539559015,
"grad_norm": 106.81758117675781,
"learning_rate": 9.731949848681366e-06,
"loss": 1.3815,
"step": 6200
},
{
"epoch": 0.8106355382619974,
"grad_norm": 59.69066619873047,
"learning_rate": 9.729788153912668e-06,
"loss": 1.3268,
"step": 6250
},
{
"epoch": 0.8171206225680934,
"grad_norm": 58.66751480102539,
"learning_rate": 9.72762645914397e-06,
"loss": 1.3593,
"step": 6300
},
{
"epoch": 0.8236057068741893,
"grad_norm": 67.43231964111328,
"learning_rate": 9.725464764375271e-06,
"loss": 1.3639,
"step": 6350
},
{
"epoch": 0.8300907911802854,
"grad_norm": 42.44819259643555,
"learning_rate": 9.723303069606573e-06,
"loss": 1.3664,
"step": 6400
},
{
"epoch": 0.8365758754863813,
"grad_norm": 31.140090942382812,
"learning_rate": 9.721141374837874e-06,
"loss": 1.3689,
"step": 6450
},
{
"epoch": 0.8430609597924773,
"grad_norm": 295.80523681640625,
"learning_rate": 9.718979680069174e-06,
"loss": 1.3763,
"step": 6500
},
{
"epoch": 0.8495460440985733,
"grad_norm": 88.26421356201172,
"learning_rate": 9.716817985300476e-06,
"loss": 1.3537,
"step": 6550
},
{
"epoch": 0.8560311284046692,
"grad_norm": 193.05654907226562,
"learning_rate": 9.714656290531777e-06,
"loss": 1.3507,
"step": 6600
},
{
"epoch": 0.8625162127107653,
"grad_norm": 79.03955078125,
"learning_rate": 9.712494595763079e-06,
"loss": 1.3388,
"step": 6650
},
{
"epoch": 0.8690012970168612,
"grad_norm": 50.94293975830078,
"learning_rate": 9.71033290099438e-06,
"loss": 1.3568,
"step": 6700
},
{
"epoch": 0.8754863813229572,
"grad_norm": 39.64507293701172,
"learning_rate": 9.708171206225682e-06,
"loss": 1.3824,
"step": 6750
},
{
"epoch": 0.8819714656290532,
"grad_norm": 72.73592376708984,
"learning_rate": 9.706009511456984e-06,
"loss": 1.3592,
"step": 6800
},
{
"epoch": 0.8884565499351491,
"grad_norm": 25.956851959228516,
"learning_rate": 9.703847816688284e-06,
"loss": 1.3338,
"step": 6850
},
{
"epoch": 0.8949416342412452,
"grad_norm": 96.9691162109375,
"learning_rate": 9.701686121919585e-06,
"loss": 1.3522,
"step": 6900
},
{
"epoch": 0.9014267185473411,
"grad_norm": 24.184741973876953,
"learning_rate": 9.699524427150887e-06,
"loss": 1.3276,
"step": 6950
},
{
"epoch": 0.9079118028534371,
"grad_norm": 38.254638671875,
"learning_rate": 9.697362732382188e-06,
"loss": 1.3185,
"step": 7000
},
{
"epoch": 0.914396887159533,
"grad_norm": 160.06329345703125,
"learning_rate": 9.69520103761349e-06,
"loss": 1.3145,
"step": 7050
},
{
"epoch": 0.920881971465629,
"grad_norm": 194.62234497070312,
"learning_rate": 9.693039342844792e-06,
"loss": 1.3505,
"step": 7100
},
{
"epoch": 0.9273670557717251,
"grad_norm": 63.943546295166016,
"learning_rate": 9.690877648076093e-06,
"loss": 1.3353,
"step": 7150
},
{
"epoch": 0.933852140077821,
"grad_norm": 64.3890151977539,
"learning_rate": 9.688715953307393e-06,
"loss": 1.2994,
"step": 7200
},
{
"epoch": 0.940337224383917,
"grad_norm": 42.98583984375,
"learning_rate": 9.686554258538695e-06,
"loss": 1.3034,
"step": 7250
},
{
"epoch": 0.9468223086900129,
"grad_norm": 112.66468811035156,
"learning_rate": 9.684392563769996e-06,
"loss": 1.2642,
"step": 7300
},
{
"epoch": 0.953307392996109,
"grad_norm": 100.0568618774414,
"learning_rate": 9.682230869001298e-06,
"loss": 1.2919,
"step": 7350
},
{
"epoch": 0.959792477302205,
"grad_norm": 26.995040893554688,
"learning_rate": 9.6800691742326e-06,
"loss": 1.2775,
"step": 7400
},
{
"epoch": 0.9662775616083009,
"grad_norm": 26.033170700073242,
"learning_rate": 9.677907479463901e-06,
"loss": 1.2675,
"step": 7450
},
{
"epoch": 0.9727626459143969,
"grad_norm": 53.325523376464844,
"learning_rate": 9.675745784695201e-06,
"loss": 1.2947,
"step": 7500
},
{
"epoch": 0.9792477302204928,
"grad_norm": 71.00118255615234,
"learning_rate": 9.673584089926503e-06,
"loss": 1.3299,
"step": 7550
},
{
"epoch": 0.9857328145265889,
"grad_norm": 115.75421142578125,
"learning_rate": 9.671422395157804e-06,
"loss": 1.3157,
"step": 7600
},
{
"epoch": 0.9922178988326849,
"grad_norm": 40.338565826416016,
"learning_rate": 9.669260700389106e-06,
"loss": 1.3313,
"step": 7650
},
{
"epoch": 0.9987029831387808,
"grad_norm": 44.46931457519531,
"learning_rate": 9.667099005620407e-06,
"loss": 1.3073,
"step": 7700
},
{
"epoch": 1.0051880674448768,
"grad_norm": 38.489139556884766,
"learning_rate": 9.664937310851709e-06,
"loss": 1.2656,
"step": 7750
},
{
"epoch": 1.0116731517509727,
"grad_norm": 118.02386474609375,
"learning_rate": 9.66277561608301e-06,
"loss": 1.2827,
"step": 7800
},
{
"epoch": 1.0181582360570687,
"grad_norm": 73.46105194091797,
"learning_rate": 9.66061392131431e-06,
"loss": 1.3098,
"step": 7850
},
{
"epoch": 1.0246433203631646,
"grad_norm": 76.57545471191406,
"learning_rate": 9.658452226545612e-06,
"loss": 1.2673,
"step": 7900
},
{
"epoch": 1.0311284046692606,
"grad_norm": 34.5427360534668,
"learning_rate": 9.656290531776914e-06,
"loss": 1.2507,
"step": 7950
},
{
"epoch": 1.0376134889753568,
"grad_norm": 93.37530517578125,
"learning_rate": 9.654128837008215e-06,
"loss": 1.2523,
"step": 8000
},
{
"epoch": 1.0440985732814527,
"grad_norm": 104.25950622558594,
"learning_rate": 9.651967142239517e-06,
"loss": 1.2446,
"step": 8050
},
{
"epoch": 1.0505836575875487,
"grad_norm": 139.35931396484375,
"learning_rate": 9.649805447470818e-06,
"loss": 1.24,
"step": 8100
},
{
"epoch": 1.0570687418936446,
"grad_norm": 160.05386352539062,
"learning_rate": 9.64764375270212e-06,
"loss": 1.2476,
"step": 8150
},
{
"epoch": 1.0635538261997406,
"grad_norm": 28.035104751586914,
"learning_rate": 9.64548205793342e-06,
"loss": 1.265,
"step": 8200
},
{
"epoch": 1.0700389105058365,
"grad_norm": 37.27667236328125,
"learning_rate": 9.643320363164722e-06,
"loss": 1.206,
"step": 8250
},
{
"epoch": 1.0765239948119325,
"grad_norm": 35.973751068115234,
"learning_rate": 9.641158668396023e-06,
"loss": 1.2393,
"step": 8300
},
{
"epoch": 1.0830090791180285,
"grad_norm": 101.7255859375,
"learning_rate": 9.638996973627325e-06,
"loss": 1.2775,
"step": 8350
},
{
"epoch": 1.0894941634241244,
"grad_norm": 54.50769805908203,
"learning_rate": 9.636835278858626e-06,
"loss": 1.2302,
"step": 8400
},
{
"epoch": 1.0959792477302206,
"grad_norm": 68.53856658935547,
"learning_rate": 9.634673584089928e-06,
"loss": 1.2285,
"step": 8450
},
{
"epoch": 1.1024643320363166,
"grad_norm": 106.03568267822266,
"learning_rate": 9.63251188932123e-06,
"loss": 1.2285,
"step": 8500
},
{
"epoch": 1.1089494163424125,
"grad_norm": 110.32369232177734,
"learning_rate": 9.63035019455253e-06,
"loss": 1.2455,
"step": 8550
},
{
"epoch": 1.1154345006485085,
"grad_norm": 71.17969512939453,
"learning_rate": 9.628188499783831e-06,
"loss": 1.2908,
"step": 8600
},
{
"epoch": 1.1219195849546044,
"grad_norm": 100.68138885498047,
"learning_rate": 9.626026805015133e-06,
"loss": 1.2489,
"step": 8650
},
{
"epoch": 1.1284046692607004,
"grad_norm": 31.05600929260254,
"learning_rate": 9.623865110246434e-06,
"loss": 1.2033,
"step": 8700
},
{
"epoch": 1.1348897535667963,
"grad_norm": 56.18037033081055,
"learning_rate": 9.621703415477736e-06,
"loss": 1.2507,
"step": 8750
},
{
"epoch": 1.1413748378728923,
"grad_norm": 66.67138671875,
"learning_rate": 9.619541720709037e-06,
"loss": 1.2256,
"step": 8800
},
{
"epoch": 1.1478599221789882,
"grad_norm": 89.71563720703125,
"learning_rate": 9.617380025940339e-06,
"loss": 1.2045,
"step": 8850
},
{
"epoch": 1.1543450064850842,
"grad_norm": 83.06657409667969,
"learning_rate": 9.615218331171639e-06,
"loss": 1.1957,
"step": 8900
},
{
"epoch": 1.1608300907911804,
"grad_norm": 32.97713851928711,
"learning_rate": 9.61305663640294e-06,
"loss": 1.2178,
"step": 8950
},
{
"epoch": 1.1673151750972763,
"grad_norm": 40.30419158935547,
"learning_rate": 9.610894941634242e-06,
"loss": 1.2114,
"step": 9000
},
{
"epoch": 1.1738002594033723,
"grad_norm": 26.891326904296875,
"learning_rate": 9.608733246865544e-06,
"loss": 1.2049,
"step": 9050
},
{
"epoch": 1.1802853437094682,
"grad_norm": 95.95230102539062,
"learning_rate": 9.606571552096845e-06,
"loss": 1.1984,
"step": 9100
},
{
"epoch": 1.1867704280155642,
"grad_norm": 137.105712890625,
"learning_rate": 9.604409857328147e-06,
"loss": 1.1697,
"step": 9150
},
{
"epoch": 1.1932555123216602,
"grad_norm": 145.55722045898438,
"learning_rate": 9.602248162559448e-06,
"loss": 1.2145,
"step": 9200
},
{
"epoch": 1.1997405966277561,
"grad_norm": 200.1924591064453,
"learning_rate": 9.600086467790748e-06,
"loss": 1.1548,
"step": 9250
},
{
"epoch": 1.206225680933852,
"grad_norm": 119.41325378417969,
"learning_rate": 9.59792477302205e-06,
"loss": 1.1784,
"step": 9300
},
{
"epoch": 1.212710765239948,
"grad_norm": 33.50049591064453,
"learning_rate": 9.595763078253352e-06,
"loss": 1.2024,
"step": 9350
},
{
"epoch": 1.2191958495460442,
"grad_norm": 106.87812805175781,
"learning_rate": 9.593601383484653e-06,
"loss": 1.211,
"step": 9400
},
{
"epoch": 1.2256809338521402,
"grad_norm": 50.5958366394043,
"learning_rate": 9.591439688715955e-06,
"loss": 1.1891,
"step": 9450
},
{
"epoch": 1.2321660181582361,
"grad_norm": 66.71959686279297,
"learning_rate": 9.589277993947256e-06,
"loss": 1.1643,
"step": 9500
},
{
"epoch": 1.238651102464332,
"grad_norm": 74.7675552368164,
"learning_rate": 9.587116299178558e-06,
"loss": 1.1527,
"step": 9550
},
{
"epoch": 1.245136186770428,
"grad_norm": 48.216217041015625,
"learning_rate": 9.584954604409858e-06,
"loss": 1.1978,
"step": 9600
},
{
"epoch": 1.251621271076524,
"grad_norm": 49.2801628112793,
"learning_rate": 9.58279290964116e-06,
"loss": 1.1669,
"step": 9650
},
{
"epoch": 1.25810635538262,
"grad_norm": 31.61471939086914,
"learning_rate": 9.580631214872461e-06,
"loss": 1.1393,
"step": 9700
},
{
"epoch": 1.264591439688716,
"grad_norm": 84.38628387451172,
"learning_rate": 9.578469520103763e-06,
"loss": 1.146,
"step": 9750
},
{
"epoch": 1.2710765239948119,
"grad_norm": 64.64751434326172,
"learning_rate": 9.576307825335064e-06,
"loss": 1.178,
"step": 9800
},
{
"epoch": 1.2775616083009078,
"grad_norm": 38.89653396606445,
"learning_rate": 9.574146130566366e-06,
"loss": 1.174,
"step": 9850
},
{
"epoch": 1.2840466926070038,
"grad_norm": 154.65513610839844,
"learning_rate": 9.571984435797666e-06,
"loss": 1.1372,
"step": 9900
},
{
"epoch": 1.2905317769131,
"grad_norm": 44.31837844848633,
"learning_rate": 9.569822741028967e-06,
"loss": 1.1371,
"step": 9950
},
{
"epoch": 1.297016861219196,
"grad_norm": 66.79319763183594,
"learning_rate": 9.567661046260269e-06,
"loss": 1.1161,
"step": 10000
},
{
"epoch": 1.3035019455252919,
"grad_norm": 109.10542297363281,
"learning_rate": 9.56549935149157e-06,
"loss": 1.1321,
"step": 10050
},
{
"epoch": 1.3099870298313878,
"grad_norm": 52.56429672241211,
"learning_rate": 9.563337656722872e-06,
"loss": 1.1606,
"step": 10100
},
{
"epoch": 1.3164721141374838,
"grad_norm": 106.3527603149414,
"learning_rate": 9.561175961954174e-06,
"loss": 1.154,
"step": 10150
},
{
"epoch": 1.3229571984435797,
"grad_norm": 275.15216064453125,
"learning_rate": 9.559014267185475e-06,
"loss": 1.1477,
"step": 10200
},
{
"epoch": 1.3294422827496757,
"grad_norm": 146.71636962890625,
"learning_rate": 9.556852572416775e-06,
"loss": 1.154,
"step": 10250
},
{
"epoch": 1.3359273670557716,
"grad_norm": 91.92938995361328,
"learning_rate": 9.554690877648077e-06,
"loss": 1.1194,
"step": 10300
},
{
"epoch": 1.3424124513618678,
"grad_norm": 34.08713912963867,
"learning_rate": 9.552529182879378e-06,
"loss": 1.1176,
"step": 10350
},
{
"epoch": 1.3488975356679638,
"grad_norm": 234.9642791748047,
"learning_rate": 9.55036748811068e-06,
"loss": 1.1294,
"step": 10400
},
{
"epoch": 1.3553826199740597,
"grad_norm": 89.80880737304688,
"learning_rate": 9.548205793341982e-06,
"loss": 1.1138,
"step": 10450
},
{
"epoch": 1.3618677042801557,
"grad_norm": 54.69230651855469,
"learning_rate": 9.546044098573283e-06,
"loss": 1.1153,
"step": 10500
},
{
"epoch": 1.3683527885862516,
"grad_norm": 43.032466888427734,
"learning_rate": 9.543882403804585e-06,
"loss": 1.1041,
"step": 10550
},
{
"epoch": 1.3748378728923476,
"grad_norm": 110.12335205078125,
"learning_rate": 9.541720709035885e-06,
"loss": 1.1056,
"step": 10600
},
{
"epoch": 1.3813229571984436,
"grad_norm": 109.36446380615234,
"learning_rate": 9.539559014267186e-06,
"loss": 1.1288,
"step": 10650
},
{
"epoch": 1.3878080415045395,
"grad_norm": 200.33387756347656,
"learning_rate": 9.537397319498488e-06,
"loss": 1.101,
"step": 10700
},
{
"epoch": 1.3942931258106355,
"grad_norm": 31.898895263671875,
"learning_rate": 9.53523562472979e-06,
"loss": 1.0919,
"step": 10750
},
{
"epoch": 1.4007782101167314,
"grad_norm": 41.42584991455078,
"learning_rate": 9.533073929961091e-06,
"loss": 1.0914,
"step": 10800
},
{
"epoch": 1.4072632944228274,
"grad_norm": 55.4241828918457,
"learning_rate": 9.530912235192391e-06,
"loss": 1.0457,
"step": 10850
},
{
"epoch": 1.4137483787289233,
"grad_norm": 92.30503845214844,
"learning_rate": 9.528750540423693e-06,
"loss": 1.1102,
"step": 10900
},
{
"epoch": 1.4202334630350195,
"grad_norm": 183.83782958984375,
"learning_rate": 9.526588845654994e-06,
"loss": 1.0788,
"step": 10950
},
{
"epoch": 1.4267185473411155,
"grad_norm": 58.78367614746094,
"learning_rate": 9.524427150886296e-06,
"loss": 1.0883,
"step": 11000
},
{
"epoch": 1.4332036316472114,
"grad_norm": 122.31067657470703,
"learning_rate": 9.522265456117596e-06,
"loss": 1.0949,
"step": 11050
},
{
"epoch": 1.4396887159533074,
"grad_norm": 92.71478271484375,
"learning_rate": 9.520103761348897e-06,
"loss": 1.1128,
"step": 11100
},
{
"epoch": 1.4461738002594033,
"grad_norm": 63.70943832397461,
"learning_rate": 9.517942066580199e-06,
"loss": 1.1321,
"step": 11150
},
{
"epoch": 1.4526588845654993,
"grad_norm": 55.85942840576172,
"learning_rate": 9.5157803718115e-06,
"loss": 1.1075,
"step": 11200
},
{
"epoch": 1.4591439688715953,
"grad_norm": 141.53460693359375,
"learning_rate": 9.513618677042802e-06,
"loss": 1.0777,
"step": 11250
},
{
"epoch": 1.4656290531776914,
"grad_norm": 52.136348724365234,
"learning_rate": 9.511456982274104e-06,
"loss": 1.094,
"step": 11300
},
{
"epoch": 1.4721141374837874,
"grad_norm": 66.62019348144531,
"learning_rate": 9.509295287505405e-06,
"loss": 1.1122,
"step": 11350
},
{
"epoch": 1.4785992217898833,
"grad_norm": 37.23124313354492,
"learning_rate": 9.507133592736705e-06,
"loss": 1.1029,
"step": 11400
},
{
"epoch": 1.4850843060959793,
"grad_norm": 141.20053100585938,
"learning_rate": 9.504971897968007e-06,
"loss": 1.0891,
"step": 11450
},
{
"epoch": 1.4915693904020753,
"grad_norm": 70.93553924560547,
"learning_rate": 9.502810203199308e-06,
"loss": 1.0799,
"step": 11500
},
{
"epoch": 1.4980544747081712,
"grad_norm": 78.5645980834961,
"learning_rate": 9.50064850843061e-06,
"loss": 1.0771,
"step": 11550
},
{
"epoch": 1.5045395590142672,
"grad_norm": 110.03238677978516,
"learning_rate": 9.498486813661911e-06,
"loss": 1.052,
"step": 11600
},
{
"epoch": 1.5110246433203631,
"grad_norm": 94.38980865478516,
"learning_rate": 9.496325118893213e-06,
"loss": 1.0831,
"step": 11650
},
{
"epoch": 1.517509727626459,
"grad_norm": 72.01763153076172,
"learning_rate": 9.494163424124515e-06,
"loss": 1.0674,
"step": 11700
},
{
"epoch": 1.523994811932555,
"grad_norm": 51.9689826965332,
"learning_rate": 9.492001729355815e-06,
"loss": 1.0831,
"step": 11750
},
{
"epoch": 1.530479896238651,
"grad_norm": 107.07817840576172,
"learning_rate": 9.489840034587116e-06,
"loss": 1.0426,
"step": 11800
},
{
"epoch": 1.536964980544747,
"grad_norm": 47.38414764404297,
"learning_rate": 9.487678339818418e-06,
"loss": 1.0711,
"step": 11850
},
{
"epoch": 1.543450064850843,
"grad_norm": 45.52274703979492,
"learning_rate": 9.48551664504972e-06,
"loss": 1.0781,
"step": 11900
},
{
"epoch": 1.549935149156939,
"grad_norm": 26.50186538696289,
"learning_rate": 9.483354950281021e-06,
"loss": 1.0574,
"step": 11950
},
{
"epoch": 1.556420233463035,
"grad_norm": 41.2259521484375,
"learning_rate": 9.481193255512323e-06,
"loss": 1.0754,
"step": 12000
},
{
"epoch": 1.562905317769131,
"grad_norm": 35.60273361206055,
"learning_rate": 9.479031560743624e-06,
"loss": 1.0288,
"step": 12050
},
{
"epoch": 1.569390402075227,
"grad_norm": 41.92966842651367,
"learning_rate": 9.476869865974924e-06,
"loss": 1.0488,
"step": 12100
},
{
"epoch": 1.575875486381323,
"grad_norm": 18.675764083862305,
"learning_rate": 9.474708171206226e-06,
"loss": 1.0531,
"step": 12150
},
{
"epoch": 1.582360570687419,
"grad_norm": 108.12574005126953,
"learning_rate": 9.472546476437527e-06,
"loss": 1.0376,
"step": 12200
},
{
"epoch": 1.588845654993515,
"grad_norm": 112.02227020263672,
"learning_rate": 9.470384781668829e-06,
"loss": 1.0343,
"step": 12250
},
{
"epoch": 1.595330739299611,
"grad_norm": 98.89630126953125,
"learning_rate": 9.46822308690013e-06,
"loss": 1.0823,
"step": 12300
},
{
"epoch": 1.601815823605707,
"grad_norm": 118.65319061279297,
"learning_rate": 9.466061392131432e-06,
"loss": 1.0556,
"step": 12350
},
{
"epoch": 1.608300907911803,
"grad_norm": 129.68385314941406,
"learning_rate": 9.463899697362734e-06,
"loss": 1.0468,
"step": 12400
},
{
"epoch": 1.6147859922178989,
"grad_norm": 76.83685302734375,
"learning_rate": 9.461738002594033e-06,
"loss": 1.0861,
"step": 12450
},
{
"epoch": 1.6212710765239948,
"grad_norm": 157.33180236816406,
"learning_rate": 9.459576307825335e-06,
"loss": 1.0452,
"step": 12500
},
{
"epoch": 1.6277561608300908,
"grad_norm": 68.68656158447266,
"learning_rate": 9.457414613056637e-06,
"loss": 1.0309,
"step": 12550
},
{
"epoch": 1.6342412451361867,
"grad_norm": 168.48919677734375,
"learning_rate": 9.455252918287938e-06,
"loss": 1.0167,
"step": 12600
},
{
"epoch": 1.6407263294422827,
"grad_norm": 57.15532684326172,
"learning_rate": 9.45309122351924e-06,
"loss": 0.9928,
"step": 12650
},
{
"epoch": 1.6472114137483787,
"grad_norm": 108.53094482421875,
"learning_rate": 9.450929528750541e-06,
"loss": 0.9977,
"step": 12700
},
{
"epoch": 1.6536964980544746,
"grad_norm": 51.94723129272461,
"learning_rate": 9.448767833981843e-06,
"loss": 1.0463,
"step": 12750
},
{
"epoch": 1.6601815823605706,
"grad_norm": 190.38787841796875,
"learning_rate": 9.446606139213143e-06,
"loss": 1.0505,
"step": 12800
},
{
"epoch": 1.6666666666666665,
"grad_norm": 53.69584274291992,
"learning_rate": 9.444444444444445e-06,
"loss": 1.0064,
"step": 12850
},
{
"epoch": 1.6731517509727627,
"grad_norm": 116.31331634521484,
"learning_rate": 9.442282749675746e-06,
"loss": 1.0066,
"step": 12900
},
{
"epoch": 1.6796368352788587,
"grad_norm": 102.2055892944336,
"learning_rate": 9.440121054907048e-06,
"loss": 0.9889,
"step": 12950
},
{
"epoch": 1.6861219195849546,
"grad_norm": 78.99929809570312,
"learning_rate": 9.43795936013835e-06,
"loss": 1.0151,
"step": 13000
},
{
"epoch": 1.6926070038910506,
"grad_norm": 97.79879760742188,
"learning_rate": 9.435797665369651e-06,
"loss": 0.9862,
"step": 13050
},
{
"epoch": 1.6990920881971465,
"grad_norm": 30.27912139892578,
"learning_rate": 9.433635970600953e-06,
"loss": 1.0031,
"step": 13100
},
{
"epoch": 1.7055771725032427,
"grad_norm": 47.64608383178711,
"learning_rate": 9.431474275832252e-06,
"loss": 0.9915,
"step": 13150
},
{
"epoch": 1.7120622568093387,
"grad_norm": 73.29598236083984,
"learning_rate": 9.429312581063554e-06,
"loss": 0.9978,
"step": 13200
},
{
"epoch": 1.7185473411154346,
"grad_norm": 54.02968215942383,
"learning_rate": 9.427150886294856e-06,
"loss": 0.9901,
"step": 13250
},
{
"epoch": 1.7250324254215306,
"grad_norm": 100.88853454589844,
"learning_rate": 9.424989191526157e-06,
"loss": 1.0254,
"step": 13300
},
{
"epoch": 1.7315175097276265,
"grad_norm": 105.97042083740234,
"learning_rate": 9.422827496757459e-06,
"loss": 1.0053,
"step": 13350
},
{
"epoch": 1.7380025940337225,
"grad_norm": 110.1412582397461,
"learning_rate": 9.42066580198876e-06,
"loss": 0.996,
"step": 13400
},
{
"epoch": 1.7444876783398184,
"grad_norm": 102.8427505493164,
"learning_rate": 9.41850410722006e-06,
"loss": 1.0155,
"step": 13450
},
{
"epoch": 1.7509727626459144,
"grad_norm": 87.20895385742188,
"learning_rate": 9.416342412451362e-06,
"loss": 0.9852,
"step": 13500
},
{
"epoch": 1.7574578469520103,
"grad_norm": 77.43791198730469,
"learning_rate": 9.414180717682663e-06,
"loss": 0.9895,
"step": 13550
},
{
"epoch": 1.7639429312581063,
"grad_norm": 172.30885314941406,
"learning_rate": 9.412019022913965e-06,
"loss": 0.9743,
"step": 13600
},
{
"epoch": 1.7704280155642023,
"grad_norm": 70.97063446044922,
"learning_rate": 9.409857328145267e-06,
"loss": 0.9753,
"step": 13650
},
{
"epoch": 1.7769130998702982,
"grad_norm": 123.29631805419922,
"learning_rate": 9.407695633376568e-06,
"loss": 0.9374,
"step": 13700
},
{
"epoch": 1.7833981841763942,
"grad_norm": 169.43450927734375,
"learning_rate": 9.40553393860787e-06,
"loss": 0.9733,
"step": 13750
},
{
"epoch": 1.7898832684824901,
"grad_norm": 94.36160278320312,
"learning_rate": 9.40337224383917e-06,
"loss": 0.9741,
"step": 13800
},
{
"epoch": 1.796368352788586,
"grad_norm": 67.74256896972656,
"learning_rate": 9.401210549070471e-06,
"loss": 0.9556,
"step": 13850
},
{
"epoch": 1.8028534370946823,
"grad_norm": 171.77330017089844,
"learning_rate": 9.399048854301773e-06,
"loss": 0.9841,
"step": 13900
},
{
"epoch": 1.8093385214007782,
"grad_norm": 110.4674301147461,
"learning_rate": 9.396887159533075e-06,
"loss": 1.0008,
"step": 13950
},
{
"epoch": 1.8158236057068742,
"grad_norm": 171.76177978515625,
"learning_rate": 9.394725464764376e-06,
"loss": 0.9835,
"step": 14000
},
{
"epoch": 1.8223086900129701,
"grad_norm": 130.97406005859375,
"learning_rate": 9.392563769995678e-06,
"loss": 0.9498,
"step": 14050
},
{
"epoch": 1.8287937743190663,
"grad_norm": 74.91665649414062,
"learning_rate": 9.39040207522698e-06,
"loss": 0.9621,
"step": 14100
},
{
"epoch": 1.8352788586251623,
"grad_norm": 48.18241500854492,
"learning_rate": 9.38824038045828e-06,
"loss": 0.9802,
"step": 14150
},
{
"epoch": 1.8417639429312582,
"grad_norm": 133.60479736328125,
"learning_rate": 9.38607868568958e-06,
"loss": 0.9622,
"step": 14200
},
{
"epoch": 1.8482490272373542,
"grad_norm": 163.2623291015625,
"learning_rate": 9.383916990920882e-06,
"loss": 0.959,
"step": 14250
},
{
"epoch": 1.8547341115434501,
"grad_norm": 56.28314208984375,
"learning_rate": 9.381755296152184e-06,
"loss": 0.9607,
"step": 14300
},
{
"epoch": 1.861219195849546,
"grad_norm": 115.68190002441406,
"learning_rate": 9.379593601383486e-06,
"loss": 0.9643,
"step": 14350
},
{
"epoch": 1.867704280155642,
"grad_norm": 57.527828216552734,
"learning_rate": 9.377431906614787e-06,
"loss": 0.9458,
"step": 14400
},
{
"epoch": 1.874189364461738,
"grad_norm": 83.91288757324219,
"learning_rate": 9.375270211846089e-06,
"loss": 0.937,
"step": 14450
},
{
"epoch": 1.880674448767834,
"grad_norm": 205.3312530517578,
"learning_rate": 9.373108517077389e-06,
"loss": 0.9382,
"step": 14500
},
{
"epoch": 1.88715953307393,
"grad_norm": 84.1654281616211,
"learning_rate": 9.37094682230869e-06,
"loss": 0.9737,
"step": 14550
},
{
"epoch": 1.8936446173800259,
"grad_norm": 59.71659469604492,
"learning_rate": 9.368785127539992e-06,
"loss": 0.9766,
"step": 14600
},
{
"epoch": 1.9001297016861218,
"grad_norm": 62.78482437133789,
"learning_rate": 9.366623432771293e-06,
"loss": 0.9746,
"step": 14650
},
{
"epoch": 1.9066147859922178,
"grad_norm": 41.973777770996094,
"learning_rate": 9.364461738002595e-06,
"loss": 0.965,
"step": 14700
},
{
"epoch": 1.9130998702983137,
"grad_norm": 26.649688720703125,
"learning_rate": 9.362300043233897e-06,
"loss": 0.949,
"step": 14750
},
{
"epoch": 1.9195849546044097,
"grad_norm": 43.40812683105469,
"learning_rate": 9.360138348465198e-06,
"loss": 0.9204,
"step": 14800
},
{
"epoch": 1.9260700389105059,
"grad_norm": 72.37606811523438,
"learning_rate": 9.357976653696498e-06,
"loss": 0.9599,
"step": 14850
},
{
"epoch": 1.9325551232166018,
"grad_norm": 24.634532928466797,
"learning_rate": 9.3558149589278e-06,
"loss": 0.9309,
"step": 14900
},
{
"epoch": 1.9390402075226978,
"grad_norm": 74.19110870361328,
"learning_rate": 9.353653264159101e-06,
"loss": 0.9173,
"step": 14950
},
{
"epoch": 1.9455252918287937,
"grad_norm": 76.68376922607422,
"learning_rate": 9.351491569390403e-06,
"loss": 0.9305,
"step": 15000
},
{
"epoch": 1.9520103761348897,
"grad_norm": 80.31610107421875,
"learning_rate": 9.349329874621705e-06,
"loss": 0.9095,
"step": 15050
},
{
"epoch": 1.9584954604409859,
"grad_norm": 59.694969177246094,
"learning_rate": 9.347168179853006e-06,
"loss": 0.9157,
"step": 15100
},
{
"epoch": 1.9649805447470818,
"grad_norm": 47.985164642333984,
"learning_rate": 9.345006485084308e-06,
"loss": 0.9553,
"step": 15150
},
{
"epoch": 1.9714656290531778,
"grad_norm": 119.7039566040039,
"learning_rate": 9.342844790315608e-06,
"loss": 0.9479,
"step": 15200
},
{
"epoch": 1.9779507133592737,
"grad_norm": 84.06747436523438,
"learning_rate": 9.34068309554691e-06,
"loss": 0.9539,
"step": 15250
},
{
"epoch": 1.9844357976653697,
"grad_norm": 182.93211364746094,
"learning_rate": 9.33852140077821e-06,
"loss": 0.9279,
"step": 15300
},
{
"epoch": 1.9909208819714657,
"grad_norm": 33.33463668823242,
"learning_rate": 9.336359706009512e-06,
"loss": 0.9237,
"step": 15350
},
{
"epoch": 1.9974059662775616,
"grad_norm": 98.06361389160156,
"learning_rate": 9.334198011240814e-06,
"loss": 0.9449,
"step": 15400
},
{
"epoch": 2.0038910505836576,
"grad_norm": 40.220664978027344,
"learning_rate": 9.332036316472116e-06,
"loss": 0.9192,
"step": 15450
},
{
"epoch": 2.0103761348897535,
"grad_norm": 67.13005828857422,
"learning_rate": 9.329874621703417e-06,
"loss": 0.9487,
"step": 15500
},
{
"epoch": 2.0168612191958495,
"grad_norm": 163.42137145996094,
"learning_rate": 9.327712926934717e-06,
"loss": 0.9684,
"step": 15550
},
{
"epoch": 2.0233463035019454,
"grad_norm": 82.5510025024414,
"learning_rate": 9.325551232166019e-06,
"loss": 0.9344,
"step": 15600
},
{
"epoch": 2.0298313878080414,
"grad_norm": 203.52099609375,
"learning_rate": 9.32338953739732e-06,
"loss": 0.8956,
"step": 15650
},
{
"epoch": 2.0363164721141374,
"grad_norm": 72.38980865478516,
"learning_rate": 9.321227842628622e-06,
"loss": 0.9391,
"step": 15700
},
{
"epoch": 2.0428015564202333,
"grad_norm": 50.11948013305664,
"learning_rate": 9.319066147859923e-06,
"loss": 0.9348,
"step": 15750
},
{
"epoch": 2.0492866407263293,
"grad_norm": 122.09666442871094,
"learning_rate": 9.316904453091225e-06,
"loss": 0.8871,
"step": 15800
},
{
"epoch": 2.0557717250324252,
"grad_norm": 59.022274017333984,
"learning_rate": 9.314742758322527e-06,
"loss": 0.9101,
"step": 15850
},
{
"epoch": 2.062256809338521,
"grad_norm": 76.15840148925781,
"learning_rate": 9.312581063553827e-06,
"loss": 0.9273,
"step": 15900
},
{
"epoch": 2.0687418936446176,
"grad_norm": 47.89101791381836,
"learning_rate": 9.310419368785128e-06,
"loss": 0.9355,
"step": 15950
},
{
"epoch": 2.0752269779507135,
"grad_norm": 229.04345703125,
"learning_rate": 9.30825767401643e-06,
"loss": 0.9091,
"step": 16000
},
{
"epoch": 2.0817120622568095,
"grad_norm": 133.06822204589844,
"learning_rate": 9.306095979247731e-06,
"loss": 0.9448,
"step": 16050
},
{
"epoch": 2.0881971465629054,
"grad_norm": 58.23340606689453,
"learning_rate": 9.303934284479033e-06,
"loss": 0.9028,
"step": 16100
},
{
"epoch": 2.0946822308690014,
"grad_norm": 232.0340118408203,
"learning_rate": 9.301772589710335e-06,
"loss": 0.9424,
"step": 16150
},
{
"epoch": 2.1011673151750974,
"grad_norm": 22.04237937927246,
"learning_rate": 9.299610894941634e-06,
"loss": 0.9227,
"step": 16200
},
{
"epoch": 2.1076523994811933,
"grad_norm": 116.25421142578125,
"learning_rate": 9.297449200172936e-06,
"loss": 0.8914,
"step": 16250
},
{
"epoch": 2.1141374837872893,
"grad_norm": 69.3602066040039,
"learning_rate": 9.295287505404238e-06,
"loss": 0.8985,
"step": 16300
},
{
"epoch": 2.1206225680933852,
"grad_norm": 145.1238555908203,
"learning_rate": 9.29312581063554e-06,
"loss": 0.8987,
"step": 16350
},
{
"epoch": 2.127107652399481,
"grad_norm": 24.35103988647461,
"learning_rate": 9.29096411586684e-06,
"loss": 0.9213,
"step": 16400
},
{
"epoch": 2.133592736705577,
"grad_norm": 37.310787200927734,
"learning_rate": 9.288802421098142e-06,
"loss": 0.8847,
"step": 16450
},
{
"epoch": 2.140077821011673,
"grad_norm": 132.53892517089844,
"learning_rate": 9.286640726329444e-06,
"loss": 0.9068,
"step": 16500
},
{
"epoch": 2.146562905317769,
"grad_norm": 75.88333892822266,
"learning_rate": 9.284479031560744e-06,
"loss": 0.894,
"step": 16550
},
{
"epoch": 2.153047989623865,
"grad_norm": 251.23751831054688,
"learning_rate": 9.282317336792046e-06,
"loss": 0.9422,
"step": 16600
},
{
"epoch": 2.159533073929961,
"grad_norm": 32.46202850341797,
"learning_rate": 9.280155642023347e-06,
"loss": 0.9291,
"step": 16650
},
{
"epoch": 2.166018158236057,
"grad_norm": 53.387718200683594,
"learning_rate": 9.277993947254649e-06,
"loss": 0.8967,
"step": 16700
},
{
"epoch": 2.172503242542153,
"grad_norm": 209.8604278564453,
"learning_rate": 9.27583225248595e-06,
"loss": 0.9111,
"step": 16750
},
{
"epoch": 2.178988326848249,
"grad_norm": 96.47901153564453,
"learning_rate": 9.273670557717252e-06,
"loss": 0.9166,
"step": 16800
},
{
"epoch": 2.1854734111543452,
"grad_norm": 52.16880798339844,
"learning_rate": 9.271508862948553e-06,
"loss": 0.8909,
"step": 16850
},
{
"epoch": 2.191958495460441,
"grad_norm": 170.49676513671875,
"learning_rate": 9.269347168179853e-06,
"loss": 0.898,
"step": 16900
},
{
"epoch": 2.198443579766537,
"grad_norm": 55.0761604309082,
"learning_rate": 9.267185473411155e-06,
"loss": 0.9078,
"step": 16950
},
{
"epoch": 2.204928664072633,
"grad_norm": 124.61663055419922,
"learning_rate": 9.265023778642457e-06,
"loss": 0.928,
"step": 17000
},
{
"epoch": 2.211413748378729,
"grad_norm": 49.64213562011719,
"learning_rate": 9.262862083873758e-06,
"loss": 0.9018,
"step": 17050
},
{
"epoch": 2.217898832684825,
"grad_norm": 143.7904052734375,
"learning_rate": 9.26070038910506e-06,
"loss": 0.8655,
"step": 17100
},
{
"epoch": 2.224383916990921,
"grad_norm": 139.10025024414062,
"learning_rate": 9.258538694336361e-06,
"loss": 0.9088,
"step": 17150
},
{
"epoch": 2.230869001297017,
"grad_norm": 18.64621925354004,
"learning_rate": 9.256376999567663e-06,
"loss": 0.8923,
"step": 17200
},
{
"epoch": 2.237354085603113,
"grad_norm": 154.90325927734375,
"learning_rate": 9.254215304798963e-06,
"loss": 0.912,
"step": 17250
},
{
"epoch": 2.243839169909209,
"grad_norm": 87.64720916748047,
"learning_rate": 9.252053610030264e-06,
"loss": 0.8789,
"step": 17300
},
{
"epoch": 2.250324254215305,
"grad_norm": 56.62800216674805,
"learning_rate": 9.249891915261566e-06,
"loss": 0.8899,
"step": 17350
},
{
"epoch": 2.2568093385214008,
"grad_norm": 37.476234436035156,
"learning_rate": 9.247730220492868e-06,
"loss": 0.8846,
"step": 17400
},
{
"epoch": 2.2632944228274967,
"grad_norm": 60.178428649902344,
"learning_rate": 9.24556852572417e-06,
"loss": 0.9088,
"step": 17450
},
{
"epoch": 2.2697795071335927,
"grad_norm": 113.12017059326172,
"learning_rate": 9.24340683095547e-06,
"loss": 0.8523,
"step": 17500
},
{
"epoch": 2.2762645914396886,
"grad_norm": 70.21991729736328,
"learning_rate": 9.241245136186772e-06,
"loss": 0.8874,
"step": 17550
},
{
"epoch": 2.2827496757457846,
"grad_norm": 20.540199279785156,
"learning_rate": 9.239083441418072e-06,
"loss": 0.8262,
"step": 17600
},
{
"epoch": 2.2892347600518805,
"grad_norm": 32.57448959350586,
"learning_rate": 9.236921746649374e-06,
"loss": 0.8445,
"step": 17650
},
{
"epoch": 2.2957198443579765,
"grad_norm": 232.79153442382812,
"learning_rate": 9.234760051880676e-06,
"loss": 0.8666,
"step": 17700
},
{
"epoch": 2.3022049286640724,
"grad_norm": 52.618385314941406,
"learning_rate": 9.232598357111977e-06,
"loss": 0.8744,
"step": 17750
},
{
"epoch": 2.3086900129701684,
"grad_norm": 47.01662826538086,
"learning_rate": 9.230436662343279e-06,
"loss": 0.8673,
"step": 17800
},
{
"epoch": 2.3151750972762644,
"grad_norm": 52.647891998291016,
"learning_rate": 9.22827496757458e-06,
"loss": 0.8946,
"step": 17850
},
{
"epoch": 2.3216601815823608,
"grad_norm": 66.30323791503906,
"learning_rate": 9.226113272805882e-06,
"loss": 0.9222,
"step": 17900
},
{
"epoch": 2.3281452658884567,
"grad_norm": 78.40735626220703,
"learning_rate": 9.223951578037182e-06,
"loss": 0.8958,
"step": 17950
},
{
"epoch": 2.3346303501945527,
"grad_norm": 156.3478240966797,
"learning_rate": 9.221789883268483e-06,
"loss": 0.8631,
"step": 18000
},
{
"epoch": 2.3411154345006486,
"grad_norm": 46.133201599121094,
"learning_rate": 9.219628188499785e-06,
"loss": 0.8669,
"step": 18050
},
{
"epoch": 2.3476005188067446,
"grad_norm": 117.3602523803711,
"learning_rate": 9.217466493731085e-06,
"loss": 0.8592,
"step": 18100
},
{
"epoch": 2.3540856031128405,
"grad_norm": 99.78243255615234,
"learning_rate": 9.215304798962386e-06,
"loss": 0.8563,
"step": 18150
},
{
"epoch": 2.3605706874189365,
"grad_norm": 47.9234504699707,
"learning_rate": 9.213143104193688e-06,
"loss": 0.8678,
"step": 18200
},
{
"epoch": 2.3670557717250325,
"grad_norm": 83.74739837646484,
"learning_rate": 9.21098140942499e-06,
"loss": 0.8752,
"step": 18250
},
{
"epoch": 2.3735408560311284,
"grad_norm": 36.51896667480469,
"learning_rate": 9.208819714656291e-06,
"loss": 0.9021,
"step": 18300
},
{
"epoch": 2.3800259403372244,
"grad_norm": 31.101106643676758,
"learning_rate": 9.206658019887593e-06,
"loss": 0.8338,
"step": 18350
},
{
"epoch": 2.3865110246433203,
"grad_norm": 103.6131591796875,
"learning_rate": 9.204496325118893e-06,
"loss": 0.9075,
"step": 18400
},
{
"epoch": 2.3929961089494163,
"grad_norm": 36.490447998046875,
"learning_rate": 9.202334630350194e-06,
"loss": 0.8567,
"step": 18450
},
{
"epoch": 2.3994811932555122,
"grad_norm": 55.931556701660156,
"learning_rate": 9.200172935581496e-06,
"loss": 0.8783,
"step": 18500
},
{
"epoch": 2.405966277561608,
"grad_norm": 78.6571044921875,
"learning_rate": 9.198011240812798e-06,
"loss": 0.8902,
"step": 18550
},
{
"epoch": 2.412451361867704,
"grad_norm": 106.48160552978516,
"learning_rate": 9.195849546044099e-06,
"loss": 0.8735,
"step": 18600
},
{
"epoch": 2.4189364461738,
"grad_norm": 160.64849853515625,
"learning_rate": 9.1936878512754e-06,
"loss": 0.8662,
"step": 18650
},
{
"epoch": 2.425421530479896,
"grad_norm": 97.8504867553711,
"learning_rate": 9.191526156506702e-06,
"loss": 0.8682,
"step": 18700
},
{
"epoch": 2.4319066147859925,
"grad_norm": 70.43258666992188,
"learning_rate": 9.189364461738002e-06,
"loss": 0.8945,
"step": 18750
},
{
"epoch": 2.4383916990920884,
"grad_norm": 112.30128479003906,
"learning_rate": 9.187202766969304e-06,
"loss": 0.8751,
"step": 18800
},
{
"epoch": 2.4448767833981844,
"grad_norm": 112.90283203125,
"learning_rate": 9.185041072200605e-06,
"loss": 0.8573,
"step": 18850
},
{
"epoch": 2.4513618677042803,
"grad_norm": 36.05859375,
"learning_rate": 9.182879377431907e-06,
"loss": 0.8304,
"step": 18900
},
{
"epoch": 2.4578469520103763,
"grad_norm": 72.84355163574219,
"learning_rate": 9.180717682663209e-06,
"loss": 0.8208,
"step": 18950
},
{
"epoch": 2.4643320363164722,
"grad_norm": 125.35198974609375,
"learning_rate": 9.17855598789451e-06,
"loss": 0.8643,
"step": 19000
},
{
"epoch": 2.470817120622568,
"grad_norm": 93.8465805053711,
"learning_rate": 9.176394293125812e-06,
"loss": 0.8591,
"step": 19050
},
{
"epoch": 2.477302204928664,
"grad_norm": 114.83902740478516,
"learning_rate": 9.174232598357112e-06,
"loss": 0.836,
"step": 19100
},
{
"epoch": 2.48378728923476,
"grad_norm": 61.47188949584961,
"learning_rate": 9.172070903588413e-06,
"loss": 0.8594,
"step": 19150
},
{
"epoch": 2.490272373540856,
"grad_norm": 81.23229217529297,
"learning_rate": 9.169909208819715e-06,
"loss": 0.8223,
"step": 19200
},
{
"epoch": 2.496757457846952,
"grad_norm": 143.3751678466797,
"learning_rate": 9.167747514051016e-06,
"loss": 0.8492,
"step": 19250
},
{
"epoch": 2.503242542153048,
"grad_norm": 75.92655181884766,
"learning_rate": 9.165585819282318e-06,
"loss": 0.834,
"step": 19300
},
{
"epoch": 2.509727626459144,
"grad_norm": 67.34745788574219,
"learning_rate": 9.16342412451362e-06,
"loss": 0.8322,
"step": 19350
},
{
"epoch": 2.51621271076524,
"grad_norm": 125.6097640991211,
"learning_rate": 9.161262429744921e-06,
"loss": 0.8121,
"step": 19400
},
{
"epoch": 2.522697795071336,
"grad_norm": 104.31269836425781,
"learning_rate": 9.159100734976221e-06,
"loss": 0.8242,
"step": 19450
},
{
"epoch": 2.529182879377432,
"grad_norm": 88.86971282958984,
"learning_rate": 9.156939040207523e-06,
"loss": 0.8465,
"step": 19500
},
{
"epoch": 2.5356679636835278,
"grad_norm": 84.49606323242188,
"learning_rate": 9.154777345438824e-06,
"loss": 0.8228,
"step": 19550
},
{
"epoch": 2.5421530479896237,
"grad_norm": 72.06951904296875,
"learning_rate": 9.152615650670126e-06,
"loss": 0.8321,
"step": 19600
},
{
"epoch": 2.5486381322957197,
"grad_norm": 51.27252197265625,
"learning_rate": 9.150453955901428e-06,
"loss": 0.8377,
"step": 19650
},
{
"epoch": 2.5551232166018156,
"grad_norm": 82.98815155029297,
"learning_rate": 9.148292261132729e-06,
"loss": 0.8577,
"step": 19700
},
{
"epoch": 2.5616083009079116,
"grad_norm": 86.29476928710938,
"learning_rate": 9.146130566364029e-06,
"loss": 0.838,
"step": 19750
},
{
"epoch": 2.5680933852140075,
"grad_norm": 201.86570739746094,
"learning_rate": 9.14396887159533e-06,
"loss": 0.8341,
"step": 19800
},
{
"epoch": 2.5745784695201035,
"grad_norm": 48.80326461791992,
"learning_rate": 9.141807176826632e-06,
"loss": 0.8183,
"step": 19850
},
{
"epoch": 2.5810635538262,
"grad_norm": 123.20867156982422,
"learning_rate": 9.139645482057934e-06,
"loss": 0.8041,
"step": 19900
},
{
"epoch": 2.587548638132296,
"grad_norm": 77.76668548583984,
"learning_rate": 9.137483787289235e-06,
"loss": 0.8027,
"step": 19950
},
{
"epoch": 2.594033722438392,
"grad_norm": 60.8740119934082,
"learning_rate": 9.135322092520537e-06,
"loss": 0.8354,
"step": 20000
},
{
"epoch": 2.6005188067444878,
"grad_norm": 33.433929443359375,
"learning_rate": 9.133160397751839e-06,
"loss": 0.8297,
"step": 20050
},
{
"epoch": 2.6070038910505837,
"grad_norm": 101.6844253540039,
"learning_rate": 9.130998702983139e-06,
"loss": 0.8595,
"step": 20100
},
{
"epoch": 2.6134889753566797,
"grad_norm": 56.76240921020508,
"learning_rate": 9.12883700821444e-06,
"loss": 0.8318,
"step": 20150
},
{
"epoch": 2.6199740596627756,
"grad_norm": 77.91346740722656,
"learning_rate": 9.126675313445742e-06,
"loss": 0.8171,
"step": 20200
},
{
"epoch": 2.6264591439688716,
"grad_norm": 22.083127975463867,
"learning_rate": 9.124513618677043e-06,
"loss": 0.8436,
"step": 20250
},
{
"epoch": 2.6329442282749675,
"grad_norm": 32.83180618286133,
"learning_rate": 9.122351923908345e-06,
"loss": 0.8413,
"step": 20300
},
{
"epoch": 2.6394293125810635,
"grad_norm": 80.33685302734375,
"learning_rate": 9.120190229139646e-06,
"loss": 0.8307,
"step": 20350
},
{
"epoch": 2.6459143968871595,
"grad_norm": 106.72901916503906,
"learning_rate": 9.118028534370948e-06,
"loss": 0.8143,
"step": 20400
},
{
"epoch": 2.6523994811932554,
"grad_norm": 84.93223571777344,
"learning_rate": 9.115866839602248e-06,
"loss": 0.8212,
"step": 20450
},
{
"epoch": 2.6588845654993514,
"grad_norm": 100.1551513671875,
"learning_rate": 9.11370514483355e-06,
"loss": 0.8289,
"step": 20500
},
{
"epoch": 2.6653696498054473,
"grad_norm": 86.93508911132812,
"learning_rate": 9.111543450064851e-06,
"loss": 0.8047,
"step": 20550
},
{
"epoch": 2.6718547341115433,
"grad_norm": 43.016624450683594,
"learning_rate": 9.109381755296153e-06,
"loss": 0.7988,
"step": 20600
},
{
"epoch": 2.6783398184176397,
"grad_norm": 310.767822265625,
"learning_rate": 9.107220060527454e-06,
"loss": 0.8227,
"step": 20650
},
{
"epoch": 2.6848249027237356,
"grad_norm": 82.60010528564453,
"learning_rate": 9.105058365758756e-06,
"loss": 0.8148,
"step": 20700
},
{
"epoch": 2.6913099870298316,
"grad_norm": 76.9372329711914,
"learning_rate": 9.102896670990058e-06,
"loss": 0.7776,
"step": 20750
},
{
"epoch": 2.6977950713359276,
"grad_norm": 41.984886169433594,
"learning_rate": 9.100734976221357e-06,
"loss": 0.8093,
"step": 20800
},
{
"epoch": 2.7042801556420235,
"grad_norm": 58.13618850708008,
"learning_rate": 9.098573281452659e-06,
"loss": 0.8415,
"step": 20850
},
{
"epoch": 2.7107652399481195,
"grad_norm": 66.05621337890625,
"learning_rate": 9.09641158668396e-06,
"loss": 0.8096,
"step": 20900
},
{
"epoch": 2.7172503242542154,
"grad_norm": 63.902557373046875,
"learning_rate": 9.094249891915262e-06,
"loss": 0.7865,
"step": 20950
},
{
"epoch": 2.7237354085603114,
"grad_norm": 41.3662109375,
"learning_rate": 9.092088197146564e-06,
"loss": 0.8213,
"step": 21000
},
{
"epoch": 2.7302204928664073,
"grad_norm": 53.82701873779297,
"learning_rate": 9.089926502377865e-06,
"loss": 0.8267,
"step": 21050
},
{
"epoch": 2.7367055771725033,
"grad_norm": 76.71524047851562,
"learning_rate": 9.087764807609167e-06,
"loss": 0.8065,
"step": 21100
},
{
"epoch": 2.7431906614785992,
"grad_norm": 34.62066650390625,
"learning_rate": 9.085603112840467e-06,
"loss": 0.764,
"step": 21150
},
{
"epoch": 2.749675745784695,
"grad_norm": 165.2742462158203,
"learning_rate": 9.083441418071769e-06,
"loss": 0.7828,
"step": 21200
},
{
"epoch": 2.756160830090791,
"grad_norm": 82.91865539550781,
"learning_rate": 9.08127972330307e-06,
"loss": 0.8355,
"step": 21250
},
{
"epoch": 2.762645914396887,
"grad_norm": 60.068851470947266,
"learning_rate": 9.079118028534372e-06,
"loss": 0.8015,
"step": 21300
},
{
"epoch": 2.769130998702983,
"grad_norm": 194.20948791503906,
"learning_rate": 9.076956333765673e-06,
"loss": 0.8166,
"step": 21350
},
{
"epoch": 2.775616083009079,
"grad_norm": 49.6822509765625,
"learning_rate": 9.074794638996975e-06,
"loss": 0.7728,
"step": 21400
},
{
"epoch": 2.782101167315175,
"grad_norm": 73.5209732055664,
"learning_rate": 9.072632944228276e-06,
"loss": 0.7917,
"step": 21450
},
{
"epoch": 2.788586251621271,
"grad_norm": 156.21685791015625,
"learning_rate": 9.070471249459576e-06,
"loss": 0.7691,
"step": 21500
},
{
"epoch": 2.795071335927367,
"grad_norm": 85.61043548583984,
"learning_rate": 9.068309554690878e-06,
"loss": 0.7911,
"step": 21550
},
{
"epoch": 2.801556420233463,
"grad_norm": 144.1258087158203,
"learning_rate": 9.06614785992218e-06,
"loss": 0.7966,
"step": 21600
},
{
"epoch": 2.808041504539559,
"grad_norm": 45.8646125793457,
"learning_rate": 9.063986165153481e-06,
"loss": 0.8261,
"step": 21650
},
{
"epoch": 2.8145265888456548,
"grad_norm": 58.49191665649414,
"learning_rate": 9.061824470384783e-06,
"loss": 0.8226,
"step": 21700
},
{
"epoch": 2.8210116731517507,
"grad_norm": 105.04296112060547,
"learning_rate": 9.059662775616084e-06,
"loss": 0.7782,
"step": 21750
},
{
"epoch": 2.8274967574578467,
"grad_norm": 62.90886688232422,
"learning_rate": 9.057501080847386e-06,
"loss": 0.7693,
"step": 21800
},
{
"epoch": 2.833981841763943,
"grad_norm": 79.02916717529297,
"learning_rate": 9.055339386078686e-06,
"loss": 0.7863,
"step": 21850
},
{
"epoch": 2.840466926070039,
"grad_norm": 92.87028503417969,
"learning_rate": 9.053177691309987e-06,
"loss": 0.7804,
"step": 21900
},
{
"epoch": 2.846952010376135,
"grad_norm": 88.81787872314453,
"learning_rate": 9.051015996541289e-06,
"loss": 0.802,
"step": 21950
},
{
"epoch": 2.853437094682231,
"grad_norm": 140.72811889648438,
"learning_rate": 9.04885430177259e-06,
"loss": 0.801,
"step": 22000
},
{
"epoch": 2.859922178988327,
"grad_norm": 190.2725067138672,
"learning_rate": 9.046692607003892e-06,
"loss": 0.793,
"step": 22050
},
{
"epoch": 2.866407263294423,
"grad_norm": 122.08084869384766,
"learning_rate": 9.044530912235194e-06,
"loss": 0.7703,
"step": 22100
},
{
"epoch": 2.872892347600519,
"grad_norm": 217.95184326171875,
"learning_rate": 9.042369217466494e-06,
"loss": 0.8127,
"step": 22150
},
{
"epoch": 2.8793774319066148,
"grad_norm": 71.10440826416016,
"learning_rate": 9.040207522697795e-06,
"loss": 0.7741,
"step": 22200
},
{
"epoch": 2.8858625162127107,
"grad_norm": 101.68942260742188,
"learning_rate": 9.038045827929097e-06,
"loss": 0.7949,
"step": 22250
},
{
"epoch": 2.8923476005188067,
"grad_norm": 55.40034484863281,
"learning_rate": 9.035884133160399e-06,
"loss": 0.7572,
"step": 22300
},
{
"epoch": 2.8988326848249026,
"grad_norm": 33.14478302001953,
"learning_rate": 9.0337224383917e-06,
"loss": 0.7708,
"step": 22350
},
{
"epoch": 2.9053177691309986,
"grad_norm": 182.9443359375,
"learning_rate": 9.031560743623002e-06,
"loss": 0.7756,
"step": 22400
},
{
"epoch": 2.9118028534370946,
"grad_norm": 55.46072769165039,
"learning_rate": 9.029399048854303e-06,
"loss": 0.8147,
"step": 22450
},
{
"epoch": 2.9182879377431905,
"grad_norm": 122.65208435058594,
"learning_rate": 9.027237354085603e-06,
"loss": 0.7468,
"step": 22500
},
{
"epoch": 2.924773022049287,
"grad_norm": 127.9378662109375,
"learning_rate": 9.025075659316905e-06,
"loss": 0.7679,
"step": 22550
},
{
"epoch": 2.931258106355383,
"grad_norm": 105.78032684326172,
"learning_rate": 9.022913964548206e-06,
"loss": 0.7804,
"step": 22600
},
{
"epoch": 2.937743190661479,
"grad_norm": 24.228551864624023,
"learning_rate": 9.020752269779508e-06,
"loss": 0.7805,
"step": 22650
},
{
"epoch": 2.9442282749675748,
"grad_norm": 106.55142974853516,
"learning_rate": 9.01859057501081e-06,
"loss": 0.7818,
"step": 22700
},
{
"epoch": 2.9507133592736707,
"grad_norm": 188.49441528320312,
"learning_rate": 9.016428880242111e-06,
"loss": 0.7815,
"step": 22750
},
{
"epoch": 2.9571984435797667,
"grad_norm": 130.7115478515625,
"learning_rate": 9.014267185473413e-06,
"loss": 0.7834,
"step": 22800
},
{
"epoch": 2.9636835278858626,
"grad_norm": 46.354881286621094,
"learning_rate": 9.012105490704713e-06,
"loss": 0.7945,
"step": 22850
},
{
"epoch": 2.9701686121919586,
"grad_norm": 52.4910774230957,
"learning_rate": 9.009943795936014e-06,
"loss": 0.769,
"step": 22900
},
{
"epoch": 2.9766536964980546,
"grad_norm": 231.7021026611328,
"learning_rate": 9.007782101167316e-06,
"loss": 0.7773,
"step": 22950
},
{
"epoch": 2.9831387808041505,
"grad_norm": 68.80513763427734,
"learning_rate": 9.005620406398617e-06,
"loss": 0.7665,
"step": 23000
},
{
"epoch": 2.9896238651102465,
"grad_norm": 21.473207473754883,
"learning_rate": 9.003458711629919e-06,
"loss": 0.7801,
"step": 23050
},
{
"epoch": 2.9961089494163424,
"grad_norm": 120.36124420166016,
"learning_rate": 9.00129701686122e-06,
"loss": 0.7948,
"step": 23100
},
{
"epoch": 3.0025940337224384,
"grad_norm": 74.29264831542969,
"learning_rate": 8.999135322092522e-06,
"loss": 0.7434,
"step": 23150
},
{
"epoch": 3.0090791180285343,
"grad_norm": 93.22494506835938,
"learning_rate": 8.996973627323822e-06,
"loss": 0.7349,
"step": 23200
},
{
"epoch": 3.0155642023346303,
"grad_norm": 30.082307815551758,
"learning_rate": 8.994811932555124e-06,
"loss": 0.7339,
"step": 23250
},
{
"epoch": 3.0220492866407263,
"grad_norm": 31.523271560668945,
"learning_rate": 8.992650237786425e-06,
"loss": 0.7256,
"step": 23300
},
{
"epoch": 3.028534370946822,
"grad_norm": 43.012237548828125,
"learning_rate": 8.990488543017727e-06,
"loss": 0.7657,
"step": 23350
},
{
"epoch": 3.035019455252918,
"grad_norm": 55.685081481933594,
"learning_rate": 8.988326848249028e-06,
"loss": 0.7587,
"step": 23400
},
{
"epoch": 3.041504539559014,
"grad_norm": 51.72869110107422,
"learning_rate": 8.98616515348033e-06,
"loss": 0.7697,
"step": 23450
},
{
"epoch": 3.04798962386511,
"grad_norm": 49.6856689453125,
"learning_rate": 8.984003458711632e-06,
"loss": 0.7699,
"step": 23500
},
{
"epoch": 3.054474708171206,
"grad_norm": 62.46233367919922,
"learning_rate": 8.981841763942932e-06,
"loss": 0.7785,
"step": 23550
},
{
"epoch": 3.060959792477302,
"grad_norm": 129.84275817871094,
"learning_rate": 8.979680069174233e-06,
"loss": 0.7474,
"step": 23600
},
{
"epoch": 3.0674448767833984,
"grad_norm": 28.303911209106445,
"learning_rate": 8.977518374405535e-06,
"loss": 0.7629,
"step": 23650
},
{
"epoch": 3.0739299610894943,
"grad_norm": 74.46251678466797,
"learning_rate": 8.975356679636836e-06,
"loss": 0.7633,
"step": 23700
},
{
"epoch": 3.0804150453955903,
"grad_norm": 27.983522415161133,
"learning_rate": 8.973194984868138e-06,
"loss": 0.7769,
"step": 23750
},
{
"epoch": 3.0869001297016863,
"grad_norm": 71.08908081054688,
"learning_rate": 8.97103329009944e-06,
"loss": 0.7656,
"step": 23800
},
{
"epoch": 3.093385214007782,
"grad_norm": 100.88603210449219,
"learning_rate": 8.968871595330741e-06,
"loss": 0.7546,
"step": 23850
},
{
"epoch": 3.099870298313878,
"grad_norm": 159.69082641601562,
"learning_rate": 8.966709900562041e-06,
"loss": 0.7591,
"step": 23900
},
{
"epoch": 3.106355382619974,
"grad_norm": 28.74492073059082,
"learning_rate": 8.964548205793343e-06,
"loss": 0.7779,
"step": 23950
},
{
"epoch": 3.11284046692607,
"grad_norm": 86.59606170654297,
"learning_rate": 8.962386511024644e-06,
"loss": 0.7592,
"step": 24000
},
{
"epoch": 3.119325551232166,
"grad_norm": 77.73062133789062,
"learning_rate": 8.960224816255946e-06,
"loss": 0.7932,
"step": 24050
},
{
"epoch": 3.125810635538262,
"grad_norm": 82.81999969482422,
"learning_rate": 8.958063121487247e-06,
"loss": 0.7468,
"step": 24100
},
{
"epoch": 3.132295719844358,
"grad_norm": 106.86148834228516,
"learning_rate": 8.955901426718549e-06,
"loss": 0.7473,
"step": 24150
},
{
"epoch": 3.138780804150454,
"grad_norm": 73.26065063476562,
"learning_rate": 8.95373973194985e-06,
"loss": 0.7653,
"step": 24200
},
{
"epoch": 3.14526588845655,
"grad_norm": 154.48199462890625,
"learning_rate": 8.95157803718115e-06,
"loss": 0.7799,
"step": 24250
},
{
"epoch": 3.151750972762646,
"grad_norm": 165.397216796875,
"learning_rate": 8.949416342412452e-06,
"loss": 0.7668,
"step": 24300
},
{
"epoch": 3.1582360570687418,
"grad_norm": 54.25576400756836,
"learning_rate": 8.947254647643754e-06,
"loss": 0.7501,
"step": 24350
},
{
"epoch": 3.1647211413748377,
"grad_norm": 78.98974609375,
"learning_rate": 8.945092952875055e-06,
"loss": 0.7546,
"step": 24400
},
{
"epoch": 3.1712062256809337,
"grad_norm": 69.79071807861328,
"learning_rate": 8.942931258106357e-06,
"loss": 0.7455,
"step": 24450
},
{
"epoch": 3.1776913099870296,
"grad_norm": 99.46908569335938,
"learning_rate": 8.940769563337658e-06,
"loss": 0.7438,
"step": 24500
},
{
"epoch": 3.184176394293126,
"grad_norm": 87.56387329101562,
"learning_rate": 8.938607868568958e-06,
"loss": 0.7421,
"step": 24550
},
{
"epoch": 3.190661478599222,
"grad_norm": 53.633941650390625,
"learning_rate": 8.93644617380026e-06,
"loss": 0.7625,
"step": 24600
},
{
"epoch": 3.197146562905318,
"grad_norm": 108.66197967529297,
"learning_rate": 8.934284479031562e-06,
"loss": 0.7474,
"step": 24650
},
{
"epoch": 3.203631647211414,
"grad_norm": 62.14433670043945,
"learning_rate": 8.932122784262863e-06,
"loss": 0.7359,
"step": 24700
},
{
"epoch": 3.21011673151751,
"grad_norm": 110.50857543945312,
"learning_rate": 8.929961089494165e-06,
"loss": 0.7402,
"step": 24750
},
{
"epoch": 3.216601815823606,
"grad_norm": 36.320377349853516,
"learning_rate": 8.927799394725466e-06,
"loss": 0.7385,
"step": 24800
},
{
"epoch": 3.223086900129702,
"grad_norm": 119.52420043945312,
"learning_rate": 8.925637699956768e-06,
"loss": 0.755,
"step": 24850
},
{
"epoch": 3.2295719844357977,
"grad_norm": 229.50978088378906,
"learning_rate": 8.923476005188068e-06,
"loss": 0.7408,
"step": 24900
},
{
"epoch": 3.2360570687418937,
"grad_norm": 29.48551368713379,
"learning_rate": 8.92131431041937e-06,
"loss": 0.7422,
"step": 24950
},
{
"epoch": 3.2425421530479897,
"grad_norm": 77.79827880859375,
"learning_rate": 8.919152615650671e-06,
"loss": 0.748,
"step": 25000
},
{
"epoch": 3.2490272373540856,
"grad_norm": 58.29311752319336,
"learning_rate": 8.916990920881973e-06,
"loss": 0.7363,
"step": 25050
},
{
"epoch": 3.2555123216601816,
"grad_norm": 22.339330673217773,
"learning_rate": 8.914829226113274e-06,
"loss": 0.7558,
"step": 25100
},
{
"epoch": 3.2619974059662775,
"grad_norm": 154.0586700439453,
"learning_rate": 8.912667531344576e-06,
"loss": 0.7527,
"step": 25150
},
{
"epoch": 3.2684824902723735,
"grad_norm": 33.30474090576172,
"learning_rate": 8.910505836575877e-06,
"loss": 0.7338,
"step": 25200
},
{
"epoch": 3.2749675745784694,
"grad_norm": 70.1267318725586,
"learning_rate": 8.908344141807177e-06,
"loss": 0.7626,
"step": 25250
},
{
"epoch": 3.2814526588845654,
"grad_norm": 214.113525390625,
"learning_rate": 8.906182447038479e-06,
"loss": 0.7451,
"step": 25300
},
{
"epoch": 3.2879377431906613,
"grad_norm": 83.08194732666016,
"learning_rate": 8.90402075226978e-06,
"loss": 0.7545,
"step": 25350
},
{
"epoch": 3.2944228274967573,
"grad_norm": 100.41940307617188,
"learning_rate": 8.90185905750108e-06,
"loss": 0.7442,
"step": 25400
},
{
"epoch": 3.3009079118028533,
"grad_norm": 67.69851684570312,
"learning_rate": 8.899697362732382e-06,
"loss": 0.7333,
"step": 25450
},
{
"epoch": 3.307392996108949,
"grad_norm": 35.9471549987793,
"learning_rate": 8.897535667963684e-06,
"loss": 0.7444,
"step": 25500
},
{
"epoch": 3.313878080415045,
"grad_norm": 192.07264709472656,
"learning_rate": 8.895373973194985e-06,
"loss": 0.7427,
"step": 25550
},
{
"epoch": 3.3203631647211416,
"grad_norm": 71.07801055908203,
"learning_rate": 8.893212278426287e-06,
"loss": 0.762,
"step": 25600
},
{
"epoch": 3.3268482490272375,
"grad_norm": 94.97274780273438,
"learning_rate": 8.891050583657588e-06,
"loss": 0.7571,
"step": 25650
},
{
"epoch": 3.3333333333333335,
"grad_norm": 64.86588287353516,
"learning_rate": 8.888888888888888e-06,
"loss": 0.7267,
"step": 25700
},
{
"epoch": 3.3398184176394294,
"grad_norm": 46.446414947509766,
"learning_rate": 8.88672719412019e-06,
"loss": 0.7433,
"step": 25750
},
{
"epoch": 3.3463035019455254,
"grad_norm": 141.70608520507812,
"learning_rate": 8.884565499351491e-06,
"loss": 0.7115,
"step": 25800
},
{
"epoch": 3.3527885862516213,
"grad_norm": 131.68763732910156,
"learning_rate": 8.882403804582793e-06,
"loss": 0.7456,
"step": 25850
},
{
"epoch": 3.3592736705577173,
"grad_norm": 44.90886306762695,
"learning_rate": 8.880242109814095e-06,
"loss": 0.7671,
"step": 25900
},
{
"epoch": 3.3657587548638133,
"grad_norm": 76.7698974609375,
"learning_rate": 8.878080415045396e-06,
"loss": 0.7414,
"step": 25950
},
{
"epoch": 3.372243839169909,
"grad_norm": 73.65957641601562,
"learning_rate": 8.875918720276698e-06,
"loss": 0.7381,
"step": 26000
},
{
"epoch": 3.378728923476005,
"grad_norm": 98.279052734375,
"learning_rate": 8.873757025507998e-06,
"loss": 0.7289,
"step": 26050
},
{
"epoch": 3.385214007782101,
"grad_norm": 87.40727233886719,
"learning_rate": 8.8715953307393e-06,
"loss": 0.7374,
"step": 26100
},
{
"epoch": 3.391699092088197,
"grad_norm": 147.8469696044922,
"learning_rate": 8.869433635970601e-06,
"loss": 0.7193,
"step": 26150
},
{
"epoch": 3.398184176394293,
"grad_norm": 57.17820358276367,
"learning_rate": 8.867271941201903e-06,
"loss": 0.7522,
"step": 26200
},
{
"epoch": 3.404669260700389,
"grad_norm": 527.1165771484375,
"learning_rate": 8.865110246433204e-06,
"loss": 0.7249,
"step": 26250
},
{
"epoch": 3.411154345006485,
"grad_norm": 110.1869125366211,
"learning_rate": 8.862948551664506e-06,
"loss": 0.7226,
"step": 26300
},
{
"epoch": 3.417639429312581,
"grad_norm": 86.62249755859375,
"learning_rate": 8.860786856895807e-06,
"loss": 0.7605,
"step": 26350
},
{
"epoch": 3.424124513618677,
"grad_norm": 53.44112014770508,
"learning_rate": 8.858625162127107e-06,
"loss": 0.7259,
"step": 26400
},
{
"epoch": 3.4306095979247733,
"grad_norm": 53.45317840576172,
"learning_rate": 8.856463467358409e-06,
"loss": 0.7322,
"step": 26450
},
{
"epoch": 3.4370946822308692,
"grad_norm": 75.9814682006836,
"learning_rate": 8.85430177258971e-06,
"loss": 0.7389,
"step": 26500
},
{
"epoch": 3.443579766536965,
"grad_norm": 72.01563262939453,
"learning_rate": 8.852140077821012e-06,
"loss": 0.7388,
"step": 26550
},
{
"epoch": 3.450064850843061,
"grad_norm": 108.14093017578125,
"learning_rate": 8.849978383052314e-06,
"loss": 0.7414,
"step": 26600
},
{
"epoch": 3.456549935149157,
"grad_norm": 146.429443359375,
"learning_rate": 8.847816688283615e-06,
"loss": 0.7322,
"step": 26650
},
{
"epoch": 3.463035019455253,
"grad_norm": 254.16734313964844,
"learning_rate": 8.845654993514917e-06,
"loss": 0.7494,
"step": 26700
},
{
"epoch": 3.469520103761349,
"grad_norm": 186.4697265625,
"learning_rate": 8.843493298746217e-06,
"loss": 0.7497,
"step": 26750
},
{
"epoch": 3.476005188067445,
"grad_norm": 110.53705596923828,
"learning_rate": 8.841331603977518e-06,
"loss": 0.7513,
"step": 26800
},
{
"epoch": 3.482490272373541,
"grad_norm": 95.7660903930664,
"learning_rate": 8.83916990920882e-06,
"loss": 0.7256,
"step": 26850
},
{
"epoch": 3.488975356679637,
"grad_norm": 60.745643615722656,
"learning_rate": 8.837008214440121e-06,
"loss": 0.7224,
"step": 26900
},
{
"epoch": 3.495460440985733,
"grad_norm": 40.43708419799805,
"learning_rate": 8.834846519671423e-06,
"loss": 0.7239,
"step": 26950
},
{
"epoch": 3.501945525291829,
"grad_norm": 42.59388732910156,
"learning_rate": 8.832684824902725e-06,
"loss": 0.7199,
"step": 27000
},
{
"epoch": 3.5084306095979247,
"grad_norm": 71.25556945800781,
"learning_rate": 8.830523130134026e-06,
"loss": 0.7256,
"step": 27050
},
{
"epoch": 3.5149156939040207,
"grad_norm": 92.77458190917969,
"learning_rate": 8.828361435365326e-06,
"loss": 0.7285,
"step": 27100
},
{
"epoch": 3.5214007782101167,
"grad_norm": 52.927757263183594,
"learning_rate": 8.826199740596628e-06,
"loss": 0.7478,
"step": 27150
},
{
"epoch": 3.5278858625162126,
"grad_norm": 71.94493865966797,
"learning_rate": 8.82403804582793e-06,
"loss": 0.7345,
"step": 27200
},
{
"epoch": 3.5343709468223086,
"grad_norm": 58.30330276489258,
"learning_rate": 8.821876351059231e-06,
"loss": 0.6936,
"step": 27250
},
{
"epoch": 3.5408560311284045,
"grad_norm": 54.03791046142578,
"learning_rate": 8.819714656290533e-06,
"loss": 0.7284,
"step": 27300
},
{
"epoch": 3.5473411154345005,
"grad_norm": 86.29717254638672,
"learning_rate": 8.817552961521834e-06,
"loss": 0.7216,
"step": 27350
},
{
"epoch": 3.5538261997405964,
"grad_norm": 105.19668579101562,
"learning_rate": 8.815391266753136e-06,
"loss": 0.7469,
"step": 27400
},
{
"epoch": 3.5603112840466924,
"grad_norm": 174.84385681152344,
"learning_rate": 8.813229571984436e-06,
"loss": 0.7122,
"step": 27450
},
{
"epoch": 3.5667963683527883,
"grad_norm": 88.76931762695312,
"learning_rate": 8.811067877215737e-06,
"loss": 0.7361,
"step": 27500
},
{
"epoch": 3.5732814526588843,
"grad_norm": 238.61947631835938,
"learning_rate": 8.808906182447039e-06,
"loss": 0.7396,
"step": 27550
},
{
"epoch": 3.5797665369649807,
"grad_norm": 105.16651916503906,
"learning_rate": 8.80674448767834e-06,
"loss": 0.7316,
"step": 27600
},
{
"epoch": 3.5862516212710767,
"grad_norm": 219.0015869140625,
"learning_rate": 8.804582792909642e-06,
"loss": 0.7254,
"step": 27650
},
{
"epoch": 3.5927367055771726,
"grad_norm": 140.21543884277344,
"learning_rate": 8.802421098140944e-06,
"loss": 0.7555,
"step": 27700
},
{
"epoch": 3.5992217898832686,
"grad_norm": 89.52685546875,
"learning_rate": 8.800259403372245e-06,
"loss": 0.7362,
"step": 27750
},
{
"epoch": 3.6057068741893645,
"grad_norm": 59.332977294921875,
"learning_rate": 8.798097708603545e-06,
"loss": 0.7284,
"step": 27800
},
{
"epoch": 3.6121919584954605,
"grad_norm": 110.76482391357422,
"learning_rate": 8.795936013834847e-06,
"loss": 0.6894,
"step": 27850
},
{
"epoch": 3.6186770428015564,
"grad_norm": 38.199073791503906,
"learning_rate": 8.793774319066148e-06,
"loss": 0.6988,
"step": 27900
},
{
"epoch": 3.6251621271076524,
"grad_norm": 63.44047927856445,
"learning_rate": 8.79161262429745e-06,
"loss": 0.734,
"step": 27950
},
{
"epoch": 3.6316472114137484,
"grad_norm": 121.4446029663086,
"learning_rate": 8.789450929528751e-06,
"loss": 0.7129,
"step": 28000
},
{
"epoch": 3.6381322957198443,
"grad_norm": 70.12100982666016,
"learning_rate": 8.787289234760053e-06,
"loss": 0.6867,
"step": 28050
},
{
"epoch": 3.6446173800259403,
"grad_norm": 51.042972564697266,
"learning_rate": 8.785127539991353e-06,
"loss": 0.7204,
"step": 28100
},
{
"epoch": 3.6511024643320362,
"grad_norm": 43.0015869140625,
"learning_rate": 8.782965845222655e-06,
"loss": 0.7225,
"step": 28150
},
{
"epoch": 3.657587548638132,
"grad_norm": 59.59611129760742,
"learning_rate": 8.780804150453956e-06,
"loss": 0.7149,
"step": 28200
},
{
"epoch": 3.664072632944228,
"grad_norm": 25.105127334594727,
"learning_rate": 8.778642455685258e-06,
"loss": 0.6864,
"step": 28250
},
{
"epoch": 3.670557717250324,
"grad_norm": 62.92705154418945,
"learning_rate": 8.77648076091656e-06,
"loss": 0.7048,
"step": 28300
},
{
"epoch": 3.6770428015564205,
"grad_norm": 154.20318603515625,
"learning_rate": 8.774319066147861e-06,
"loss": 0.6617,
"step": 28350
},
{
"epoch": 3.6835278858625164,
"grad_norm": 212.035400390625,
"learning_rate": 8.772157371379163e-06,
"loss": 0.6981,
"step": 28400
},
{
"epoch": 3.6900129701686124,
"grad_norm": 98.92573547363281,
"learning_rate": 8.769995676610462e-06,
"loss": 0.7024,
"step": 28450
},
{
"epoch": 3.6964980544747084,
"grad_norm": 136.00390625,
"learning_rate": 8.767833981841764e-06,
"loss": 0.7532,
"step": 28500
},
{
"epoch": 3.7029831387808043,
"grad_norm": 249.03781127929688,
"learning_rate": 8.765672287073066e-06,
"loss": 0.6805,
"step": 28550
},
{
"epoch": 3.7094682230869003,
"grad_norm": 37.31251525878906,
"learning_rate": 8.763510592304367e-06,
"loss": 0.7054,
"step": 28600
},
{
"epoch": 3.7159533073929962,
"grad_norm": 75.17498779296875,
"learning_rate": 8.761348897535669e-06,
"loss": 0.7303,
"step": 28650
},
{
"epoch": 3.722438391699092,
"grad_norm": 146.90443420410156,
"learning_rate": 8.75918720276697e-06,
"loss": 0.7149,
"step": 28700
},
{
"epoch": 3.728923476005188,
"grad_norm": 37.123870849609375,
"learning_rate": 8.757025507998272e-06,
"loss": 0.7096,
"step": 28750
},
{
"epoch": 3.735408560311284,
"grad_norm": 54.98661422729492,
"learning_rate": 8.754863813229572e-06,
"loss": 0.7424,
"step": 28800
},
{
"epoch": 3.74189364461738,
"grad_norm": 135.15431213378906,
"learning_rate": 8.752702118460874e-06,
"loss": 0.7179,
"step": 28850
},
{
"epoch": 3.748378728923476,
"grad_norm": 159.3280792236328,
"learning_rate": 8.750540423692175e-06,
"loss": 0.7466,
"step": 28900
},
{
"epoch": 3.754863813229572,
"grad_norm": 111.12368774414062,
"learning_rate": 8.748378728923477e-06,
"loss": 0.71,
"step": 28950
},
{
"epoch": 3.761348897535668,
"grad_norm": 95.70431518554688,
"learning_rate": 8.746217034154778e-06,
"loss": 0.6719,
"step": 29000
},
{
"epoch": 3.767833981841764,
"grad_norm": 116.32410430908203,
"learning_rate": 8.74405533938608e-06,
"loss": 0.6959,
"step": 29050
},
{
"epoch": 3.77431906614786,
"grad_norm": 48.57170867919922,
"learning_rate": 8.741893644617381e-06,
"loss": 0.7021,
"step": 29100
},
{
"epoch": 3.780804150453956,
"grad_norm": 145.74124145507812,
"learning_rate": 8.739731949848681e-06,
"loss": 0.6982,
"step": 29150
},
{
"epoch": 3.7872892347600517,
"grad_norm": 110.97146606445312,
"learning_rate": 8.737570255079983e-06,
"loss": 0.6707,
"step": 29200
},
{
"epoch": 3.7937743190661477,
"grad_norm": 80.89407348632812,
"learning_rate": 8.735408560311285e-06,
"loss": 0.704,
"step": 29250
},
{
"epoch": 3.8002594033722437,
"grad_norm": 117.62003326416016,
"learning_rate": 8.733246865542586e-06,
"loss": 0.737,
"step": 29300
},
{
"epoch": 3.8067444876783396,
"grad_norm": 236.39186096191406,
"learning_rate": 8.731085170773888e-06,
"loss": 0.6954,
"step": 29350
},
{
"epoch": 3.8132295719844356,
"grad_norm": 204.6386260986328,
"learning_rate": 8.72892347600519e-06,
"loss": 0.7258,
"step": 29400
},
{
"epoch": 3.8197146562905315,
"grad_norm": 175.2502899169922,
"learning_rate": 8.726761781236491e-06,
"loss": 0.7116,
"step": 29450
},
{
"epoch": 3.8261997405966275,
"grad_norm": 64.19542694091797,
"learning_rate": 8.724600086467791e-06,
"loss": 0.7335,
"step": 29500
},
{
"epoch": 3.832684824902724,
"grad_norm": 67.48596954345703,
"learning_rate": 8.722438391699092e-06,
"loss": 0.6889,
"step": 29550
},
{
"epoch": 3.83916990920882,
"grad_norm": 87.38389587402344,
"learning_rate": 8.720276696930394e-06,
"loss": 0.6961,
"step": 29600
},
{
"epoch": 3.845654993514916,
"grad_norm": 42.56321334838867,
"learning_rate": 8.718115002161696e-06,
"loss": 0.7061,
"step": 29650
},
{
"epoch": 3.8521400778210118,
"grad_norm": 104.84762573242188,
"learning_rate": 8.715953307392997e-06,
"loss": 0.6809,
"step": 29700
},
{
"epoch": 3.8586251621271077,
"grad_norm": 84.26802062988281,
"learning_rate": 8.713791612624299e-06,
"loss": 0.6955,
"step": 29750
},
{
"epoch": 3.8651102464332037,
"grad_norm": 76.20053100585938,
"learning_rate": 8.7116299178556e-06,
"loss": 0.6869,
"step": 29800
},
{
"epoch": 3.8715953307392996,
"grad_norm": 52.06394958496094,
"learning_rate": 8.7094682230869e-06,
"loss": 0.6838,
"step": 29850
},
{
"epoch": 3.8780804150453956,
"grad_norm": 31.091880798339844,
"learning_rate": 8.707306528318202e-06,
"loss": 0.6914,
"step": 29900
},
{
"epoch": 3.8845654993514915,
"grad_norm": 154.26475524902344,
"learning_rate": 8.705144833549504e-06,
"loss": 0.7303,
"step": 29950
},
{
"epoch": 3.8910505836575875,
"grad_norm": 70.6423568725586,
"learning_rate": 8.702983138780805e-06,
"loss": 0.6856,
"step": 30000
},
{
"epoch": 3.8975356679636834,
"grad_norm": 70.91290283203125,
"learning_rate": 8.700821444012107e-06,
"loss": 0.6972,
"step": 30050
},
{
"epoch": 3.9040207522697794,
"grad_norm": 87.57475280761719,
"learning_rate": 8.698659749243408e-06,
"loss": 0.6854,
"step": 30100
},
{
"epoch": 3.9105058365758754,
"grad_norm": 63.372528076171875,
"learning_rate": 8.69649805447471e-06,
"loss": 0.6723,
"step": 30150
},
{
"epoch": 3.9169909208819713,
"grad_norm": 108.40290069580078,
"learning_rate": 8.69433635970601e-06,
"loss": 0.686,
"step": 30200
},
{
"epoch": 3.9234760051880677,
"grad_norm": 134.44715881347656,
"learning_rate": 8.692174664937311e-06,
"loss": 0.6884,
"step": 30250
},
{
"epoch": 3.9299610894941637,
"grad_norm": 45.21245574951172,
"learning_rate": 8.690012970168613e-06,
"loss": 0.6974,
"step": 30300
},
{
"epoch": 3.9364461738002596,
"grad_norm": 64.46482849121094,
"learning_rate": 8.687851275399915e-06,
"loss": 0.6928,
"step": 30350
},
{
"epoch": 3.9429312581063556,
"grad_norm": 113.75922393798828,
"learning_rate": 8.685689580631216e-06,
"loss": 0.7125,
"step": 30400
},
{
"epoch": 3.9494163424124515,
"grad_norm": 106.91778564453125,
"learning_rate": 8.683527885862518e-06,
"loss": 0.6689,
"step": 30450
},
{
"epoch": 3.9559014267185475,
"grad_norm": 145.61880493164062,
"learning_rate": 8.681366191093818e-06,
"loss": 0.7005,
"step": 30500
},
{
"epoch": 3.9623865110246435,
"grad_norm": 147.24017333984375,
"learning_rate": 8.67920449632512e-06,
"loss": 0.6987,
"step": 30550
},
{
"epoch": 3.9688715953307394,
"grad_norm": 86.30076599121094,
"learning_rate": 8.677042801556421e-06,
"loss": 0.7059,
"step": 30600
},
{
"epoch": 3.9753566796368354,
"grad_norm": 118.67623138427734,
"learning_rate": 8.674881106787722e-06,
"loss": 0.6777,
"step": 30650
},
{
"epoch": 3.9818417639429313,
"grad_norm": 48.108436584472656,
"learning_rate": 8.672719412019024e-06,
"loss": 0.6791,
"step": 30700
},
{
"epoch": 3.9883268482490273,
"grad_norm": 81.96046447753906,
"learning_rate": 8.670557717250326e-06,
"loss": 0.6913,
"step": 30750
},
{
"epoch": 3.9948119325551232,
"grad_norm": 197.388916015625,
"learning_rate": 8.668396022481627e-06,
"loss": 0.7043,
"step": 30800
},
{
"epoch": 4.001297016861219,
"grad_norm": 35.324703216552734,
"learning_rate": 8.666234327712927e-06,
"loss": 0.7125,
"step": 30850
},
{
"epoch": 4.007782101167315,
"grad_norm": 105.50518035888672,
"learning_rate": 8.664072632944229e-06,
"loss": 0.707,
"step": 30900
},
{
"epoch": 4.014267185473411,
"grad_norm": 23.028858184814453,
"learning_rate": 8.66191093817553e-06,
"loss": 0.6371,
"step": 30950
},
{
"epoch": 4.020752269779507,
"grad_norm": 72.48033142089844,
"learning_rate": 8.659749243406832e-06,
"loss": 0.6719,
"step": 31000
},
{
"epoch": 4.027237354085603,
"grad_norm": 186.94964599609375,
"learning_rate": 8.657587548638134e-06,
"loss": 0.657,
"step": 31050
},
{
"epoch": 4.033722438391699,
"grad_norm": 77.0679702758789,
"learning_rate": 8.655425853869435e-06,
"loss": 0.6947,
"step": 31100
},
{
"epoch": 4.040207522697795,
"grad_norm": 44.03890609741211,
"learning_rate": 8.653264159100737e-06,
"loss": 0.6778,
"step": 31150
},
{
"epoch": 4.046692607003891,
"grad_norm": 71.50305938720703,
"learning_rate": 8.651102464332037e-06,
"loss": 0.6779,
"step": 31200
},
{
"epoch": 4.053177691309987,
"grad_norm": 81.2274398803711,
"learning_rate": 8.648940769563338e-06,
"loss": 0.6696,
"step": 31250
},
{
"epoch": 4.059662775616083,
"grad_norm": 169.8217315673828,
"learning_rate": 8.64677907479464e-06,
"loss": 0.6846,
"step": 31300
},
{
"epoch": 4.066147859922179,
"grad_norm": 87.5166244506836,
"learning_rate": 8.644617380025941e-06,
"loss": 0.6826,
"step": 31350
},
{
"epoch": 4.072632944228275,
"grad_norm": 189.89439392089844,
"learning_rate": 8.642455685257243e-06,
"loss": 0.6891,
"step": 31400
},
{
"epoch": 4.079118028534371,
"grad_norm": 34.17830276489258,
"learning_rate": 8.640293990488545e-06,
"loss": 0.7317,
"step": 31450
},
{
"epoch": 4.085603112840467,
"grad_norm": 135.60418701171875,
"learning_rate": 8.638132295719846e-06,
"loss": 0.6752,
"step": 31500
},
{
"epoch": 4.092088197146563,
"grad_norm": 81.31814575195312,
"learning_rate": 8.635970600951146e-06,
"loss": 0.7003,
"step": 31550
},
{
"epoch": 4.0985732814526585,
"grad_norm": 302.2116394042969,
"learning_rate": 8.633808906182448e-06,
"loss": 0.6662,
"step": 31600
},
{
"epoch": 4.1050583657587545,
"grad_norm": 36.41209030151367,
"learning_rate": 8.63164721141375e-06,
"loss": 0.6726,
"step": 31650
},
{
"epoch": 4.1115434500648504,
"grad_norm": 164.67007446289062,
"learning_rate": 8.629485516645051e-06,
"loss": 0.7068,
"step": 31700
},
{
"epoch": 4.118028534370946,
"grad_norm": 43.526405334472656,
"learning_rate": 8.627323821876352e-06,
"loss": 0.6729,
"step": 31750
},
{
"epoch": 4.124513618677042,
"grad_norm": 110.07795715332031,
"learning_rate": 8.625162127107654e-06,
"loss": 0.683,
"step": 31800
},
{
"epoch": 4.130998702983139,
"grad_norm": 89.71601867675781,
"learning_rate": 8.623000432338956e-06,
"loss": 0.6792,
"step": 31850
},
{
"epoch": 4.137483787289235,
"grad_norm": 101.19843292236328,
"learning_rate": 8.620838737570256e-06,
"loss": 0.6505,
"step": 31900
},
{
"epoch": 4.143968871595331,
"grad_norm": 230.81871032714844,
"learning_rate": 8.618677042801557e-06,
"loss": 0.6748,
"step": 31950
},
{
"epoch": 4.150453955901427,
"grad_norm": 114.94778442382812,
"learning_rate": 8.616515348032859e-06,
"loss": 0.6755,
"step": 32000
},
{
"epoch": 4.156939040207523,
"grad_norm": 34.266761779785156,
"learning_rate": 8.61435365326416e-06,
"loss": 0.6572,
"step": 32050
},
{
"epoch": 4.163424124513619,
"grad_norm": 80.04161071777344,
"learning_rate": 8.612191958495462e-06,
"loss": 0.6804,
"step": 32100
},
{
"epoch": 4.169909208819715,
"grad_norm": 350.19573974609375,
"learning_rate": 8.610030263726764e-06,
"loss": 0.6955,
"step": 32150
},
{
"epoch": 4.176394293125811,
"grad_norm": 83.74986267089844,
"learning_rate": 8.607868568958065e-06,
"loss": 0.6856,
"step": 32200
},
{
"epoch": 4.182879377431907,
"grad_norm": 137.72669982910156,
"learning_rate": 8.605706874189365e-06,
"loss": 0.6541,
"step": 32250
},
{
"epoch": 4.189364461738003,
"grad_norm": 45.202903747558594,
"learning_rate": 8.603545179420667e-06,
"loss": 0.679,
"step": 32300
},
{
"epoch": 4.195849546044099,
"grad_norm": 191.8456268310547,
"learning_rate": 8.601383484651968e-06,
"loss": 0.6594,
"step": 32350
},
{
"epoch": 4.202334630350195,
"grad_norm": 152.2100830078125,
"learning_rate": 8.59922178988327e-06,
"loss": 0.6553,
"step": 32400
},
{
"epoch": 4.208819714656291,
"grad_norm": 56.737754821777344,
"learning_rate": 8.597060095114571e-06,
"loss": 0.6915,
"step": 32450
},
{
"epoch": 4.215304798962387,
"grad_norm": 49.64228057861328,
"learning_rate": 8.594898400345873e-06,
"loss": 0.6773,
"step": 32500
},
{
"epoch": 4.221789883268483,
"grad_norm": 57.6026496887207,
"learning_rate": 8.592736705577173e-06,
"loss": 0.7089,
"step": 32550
},
{
"epoch": 4.2282749675745785,
"grad_norm": 58.62641143798828,
"learning_rate": 8.590575010808474e-06,
"loss": 0.6433,
"step": 32600
},
{
"epoch": 4.2347600518806745,
"grad_norm": 101.03966522216797,
"learning_rate": 8.588413316039776e-06,
"loss": 0.6496,
"step": 32650
},
{
"epoch": 4.2412451361867705,
"grad_norm": 198.1434326171875,
"learning_rate": 8.586251621271076e-06,
"loss": 0.6757,
"step": 32700
},
{
"epoch": 4.247730220492866,
"grad_norm": 78.59976196289062,
"learning_rate": 8.584089926502378e-06,
"loss": 0.6913,
"step": 32750
},
{
"epoch": 4.254215304798962,
"grad_norm": 94.35735321044922,
"learning_rate": 8.58192823173368e-06,
"loss": 0.6982,
"step": 32800
},
{
"epoch": 4.260700389105058,
"grad_norm": 140.77769470214844,
"learning_rate": 8.57976653696498e-06,
"loss": 0.6753,
"step": 32850
},
{
"epoch": 4.267185473411154,
"grad_norm": 59.85847091674805,
"learning_rate": 8.577604842196282e-06,
"loss": 0.644,
"step": 32900
},
{
"epoch": 4.27367055771725,
"grad_norm": 49.44724655151367,
"learning_rate": 8.575443147427584e-06,
"loss": 0.676,
"step": 32950
},
{
"epoch": 4.280155642023346,
"grad_norm": 58.50251007080078,
"learning_rate": 8.573281452658886e-06,
"loss": 0.6919,
"step": 33000
},
{
"epoch": 4.286640726329442,
"grad_norm": 52.0682258605957,
"learning_rate": 8.571119757890185e-06,
"loss": 0.6517,
"step": 33050
},
{
"epoch": 4.293125810635538,
"grad_norm": 100.81246948242188,
"learning_rate": 8.568958063121487e-06,
"loss": 0.7115,
"step": 33100
},
{
"epoch": 4.299610894941634,
"grad_norm": 126.4149398803711,
"learning_rate": 8.566796368352789e-06,
"loss": 0.6621,
"step": 33150
},
{
"epoch": 4.30609597924773,
"grad_norm": 82.7846908569336,
"learning_rate": 8.56463467358409e-06,
"loss": 0.6653,
"step": 33200
},
{
"epoch": 4.312581063553826,
"grad_norm": 111.23580932617188,
"learning_rate": 8.562472978815392e-06,
"loss": 0.6779,
"step": 33250
},
{
"epoch": 4.319066147859922,
"grad_norm": 88.04605102539062,
"learning_rate": 8.560311284046693e-06,
"loss": 0.6984,
"step": 33300
},
{
"epoch": 4.325551232166018,
"grad_norm": 34.93830871582031,
"learning_rate": 8.558149589277995e-06,
"loss": 0.6901,
"step": 33350
},
{
"epoch": 4.332036316472114,
"grad_norm": 86.28446197509766,
"learning_rate": 8.555987894509295e-06,
"loss": 0.6808,
"step": 33400
},
{
"epoch": 4.33852140077821,
"grad_norm": 52.025169372558594,
"learning_rate": 8.553826199740596e-06,
"loss": 0.6769,
"step": 33450
},
{
"epoch": 4.345006485084306,
"grad_norm": 103.3537826538086,
"learning_rate": 8.551664504971898e-06,
"loss": 0.674,
"step": 33500
},
{
"epoch": 4.351491569390402,
"grad_norm": 131.0025634765625,
"learning_rate": 8.5495028102032e-06,
"loss": 0.6634,
"step": 33550
},
{
"epoch": 4.357976653696498,
"grad_norm": 36.6743049621582,
"learning_rate": 8.547341115434501e-06,
"loss": 0.6771,
"step": 33600
},
{
"epoch": 4.364461738002594,
"grad_norm": 91.82353210449219,
"learning_rate": 8.545179420665803e-06,
"loss": 0.6709,
"step": 33650
},
{
"epoch": 4.3709468223086905,
"grad_norm": 67.49322509765625,
"learning_rate": 8.543017725897104e-06,
"loss": 0.6381,
"step": 33700
},
{
"epoch": 4.377431906614786,
"grad_norm": 53.42247772216797,
"learning_rate": 8.540856031128404e-06,
"loss": 0.6886,
"step": 33750
},
{
"epoch": 4.383916990920882,
"grad_norm": 221.73178100585938,
"learning_rate": 8.538694336359706e-06,
"loss": 0.6354,
"step": 33800
},
{
"epoch": 4.390402075226978,
"grad_norm": 103.88397216796875,
"learning_rate": 8.536532641591008e-06,
"loss": 0.6807,
"step": 33850
},
{
"epoch": 4.396887159533074,
"grad_norm": 45.40660858154297,
"learning_rate": 8.53437094682231e-06,
"loss": 0.6404,
"step": 33900
},
{
"epoch": 4.40337224383917,
"grad_norm": 65.8223876953125,
"learning_rate": 8.53220925205361e-06,
"loss": 0.6567,
"step": 33950
},
{
"epoch": 4.409857328145266,
"grad_norm": 245.63230895996094,
"learning_rate": 8.530047557284912e-06,
"loss": 0.6437,
"step": 34000
},
{
"epoch": 4.416342412451362,
"grad_norm": 125.60919952392578,
"learning_rate": 8.527885862516212e-06,
"loss": 0.647,
"step": 34050
},
{
"epoch": 4.422827496757458,
"grad_norm": 147.76620483398438,
"learning_rate": 8.525724167747514e-06,
"loss": 0.6771,
"step": 34100
},
{
"epoch": 4.429312581063554,
"grad_norm": 118.33441925048828,
"learning_rate": 8.523562472978815e-06,
"loss": 0.6826,
"step": 34150
},
{
"epoch": 4.43579766536965,
"grad_norm": 289.2904052734375,
"learning_rate": 8.521400778210117e-06,
"loss": 0.6607,
"step": 34200
},
{
"epoch": 4.442282749675746,
"grad_norm": 53.50255584716797,
"learning_rate": 8.519239083441419e-06,
"loss": 0.6409,
"step": 34250
},
{
"epoch": 4.448767833981842,
"grad_norm": 133.48831176757812,
"learning_rate": 8.51707738867272e-06,
"loss": 0.6691,
"step": 34300
},
{
"epoch": 4.455252918287938,
"grad_norm": 119.14691925048828,
"learning_rate": 8.514915693904022e-06,
"loss": 0.6595,
"step": 34350
},
{
"epoch": 4.461738002594034,
"grad_norm": 323.05889892578125,
"learning_rate": 8.512753999135322e-06,
"loss": 0.6648,
"step": 34400
},
{
"epoch": 4.46822308690013,
"grad_norm": 193.7076873779297,
"learning_rate": 8.510592304366623e-06,
"loss": 0.6661,
"step": 34450
},
{
"epoch": 4.474708171206226,
"grad_norm": 85.69574737548828,
"learning_rate": 8.508430609597925e-06,
"loss": 0.6683,
"step": 34500
},
{
"epoch": 4.481193255512322,
"grad_norm": 23.649465560913086,
"learning_rate": 8.506268914829226e-06,
"loss": 0.6423,
"step": 34550
},
{
"epoch": 4.487678339818418,
"grad_norm": 109.6485366821289,
"learning_rate": 8.504107220060528e-06,
"loss": 0.6924,
"step": 34600
},
{
"epoch": 4.494163424124514,
"grad_norm": 76.71481323242188,
"learning_rate": 8.50194552529183e-06,
"loss": 0.6041,
"step": 34650
},
{
"epoch": 4.50064850843061,
"grad_norm": 33.22921371459961,
"learning_rate": 8.499783830523131e-06,
"loss": 0.6208,
"step": 34700
},
{
"epoch": 4.5071335927367056,
"grad_norm": 47.12236022949219,
"learning_rate": 8.497622135754431e-06,
"loss": 0.6477,
"step": 34750
},
{
"epoch": 4.5136186770428015,
"grad_norm": 51.28311538696289,
"learning_rate": 8.495460440985733e-06,
"loss": 0.628,
"step": 34800
},
{
"epoch": 4.5201037613488975,
"grad_norm": 25.358299255371094,
"learning_rate": 8.493298746217034e-06,
"loss": 0.6582,
"step": 34850
},
{
"epoch": 4.526588845654993,
"grad_norm": 97.27490997314453,
"learning_rate": 8.491137051448336e-06,
"loss": 0.6382,
"step": 34900
},
{
"epoch": 4.533073929961089,
"grad_norm": 176.92462158203125,
"learning_rate": 8.488975356679638e-06,
"loss": 0.6489,
"step": 34950
},
{
"epoch": 4.539559014267185,
"grad_norm": 46.83137130737305,
"learning_rate": 8.486813661910939e-06,
"loss": 0.6509,
"step": 35000
},
{
"epoch": 4.546044098573281,
"grad_norm": 27.511350631713867,
"learning_rate": 8.48465196714224e-06,
"loss": 0.6425,
"step": 35050
},
{
"epoch": 4.552529182879377,
"grad_norm": 85.7640609741211,
"learning_rate": 8.48249027237354e-06,
"loss": 0.6727,
"step": 35100
},
{
"epoch": 4.559014267185473,
"grad_norm": 246.8522491455078,
"learning_rate": 8.480328577604842e-06,
"loss": 0.6551,
"step": 35150
},
{
"epoch": 4.565499351491569,
"grad_norm": 145.3149871826172,
"learning_rate": 8.478166882836144e-06,
"loss": 0.655,
"step": 35200
},
{
"epoch": 4.571984435797665,
"grad_norm": 98.9753189086914,
"learning_rate": 8.476005188067445e-06,
"loss": 0.6396,
"step": 35250
},
{
"epoch": 4.578469520103761,
"grad_norm": 103.30072021484375,
"learning_rate": 8.473843493298747e-06,
"loss": 0.6491,
"step": 35300
},
{
"epoch": 4.584954604409857,
"grad_norm": 46.122684478759766,
"learning_rate": 8.471681798530049e-06,
"loss": 0.6505,
"step": 35350
},
{
"epoch": 4.591439688715953,
"grad_norm": 183.2648468017578,
"learning_rate": 8.46952010376135e-06,
"loss": 0.6504,
"step": 35400
},
{
"epoch": 4.597924773022049,
"grad_norm": 37.44175338745117,
"learning_rate": 8.46735840899265e-06,
"loss": 0.6747,
"step": 35450
},
{
"epoch": 4.604409857328145,
"grad_norm": 42.08739471435547,
"learning_rate": 8.465196714223952e-06,
"loss": 0.6365,
"step": 35500
},
{
"epoch": 4.610894941634241,
"grad_norm": 86.90052032470703,
"learning_rate": 8.463035019455253e-06,
"loss": 0.6565,
"step": 35550
},
{
"epoch": 4.617380025940337,
"grad_norm": 134.0282440185547,
"learning_rate": 8.460873324686555e-06,
"loss": 0.6702,
"step": 35600
},
{
"epoch": 4.623865110246433,
"grad_norm": 47.65680694580078,
"learning_rate": 8.458711629917856e-06,
"loss": 0.6805,
"step": 35650
},
{
"epoch": 4.630350194552529,
"grad_norm": 74.25086212158203,
"learning_rate": 8.456549935149158e-06,
"loss": 0.697,
"step": 35700
},
{
"epoch": 4.636835278858625,
"grad_norm": 245.19024658203125,
"learning_rate": 8.45438824038046e-06,
"loss": 0.6648,
"step": 35750
},
{
"epoch": 4.6433203631647215,
"grad_norm": 59.609580993652344,
"learning_rate": 8.45222654561176e-06,
"loss": 0.6457,
"step": 35800
},
{
"epoch": 4.6498054474708175,
"grad_norm": 68.63418579101562,
"learning_rate": 8.450064850843061e-06,
"loss": 0.6434,
"step": 35850
},
{
"epoch": 4.656290531776913,
"grad_norm": 153.90467834472656,
"learning_rate": 8.447903156074363e-06,
"loss": 0.6385,
"step": 35900
},
{
"epoch": 4.662775616083009,
"grad_norm": 68.64386749267578,
"learning_rate": 8.445741461305664e-06,
"loss": 0.6758,
"step": 35950
},
{
"epoch": 4.669260700389105,
"grad_norm": 103.12224578857422,
"learning_rate": 8.443579766536966e-06,
"loss": 0.6526,
"step": 36000
},
{
"epoch": 4.675745784695201,
"grad_norm": 35.21643829345703,
"learning_rate": 8.441418071768268e-06,
"loss": 0.6469,
"step": 36050
},
{
"epoch": 4.682230869001297,
"grad_norm": 48.4489631652832,
"learning_rate": 8.439256376999569e-06,
"loss": 0.6483,
"step": 36100
},
{
"epoch": 4.688715953307393,
"grad_norm": 181.4416046142578,
"learning_rate": 8.437094682230869e-06,
"loss": 0.6434,
"step": 36150
},
{
"epoch": 4.695201037613489,
"grad_norm": 153.2976837158203,
"learning_rate": 8.43493298746217e-06,
"loss": 0.6435,
"step": 36200
},
{
"epoch": 4.701686121919585,
"grad_norm": 80.14440155029297,
"learning_rate": 8.432771292693472e-06,
"loss": 0.6385,
"step": 36250
},
{
"epoch": 4.708171206225681,
"grad_norm": 30.52111053466797,
"learning_rate": 8.430609597924774e-06,
"loss": 0.6292,
"step": 36300
},
{
"epoch": 4.714656290531777,
"grad_norm": 192.3052520751953,
"learning_rate": 8.428447903156075e-06,
"loss": 0.6395,
"step": 36350
},
{
"epoch": 4.721141374837873,
"grad_norm": 105.61079406738281,
"learning_rate": 8.426286208387377e-06,
"loss": 0.6241,
"step": 36400
},
{
"epoch": 4.727626459143969,
"grad_norm": 111.08782196044922,
"learning_rate": 8.424124513618679e-06,
"loss": 0.6408,
"step": 36450
},
{
"epoch": 4.734111543450065,
"grad_norm": 140.2386932373047,
"learning_rate": 8.421962818849979e-06,
"loss": 0.63,
"step": 36500
},
{
"epoch": 4.740596627756161,
"grad_norm": 104.75723266601562,
"learning_rate": 8.41980112408128e-06,
"loss": 0.651,
"step": 36550
},
{
"epoch": 4.747081712062257,
"grad_norm": 38.98159408569336,
"learning_rate": 8.417639429312582e-06,
"loss": 0.6435,
"step": 36600
},
{
"epoch": 4.753566796368353,
"grad_norm": 250.14450073242188,
"learning_rate": 8.415477734543883e-06,
"loss": 0.6344,
"step": 36650
},
{
"epoch": 4.760051880674449,
"grad_norm": 50.4091796875,
"learning_rate": 8.413316039775185e-06,
"loss": 0.6409,
"step": 36700
},
{
"epoch": 4.766536964980545,
"grad_norm": 146.32968139648438,
"learning_rate": 8.411154345006486e-06,
"loss": 0.6509,
"step": 36750
},
{
"epoch": 4.773022049286641,
"grad_norm": 95.01649475097656,
"learning_rate": 8.408992650237786e-06,
"loss": 0.6637,
"step": 36800
},
{
"epoch": 4.779507133592737,
"grad_norm": 52.520076751708984,
"learning_rate": 8.406830955469088e-06,
"loss": 0.6525,
"step": 36850
},
{
"epoch": 4.785992217898833,
"grad_norm": 106.26171112060547,
"learning_rate": 8.40466926070039e-06,
"loss": 0.6759,
"step": 36900
},
{
"epoch": 4.7924773022049285,
"grad_norm": 82.12842559814453,
"learning_rate": 8.402507565931691e-06,
"loss": 0.6305,
"step": 36950
},
{
"epoch": 4.7989623865110245,
"grad_norm": 110.25924682617188,
"learning_rate": 8.400345871162993e-06,
"loss": 0.6231,
"step": 37000
},
{
"epoch": 4.80544747081712,
"grad_norm": 61.60184860229492,
"learning_rate": 8.398184176394294e-06,
"loss": 0.636,
"step": 37050
},
{
"epoch": 4.811932555123216,
"grad_norm": 106.20768737792969,
"learning_rate": 8.396022481625596e-06,
"loss": 0.663,
"step": 37100
},
{
"epoch": 4.818417639429312,
"grad_norm": 24.003427505493164,
"learning_rate": 8.393860786856896e-06,
"loss": 0.6515,
"step": 37150
},
{
"epoch": 4.824902723735408,
"grad_norm": 162.2716522216797,
"learning_rate": 8.391699092088197e-06,
"loss": 0.6714,
"step": 37200
},
{
"epoch": 4.831387808041504,
"grad_norm": 246.6392059326172,
"learning_rate": 8.389537397319499e-06,
"loss": 0.6578,
"step": 37250
},
{
"epoch": 4.8378728923476,
"grad_norm": 122.14068603515625,
"learning_rate": 8.3873757025508e-06,
"loss": 0.6262,
"step": 37300
},
{
"epoch": 4.844357976653696,
"grad_norm": 33.177120208740234,
"learning_rate": 8.385214007782102e-06,
"loss": 0.6666,
"step": 37350
},
{
"epoch": 4.850843060959792,
"grad_norm": 106.25698852539062,
"learning_rate": 8.383052313013404e-06,
"loss": 0.6421,
"step": 37400
},
{
"epoch": 4.857328145265888,
"grad_norm": 119.15618133544922,
"learning_rate": 8.380890618244705e-06,
"loss": 0.6409,
"step": 37450
},
{
"epoch": 4.863813229571985,
"grad_norm": 82.73539733886719,
"learning_rate": 8.378728923476005e-06,
"loss": 0.6363,
"step": 37500
},
{
"epoch": 4.870298313878081,
"grad_norm": 37.429141998291016,
"learning_rate": 8.376567228707307e-06,
"loss": 0.6361,
"step": 37550
},
{
"epoch": 4.876783398184177,
"grad_norm": 152.13327026367188,
"learning_rate": 8.374405533938609e-06,
"loss": 0.6386,
"step": 37600
},
{
"epoch": 4.883268482490273,
"grad_norm": 57.97270584106445,
"learning_rate": 8.37224383916991e-06,
"loss": 0.6475,
"step": 37650
},
{
"epoch": 4.889753566796369,
"grad_norm": 87.77135467529297,
"learning_rate": 8.370082144401212e-06,
"loss": 0.6235,
"step": 37700
},
{
"epoch": 4.896238651102465,
"grad_norm": 206.52565002441406,
"learning_rate": 8.367920449632513e-06,
"loss": 0.6211,
"step": 37750
},
{
"epoch": 4.902723735408561,
"grad_norm": 115.79866027832031,
"learning_rate": 8.365758754863815e-06,
"loss": 0.6338,
"step": 37800
},
{
"epoch": 4.909208819714657,
"grad_norm": 147.11058044433594,
"learning_rate": 8.363597060095115e-06,
"loss": 0.6615,
"step": 37850
},
{
"epoch": 4.915693904020753,
"grad_norm": 107.96685028076172,
"learning_rate": 8.361435365326416e-06,
"loss": 0.6437,
"step": 37900
},
{
"epoch": 4.9221789883268485,
"grad_norm": 75.72913360595703,
"learning_rate": 8.359273670557718e-06,
"loss": 0.6257,
"step": 37950
},
{
"epoch": 4.9286640726329445,
"grad_norm": 217.0497283935547,
"learning_rate": 8.35711197578902e-06,
"loss": 0.6603,
"step": 38000
},
{
"epoch": 4.93514915693904,
"grad_norm": 40.60713577270508,
"learning_rate": 8.354950281020321e-06,
"loss": 0.6293,
"step": 38050
},
{
"epoch": 4.941634241245136,
"grad_norm": 71.73409271240234,
"learning_rate": 8.352788586251623e-06,
"loss": 0.6409,
"step": 38100
},
{
"epoch": 4.948119325551232,
"grad_norm": 91.70991516113281,
"learning_rate": 8.350626891482924e-06,
"loss": 0.6299,
"step": 38150
},
{
"epoch": 4.954604409857328,
"grad_norm": 117.5611572265625,
"learning_rate": 8.348465196714224e-06,
"loss": 0.6322,
"step": 38200
},
{
"epoch": 4.961089494163424,
"grad_norm": 119.25588989257812,
"learning_rate": 8.346303501945526e-06,
"loss": 0.6419,
"step": 38250
},
{
"epoch": 4.96757457846952,
"grad_norm": 215.7095184326172,
"learning_rate": 8.344141807176827e-06,
"loss": 0.6331,
"step": 38300
},
{
"epoch": 4.974059662775616,
"grad_norm": 63.63528060913086,
"learning_rate": 8.341980112408129e-06,
"loss": 0.6632,
"step": 38350
},
{
"epoch": 4.980544747081712,
"grad_norm": 44.25017547607422,
"learning_rate": 8.33981841763943e-06,
"loss": 0.6519,
"step": 38400
},
{
"epoch": 4.987029831387808,
"grad_norm": 68.59965515136719,
"learning_rate": 8.337656722870732e-06,
"loss": 0.637,
"step": 38450
},
{
"epoch": 4.993514915693904,
"grad_norm": 136.69644165039062,
"learning_rate": 8.335495028102034e-06,
"loss": 0.6145,
"step": 38500
}
],
"logging_steps": 50,
"max_steps": 231300,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 38548,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.4735422916204544e+20,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}