{
  "best_metric": 0.89,
  "best_model_checkpoint": "models/evacun-lemmatization/checkpoint-349372",
  "epoch": 24.0,
  "eval_steps": 500,
  "global_step": 441312,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.027191646726125736,
      "grad_norm": 3.1259007453918457,
      "learning_rate": 9.994561670654776e-06,
      "loss": 4.085,
      "step": 500
    },
    {
      "epoch": 0.05438329345225147,
      "grad_norm": 3.907444953918457,
      "learning_rate": 9.98912334130955e-06,
      "loss": 1.9495,
      "step": 1000
    },
    {
      "epoch": 0.0815749401783772,
      "grad_norm": 2.732598066329956,
      "learning_rate": 9.983685011964326e-06,
      "loss": 1.4816,
      "step": 1500
    },
    {
      "epoch": 0.10876658690450294,
      "grad_norm": 5.2942376136779785,
      "learning_rate": 9.9782466826191e-06,
      "loss": 1.2103,
      "step": 2000
    },
    {
      "epoch": 0.13595823363062867,
      "grad_norm": 4.758411884307861,
      "learning_rate": 9.972808353273875e-06,
      "loss": 1.0154,
      "step": 2500
    },
    {
      "epoch": 0.1631498803567544,
      "grad_norm": 6.896116733551025,
      "learning_rate": 9.96737002392865e-06,
      "loss": 0.9023,
      "step": 3000
    },
    {
      "epoch": 0.19034152708288013,
      "grad_norm": 5.587175369262695,
      "learning_rate": 9.961931694583424e-06,
      "loss": 0.7953,
      "step": 3500
    },
    {
      "epoch": 0.2175331738090059,
      "grad_norm": 5.7429704666137695,
      "learning_rate": 9.9564933652382e-06,
      "loss": 0.7148,
      "step": 4000
    },
    {
      "epoch": 0.24472482053513162,
      "grad_norm": 4.422295093536377,
      "learning_rate": 9.951055035892974e-06,
      "loss": 0.6566,
      "step": 4500
    },
    {
      "epoch": 0.27191646726125734,
      "grad_norm": 3.8143515586853027,
      "learning_rate": 9.94561670654775e-06,
      "loss": 0.6051,
      "step": 5000
    },
    {
      "epoch": 0.2991081139873831,
      "grad_norm": 3.3009655475616455,
      "learning_rate": 9.940178377202525e-06,
      "loss": 0.5562,
      "step": 5500
    },
    {
      "epoch": 0.3262997607135088,
      "grad_norm": 4.799742221832275,
      "learning_rate": 9.934740047857298e-06,
      "loss": 0.5165,
      "step": 6000
    },
    {
      "epoch": 0.35349140743963453,
      "grad_norm": 4.289649486541748,
      "learning_rate": 9.929301718512075e-06,
      "loss": 0.4773,
      "step": 6500
    },
    {
      "epoch": 0.38068305416576026,
      "grad_norm": 5.090242385864258,
      "learning_rate": 9.923863389166849e-06,
      "loss": 0.4651,
      "step": 7000
    },
    {
      "epoch": 0.407874700891886,
      "grad_norm": 3.0283756256103516,
      "learning_rate": 9.918425059821624e-06,
      "loss": 0.4524,
      "step": 7500
    },
    {
      "epoch": 0.4350663476180118,
      "grad_norm": 3.258117198944092,
      "learning_rate": 9.912986730476399e-06,
      "loss": 0.4214,
      "step": 8000
    },
    {
      "epoch": 0.4622579943441375,
      "grad_norm": 2.3887009620666504,
      "learning_rate": 9.907548401131173e-06,
      "loss": 0.393,
      "step": 8500
    },
    {
      "epoch": 0.48944964107026323,
      "grad_norm": 4.268918037414551,
      "learning_rate": 9.90211007178595e-06,
      "loss": 0.3776,
      "step": 9000
    },
    {
      "epoch": 0.516641287796389,
      "grad_norm": 3.5440762042999268,
      "learning_rate": 9.896671742440723e-06,
      "loss": 0.3549,
      "step": 9500
    },
    {
      "epoch": 0.5438329345225147,
      "grad_norm": 3.123277187347412,
      "learning_rate": 9.891233413095498e-06,
      "loss": 0.3558,
      "step": 10000
    },
    {
      "epoch": 0.5710245812486404,
      "grad_norm": 2.6814281940460205,
      "learning_rate": 9.885795083750273e-06,
      "loss": 0.3392,
      "step": 10500
    },
    {
      "epoch": 0.5982162279747661,
      "grad_norm": 3.1762001514434814,
      "learning_rate": 9.880356754405047e-06,
      "loss": 0.3232,
      "step": 11000
    },
    {
      "epoch": 0.6254078747008919,
      "grad_norm": 2.568861722946167,
      "learning_rate": 9.874918425059824e-06,
      "loss": 0.334,
      "step": 11500
    },
    {
      "epoch": 0.6525995214270176,
      "grad_norm": 3.985795736312866,
      "learning_rate": 9.869480095714597e-06,
      "loss": 0.3126,
      "step": 12000
    },
    {
      "epoch": 0.6797911681531433,
      "grad_norm": 3.3112289905548096,
      "learning_rate": 9.864041766369372e-06,
      "loss": 0.3023,
      "step": 12500
    },
    {
      "epoch": 0.7069828148792691,
      "grad_norm": 5.755519390106201,
      "learning_rate": 9.858603437024146e-06,
      "loss": 0.2828,
      "step": 13000
    },
    {
      "epoch": 0.7341744616053948,
      "grad_norm": 2.3035266399383545,
      "learning_rate": 9.853165107678921e-06,
      "loss": 0.2752,
      "step": 13500
    },
    {
      "epoch": 0.7613661083315205,
      "grad_norm": 4.862213611602783,
      "learning_rate": 9.847726778333696e-06,
      "loss": 0.2518,
      "step": 14000
    },
    {
      "epoch": 0.7885577550576462,
      "grad_norm": 5.991926193237305,
      "learning_rate": 9.842288448988472e-06,
      "loss": 0.2571,
      "step": 14500
    },
    {
      "epoch": 0.815749401783772,
      "grad_norm": 5.180552959442139,
      "learning_rate": 9.836850119643247e-06,
      "loss": 0.2655,
      "step": 15000
    },
    {
      "epoch": 0.8429410485098977,
      "grad_norm": 1.392749309539795,
      "learning_rate": 9.83141179029802e-06,
      "loss": 0.2618,
      "step": 15500
    },
    {
      "epoch": 0.8701326952360235,
      "grad_norm": 2.431339740753174,
      "learning_rate": 9.825973460952795e-06,
      "loss": 0.2577,
      "step": 16000
    },
    {
      "epoch": 0.8973243419621493,
      "grad_norm": 2.325584888458252,
      "learning_rate": 9.82053513160757e-06,
      "loss": 0.2389,
      "step": 16500
    },
    {
      "epoch": 0.924515988688275,
      "grad_norm": 2.193328619003296,
      "learning_rate": 9.815096802262346e-06,
      "loss": 0.2404,
      "step": 17000
    },
    {
      "epoch": 0.9517076354144007,
      "grad_norm": 2.3462648391723633,
      "learning_rate": 9.809658472917121e-06,
      "loss": 0.2299,
      "step": 17500
    },
    {
      "epoch": 0.9788992821405265,
      "grad_norm": 1.676985502243042,
      "learning_rate": 9.804220143571895e-06,
      "loss": 0.2351,
      "step": 18000
    },
    {
      "epoch": 1.0,
      "eval_exact_match": 0.7948,
      "eval_loss": 0.2527632415294647,
      "eval_runtime": 1021.1942,
      "eval_samples_per_second": 11.141,
      "eval_steps_per_second": 0.697,
      "step": 18388
    },
    {
      "epoch": 1.0060909288666522,
      "grad_norm": 2.935654640197754,
      "learning_rate": 9.79878181422667e-06,
      "loss": 0.2187,
      "step": 18500
    },
    {
      "epoch": 1.033282575592778,
      "grad_norm": 4.042050838470459,
      "learning_rate": 9.793343484881445e-06,
      "loss": 0.184,
      "step": 19000
    },
    {
      "epoch": 1.0604742223189036,
      "grad_norm": 2.6612393856048584,
      "learning_rate": 9.78790515553622e-06,
      "loss": 0.1627,
      "step": 19500
    },
    {
      "epoch": 1.0876658690450294,
      "grad_norm": 0.839908242225647,
      "learning_rate": 9.782466826190995e-06,
      "loss": 0.1761,
      "step": 20000
    },
    {
      "epoch": 1.114857515771155,
      "grad_norm": 3.877523899078369,
      "learning_rate": 9.777028496845769e-06,
      "loss": 0.1675,
      "step": 20500
    },
    {
      "epoch": 1.1420491624972808,
      "grad_norm": 2.287436008453369,
      "learning_rate": 9.771590167500544e-06,
      "loss": 0.1713,
      "step": 21000
    },
    {
      "epoch": 1.1692408092234066,
      "grad_norm": 2.2683210372924805,
      "learning_rate": 9.76615183815532e-06,
      "loss": 0.1671,
      "step": 21500
    },
    {
      "epoch": 1.1964324559495323,
      "grad_norm": 0.6974703669548035,
      "learning_rate": 9.760713508810095e-06,
      "loss": 0.167,
      "step": 22000
    },
    {
      "epoch": 1.223624102675658,
      "grad_norm": 0.8777428865432739,
      "learning_rate": 9.75527517946487e-06,
      "loss": 0.1569,
      "step": 22500
    },
    {
      "epoch": 1.2508157494017837,
      "grad_norm": 2.61319637298584,
      "learning_rate": 9.749836850119643e-06,
      "loss": 0.1648,
      "step": 23000
    },
    {
      "epoch": 1.2780073961279095,
      "grad_norm": 3.361828565597534,
      "learning_rate": 9.744398520774418e-06,
      "loss": 0.1529,
      "step": 23500
    },
    {
      "epoch": 1.3051990428540352,
      "grad_norm": 2.4574973583221436,
      "learning_rate": 9.738960191429194e-06,
      "loss": 0.1587,
      "step": 24000
    },
    {
      "epoch": 1.332390689580161,
      "grad_norm": 2.845959424972534,
      "learning_rate": 9.733521862083969e-06,
      "loss": 0.146,
      "step": 24500
    },
    {
      "epoch": 1.3595823363062867,
      "grad_norm": 2.5402307510375977,
      "learning_rate": 9.728083532738744e-06,
      "loss": 0.1494,
      "step": 25000
    },
    {
      "epoch": 1.3867739830324124,
      "grad_norm": 2.56087327003479,
      "learning_rate": 9.722645203393518e-06,
      "loss": 0.1533,
      "step": 25500
    },
    {
      "epoch": 1.4139656297585381,
      "grad_norm": 2.302635431289673,
      "learning_rate": 9.717206874048293e-06,
      "loss": 0.1557,
      "step": 26000
    },
    {
      "epoch": 1.4411572764846639,
      "grad_norm": 5.063803672790527,
      "learning_rate": 9.711768544703068e-06,
      "loss": 0.1614,
      "step": 26500
    },
    {
      "epoch": 1.4683489232107896,
      "grad_norm": 4.056577682495117,
      "learning_rate": 9.706330215357843e-06,
      "loss": 0.1529,
      "step": 27000
    },
    {
      "epoch": 1.4955405699369153,
      "grad_norm": 4.56748104095459,
      "learning_rate": 9.700891886012618e-06,
      "loss": 0.1458,
      "step": 27500
    },
    {
      "epoch": 1.5227322166630413,
      "grad_norm": 3.91300892829895,
      "learning_rate": 9.695453556667392e-06,
      "loss": 0.1399,
      "step": 28000
    },
    {
      "epoch": 1.549923863389167,
      "grad_norm": 2.8584766387939453,
      "learning_rate": 9.690015227322167e-06,
      "loss": 0.1372,
      "step": 28500
    },
    {
      "epoch": 1.5771155101152927,
      "grad_norm": 1.0653077363967896,
      "learning_rate": 9.684576897976942e-06,
      "loss": 0.1505,
      "step": 29000
    },
    {
      "epoch": 1.6043071568414184,
      "grad_norm": 2.1718199253082275,
      "learning_rate": 9.679138568631718e-06,
      "loss": 0.132,
      "step": 29500
    },
    {
      "epoch": 1.6314988035675442,
      "grad_norm": 2.4078354835510254,
      "learning_rate": 9.673700239286493e-06,
      "loss": 0.1354,
      "step": 30000
    },
    {
      "epoch": 1.65869045029367,
      "grad_norm": 2.3602287769317627,
      "learning_rate": 9.668261909941266e-06,
      "loss": 0.1478,
      "step": 30500
    },
    {
      "epoch": 1.6858820970197956,
      "grad_norm": 5.312971591949463,
      "learning_rate": 9.662823580596041e-06,
      "loss": 0.1443,
      "step": 31000
    },
    {
      "epoch": 1.7130737437459214,
      "grad_norm": 1.5108168125152588,
      "learning_rate": 9.657385251250817e-06,
      "loss": 0.1442,
      "step": 31500
    },
    {
      "epoch": 1.740265390472047,
      "grad_norm": 2.7200069427490234,
      "learning_rate": 9.651946921905592e-06,
      "loss": 0.138,
      "step": 32000
    },
    {
      "epoch": 1.7674570371981728,
      "grad_norm": 1.6983907222747803,
      "learning_rate": 9.646508592560367e-06,
      "loss": 0.1472,
      "step": 32500
    },
    {
      "epoch": 1.7946486839242985,
      "grad_norm": 4.2195024490356445,
      "learning_rate": 9.64107026321514e-06,
      "loss": 0.1327,
      "step": 33000
    },
    {
      "epoch": 1.8218403306504243,
      "grad_norm": 2.506478786468506,
      "learning_rate": 9.635631933869916e-06,
      "loss": 0.1382,
      "step": 33500
    },
    {
      "epoch": 1.84903197737655,
      "grad_norm": 5.781156539916992,
      "learning_rate": 9.630193604524691e-06,
      "loss": 0.1301,
      "step": 34000
    },
    {
      "epoch": 1.8762236241026757,
      "grad_norm": 1.8185195922851562,
      "learning_rate": 9.624755275179466e-06,
      "loss": 0.1294,
      "step": 34500
    },
    {
      "epoch": 1.9034152708288015,
      "grad_norm": 4.116232872009277,
      "learning_rate": 9.619316945834241e-06,
      "loss": 0.1285,
      "step": 35000
    },
    {
      "epoch": 1.9306069175549272,
      "grad_norm": 2.8177270889282227,
      "learning_rate": 9.613878616489015e-06,
      "loss": 0.1282,
      "step": 35500
    },
    {
      "epoch": 1.957798564281053,
      "grad_norm": 0.8758026361465454,
      "learning_rate": 9.60844028714379e-06,
      "loss": 0.1327,
      "step": 36000
    },
    {
      "epoch": 1.9849902110071787,
      "grad_norm": 1.9684972763061523,
      "learning_rate": 9.603001957798565e-06,
      "loss": 0.1237,
      "step": 36500
    },
    {
      "epoch": 2.0,
      "eval_exact_match": 0.8592,
      "eval_loss": 0.1779375970363617,
      "eval_runtime": 1020.8338,
      "eval_samples_per_second": 11.145,
      "eval_steps_per_second": 0.697,
      "step": 36776
    },
    {
      "epoch": 2.0121818577333044,
      "grad_norm": 2.499526262283325,
      "learning_rate": 9.59756362845334e-06,
      "loss": 0.1125,
      "step": 37000
    },
    {
      "epoch": 2.03937350445943,
      "grad_norm": 0.8748095631599426,
      "learning_rate": 9.592125299108114e-06,
      "loss": 0.0825,
      "step": 37500
    },
    {
      "epoch": 2.066565151185556,
      "grad_norm": 3.8996646404266357,
      "learning_rate": 9.58668696976289e-06,
      "loss": 0.0833,
      "step": 38000
    },
    {
      "epoch": 2.0937567979116816,
      "grad_norm": 4.08225154876709,
      "learning_rate": 9.581248640417664e-06,
      "loss": 0.0848,
      "step": 38500
    },
    {
      "epoch": 2.1209484446378073,
      "grad_norm": 2.4132580757141113,
      "learning_rate": 9.57581031107244e-06,
      "loss": 0.0816,
      "step": 39000
    },
    {
      "epoch": 2.148140091363933,
      "grad_norm": 3.604099988937378,
      "learning_rate": 9.570371981727215e-06,
      "loss": 0.0871,
      "step": 39500
    },
    {
      "epoch": 2.1753317380900588,
      "grad_norm": 0.1320401430130005,
      "learning_rate": 9.564933652381988e-06,
      "loss": 0.0885,
      "step": 40000
    },
    {
      "epoch": 2.2025233848161845,
      "grad_norm": 0.4408586621284485,
      "learning_rate": 9.559495323036764e-06,
      "loss": 0.0873,
      "step": 40500
    },
    {
      "epoch": 2.22971503154231,
      "grad_norm": 1.62918221950531,
      "learning_rate": 9.554056993691539e-06,
      "loss": 0.0911,
      "step": 41000
    },
    {
      "epoch": 2.256906678268436,
      "grad_norm": 1.7283786535263062,
      "learning_rate": 9.548618664346314e-06,
      "loss": 0.0851,
      "step": 41500
    },
    {
      "epoch": 2.2840983249945617,
      "grad_norm": 3.522033452987671,
      "learning_rate": 9.54318033500109e-06,
      "loss": 0.0806,
      "step": 42000
    },
    {
      "epoch": 2.3112899717206874,
      "grad_norm": 1.8525676727294922,
      "learning_rate": 9.537742005655863e-06,
      "loss": 0.0776,
      "step": 42500
    },
    {
      "epoch": 2.338481618446813,
      "grad_norm": 2.7800660133361816,
      "learning_rate": 9.532303676310638e-06,
      "loss": 0.0875,
      "step": 43000
    },
    {
      "epoch": 2.365673265172939,
      "grad_norm": 0.9835543632507324,
      "learning_rate": 9.526865346965413e-06,
      "loss": 0.0797,
      "step": 43500
    },
    {
      "epoch": 2.3928649118990646,
      "grad_norm": 4.02990198135376,
      "learning_rate": 9.521427017620188e-06,
      "loss": 0.0822,
      "step": 44000
    },
    {
      "epoch": 2.4200565586251903,
      "grad_norm": 3.2835583686828613,
      "learning_rate": 9.515988688274963e-06,
      "loss": 0.0896,
      "step": 44500
    },
    {
      "epoch": 2.447248205351316,
      "grad_norm": 2.090576171875,
      "learning_rate": 9.510550358929737e-06,
      "loss": 0.0797,
      "step": 45000
    },
    {
      "epoch": 2.4744398520774418,
      "grad_norm": 1.9766199588775635,
      "learning_rate": 9.505112029584512e-06,
      "loss": 0.0853,
      "step": 45500
    },
    {
      "epoch": 2.5016314988035675,
      "grad_norm": 4.452338695526123,
      "learning_rate": 9.499673700239287e-06,
      "loss": 0.0862,
      "step": 46000
    },
    {
      "epoch": 2.5288231455296932,
      "grad_norm": 0.5408188104629517,
      "learning_rate": 9.494235370894063e-06,
      "loss": 0.0831,
      "step": 46500
    },
    {
      "epoch": 2.556014792255819,
      "grad_norm": 1.3575879335403442,
      "learning_rate": 9.488797041548838e-06,
      "loss": 0.0872,
      "step": 47000
    },
    {
      "epoch": 2.5832064389819447,
      "grad_norm": 1.4951727390289307,
      "learning_rate": 9.483358712203611e-06,
      "loss": 0.0851,
      "step": 47500
    },
    {
      "epoch": 2.6103980857080704,
      "grad_norm": 2.675262212753296,
      "learning_rate": 9.477920382858387e-06,
      "loss": 0.0823,
      "step": 48000
    },
    {
      "epoch": 2.637589732434196,
      "grad_norm": 2.4334521293640137,
      "learning_rate": 9.472482053513162e-06,
      "loss": 0.0793,
      "step": 48500
    },
    {
      "epoch": 2.664781379160322,
      "grad_norm": 3.254221200942993,
      "learning_rate": 9.467043724167937e-06,
      "loss": 0.0814,
      "step": 49000
    },
    {
      "epoch": 2.6919730258864476,
      "grad_norm": 2.9325039386749268,
      "learning_rate": 9.461605394822712e-06,
      "loss": 0.084,
      "step": 49500
    },
    {
      "epoch": 2.7191646726125733,
      "grad_norm": 1.1457610130310059,
      "learning_rate": 9.456167065477486e-06,
      "loss": 0.0834,
      "step": 50000
    },
    {
      "epoch": 2.746356319338699,
      "grad_norm": 3.7050232887268066,
      "learning_rate": 9.450728736132261e-06,
      "loss": 0.0866,
      "step": 50500
    },
    {
      "epoch": 2.773547966064825,
      "grad_norm": 0.22456876933574677,
      "learning_rate": 9.445290406787036e-06,
      "loss": 0.0872,
      "step": 51000
    },
    {
      "epoch": 2.8007396127909505,
      "grad_norm": 0.23606906831264496,
      "learning_rate": 9.439852077441811e-06,
      "loss": 0.0764,
      "step": 51500
    },
    {
      "epoch": 2.8279312595170762,
      "grad_norm": 3.726656436920166,
      "learning_rate": 9.434413748096586e-06,
      "loss": 0.0808,
      "step": 52000
    },
    {
      "epoch": 2.855122906243202,
      "grad_norm": 0.7011487483978271,
      "learning_rate": 9.42897541875136e-06,
      "loss": 0.0843,
      "step": 52500
    },
    {
      "epoch": 2.8823145529693277,
      "grad_norm": 4.318293571472168,
      "learning_rate": 9.423537089406135e-06,
      "loss": 0.0743,
      "step": 53000
    },
    {
      "epoch": 2.9095061996954534,
      "grad_norm": 1.4616190195083618,
      "learning_rate": 9.41809876006091e-06,
      "loss": 0.0765,
      "step": 53500
    },
    {
      "epoch": 2.936697846421579,
      "grad_norm": 0.5177611112594604,
      "learning_rate": 9.412660430715684e-06,
      "loss": 0.0768,
      "step": 54000
    },
    {
      "epoch": 2.963889493147705,
      "grad_norm": 1.2543549537658691,
      "learning_rate": 9.40722210137046e-06,
      "loss": 0.078,
      "step": 54500
    },
    {
      "epoch": 2.9910811398738306,
      "grad_norm": 3.5229008197784424,
      "learning_rate": 9.401783772025234e-06,
      "loss": 0.0861,
      "step": 55000
    },
    {
      "epoch": 3.0,
      "eval_exact_match": 0.8693,
      "eval_loss": 0.16594479978084564,
      "eval_runtime": 1021.2082,
      "eval_samples_per_second": 11.141,
      "eval_steps_per_second": 0.697,
      "step": 55164
    },
    {
      "epoch": 3.0182727865999563,
      "grad_norm": 0.3636282980442047,
      "learning_rate": 9.39634544268001e-06,
      "loss": 0.057,
      "step": 55500
    },
    {
      "epoch": 3.045464433326082,
      "grad_norm": 1.1032425165176392,
      "learning_rate": 9.390907113334785e-06,
      "loss": 0.0468,
      "step": 56000
    },
    {
      "epoch": 3.072656080052208,
      "grad_norm": 1.8635987043380737,
      "learning_rate": 9.385468783989558e-06,
      "loss": 0.0458,
      "step": 56500
    },
    {
      "epoch": 3.0998477267783335,
      "grad_norm": 0.11481478065252304,
      "learning_rate": 9.380030454644335e-06,
      "loss": 0.0487,
      "step": 57000
    },
    {
      "epoch": 3.1270393735044593,
      "grad_norm": 1.2612336874008179,
      "learning_rate": 9.374592125299109e-06,
      "loss": 0.0472,
      "step": 57500
    },
    {
      "epoch": 3.154231020230585,
      "grad_norm": 3.589947462081909,
      "learning_rate": 9.369153795953884e-06,
      "loss": 0.0512,
      "step": 58000
    },
    {
      "epoch": 3.1814226669567107,
      "grad_norm": 2.791079521179199,
      "learning_rate": 9.363715466608659e-06,
      "loss": 0.0499,
      "step": 58500
    },
    {
      "epoch": 3.2086143136828364,
      "grad_norm": 2.4952220916748047,
      "learning_rate": 9.358277137263433e-06,
      "loss": 0.0477,
      "step": 59000
    },
    {
      "epoch": 3.235805960408962,
      "grad_norm": 1.147648572921753,
      "learning_rate": 9.35283880791821e-06,
      "loss": 0.0484,
      "step": 59500
    },
    {
      "epoch": 3.262997607135088,
      "grad_norm": 0.35628893971443176,
      "learning_rate": 9.347400478572983e-06,
      "loss": 0.0504,
      "step": 60000
    },
    {
      "epoch": 3.2901892538612136,
      "grad_norm": 1.9759888648986816,
      "learning_rate": 9.341962149227758e-06,
      "loss": 0.0515,
      "step": 60500
    },
    {
      "epoch": 3.31738090058734,
      "grad_norm": 0.2888725697994232,
      "learning_rate": 9.336523819882533e-06,
      "loss": 0.0523,
      "step": 61000
    },
    {
      "epoch": 3.344572547313465,
      "grad_norm": 3.626575469970703,
      "learning_rate": 9.331085490537307e-06,
      "loss": 0.0499,
      "step": 61500
    },
    {
      "epoch": 3.3717641940395913,
      "grad_norm": 1.1794458627700806,
      "learning_rate": 9.325647161192082e-06,
      "loss": 0.0482,
      "step": 62000
    },
    {
      "epoch": 3.3989558407657166,
      "grad_norm": 1.3318313360214233,
      "learning_rate": 9.320208831846857e-06,
      "loss": 0.0558,
      "step": 62500
    },
    {
      "epoch": 3.4261474874918427,
      "grad_norm": 0.2946118712425232,
      "learning_rate": 9.314770502501632e-06,
      "loss": 0.0524,
      "step": 63000
    },
    {
      "epoch": 3.453339134217968,
      "grad_norm": 0.9527666568756104,
      "learning_rate": 9.309332173156406e-06,
      "loss": 0.0523,
      "step": 63500
    },
    {
      "epoch": 3.480530780944094,
      "grad_norm": 1.9586501121520996,
      "learning_rate": 9.303893843811181e-06,
      "loss": 0.0496,
      "step": 64000
    },
    {
      "epoch": 3.5077224276702195,
      "grad_norm": 3.5742673873901367,
      "learning_rate": 9.298455514465956e-06,
      "loss": 0.0562,
      "step": 64500
    },
    {
      "epoch": 3.5349140743963456,
      "grad_norm": 1.3943268060684204,
      "learning_rate": 9.293017185120732e-06,
      "loss": 0.0513,
      "step": 65000
    },
    {
      "epoch": 3.562105721122471,
      "grad_norm": 3.8523316383361816,
      "learning_rate": 9.287578855775507e-06,
      "loss": 0.0515,
      "step": 65500
    },
    {
      "epoch": 3.589297367848597,
      "grad_norm": 1.121957540512085,
      "learning_rate": 9.28214052643028e-06,
      "loss": 0.0547,
      "step": 66000
    },
    {
      "epoch": 3.6164890145747224,
      "grad_norm": 0.9070321321487427,
      "learning_rate": 9.276702197085056e-06,
      "loss": 0.0482,
      "step": 66500
    },
    {
      "epoch": 3.6436806613008486,
      "grad_norm": 0.5214864611625671,
      "learning_rate": 9.27126386773983e-06,
      "loss": 0.0505,
      "step": 67000
    },
    {
      "epoch": 3.670872308026974,
      "grad_norm": 3.1231439113616943,
      "learning_rate": 9.265825538394606e-06,
      "loss": 0.0497,
      "step": 67500
    },
    {
      "epoch": 3.6980639547531,
      "grad_norm": 2.436281204223633,
      "learning_rate": 9.260387209049381e-06,
      "loss": 0.0444,
      "step": 68000
    },
    {
      "epoch": 3.7252556014792257,
      "grad_norm": 1.7644033432006836,
      "learning_rate": 9.254948879704155e-06,
      "loss": 0.0493,
      "step": 68500
    },
    {
      "epoch": 3.7524472482053515,
      "grad_norm": 0.840013861656189,
      "learning_rate": 9.24951055035893e-06,
      "loss": 0.0536,
      "step": 69000
    },
    {
      "epoch": 3.779638894931477,
      "grad_norm": 2.0115058422088623,
      "learning_rate": 9.244072221013705e-06,
      "loss": 0.0522,
      "step": 69500
    },
    {
      "epoch": 3.806830541657603,
      "grad_norm": 2.1235430240631104,
      "learning_rate": 9.23863389166848e-06,
      "loss": 0.0554,
      "step": 70000
    },
    {
      "epoch": 3.8340221883837287,
      "grad_norm": 0.39138633012771606,
      "learning_rate": 9.233195562323255e-06,
      "loss": 0.0477,
      "step": 70500
    },
    {
      "epoch": 3.8612138351098544,
      "grad_norm": 0.49036768078804016,
      "learning_rate": 9.227757232978029e-06,
      "loss": 0.0543,
      "step": 71000
    },
    {
      "epoch": 3.88840548183598,
      "grad_norm": 2.911491632461548,
      "learning_rate": 9.222318903632804e-06,
      "loss": 0.0513,
      "step": 71500
    },
    {
      "epoch": 3.915597128562106,
      "grad_norm": 5.786170959472656,
      "learning_rate": 9.21688057428758e-06,
      "loss": 0.0531,
      "step": 72000
    },
    {
      "epoch": 3.9427887752882316,
      "grad_norm": 0.6657633185386658,
      "learning_rate": 9.211442244942355e-06,
      "loss": 0.0545,
      "step": 72500
    },
    {
      "epoch": 3.9699804220143573,
      "grad_norm": 2.4814870357513428,
      "learning_rate": 9.20600391559713e-06,
      "loss": 0.0523,
      "step": 73000
    },
    {
      "epoch": 3.997172068740483,
      "grad_norm": 0.967647135257721,
      "learning_rate": 9.200565586251903e-06,
      "loss": 0.0497,
      "step": 73500
    },
    {
      "epoch": 4.0,
      "eval_exact_match": 0.8795,
      "eval_loss": 0.175943061709404,
      "eval_runtime": 1022.0597,
      "eval_samples_per_second": 11.131,
      "eval_steps_per_second": 0.697,
      "step": 73552
    },
    {
      "epoch": 4.024363715466609,
      "grad_norm": 4.134894847869873,
      "learning_rate": 9.195127256906679e-06,
      "loss": 0.0281,
      "step": 74000
    },
    {
      "epoch": 4.051555362192734,
      "grad_norm": 1.442826509475708,
      "learning_rate": 9.189688927561454e-06,
      "loss": 0.0287,
      "step": 74500
    },
    {
      "epoch": 4.07874700891886,
      "grad_norm": 0.3482317626476288,
      "learning_rate": 9.184250598216229e-06,
      "loss": 0.0309,
      "step": 75000
    },
    {
      "epoch": 4.1059386556449855,
      "grad_norm": 0.5604238510131836,
      "learning_rate": 9.178812268871004e-06,
      "loss": 0.0281,
      "step": 75500
    },
    {
      "epoch": 4.133130302371112,
      "grad_norm": 2.2239296436309814,
      "learning_rate": 9.173373939525778e-06,
      "loss": 0.0259,
      "step": 76000
    },
    {
      "epoch": 4.160321949097237,
      "grad_norm": 0.34009259939193726,
      "learning_rate": 9.167935610180553e-06,
      "loss": 0.0321,
      "step": 76500
    },
    {
      "epoch": 4.187513595823363,
      "grad_norm": 3.0141053199768066,
      "learning_rate": 9.162497280835328e-06,
      "loss": 0.0317,
      "step": 77000
    },
    {
      "epoch": 4.214705242549488,
      "grad_norm": 0.01643652655184269,
      "learning_rate": 9.157058951490103e-06,
      "loss": 0.029,
      "step": 77500
    },
    {
      "epoch": 4.241896889275615,
      "grad_norm": 0.03787761181592941,
      "learning_rate": 9.151620622144878e-06,
      "loss": 0.0321,
      "step": 78000
    },
    {
      "epoch": 4.26908853600174,
      "grad_norm": 0.7121431231498718,
      "learning_rate": 9.146182292799652e-06,
      "loss": 0.0312,
      "step": 78500
    },
    {
      "epoch": 4.296280182727866,
      "grad_norm": 0.01658172532916069,
      "learning_rate": 9.140743963454427e-06,
      "loss": 0.0327,
      "step": 79000
    },
    {
      "epoch": 4.323471829453991,
      "grad_norm": 0.21374182403087616,
      "learning_rate": 9.135305634109202e-06,
      "loss": 0.0318,
      "step": 79500
    },
    {
      "epoch": 4.3506634761801175,
      "grad_norm": 3.6706273555755615,
      "learning_rate": 9.129867304763978e-06,
      "loss": 0.0311,
      "step": 80000
    },
    {
      "epoch": 4.377855122906243,
      "grad_norm": 2.5517737865448,
      "learning_rate": 9.124428975418753e-06,
      "loss": 0.0299,
      "step": 80500
    },
    {
      "epoch": 4.405046769632369,
      "grad_norm": 1.2063195705413818,
      "learning_rate": 9.118990646073526e-06,
      "loss": 0.0323,
      "step": 81000
    },
    {
      "epoch": 4.432238416358494,
      "grad_norm": 0.2457069456577301,
      "learning_rate": 9.113552316728301e-06,
      "loss": 0.0309,
      "step": 81500
    },
    {
      "epoch": 4.45943006308462,
      "grad_norm": 0.020821336656808853,
      "learning_rate": 9.108113987383077e-06,
      "loss": 0.0351,
      "step": 82000
    },
    {
      "epoch": 4.486621709810747,
      "grad_norm": 2.251249313354492,
      "learning_rate": 9.102675658037852e-06,
      "loss": 0.0345,
      "step": 82500
    },
    {
      "epoch": 4.513813356536872,
      "grad_norm": 0.6691648364067078,
      "learning_rate": 9.097237328692627e-06,
      "loss": 0.0311,
      "step": 83000
    },
    {
      "epoch": 4.541005003262997,
      "grad_norm": 0.05074188485741615,
      "learning_rate": 9.0917989993474e-06,
      "loss": 0.03,
      "step": 83500
    },
    {
      "epoch": 4.568196649989123,
      "grad_norm": 0.03867918998003006,
      "learning_rate": 9.086360670002176e-06,
      "loss": 0.0317,
      "step": 84000
    },
    {
      "epoch": 4.5953882967152495,
      "grad_norm": 2.578198194503784,
      "learning_rate": 9.080922340656951e-06,
      "loss": 0.0332,
      "step": 84500
    },
    {
      "epoch": 4.622579943441375,
      "grad_norm": 0.6527734994888306,
      "learning_rate": 9.075484011311726e-06,
      "loss": 0.0306,
      "step": 85000
    },
    {
      "epoch": 4.6497715901675,
      "grad_norm": 3.3111846446990967,
      "learning_rate": 9.070045681966501e-06,
      "loss": 0.0352,
      "step": 85500
    },
    {
      "epoch": 4.676963236893626,
      "grad_norm": 4.766884803771973,
      "learning_rate": 9.064607352621275e-06,
      "loss": 0.0331,
      "step": 86000
    },
    {
      "epoch": 4.704154883619752,
      "grad_norm": 3.993748903274536,
      "learning_rate": 9.05916902327605e-06,
      "loss": 0.0343,
      "step": 86500
    },
    {
      "epoch": 4.731346530345878,
      "grad_norm": 6.515500068664551,
      "learning_rate": 9.053730693930825e-06,
      "loss": 0.0322,
      "step": 87000
    },
    {
      "epoch": 4.758538177072003,
      "grad_norm": 1.858112096786499,
      "learning_rate": 9.0482923645856e-06,
      "loss": 0.034,
      "step": 87500
    },
    {
      "epoch": 4.785729823798129,
      "grad_norm": 8.057866096496582,
      "learning_rate": 9.042854035240374e-06,
      "loss": 0.0326,
      "step": 88000
    },
    {
      "epoch": 4.812921470524255,
      "grad_norm": 2.783409833908081,
      "learning_rate": 9.03741570589515e-06,
      "loss": 0.0349,
      "step": 88500
    },
    {
      "epoch": 4.840113117250381,
      "grad_norm": 0.008890635333955288,
      "learning_rate": 9.031977376549924e-06,
      "loss": 0.0324,
      "step": 89000
    },
    {
      "epoch": 4.867304763976507,
      "grad_norm": 2.3815135955810547,
      "learning_rate": 9.0265390472047e-06,
      "loss": 0.0332,
      "step": 89500
    },
    {
      "epoch": 4.894496410702632,
      "grad_norm": 1.191375732421875,
      "learning_rate": 9.021100717859475e-06,
      "loss": 0.0336,
      "step": 90000
    },
    {
      "epoch": 4.921688057428758,
      "grad_norm": 2.7159788608551025,
      "learning_rate": 9.015662388514248e-06,
      "loss": 0.0359,
      "step": 90500
    },
    {
      "epoch": 4.9488797041548835,
      "grad_norm": 4.028094291687012,
      "learning_rate": 9.010224059169024e-06,
      "loss": 0.032,
      "step": 91000
    },
    {
      "epoch": 4.97607135088101,
      "grad_norm": 1.4064428806304932,
      "learning_rate": 9.004785729823799e-06,
      "loss": 0.0338,
      "step": 91500
    },
    {
      "epoch": 5.0,
      "eval_exact_match": 0.8753,
      "eval_loss": 0.20461878180503845,
      "eval_runtime": 1007.4294,
      "eval_samples_per_second": 11.293,
      "eval_steps_per_second": 0.707,
      "step": 91940
    },
    {
      "epoch": 5.003262997607135,
      "grad_norm": 2.7804439067840576,
      "learning_rate": 8.999347400478574e-06,
      "loss": 0.0301,
      "step": 92000
    },
    {
      "epoch": 5.030454644333261,
      "grad_norm": 0.11794668436050415,
      "learning_rate": 8.99390907113335e-06,
      "loss": 0.0179,
      "step": 92500
    },
    {
      "epoch": 5.0576462910593865,
      "grad_norm": 0.5474434494972229,
      "learning_rate": 8.988470741788123e-06,
      "loss": 0.0198,
      "step": 93000
    },
    {
      "epoch": 5.084837937785513,
      "grad_norm": 3.92921781539917,
      "learning_rate": 8.983032412442898e-06,
      "loss": 0.0193,
      "step": 93500
    },
    {
      "epoch": 5.112029584511638,
      "grad_norm": 2.111978054046631,
      "learning_rate": 8.977594083097673e-06,
      "loss": 0.0187,
      "step": 94000
    },
    {
      "epoch": 5.139221231237764,
      "grad_norm": 0.17302042245864868,
      "learning_rate": 8.972155753752448e-06,
      "loss": 0.016,
      "step": 94500
    },
    {
      "epoch": 5.166412877963889,
      "grad_norm": 1.5338056087493896,
      "learning_rate": 8.966717424407224e-06,
      "loss": 0.0215,
      "step": 95000
    },
    {
      "epoch": 5.1936045246900155,
      "grad_norm": 0.14234082400798798,
      "learning_rate": 8.961279095061997e-06,
      "loss": 0.0214,
      "step": 95500
    },
    {
      "epoch": 5.220796171416141,
      "grad_norm": 0.10225632041692734,
      "learning_rate": 8.955840765716772e-06,
      "loss": 0.0194,
      "step": 96000
    },
    {
      "epoch": 5.247987818142267,
      "grad_norm": 3.5910024642944336,
      "learning_rate": 8.950402436371547e-06,
      "loss": 0.0204,
      "step": 96500
    },
    {
      "epoch": 5.275179464868392,
      "grad_norm": 4.878687381744385,
      "learning_rate": 8.944964107026323e-06,
      "loss": 0.0234,
      "step": 97000
    },
    {
      "epoch": 5.3023711115945185,
      "grad_norm": 0.8146882057189941,
      "learning_rate": 8.939525777681098e-06,
      "loss": 0.0207,
      "step": 97500
    },
    {
      "epoch": 5.329562758320644,
      "grad_norm": 0.23543909192085266,
      "learning_rate": 8.934087448335871e-06,
      "loss": 0.0242,
      "step": 98000
    },
    {
      "epoch": 5.35675440504677,
      "grad_norm": 0.07411856204271317,
      "learning_rate": 8.928649118990647e-06,
      "loss": 0.0233,
      "step": 98500
    },
    {
      "epoch": 5.383946051772895,
      "grad_norm": 0.7384315729141235,
      "learning_rate": 8.923210789645422e-06,
      "loss": 0.0205,
      "step": 99000
    },
    {
      "epoch": 5.411137698499021,
      "grad_norm": 0.6251228451728821,
      "learning_rate": 8.917772460300197e-06,
      "loss": 0.023,
      "step": 99500
    },
    {
      "epoch": 5.438329345225147,
      "grad_norm": 0.5921465754508972,
      "learning_rate": 8.912334130954972e-06,
      "loss": 0.0218,
      "step": 100000
    },
    {
      "epoch": 5.465520991951273,
      "grad_norm": 0.01474306546151638,
      "learning_rate": 8.906895801609746e-06,
      "loss": 0.0219,
      "step": 100500
    },
    {
      "epoch": 5.492712638677398,
      "grad_norm": 3.527553081512451,
      "learning_rate": 8.901457472264521e-06,
      "loss": 0.0213,
      "step": 101000
    },
    {
      "epoch": 5.519904285403524,
      "grad_norm": 3.0029749870300293,
      "learning_rate": 8.896019142919296e-06,
      "loss": 0.0205,
      "step": 101500
    },
    {
      "epoch": 5.54709593212965,
      "grad_norm": 0.3347836434841156,
      "learning_rate": 8.890580813574071e-06,
      "loss": 0.0213,
      "step": 102000
    },
    {
      "epoch": 5.574287578855776,
      "grad_norm": 2.0022785663604736,
      "learning_rate": 8.885142484228847e-06,
      "loss": 0.0236,
      "step": 102500
    },
    {
      "epoch": 5.601479225581901,
      "grad_norm": 0.6466526389122009,
      "learning_rate": 8.87970415488362e-06,
      "loss": 0.0229,
      "step": 103000
    },
    {
      "epoch": 5.628670872308027,
      "grad_norm": 0.013481836766004562,
      "learning_rate": 8.874265825538395e-06,
      "loss": 0.022,
      "step": 103500
    },
    {
      "epoch": 5.6558625190341525,
      "grad_norm": 0.4195241630077362,
      "learning_rate": 8.86882749619317e-06,
      "loss": 0.0241,
      "step": 104000
    },
    {
      "epoch": 5.683054165760279,
      "grad_norm": 1.216953158378601,
      "learning_rate": 8.863389166847946e-06,
      "loss": 0.0224,
      "step": 104500
    },
    {
      "epoch": 5.710245812486404,
      "grad_norm": 0.682259738445282,
      "learning_rate": 8.85795083750272e-06,
      "loss": 0.0191,
      "step": 105000
    },
    {
      "epoch": 5.73743745921253,
      "grad_norm": 0.42561373114585876,
      "learning_rate": 8.852512508157494e-06,
      "loss": 0.0237,
      "step": 105500
    },
    {
      "epoch": 5.764629105938655,
      "grad_norm": 6.53951358795166,
      "learning_rate": 8.84707417881227e-06,
      "loss": 0.021,
      "step": 106000
    },
    {
      "epoch": 5.791820752664782,
      "grad_norm": 2.535867214202881,
      "learning_rate": 8.841635849467045e-06,
      "loss": 0.0236,
      "step": 106500
    },
    {
      "epoch": 5.819012399390907,
      "grad_norm": 0.7644603848457336,
      "learning_rate": 8.83619752012182e-06,
      "loss": 0.0219,
      "step": 107000
    },
    {
      "epoch": 5.846204046117033,
      "grad_norm": 7.7599616050720215,
      "learning_rate": 8.830759190776595e-06,
      "loss": 0.0206,
      "step": 107500
    },
    {
      "epoch": 5.873395692843158,
      "grad_norm": 1.106614589691162,
      "learning_rate": 8.825320861431369e-06,
      "loss": 0.0214,
      "step": 108000
    },
    {
      "epoch": 5.9005873395692845,
      "grad_norm": 0.4505751430988312,
      "learning_rate": 8.819882532086144e-06,
      "loss": 0.0229,
      "step": 108500
    },
    {
      "epoch": 5.92777898629541,
      "grad_norm": 0.23297396302223206,
      "learning_rate": 8.814444202740919e-06,
      "loss": 0.0234,
      "step": 109000
    },
    {
      "epoch": 5.954970633021536,
      "grad_norm": 1.527669072151184,
      "learning_rate": 8.809005873395694e-06,
      "loss": 0.0214,
      "step": 109500
    },
    {
      "epoch": 5.982162279747661,
      "grad_norm": 0.012977199628949165,
      "learning_rate": 8.80356754405047e-06,
      "loss": 0.0228,
      "step": 110000
    },
    {
      "epoch": 6.0,
      "eval_exact_match": 0.8794,
      "eval_loss": 0.2122552990913391,
      "eval_runtime": 1020.184,
      "eval_samples_per_second": 11.152,
      "eval_steps_per_second": 0.698,
      "step": 110328
    },
    {
      "epoch": 6.009353926473787,
      "grad_norm": 0.0063011981546878815,
      "learning_rate": 8.798129214705243e-06,
      "loss": 0.0183,
      "step": 110500
    },
    {
      "epoch": 6.036545573199913,
      "grad_norm": 0.042253538966178894,
      "learning_rate": 8.792690885360018e-06,
      "loss": 0.0136,
      "step": 111000
    },
    {
      "epoch": 6.063737219926039,
      "grad_norm": 3.0220460891723633,
      "learning_rate": 8.787252556014792e-06,
      "loss": 0.0142,
      "step": 111500
    },
    {
      "epoch": 6.090928866652164,
      "grad_norm": 1.7237880229949951,
      "learning_rate": 8.781814226669569e-06,
      "loss": 0.0121,
      "step": 112000
    },
    {
      "epoch": 6.11812051337829,
      "grad_norm": 2.3117618560791016,
      "learning_rate": 8.776375897324342e-06,
      "loss": 0.0151,
      "step": 112500
    },
    {
      "epoch": 6.145312160104416,
      "grad_norm": 0.2513481080532074,
      "learning_rate": 8.770937567979117e-06,
      "loss": 0.0122,
      "step": 113000
    },
    {
      "epoch": 6.172503806830542,
      "grad_norm": 0.22196491062641144,
      "learning_rate": 8.765499238633893e-06,
      "loss": 0.0136,
      "step": 113500
    },
    {
      "epoch": 6.199695453556667,
      "grad_norm": 0.3039638102054596,
      "learning_rate": 8.760060909288666e-06,
      "loss": 0.0154,
      "step": 114000
    },
    {
      "epoch": 6.226887100282793,
      "grad_norm": 1.2839832305908203,
      "learning_rate": 8.754622579943443e-06,
      "loss": 0.015,
      "step": 114500
    },
    {
      "epoch": 6.2540787470089185,
      "grad_norm": 1.728105902671814,
      "learning_rate": 8.749184250598216e-06,
      "loss": 0.0154,
      "step": 115000
    },
    {
      "epoch": 6.281270393735045,
      "grad_norm": 1.7439731359481812,
      "learning_rate": 8.743745921252992e-06,
      "loss": 0.013,
      "step": 115500
    },
    {
      "epoch": 6.30846204046117,
      "grad_norm": 0.7588323354721069,
      "learning_rate": 8.738307591907767e-06,
      "loss": 0.013,
      "step": 116000
    },
    {
      "epoch": 6.335653687187296,
      "grad_norm": 0.3581075668334961,
      "learning_rate": 8.73286926256254e-06,
      "loss": 0.0147,
      "step": 116500
    },
    {
      "epoch": 6.362845333913421,
      "grad_norm": 0.5312409996986389,
      "learning_rate": 8.727430933217317e-06,
      "loss": 0.0159,
      "step": 117000
    },
    {
      "epoch": 6.390036980639548,
      "grad_norm": 1.5447969436645508,
      "learning_rate": 8.72199260387209e-06,
      "loss": 0.0163,
      "step": 117500
    },
    {
      "epoch": 6.417228627365673,
      "grad_norm": 0.41506335139274597,
      "learning_rate": 8.716554274526866e-06,
      "loss": 0.016,
      "step": 118000
    },
    {
      "epoch": 6.444420274091799,
      "grad_norm": 0.17262350022792816,
      "learning_rate": 8.711115945181641e-06,
      "loss": 0.0143,
      "step": 118500
    },
    {
      "epoch": 6.471611920817924,
      "grad_norm": 1.5750232934951782,
      "learning_rate": 8.705677615836415e-06,
      "loss": 0.0143,
      "step": 119000
    },
    {
      "epoch": 6.4988035675440505,
      "grad_norm": 2.806853771209717,
      "learning_rate": 8.700239286491192e-06,
      "loss": 0.0164,
      "step": 119500
    },
    {
      "epoch": 6.525995214270176,
      "grad_norm": 0.2280786782503128,
      "learning_rate": 8.694800957145965e-06,
      "loss": 0.0138,
      "step": 120000
    },
    {
      "epoch": 6.553186860996302,
      "grad_norm": 0.6690570712089539,
      "learning_rate": 8.68936262780074e-06,
      "loss": 0.0157,
      "step": 120500
    },
    {
      "epoch": 6.580378507722427,
      "grad_norm": 0.04652916640043259,
      "learning_rate": 8.683924298455516e-06,
      "loss": 0.0172,
      "step": 121000
    },
    {
      "epoch": 6.6075701544485534,
      "grad_norm": 1.504131555557251,
      "learning_rate": 8.678485969110289e-06,
      "loss": 0.0175,
      "step": 121500
    },
    {
      "epoch": 6.63476180117468,
      "grad_norm": 1.5482994318008423,
      "learning_rate": 8.673047639765066e-06,
      "loss": 0.0177,
      "step": 122000
    },
    {
      "epoch": 6.661953447900805,
      "grad_norm": 1.784773349761963,
      "learning_rate": 8.66760931041984e-06,
      "loss": 0.0153,
      "step": 122500
    },
    {
      "epoch": 6.68914509462693,
      "grad_norm": 0.1438634991645813,
      "learning_rate": 8.662170981074615e-06,
      "loss": 0.016,
      "step": 123000
    },
    {
      "epoch": 6.716336741353056,
      "grad_norm": 1.1094955205917358,
      "learning_rate": 8.65673265172939e-06,
      "loss": 0.0158,
      "step": 123500
    },
    {
      "epoch": 6.7435283880791825,
      "grad_norm": 3.1225857734680176,
      "learning_rate": 8.651294322384163e-06,
      "loss": 0.0145,
      "step": 124000
    },
    {
      "epoch": 6.770720034805308,
      "grad_norm": 0.4727814793586731,
      "learning_rate": 8.64585599303894e-06,
      "loss": 0.0178,
      "step": 124500
    },
    {
      "epoch": 6.797911681531433,
      "grad_norm": 0.9324865937232971,
      "learning_rate": 8.640417663693714e-06,
      "loss": 0.0165,
      "step": 125000
    },
    {
      "epoch": 6.825103328257559,
      "grad_norm": 1.3977622985839844,
      "learning_rate": 8.634979334348489e-06,
      "loss": 0.0148,
      "step": 125500
    },
    {
      "epoch": 6.8522949749836854,
      "grad_norm": 0.694773256778717,
      "learning_rate": 8.629541005003264e-06,
      "loss": 0.0173,
      "step": 126000
    },
    {
      "epoch": 6.879486621709811,
      "grad_norm": 4.397082328796387,
      "learning_rate": 8.624102675658038e-06,
      "loss": 0.0164,
      "step": 126500
    },
    {
      "epoch": 6.906678268435936,
      "grad_norm": 0.09322671592235565,
      "learning_rate": 8.618664346312815e-06,
      "loss": 0.0162,
      "step": 127000
    },
    {
      "epoch": 6.933869915162062,
      "grad_norm": 3.736959218978882,
      "learning_rate": 8.613226016967588e-06,
      "loss": 0.0161,
      "step": 127500
    },
    {
      "epoch": 6.961061561888188,
      "grad_norm": 0.21108615398406982,
      "learning_rate": 8.607787687622363e-06,
      "loss": 0.0154,
      "step": 128000
    },
    {
      "epoch": 6.988253208614314,
      "grad_norm": 0.029453817754983902,
      "learning_rate": 8.602349358277138e-06,
      "loss": 0.017,
      "step": 128500
    },
    {
      "epoch": 7.0,
      "eval_exact_match": 0.8825,
      "eval_loss": 0.24493291974067688,
      "eval_runtime": 1023.3771,
      "eval_samples_per_second": 11.117,
      "eval_steps_per_second": 0.696,
      "step": 128716
    },
    {
      "epoch": 7.01544485534044,
      "grad_norm": 6.605143070220947,
      "learning_rate": 8.596911028931912e-06,
      "loss": 0.0124,
      "step": 129000
    },
    {
      "epoch": 7.042636502066565,
      "grad_norm": 0.22909535467624664,
      "learning_rate": 8.591472699586689e-06,
      "loss": 0.009,
      "step": 129500
    },
    {
      "epoch": 7.069828148792691,
      "grad_norm": 0.08523764461278915,
      "learning_rate": 8.586034370241462e-06,
      "loss": 0.0085,
      "step": 130000
    },
    {
      "epoch": 7.097019795518817,
      "grad_norm": 6.189642906188965,
      "learning_rate": 8.580596040896238e-06,
      "loss": 0.0093,
      "step": 130500
    },
    {
      "epoch": 7.124211442244943,
      "grad_norm": 0.7962560057640076,
      "learning_rate": 8.575157711551013e-06,
      "loss": 0.0116,
      "step": 131000
    },
    {
      "epoch": 7.151403088971068,
      "grad_norm": 0.14231279492378235,
      "learning_rate": 8.569719382205786e-06,
      "loss": 0.0122,
      "step": 131500
    },
    {
      "epoch": 7.178594735697194,
      "grad_norm": 2.1064910888671875,
      "learning_rate": 8.564281052860563e-06,
      "loss": 0.013,
      "step": 132000
    },
    {
      "epoch": 7.2057863824233195,
      "grad_norm": 0.02576456405222416,
      "learning_rate": 8.558842723515337e-06,
      "loss": 0.0098,
      "step": 132500
    },
    {
      "epoch": 7.232978029149446,
      "grad_norm": 0.05112173408269882,
      "learning_rate": 8.553404394170112e-06,
      "loss": 0.0101,
      "step": 133000
    },
    {
      "epoch": 7.260169675875571,
      "grad_norm": 0.7245155572891235,
      "learning_rate": 8.547966064824887e-06,
      "loss": 0.012,
      "step": 133500
    },
    {
      "epoch": 7.287361322601697,
      "grad_norm": 0.014532721601426601,
      "learning_rate": 8.54252773547966e-06,
      "loss": 0.0088,
      "step": 134000
    },
    {
      "epoch": 7.314552969327822,
      "grad_norm": 0.1423533707857132,
      "learning_rate": 8.537089406134438e-06,
      "loss": 0.0124,
      "step": 134500
    },
    {
      "epoch": 7.341744616053949,
      "grad_norm": 0.33425888419151306,
      "learning_rate": 8.531651076789211e-06,
      "loss": 0.0103,
      "step": 135000
    },
    {
      "epoch": 7.368936262780074,
      "grad_norm": 0.27307161688804626,
      "learning_rate": 8.526212747443986e-06,
      "loss": 0.0103,
      "step": 135500
    },
    {
      "epoch": 7.3961279095062,
      "grad_norm": 0.559861958026886,
      "learning_rate": 8.52077441809876e-06,
      "loss": 0.0124,
      "step": 136000
    },
    {
      "epoch": 7.423319556232325,
      "grad_norm": 0.1392046958208084,
      "learning_rate": 8.515336088753535e-06,
      "loss": 0.0105,
      "step": 136500
    },
    {
      "epoch": 7.4505112029584515,
      "grad_norm": 0.013254035264253616,
      "learning_rate": 8.50989775940831e-06,
      "loss": 0.011,
      "step": 137000
    },
    {
      "epoch": 7.477702849684577,
      "grad_norm": 5.330664157867432,
      "learning_rate": 8.504459430063085e-06,
      "loss": 0.0108,
      "step": 137500
    },
    {
      "epoch": 7.504894496410703,
      "grad_norm": 3.8794960975646973,
      "learning_rate": 8.49902110071786e-06,
      "loss": 0.0117,
      "step": 138000
    },
    {
      "epoch": 7.532086143136828,
      "grad_norm": 0.7144114375114441,
      "learning_rate": 8.493582771372634e-06,
      "loss": 0.0126,
      "step": 138500
    },
    {
      "epoch": 7.559277789862954,
      "grad_norm": 0.022297067567706108,
      "learning_rate": 8.48814444202741e-06,
      "loss": 0.0125,
      "step": 139000
    },
    {
      "epoch": 7.58646943658908,
      "grad_norm": 0.6900652050971985,
      "learning_rate": 8.482706112682185e-06,
      "loss": 0.0113,
      "step": 139500
    },
    {
      "epoch": 7.613661083315206,
      "grad_norm": 0.21370786428451538,
      "learning_rate": 8.47726778333696e-06,
      "loss": 0.0111,
      "step": 140000
    },
    {
      "epoch": 7.640852730041331,
      "grad_norm": 1.9464149475097656,
      "learning_rate": 8.471829453991735e-06,
      "loss": 0.0118,
      "step": 140500
    },
    {
      "epoch": 7.668044376767457,
      "grad_norm": 0.18144677579402924,
      "learning_rate": 8.466391124646508e-06,
      "loss": 0.0145,
      "step": 141000
    },
    {
      "epoch": 7.695236023493583,
      "grad_norm": 0.3602216839790344,
      "learning_rate": 8.460952795301284e-06,
      "loss": 0.0122,
      "step": 141500
    },
    {
      "epoch": 7.722427670219709,
      "grad_norm": 0.3419041335582733,
      "learning_rate": 8.455514465956059e-06,
      "loss": 0.0133,
      "step": 142000
    },
    {
      "epoch": 7.749619316945834,
      "grad_norm": 0.10839635133743286,
      "learning_rate": 8.450076136610834e-06,
      "loss": 0.0105,
      "step": 142500
    },
    {
      "epoch": 7.77681096367196,
      "grad_norm": 3.2976644039154053,
      "learning_rate": 8.44463780726561e-06,
      "loss": 0.0106,
      "step": 143000
    },
    {
      "epoch": 7.8040026103980855,
      "grad_norm": 1.0369518995285034,
      "learning_rate": 8.439199477920383e-06,
      "loss": 0.0137,
      "step": 143500
    },
    {
      "epoch": 7.831194257124212,
      "grad_norm": 1.0955511331558228,
      "learning_rate": 8.433761148575158e-06,
      "loss": 0.0111,
      "step": 144000
    },
    {
      "epoch": 7.858385903850337,
      "grad_norm": 1.4048563241958618,
      "learning_rate": 8.428322819229933e-06,
      "loss": 0.0108,
      "step": 144500
    },
    {
      "epoch": 7.885577550576463,
      "grad_norm": 1.3276982307434082,
      "learning_rate": 8.422884489884708e-06,
      "loss": 0.0135,
      "step": 145000
    },
    {
      "epoch": 7.912769197302588,
      "grad_norm": 0.062070589512586594,
      "learning_rate": 8.417446160539484e-06,
      "loss": 0.0128,
      "step": 145500
    },
    {
      "epoch": 7.939960844028715,
      "grad_norm": 0.06153295561671257,
      "learning_rate": 8.412007831194257e-06,
      "loss": 0.0122,
      "step": 146000
    },
    {
      "epoch": 7.96715249075484,
      "grad_norm": 0.3033904731273651,
      "learning_rate": 8.406569501849032e-06,
      "loss": 0.0121,
      "step": 146500
    },
    {
      "epoch": 7.994344137480966,
      "grad_norm": 0.010566359385848045,
      "learning_rate": 8.401131172503807e-06,
      "loss": 0.0109,
      "step": 147000
    },
    {
      "epoch": 8.0,
      "eval_exact_match": 0.8825,
      "eval_loss": 0.27669957280158997,
      "eval_runtime": 1019.6911,
      "eval_samples_per_second": 11.157,
      "eval_steps_per_second": 0.698,
      "step": 147104
    },
    {
      "epoch": 8.021535784207092,
      "grad_norm": 0.4247625172138214,
      "learning_rate": 8.395692843158583e-06,
      "loss": 0.0086,
      "step": 147500
    },
    {
      "epoch": 8.048727430933218,
      "grad_norm": 0.1236380860209465,
      "learning_rate": 8.390254513813358e-06,
      "loss": 0.0071,
      "step": 148000
    },
    {
      "epoch": 8.075919077659343,
      "grad_norm": 8.52878475189209,
      "learning_rate": 8.384816184468131e-06,
      "loss": 0.0081,
      "step": 148500
    },
    {
      "epoch": 8.103110724385468,
      "grad_norm": 1.577666997909546,
      "learning_rate": 8.379377855122907e-06,
      "loss": 0.008,
      "step": 149000
    },
    {
      "epoch": 8.130302371111595,
      "grad_norm": 0.29314127564430237,
      "learning_rate": 8.373939525777682e-06,
      "loss": 0.0066,
      "step": 149500
    },
    {
      "epoch": 8.15749401783772,
      "grad_norm": 0.7720054984092712,
      "learning_rate": 8.368501196432457e-06,
      "loss": 0.0064,
      "step": 150000
    },
    {
      "epoch": 8.184685664563846,
      "grad_norm": 0.21347130835056305,
      "learning_rate": 8.363062867087232e-06,
      "loss": 0.0067,
      "step": 150500
    },
    {
      "epoch": 8.211877311289971,
      "grad_norm": 0.004629666917026043,
      "learning_rate": 8.357624537742006e-06,
      "loss": 0.0073,
      "step": 151000
    },
    {
      "epoch": 8.239068958016098,
      "grad_norm": 0.07173150777816772,
      "learning_rate": 8.352186208396781e-06,
      "loss": 0.0088,
      "step": 151500
    },
    {
      "epoch": 8.266260604742223,
      "grad_norm": 0.0343378446996212,
      "learning_rate": 8.346747879051556e-06,
      "loss": 0.0085,
      "step": 152000
    },
    {
      "epoch": 8.293452251468349,
      "grad_norm": 0.2848726809024811,
      "learning_rate": 8.341309549706331e-06,
      "loss": 0.0076,
      "step": 152500
    },
    {
      "epoch": 8.320643898194474,
      "grad_norm": 0.08385962247848511,
      "learning_rate": 8.335871220361107e-06,
      "loss": 0.0089,
      "step": 153000
    },
    {
      "epoch": 8.347835544920601,
      "grad_norm": 0.2588665783405304,
      "learning_rate": 8.33043289101588e-06,
      "loss": 0.0086,
      "step": 153500
    },
    {
      "epoch": 8.375027191646726,
      "grad_norm": 0.5390540361404419,
      "learning_rate": 8.324994561670655e-06,
      "loss": 0.0082,
      "step": 154000
    },
    {
      "epoch": 8.402218838372852,
      "grad_norm": 0.09838502109050751,
      "learning_rate": 8.31955623232543e-06,
      "loss": 0.0095,
      "step": 154500
    },
    {
      "epoch": 8.429410485098977,
      "grad_norm": 5.106756687164307,
      "learning_rate": 8.314117902980206e-06,
      "loss": 0.01,
      "step": 155000
    },
    {
      "epoch": 8.456602131825104,
      "grad_norm": 0.08692660927772522,
      "learning_rate": 8.308679573634981e-06,
      "loss": 0.0097,
      "step": 155500
    },
    {
      "epoch": 8.48379377855123,
      "grad_norm": 0.013324704952538013,
      "learning_rate": 8.303241244289754e-06,
      "loss": 0.0092,
      "step": 156000
    },
    {
      "epoch": 8.510985425277354,
      "grad_norm": 0.008706462569534779,
      "learning_rate": 8.29780291494453e-06,
      "loss": 0.0097,
      "step": 156500
    },
    {
      "epoch": 8.53817707200348,
      "grad_norm": 5.618961334228516,
      "learning_rate": 8.292364585599305e-06,
      "loss": 0.0103,
      "step": 157000
    },
    {
      "epoch": 8.565368718729607,
      "grad_norm": 0.17830610275268555,
      "learning_rate": 8.28692625625408e-06,
      "loss": 0.0091,
      "step": 157500
    },
    {
      "epoch": 8.592560365455732,
      "grad_norm": 0.15179601311683655,
      "learning_rate": 8.281487926908855e-06,
      "loss": 0.0097,
      "step": 158000
    },
    {
      "epoch": 8.619752012181857,
      "grad_norm": 0.004950134549289942,
      "learning_rate": 8.276049597563629e-06,
      "loss": 0.0077,
      "step": 158500
    },
    {
      "epoch": 8.646943658907983,
      "grad_norm": 1.6617698669433594,
      "learning_rate": 8.270611268218404e-06,
      "loss": 0.0112,
      "step": 159000
    },
    {
      "epoch": 8.67413530563411,
      "grad_norm": 1.129233479499817,
      "learning_rate": 8.265172938873179e-06,
      "loss": 0.0084,
      "step": 159500
    },
    {
      "epoch": 8.701326952360235,
      "grad_norm": 0.3634829521179199,
      "learning_rate": 8.259734609527954e-06,
      "loss": 0.0088,
      "step": 160000
    },
    {
      "epoch": 8.72851859908636,
      "grad_norm": 0.0007674964144825935,
      "learning_rate": 8.254296280182728e-06,
      "loss": 0.0091,
      "step": 160500
    },
    {
      "epoch": 8.755710245812486,
      "grad_norm": 0.840733528137207,
      "learning_rate": 8.248857950837503e-06,
      "loss": 0.0096,
      "step": 161000
    },
    {
      "epoch": 8.782901892538613,
      "grad_norm": 0.002596140606328845,
      "learning_rate": 8.243419621492278e-06,
      "loss": 0.0089,
      "step": 161500
    },
    {
      "epoch": 8.810093539264738,
      "grad_norm": 0.011948781087994576,
      "learning_rate": 8.237981292147052e-06,
      "loss": 0.0079,
      "step": 162000
    },
    {
      "epoch": 8.837285185990863,
      "grad_norm": 0.4065161645412445,
      "learning_rate": 8.232542962801829e-06,
      "loss": 0.0083,
      "step": 162500
    },
    {
      "epoch": 8.864476832716988,
      "grad_norm": 0.001233687624335289,
      "learning_rate": 8.227104633456602e-06,
      "loss": 0.0101,
      "step": 163000
    },
    {
      "epoch": 8.891668479443116,
      "grad_norm": 1.7294543981552124,
      "learning_rate": 8.221666304111377e-06,
      "loss": 0.0097,
      "step": 163500
    },
    {
      "epoch": 8.91886012616924,
      "grad_norm": 0.001020897296257317,
      "learning_rate": 8.216227974766153e-06,
      "loss": 0.0101,
      "step": 164000
    },
    {
      "epoch": 8.946051772895366,
      "grad_norm": 0.01642615906894207,
      "learning_rate": 8.210789645420926e-06,
      "loss": 0.0103,
      "step": 164500
    },
    {
      "epoch": 8.973243419621493,
      "grad_norm": 0.7197975516319275,
      "learning_rate": 8.205351316075703e-06,
      "loss": 0.0097,
      "step": 165000
    },
    {
      "epoch": 9.0,
      "eval_exact_match": 0.8803,
      "eval_loss": 0.29959383606910706,
      "eval_runtime": 1017.9862,
      "eval_samples_per_second": 11.176,
      "eval_steps_per_second": 0.699,
      "step": 165492
    },
    {
      "epoch": 9.000435066347618,
      "grad_norm": 0.19451579451560974,
      "learning_rate": 8.199912986730476e-06,
      "loss": 0.007,
      "step": 165500
    },
    {
      "epoch": 9.027626713073744,
      "grad_norm": 0.10793378204107285,
      "learning_rate": 8.194474657385252e-06,
      "loss": 0.0054,
      "step": 166000
    },
    {
      "epoch": 9.054818359799869,
      "grad_norm": 0.012452369555830956,
      "learning_rate": 8.189036328040027e-06,
      "loss": 0.004,
      "step": 166500
    },
    {
      "epoch": 9.082010006525994,
      "grad_norm": 0.30145037174224854,
      "learning_rate": 8.1835979986948e-06,
      "loss": 0.0059,
      "step": 167000
    },
    {
      "epoch": 9.109201653252121,
      "grad_norm": 0.0008534679072909057,
      "learning_rate": 8.178159669349577e-06,
      "loss": 0.0062,
      "step": 167500
    },
    {
      "epoch": 9.136393299978247,
      "grad_norm": 0.03561757877469063,
      "learning_rate": 8.17272134000435e-06,
      "loss": 0.0049,
      "step": 168000
    },
    {
      "epoch": 9.163584946704372,
      "grad_norm": 0.9257168173789978,
      "learning_rate": 8.167283010659126e-06,
      "loss": 0.0055,
      "step": 168500
    },
    {
      "epoch": 9.190776593430499,
      "grad_norm": 0.4846535325050354,
      "learning_rate": 8.161844681313901e-06,
      "loss": 0.0068,
      "step": 169000
    },
    {
      "epoch": 9.217968240156624,
      "grad_norm": 0.2089615762233734,
      "learning_rate": 8.156406351968675e-06,
      "loss": 0.0063,
      "step": 169500
    },
    {
      "epoch": 9.24515988688275,
      "grad_norm": 0.14049288630485535,
      "learning_rate": 8.150968022623452e-06,
      "loss": 0.0072,
      "step": 170000
    },
    {
      "epoch": 9.272351533608875,
      "grad_norm": 0.26103538274765015,
      "learning_rate": 8.145529693278225e-06,
      "loss": 0.006,
      "step": 170500
    },
    {
      "epoch": 9.299543180335002,
      "grad_norm": 0.1939212679862976,
      "learning_rate": 8.140091363933e-06,
      "loss": 0.0069,
      "step": 171000
    },
    {
      "epoch": 9.326734827061127,
      "grad_norm": 0.004148316103965044,
      "learning_rate": 8.134653034587776e-06,
      "loss": 0.0063,
      "step": 171500
    },
    {
      "epoch": 9.353926473787253,
      "grad_norm": 0.36702999472618103,
      "learning_rate": 8.129214705242549e-06,
      "loss": 0.0064,
      "step": 172000
    },
    {
      "epoch": 9.381118120513378,
      "grad_norm": 0.0005734359147027135,
      "learning_rate": 8.123776375897326e-06,
      "loss": 0.007,
      "step": 172500
    },
    {
      "epoch": 9.408309767239505,
      "grad_norm": 9.195197105407715,
      "learning_rate": 8.1183380465521e-06,
      "loss": 0.0087,
      "step": 173000
    },
    {
      "epoch": 9.43550141396563,
      "grad_norm": 0.4702955186367035,
      "learning_rate": 8.112899717206875e-06,
      "loss": 0.0077,
      "step": 173500
    },
    {
      "epoch": 9.462693060691755,
      "grad_norm": 0.8873838782310486,
      "learning_rate": 8.10746138786165e-06,
      "loss": 0.0073,
      "step": 174000
    },
    {
      "epoch": 9.48988470741788,
      "grad_norm": 0.028335994109511375,
      "learning_rate": 8.102023058516423e-06,
      "loss": 0.0068,
      "step": 174500
    },
    {
      "epoch": 9.517076354144008,
      "grad_norm": 0.2030220478773117,
      "learning_rate": 8.0965847291712e-06,
      "loss": 0.008,
      "step": 175000
    },
    {
      "epoch": 9.544268000870133,
      "grad_norm": 0.015208634547889233,
      "learning_rate": 8.091146399825974e-06,
      "loss": 0.0086,
      "step": 175500
    },
    {
      "epoch": 9.571459647596258,
      "grad_norm": 0.004770020954310894,
      "learning_rate": 8.085708070480749e-06,
      "loss": 0.0075,
      "step": 176000
    },
    {
      "epoch": 9.598651294322384,
      "grad_norm": 7.568017959594727,
      "learning_rate": 8.080269741135524e-06,
      "loss": 0.0066,
      "step": 176500
    },
    {
      "epoch": 9.62584294104851,
      "grad_norm": 0.08423513919115067,
      "learning_rate": 8.074831411790298e-06,
      "loss": 0.0064,
      "step": 177000
    },
    {
      "epoch": 9.653034587774636,
      "grad_norm": 0.979928195476532,
      "learning_rate": 8.069393082445075e-06,
      "loss": 0.0072,
      "step": 177500
    },
    {
      "epoch": 9.680226234500761,
      "grad_norm": 0.05285876616835594,
      "learning_rate": 8.063954753099848e-06,
      "loss": 0.0075,
      "step": 178000
    },
    {
      "epoch": 9.707417881226887,
      "grad_norm": 0.3576393723487854,
      "learning_rate": 8.058516423754623e-06,
      "loss": 0.0087,
      "step": 178500
    },
    {
      "epoch": 9.734609527953014,
      "grad_norm": 0.027542833238840103,
      "learning_rate": 8.053078094409399e-06,
      "loss": 0.0081,
      "step": 179000
    },
    {
      "epoch": 9.761801174679139,
      "grad_norm": 0.01537884958088398,
      "learning_rate": 8.047639765064172e-06,
      "loss": 0.0059,
      "step": 179500
    },
    {
      "epoch": 9.788992821405264,
      "grad_norm": 0.8428720235824585,
      "learning_rate": 8.042201435718949e-06,
      "loss": 0.0083,
      "step": 180000
    },
    {
      "epoch": 9.81618446813139,
      "grad_norm": 0.0019140657968819141,
      "learning_rate": 8.036763106373722e-06,
      "loss": 0.0072,
      "step": 180500
    },
    {
      "epoch": 9.843376114857517,
      "grad_norm": 0.03006519190967083,
      "learning_rate": 8.031324777028498e-06,
      "loss": 0.0091,
      "step": 181000
    },
    {
      "epoch": 9.870567761583642,
      "grad_norm": 1.1168920993804932,
      "learning_rate": 8.025886447683273e-06,
      "loss": 0.0069,
      "step": 181500
    },
    {
      "epoch": 9.897759408309767,
      "grad_norm": 0.0870746374130249,
      "learning_rate": 8.020448118338046e-06,
      "loss": 0.0069,
      "step": 182000
    },
    {
      "epoch": 9.924951055035892,
      "grad_norm": 0.2507496774196625,
      "learning_rate": 8.015009788992823e-06,
      "loss": 0.007,
      "step": 182500
    },
    {
      "epoch": 9.95214270176202,
      "grad_norm": 4.196346759796143,
      "learning_rate": 8.009571459647597e-06,
      "loss": 0.0083,
      "step": 183000
    },
    {
      "epoch": 9.979334348488145,
      "grad_norm": 0.009614282287657261,
      "learning_rate": 8.004133130302372e-06,
      "loss": 0.0084,
      "step": 183500
    },
    {
      "epoch": 10.0,
      "eval_exact_match": 0.882,
      "eval_loss": 0.3081795871257782,
      "eval_runtime": 1024.0537,
      "eval_samples_per_second": 11.11,
      "eval_steps_per_second": 0.695,
      "step": 183880
    },
    {
      "epoch": 10.00652599521427,
      "grad_norm": 0.01503839809447527,
      "learning_rate": 7.998694800957147e-06,
      "loss": 0.0067,
      "step": 184000
    },
    {
      "epoch": 10.033717641940395,
      "grad_norm": 1.0102128982543945,
      "learning_rate": 7.99325647161192e-06,
      "loss": 0.0046,
      "step": 184500
    },
    {
      "epoch": 10.060909288666522,
      "grad_norm": 1.3546432256698608,
      "learning_rate": 7.987818142266696e-06,
      "loss": 0.0055,
      "step": 185000
    },
    {
      "epoch": 10.088100935392648,
      "grad_norm": 0.001982804387807846,
      "learning_rate": 7.982379812921471e-06,
      "loss": 0.0054,
      "step": 185500
    },
    {
      "epoch": 10.115292582118773,
      "grad_norm": 0.011872241273522377,
      "learning_rate": 7.976941483576246e-06,
      "loss": 0.0059,
      "step": 186000
    },
    {
      "epoch": 10.142484228844898,
      "grad_norm": 0.06217168644070625,
      "learning_rate": 7.97150315423102e-06,
      "loss": 0.0051,
      "step": 186500
    },
    {
      "epoch": 10.169675875571025,
      "grad_norm": 0.5569573044776917,
      "learning_rate": 7.966064824885795e-06,
      "loss": 0.0052,
      "step": 187000
    },
    {
      "epoch": 10.19686752229715,
      "grad_norm": 0.1213633194565773,
      "learning_rate": 7.96062649554057e-06,
      "loss": 0.0041,
      "step": 187500
    },
    {
      "epoch": 10.224059169023276,
      "grad_norm": 0.020384617149829865,
      "learning_rate": 7.955188166195345e-06,
      "loss": 0.0059,
      "step": 188000
    },
    {
      "epoch": 10.251250815749401,
      "grad_norm": 0.012423365376889706,
      "learning_rate": 7.94974983685012e-06,
      "loss": 0.0064,
      "step": 188500
    },
    {
      "epoch": 10.278442462475528,
      "grad_norm": 0.0038297956343740225,
      "learning_rate": 7.944311507504894e-06,
      "loss": 0.0039,
      "step": 189000
    },
    {
      "epoch": 10.305634109201653,
      "grad_norm": 0.6402150988578796,
      "learning_rate": 7.93887317815967e-06,
      "loss": 0.0066,
      "step": 189500
    },
    {
      "epoch": 10.332825755927779,
      "grad_norm": 0.0031200500670820475,
      "learning_rate": 7.933434848814445e-06,
      "loss": 0.0061,
      "step": 190000
    },
    {
      "epoch": 10.360017402653904,
      "grad_norm": 0.6107866168022156,
      "learning_rate": 7.92799651946922e-06,
      "loss": 0.0054,
      "step": 190500
    },
    {
      "epoch": 10.387209049380031,
      "grad_norm": 2.908076047897339,
      "learning_rate": 7.922558190123995e-06,
      "loss": 0.0066,
      "step": 191000
    },
    {
      "epoch": 10.414400696106156,
      "grad_norm": 0.5853959321975708,
      "learning_rate": 7.917119860778768e-06,
      "loss": 0.0065,
      "step": 191500
    },
    {
      "epoch": 10.441592342832282,
      "grad_norm": 0.005342130083590746,
      "learning_rate": 7.911681531433544e-06,
      "loss": 0.0079,
      "step": 192000
    },
    {
      "epoch": 10.468783989558407,
      "grad_norm": 10.593901634216309,
      "learning_rate": 7.906243202088319e-06,
      "loss": 0.0064,
      "step": 192500
    },
    {
      "epoch": 10.495975636284534,
      "grad_norm": 0.007571155205368996,
      "learning_rate": 7.900804872743094e-06,
      "loss": 0.0061,
      "step": 193000
    },
    {
      "epoch": 10.52316728301066,
      "grad_norm": 0.08925803750753403,
      "learning_rate": 7.89536654339787e-06,
      "loss": 0.0051,
      "step": 193500
    },
    {
      "epoch": 10.550358929736785,
      "grad_norm": 0.13003523647785187,
      "learning_rate": 7.889928214052643e-06,
      "loss": 0.0053,
      "step": 194000
    },
    {
      "epoch": 10.57755057646291,
      "grad_norm": 0.051926884800195694,
      "learning_rate": 7.884489884707418e-06,
      "loss": 0.0081,
      "step": 194500
    },
    {
      "epoch": 10.604742223189037,
      "grad_norm": 0.0002671371621545404,
      "learning_rate": 7.879051555362193e-06,
      "loss": 0.006,
      "step": 195000
    },
    {
      "epoch": 10.631933869915162,
      "grad_norm": 0.0013647901359945536,
      "learning_rate": 7.873613226016968e-06,
      "loss": 0.0055,
      "step": 195500
    },
    {
      "epoch": 10.659125516641287,
      "grad_norm": 0.495665043592453,
      "learning_rate": 7.868174896671744e-06,
      "loss": 0.006,
      "step": 196000
    },
    {
      "epoch": 10.686317163367413,
      "grad_norm": 3.4194962978363037,
      "learning_rate": 7.862736567326517e-06,
      "loss": 0.006,
      "step": 196500
    },
    {
      "epoch": 10.71350881009354,
      "grad_norm": 0.5971339344978333,
      "learning_rate": 7.857298237981292e-06,
      "loss": 0.0065,
      "step": 197000
    },
    {
      "epoch": 10.740700456819665,
      "grad_norm": 0.0033764122053980827,
      "learning_rate": 7.851859908636068e-06,
      "loss": 0.0069,
      "step": 197500
    },
    {
      "epoch": 10.76789210354579,
      "grad_norm": 0.15896004438400269,
      "learning_rate": 7.846421579290843e-06,
      "loss": 0.0065,
      "step": 198000
    },
    {
      "epoch": 10.795083750271916,
      "grad_norm": 0.012025897391140461,
      "learning_rate": 7.840983249945618e-06,
      "loss": 0.0054,
      "step": 198500
    },
    {
      "epoch": 10.822275396998043,
      "grad_norm": 0.05475957691669464,
      "learning_rate": 7.835544920600391e-06,
      "loss": 0.0054,
      "step": 199000
    },
    {
      "epoch": 10.849467043724168,
      "grad_norm": 0.003789114998653531,
      "learning_rate": 7.830106591255167e-06,
      "loss": 0.0068,
      "step": 199500
    },
    {
      "epoch": 10.876658690450293,
      "grad_norm": 0.0021468698978424072,
      "learning_rate": 7.824668261909942e-06,
      "loss": 0.0064,
      "step": 200000
    },
    {
      "epoch": 10.903850337176419,
      "grad_norm": 6.396396636962891,
      "learning_rate": 7.819229932564717e-06,
      "loss": 0.0074,
      "step": 200500
    },
    {
      "epoch": 10.931041983902546,
      "grad_norm": 0.023530324921011925,
      "learning_rate": 7.813791603219492e-06,
      "loss": 0.0061,
      "step": 201000
    },
    {
      "epoch": 10.958233630628671,
      "grad_norm": 0.019590675830841064,
      "learning_rate": 7.808353273874266e-06,
      "loss": 0.0054,
      "step": 201500
    },
    {
      "epoch": 10.985425277354796,
      "grad_norm": 0.0019979814533144236,
      "learning_rate": 7.802914944529041e-06,
      "loss": 0.0062,
      "step": 202000
    },
    {
      "epoch": 11.0,
      "eval_exact_match": 0.8818,
      "eval_loss": 0.31789034605026245,
      "eval_runtime": 1019.6612,
      "eval_samples_per_second": 11.158,
      "eval_steps_per_second": 0.698,
      "step": 202268
    },
    {
      "epoch": 11.012616924080922,
      "grad_norm": 0.024939043447375298,
      "learning_rate": 7.797476615183816e-06,
      "loss": 0.0058,
      "step": 202500
    },
    {
      "epoch": 11.039808570807049,
      "grad_norm": 0.00953602697700262,
      "learning_rate": 7.792038285838591e-06,
      "loss": 0.0063,
      "step": 203000
    },
    {
      "epoch": 11.067000217533174,
      "grad_norm": 2.4622678756713867,
      "learning_rate": 7.786599956493367e-06,
      "loss": 0.003,
      "step": 203500
    },
    {
      "epoch": 11.0941918642593,
      "grad_norm": 0.002521548420190811,
      "learning_rate": 7.78116162714814e-06,
      "loss": 0.0037,
      "step": 204000
    },
    {
      "epoch": 11.121383510985424,
      "grad_norm": 0.017454292625188828,
      "learning_rate": 7.775723297802915e-06,
      "loss": 0.0038,
      "step": 204500
    },
    {
      "epoch": 11.148575157711551,
      "grad_norm": 0.01883101277053356,
      "learning_rate": 7.77028496845769e-06,
      "loss": 0.0035,
      "step": 205000
    },
    {
      "epoch": 11.175766804437677,
      "grad_norm": 0.004060177132487297,
      "learning_rate": 7.764846639112466e-06,
      "loss": 0.0049,
      "step": 205500
    },
    {
      "epoch": 11.202958451163802,
      "grad_norm": 11.019586563110352,
      "learning_rate": 7.759408309767241e-06,
      "loss": 0.0059,
      "step": 206000
    },
    {
      "epoch": 11.230150097889927,
      "grad_norm": 0.0053796349093317986,
      "learning_rate": 7.753969980422014e-06,
      "loss": 0.0052,
      "step": 206500
    },
    {
      "epoch": 11.257341744616054,
      "grad_norm": 0.03368353471159935,
      "learning_rate": 7.74853165107679e-06,
      "loss": 0.0048,
      "step": 207000
    },
    {
      "epoch": 11.28453339134218,
      "grad_norm": 1.5319031476974487,
      "learning_rate": 7.743093321731565e-06,
      "loss": 0.005,
      "step": 207500
    },
    {
      "epoch": 11.311725038068305,
      "grad_norm": 0.18133412301540375,
      "learning_rate": 7.73765499238634e-06,
      "loss": 0.0043,
      "step": 208000
    },
    {
      "epoch": 11.33891668479443,
      "grad_norm": 0.0025546839460730553,
      "learning_rate": 7.732216663041115e-06,
      "loss": 0.0041,
      "step": 208500
    },
    {
      "epoch": 11.366108331520557,
      "grad_norm": 0.0636424720287323,
      "learning_rate": 7.726778333695889e-06,
      "loss": 0.0054,
      "step": 209000
    },
    {
      "epoch": 11.393299978246683,
      "grad_norm": 0.0026644619647413492,
      "learning_rate": 7.721340004350664e-06,
      "loss": 0.0036,
      "step": 209500
    },
    {
      "epoch": 11.420491624972808,
      "grad_norm": 0.014393744058907032,
      "learning_rate": 7.71590167500544e-06,
      "loss": 0.0045,
      "step": 210000
    },
    {
      "epoch": 11.447683271698935,
      "grad_norm": 0.00033763342071324587,
      "learning_rate": 7.710463345660214e-06,
      "loss": 0.0052,
      "step": 210500
    },
    {
      "epoch": 11.47487491842506,
      "grad_norm": 0.0011537778191268444,
      "learning_rate": 7.705025016314988e-06,
      "loss": 0.0032,
      "step": 211000
    },
    {
      "epoch": 11.502066565151186,
      "grad_norm": 0.007493032608181238,
      "learning_rate": 7.699586686969763e-06,
      "loss": 0.0048,
      "step": 211500
    },
    {
      "epoch": 11.52925821187731,
      "grad_norm": 0.05346640944480896,
      "learning_rate": 7.694148357624538e-06,
      "loss": 0.0055,
      "step": 212000
    },
    {
      "epoch": 11.556449858603436,
      "grad_norm": 0.06508354097604752,
      "learning_rate": 7.688710028279313e-06,
      "loss": 0.0056,
      "step": 212500
    },
    {
      "epoch": 11.583641505329563,
      "grad_norm": 0.00885526929050684,
      "learning_rate": 7.683271698934089e-06,
      "loss": 0.0055,
      "step": 213000
    },
    {
      "epoch": 11.610833152055688,
      "grad_norm": 0.3413640558719635,
      "learning_rate": 7.677833369588862e-06,
      "loss": 0.0049,
      "step": 213500
    },
    {
      "epoch": 11.638024798781814,
      "grad_norm": 0.4742060601711273,
      "learning_rate": 7.672395040243637e-06,
      "loss": 0.0042,
      "step": 214000
    },
    {
      "epoch": 11.66521644550794,
      "grad_norm": 0.003855757648125291,
      "learning_rate": 7.666956710898413e-06,
      "loss": 0.0052,
      "step": 214500
    },
    {
      "epoch": 11.692408092234066,
      "grad_norm": 0.0012399395927786827,
      "learning_rate": 7.661518381553188e-06,
      "loss": 0.005,
      "step": 215000
    },
    {
      "epoch": 11.719599738960191,
      "grad_norm": 0.6118332743644714,
      "learning_rate": 7.656080052207963e-06,
      "loss": 0.0043,
      "step": 215500
    },
    {
      "epoch": 11.746791385686317,
      "grad_norm": 1.3236043453216553,
      "learning_rate": 7.650641722862737e-06,
      "loss": 0.0041,
      "step": 216000
    },
    {
      "epoch": 11.773983032412444,
      "grad_norm": 0.2502232491970062,
      "learning_rate": 7.645203393517512e-06,
      "loss": 0.0053,
      "step": 216500
    },
    {
      "epoch": 11.801174679138569,
      "grad_norm": 0.3646252155303955,
      "learning_rate": 7.639765064172287e-06,
      "loss": 0.0054,
      "step": 217000
    },
    {
      "epoch": 11.828366325864694,
      "grad_norm": 0.00609625643119216,
      "learning_rate": 7.634326734827062e-06,
      "loss": 0.0067,
      "step": 217500
    },
    {
      "epoch": 11.85555797259082,
      "grad_norm": 0.000278811261523515,
      "learning_rate": 7.6288884054818365e-06,
      "loss": 0.0068,
      "step": 218000
    },
    {
      "epoch": 11.882749619316947,
      "grad_norm": 0.03974534198641777,
      "learning_rate": 7.623450076136612e-06,
      "loss": 0.0059,
      "step": 218500
    },
    {
      "epoch": 11.909941266043072,
      "grad_norm": 0.001021494623273611,
      "learning_rate": 7.618011746791386e-06,
      "loss": 0.0054,
      "step": 219000
    },
    {
      "epoch": 11.937132912769197,
      "grad_norm": 3.7211320400238037,
      "learning_rate": 7.61257341744616e-06,
      "loss": 0.0039,
      "step": 219500
    },
    {
      "epoch": 11.964324559495322,
      "grad_norm": 0.25480392575263977,
      "learning_rate": 7.6071350881009365e-06,
      "loss": 0.0063,
      "step": 220000
    },
    {
      "epoch": 11.99151620622145,
      "grad_norm": 0.09184807538986206,
      "learning_rate": 7.601696758755711e-06,
      "loss": 0.0056,
      "step": 220500
    },
    {
      "epoch": 12.0,
      "eval_exact_match": 0.8848,
      "eval_loss": 0.32719093561172485,
      "eval_runtime": 1021.1809,
      "eval_samples_per_second": 11.141,
      "eval_steps_per_second": 0.697,
      "step": 220656
    },
    {
      "epoch": 12.018707852947575,
      "grad_norm": 0.0011609598295763135,
      "learning_rate": 7.596258429410486e-06,
      "loss": 0.0038,
      "step": 221000
    },
    {
      "epoch": 12.0458994996737,
      "grad_norm": 0.006982658989727497,
      "learning_rate": 7.59082010006526e-06,
      "loss": 0.0022,
      "step": 221500
    },
    {
      "epoch": 12.073091146399825,
      "grad_norm": 0.16167816519737244,
      "learning_rate": 7.585381770720035e-06,
      "loss": 0.0036,
      "step": 222000
    },
    {
      "epoch": 12.100282793125952,
      "grad_norm": 0.00900455191731453,
      "learning_rate": 7.579943441374811e-06,
      "loss": 0.0032,
      "step": 222500
    },
    {
      "epoch": 12.127474439852078,
      "grad_norm": 0.0007139613153412938,
      "learning_rate": 7.574505112029585e-06,
      "loss": 0.0042,
      "step": 223000
    },
    {
      "epoch": 12.154666086578203,
      "grad_norm": 0.2929919362068176,
      "learning_rate": 7.56906678268436e-06,
      "loss": 0.004,
      "step": 223500
    },
    {
      "epoch": 12.181857733304328,
      "grad_norm": 0.1538592129945755,
      "learning_rate": 7.563628453339135e-06,
      "loss": 0.0029,
      "step": 224000
    },
    {
      "epoch": 12.209049380030455,
      "grad_norm": 0.05683496594429016,
      "learning_rate": 7.558190123993909e-06,
      "loss": 0.0039,
      "step": 224500
    },
    {
      "epoch": 12.23624102675658,
      "grad_norm": 0.002470650477334857,
      "learning_rate": 7.552751794648685e-06,
      "loss": 0.0045,
      "step": 225000
    },
    {
      "epoch": 12.263432673482706,
      "grad_norm": 11.708252906799316,
      "learning_rate": 7.5473134653034595e-06,
      "loss": 0.0053,
      "step": 225500
    },
    {
      "epoch": 12.290624320208831,
      "grad_norm": 0.005214590113610029,
      "learning_rate": 7.541875135958235e-06,
      "loss": 0.0036,
      "step": 226000
    },
    {
      "epoch": 12.317815966934958,
      "grad_norm": 0.16717997193336487,
      "learning_rate": 7.536436806613009e-06,
      "loss": 0.0037,
      "step": 226500
    },
    {
      "epoch": 12.345007613661084,
      "grad_norm": 0.4088883101940155,
      "learning_rate": 7.530998477267783e-06,
      "loss": 0.0067,
      "step": 227000
    },
    {
      "epoch": 12.372199260387209,
      "grad_norm": 7.834692001342773,
      "learning_rate": 7.5255601479225594e-06,
      "loss": 0.0042,
      "step": 227500
    },
    {
      "epoch": 12.399390907113334,
      "grad_norm": 0.0002414975460851565,
      "learning_rate": 7.520121818577334e-06,
      "loss": 0.0033,
      "step": 228000
    },
    {
      "epoch": 12.426582553839461,
      "grad_norm": 0.005257593933492899,
      "learning_rate": 7.514683489232108e-06,
      "loss": 0.0034,
      "step": 228500
    },
    {
      "epoch": 12.453774200565586,
      "grad_norm": 0.0003224584797862917,
      "learning_rate": 7.509245159886883e-06,
      "loss": 0.0047,
      "step": 229000
    },
    {
      "epoch": 12.480965847291712,
      "grad_norm": 0.013201883994042873,
      "learning_rate": 7.503806830541658e-06,
      "loss": 0.0057,
      "step": 229500
    },
    {
      "epoch": 12.508157494017837,
      "grad_norm": 0.6929981112480164,
      "learning_rate": 7.498368501196434e-06,
      "loss": 0.007,
      "step": 230000
    },
    {
      "epoch": 12.535349140743964,
      "grad_norm": 0.01077383290976286,
      "learning_rate": 7.492930171851208e-06,
      "loss": 0.0048,
      "step": 230500
    },
    {
      "epoch": 12.56254078747009,
      "grad_norm": 0.00016784864419605583,
      "learning_rate": 7.4874918425059825e-06,
      "loss": 0.0052,
      "step": 231000
    },
    {
      "epoch": 12.589732434196215,
      "grad_norm": 0.02103503979742527,
      "learning_rate": 7.482053513160758e-06,
      "loss": 0.0042,
      "step": 231500
    },
    {
      "epoch": 12.61692408092234,
      "grad_norm": 0.16059672832489014,
      "learning_rate": 7.476615183815532e-06,
      "loss": 0.0034,
      "step": 232000
    },
    {
      "epoch": 12.644115727648467,
      "grad_norm": 0.002428087405860424,
      "learning_rate": 7.471176854470308e-06,
      "loss": 0.0045,
      "step": 232500
    },
    {
      "epoch": 12.671307374374592,
      "grad_norm": 0.002322005107998848,
      "learning_rate": 7.4657385251250825e-06,
      "loss": 0.0053,
      "step": 233000
    },
    {
      "epoch": 12.698499021100718,
      "grad_norm": 0.001284032710827887,
      "learning_rate": 7.460300195779857e-06,
      "loss": 0.0036,
      "step": 233500
    },
    {
      "epoch": 12.725690667826843,
      "grad_norm": 0.011062849313020706,
      "learning_rate": 7.454861866434632e-06,
      "loss": 0.0051,
      "step": 234000
    },
    {
      "epoch": 12.75288231455297,
      "grad_norm": 0.007350505795329809,
      "learning_rate": 7.449423537089406e-06,
      "loss": 0.0056,
      "step": 234500
    },
    {
      "epoch": 12.780073961279095,
      "grad_norm": 0.033329423516988754,
      "learning_rate": 7.443985207744182e-06,
      "loss": 0.0042,
      "step": 235000
    },
    {
      "epoch": 12.80726560800522,
      "grad_norm": 0.06402397900819778,
      "learning_rate": 7.438546878398957e-06,
      "loss": 0.0046,
      "step": 235500
    },
    {
      "epoch": 12.834457254731346,
      "grad_norm": 0.1854117214679718,
      "learning_rate": 7.433108549053731e-06,
      "loss": 0.0043,
      "step": 236000
    },
    {
      "epoch": 12.861648901457473,
      "grad_norm": 4.086294174194336,
      "learning_rate": 7.427670219708506e-06,
      "loss": 0.0043,
      "step": 236500
    },
    {
      "epoch": 12.888840548183598,
      "grad_norm": 0.0060177757404744625,
      "learning_rate": 7.422231890363281e-06,
      "loss": 0.0049,
      "step": 237000
    },
    {
      "epoch": 12.916032194909723,
      "grad_norm": 0.0001580445095896721,
      "learning_rate": 7.416793561018057e-06,
      "loss": 0.0051,
      "step": 237500
    },
    {
      "epoch": 12.943223841635849,
      "grad_norm": 0.08792165666818619,
      "learning_rate": 7.411355231672831e-06,
      "loss": 0.0043,
      "step": 238000
    },
    {
      "epoch": 12.970415488361976,
      "grad_norm": 0.10381147265434265,
      "learning_rate": 7.4059169023276055e-06,
      "loss": 0.0059,
      "step": 238500
    },
    {
      "epoch": 12.997607135088101,
      "grad_norm": 0.06624840945005417,
      "learning_rate": 7.400478572982381e-06,
      "loss": 0.0053,
      "step": 239000
    },
    {
      "epoch": 13.0,
      "eval_exact_match": 0.8813,
      "eval_loss": 0.3372015357017517,
      "eval_runtime": 1023.493,
      "eval_samples_per_second": 11.116,
      "eval_steps_per_second": 0.696,
      "step": 239044
    },
    {
      "epoch": 13.024798781814226,
      "grad_norm": 0.021261321380734444,
      "learning_rate": 7.395040243637155e-06,
      "loss": 0.0036,
      "step": 239500
    },
    {
      "epoch": 13.051990428540352,
      "grad_norm": 0.0019554144237190485,
      "learning_rate": 7.38960191429193e-06,
      "loss": 0.002,
      "step": 240000
    },
    {
      "epoch": 13.079182075266479,
      "grad_norm": 0.01153012365102768,
      "learning_rate": 7.3841635849467054e-06,
      "loss": 0.0027,
      "step": 240500
    },
    {
      "epoch": 13.106373721992604,
      "grad_norm": 0.0053418660536408424,
      "learning_rate": 7.37872525560148e-06,
      "loss": 0.0027,
      "step": 241000
    },
    {
      "epoch": 13.13356536871873,
      "grad_norm": 7.908132101874799e-05,
      "learning_rate": 7.373286926256254e-06,
      "loss": 0.0044,
      "step": 241500
    },
    {
      "epoch": 13.160757015444855,
      "grad_norm": 0.0015456726541742682,
      "learning_rate": 7.367848596911029e-06,
      "loss": 0.0048,
      "step": 242000
    },
    {
      "epoch": 13.187948662170982,
      "grad_norm": 0.009918862022459507,
      "learning_rate": 7.3624102675658046e-06,
      "loss": 0.0024,
      "step": 242500
    },
    {
      "epoch": 13.215140308897107,
      "grad_norm": 1.1204112768173218,
      "learning_rate": 7.35697193822058e-06,
      "loss": 0.0034,
      "step": 243000
    },
    {
      "epoch": 13.242331955623232,
      "grad_norm": 0.015402857214212418,
      "learning_rate": 7.351533608875354e-06,
      "loss": 0.0042,
      "step": 243500
    },
    {
      "epoch": 13.269523602349357,
      "grad_norm": 0.13369937241077423,
      "learning_rate": 7.3460952795301285e-06,
      "loss": 0.0038,
      "step": 244000
    },
    {
      "epoch": 13.296715249075485,
      "grad_norm": 0.0002845363924279809,
      "learning_rate": 7.340656950184904e-06,
      "loss": 0.0025,
      "step": 244500
    },
    {
      "epoch": 13.32390689580161,
      "grad_norm": 0.05165963992476463,
      "learning_rate": 7.335218620839679e-06,
      "loss": 0.0037,
      "step": 245000
    },
    {
      "epoch": 13.351098542527735,
      "grad_norm": 0.0008344284142367542,
      "learning_rate": 7.329780291494454e-06,
      "loss": 0.0036,
      "step": 245500
    },
    {
      "epoch": 13.37829018925386,
      "grad_norm": 0.46657341718673706,
      "learning_rate": 7.3243419621492284e-06,
      "loss": 0.0046,
      "step": 246000
    },
    {
      "epoch": 13.405481835979987,
      "grad_norm": 0.6373306512832642,
      "learning_rate": 7.318903632804003e-06,
      "loss": 0.0039,
      "step": 246500
    },
    {
      "epoch": 13.432673482706113,
      "grad_norm": 0.5203019976615906,
      "learning_rate": 7.313465303458778e-06,
      "loss": 0.0038,
      "step": 247000
    },
    {
      "epoch": 13.459865129432238,
      "grad_norm": 0.3291895389556885,
      "learning_rate": 7.308026974113553e-06,
      "loss": 0.0048,
      "step": 247500
    },
    {
      "epoch": 13.487056776158363,
      "grad_norm": 0.10239601135253906,
      "learning_rate": 7.302588644768328e-06,
      "loss": 0.004,
      "step": 248000
    },
    {
      "epoch": 13.51424842288449,
      "grad_norm": 6.02880859375,
      "learning_rate": 7.297150315423103e-06,
      "loss": 0.0044,
      "step": 248500
    },
    {
      "epoch": 13.541440069610616,
      "grad_norm": 0.0003724268462974578,
      "learning_rate": 7.291711986077877e-06,
      "loss": 0.003,
      "step": 249000
    },
    {
      "epoch": 13.568631716336741,
      "grad_norm": 0.008930696174502373,
      "learning_rate": 7.286273656732652e-06,
      "loss": 0.0063,
      "step": 249500
    },
    {
      "epoch": 13.595823363062866,
      "grad_norm": 0.08336593955755234,
      "learning_rate": 7.2808353273874275e-06,
      "loss": 0.0034,
      "step": 250000
    },
    {
      "epoch": 13.623015009788993,
      "grad_norm": 0.00440165214240551,
      "learning_rate": 7.275396998042203e-06,
      "loss": 0.0052,
      "step": 250500
    },
    {
      "epoch": 13.650206656515119,
      "grad_norm": 0.02176918275654316,
      "learning_rate": 7.269958668696977e-06,
      "loss": 0.0027,
      "step": 251000
    },
    {
      "epoch": 13.677398303241244,
      "grad_norm": 0.8251722455024719,
      "learning_rate": 7.2645203393517515e-06,
      "loss": 0.0042,
      "step": 251500
    },
    {
      "epoch": 13.704589949967371,
      "grad_norm": 0.0017955650109797716,
      "learning_rate": 7.259082010006527e-06,
      "loss": 0.0058,
      "step": 252000
    },
    {
      "epoch": 13.731781596693496,
      "grad_norm": 0.00015411581262014806,
      "learning_rate": 7.253643680661302e-06,
      "loss": 0.0043,
      "step": 252500
    },
    {
      "epoch": 13.758973243419621,
      "grad_norm": 0.0008522845455445349,
      "learning_rate": 7.248205351316076e-06,
      "loss": 0.0036,
      "step": 253000
    },
    {
      "epoch": 13.786164890145747,
      "grad_norm": 0.16430547833442688,
      "learning_rate": 7.242767021970851e-06,
      "loss": 0.0037,
      "step": 253500
    },
    {
      "epoch": 13.813356536871872,
      "grad_norm": 0.004896281752735376,
      "learning_rate": 7.237328692625626e-06,
      "loss": 0.0033,
      "step": 254000
    },
    {
      "epoch": 13.840548183597999,
      "grad_norm": 0.011140445247292519,
      "learning_rate": 7.2318903632804e-06,
      "loss": 0.0038,
      "step": 254500
    },
    {
      "epoch": 13.867739830324124,
      "grad_norm": 0.4586171805858612,
      "learning_rate": 7.226452033935176e-06,
      "loss": 0.004,
      "step": 255000
    },
    {
      "epoch": 13.89493147705025,
      "grad_norm": 0.30230623483657837,
      "learning_rate": 7.2210137045899505e-06,
      "loss": 0.005,
      "step": 255500
    },
    {
      "epoch": 13.922123123776377,
      "grad_norm": 0.0007046368555165827,
      "learning_rate": 7.215575375244726e-06,
      "loss": 0.0047,
      "step": 256000
    },
    {
      "epoch": 13.949314770502502,
      "grad_norm": 0.014722813852131367,
      "learning_rate": 7.2101370458995e-06,
      "loss": 0.0048,
      "step": 256500
    },
    {
      "epoch": 13.976506417228627,
      "grad_norm": 0.00015357104712165892,
      "learning_rate": 7.2046987165542745e-06,
      "loss": 0.0029,
      "step": 257000
    },
    {
      "epoch": 14.0,
      "eval_exact_match": 0.8867,
      "eval_loss": 0.34934452176094055,
      "eval_runtime": 1020.9159,
      "eval_samples_per_second": 11.144,
      "eval_steps_per_second": 0.697,
      "step": 257432
    },
    {
      "epoch": 14.003698063954753,
      "grad_norm": 0.042392514646053314,
      "learning_rate": 7.1992603872090505e-06,
      "loss": 0.0044,
      "step": 257500
    },
    {
      "epoch": 14.03088971068088,
      "grad_norm": 0.00043550218106247485,
      "learning_rate": 7.193822057863825e-06,
      "loss": 0.0022,
      "step": 258000
    },
    {
      "epoch": 14.058081357407005,
      "grad_norm": 0.010852435603737831,
      "learning_rate": 7.1883837285186e-06,
      "loss": 0.0038,
      "step": 258500
    },
    {
      "epoch": 14.08527300413313,
      "grad_norm": 1.2875603437423706,
      "learning_rate": 7.1829453991733744e-06,
      "loss": 0.0022,
      "step": 259000
    },
    {
      "epoch": 14.112464650859255,
      "grad_norm": 0.7273116707801819,
      "learning_rate": 7.177507069828149e-06,
      "loss": 0.0019,
      "step": 259500
    },
    {
      "epoch": 14.139656297585383,
      "grad_norm": 0.029256224632263184,
      "learning_rate": 7.172068740482925e-06,
      "loss": 0.0035,
      "step": 260000
    },
    {
      "epoch": 14.166847944311508,
      "grad_norm": 0.6167672276496887,
      "learning_rate": 7.166630411137699e-06,
      "loss": 0.0024,
      "step": 260500
    },
    {
      "epoch": 14.194039591037633,
      "grad_norm": 0.0019446202786639333,
      "learning_rate": 7.161192081792474e-06,
      "loss": 0.0025,
      "step": 261000
    },
    {
      "epoch": 14.221231237763758,
      "grad_norm": 0.007389050908386707,
      "learning_rate": 7.155753752447249e-06,
      "loss": 0.0033,
      "step": 261500
    },
    {
      "epoch": 14.248422884489885,
      "grad_norm": 1.1310490369796753,
      "learning_rate": 7.150315423102023e-06,
      "loss": 0.0025,
      "step": 262000
    },
    {
      "epoch": 14.27561453121601,
      "grad_norm": 0.011108157224953175,
      "learning_rate": 7.144877093756798e-06,
      "loss": 0.0019,
      "step": 262500
    },
    {
      "epoch": 14.302806177942136,
      "grad_norm": 0.0007718420820310712,
      "learning_rate": 7.1394387644115735e-06,
      "loss": 0.0048,
      "step": 263000
    },
    {
      "epoch": 14.329997824668261,
      "grad_norm": 0.00021103527978993952,
      "learning_rate": 7.134000435066349e-06,
      "loss": 0.0027,
      "step": 263500
    },
    {
      "epoch": 14.357189471394388,
      "grad_norm": 0.2015926092863083,
      "learning_rate": 7.128562105721123e-06,
      "loss": 0.0045,
      "step": 264000
    },
    {
      "epoch": 14.384381118120514,
      "grad_norm": 0.008299685083329678,
      "learning_rate": 7.1231237763758974e-06,
      "loss": 0.0033,
      "step": 264500
    },
    {
      "epoch": 14.411572764846639,
      "grad_norm": 2.5773375034332275,
      "learning_rate": 7.117685447030672e-06,
      "loss": 0.004,
      "step": 265000
    },
    {
      "epoch": 14.438764411572764,
      "grad_norm": 0.0018001631833612919,
      "learning_rate": 7.112247117685448e-06,
      "loss": 0.0036,
      "step": 265500
    },
    {
      "epoch": 14.465956058298891,
      "grad_norm": 0.0002188493381254375,
      "learning_rate": 7.106808788340222e-06,
      "loss": 0.0044,
      "step": 266000
    },
    {
      "epoch": 14.493147705025017,
      "grad_norm": 6.366543769836426,
      "learning_rate": 7.101370458994997e-06,
      "loss": 0.0045,
      "step": 266500
    },
    {
      "epoch": 14.520339351751142,
      "grad_norm": 1.5890947906882502e-05,
      "learning_rate": 7.095932129649772e-06,
      "loss": 0.0042,
      "step": 267000
    },
    {
      "epoch": 14.547530998477267,
      "grad_norm": 0.0019672750495374203,
      "learning_rate": 7.090493800304546e-06,
      "loss": 0.0041,
      "step": 267500
    },
    {
      "epoch": 14.574722645203394,
      "grad_norm": 3.3259029388427734,
      "learning_rate": 7.085055470959322e-06,
      "loss": 0.004,
      "step": 268000
    },
    {
      "epoch": 14.60191429192952,
      "grad_norm": 0.051898933947086334,
      "learning_rate": 7.0796171416140965e-06,
      "loss": 0.0038,
      "step": 268500
    },
    {
      "epoch": 14.629105938655645,
      "grad_norm": 0.0006499428418464959,
      "learning_rate": 7.074178812268872e-06,
      "loss": 0.0036,
      "step": 269000
    },
    {
      "epoch": 14.65629758538177,
      "grad_norm": 0.00038398781907744706,
      "learning_rate": 7.068740482923646e-06,
      "loss": 0.0048,
      "step": 269500
    },
    {
      "epoch": 14.683489232107897,
      "grad_norm": 0.4993869960308075,
      "learning_rate": 7.0633021535784204e-06,
      "loss": 0.0035,
      "step": 270000
    },
    {
      "epoch": 14.710680878834022,
      "grad_norm": 2.1485577235580422e-05,
      "learning_rate": 7.0578638242331965e-06,
      "loss": 0.0047,
      "step": 270500
    },
    {
      "epoch": 14.737872525560148,
      "grad_norm": 0.0015798599924892187,
      "learning_rate": 7.052425494887971e-06,
      "loss": 0.0042,
      "step": 271000
    },
    {
      "epoch": 14.765064172286273,
      "grad_norm": 0.0002877725928556174,
      "learning_rate": 7.046987165542746e-06,
      "loss": 0.0042,
      "step": 271500
    },
    {
      "epoch": 14.7922558190124,
      "grad_norm": 0.0006782010896131396,
      "learning_rate": 7.04154883619752e-06,
      "loss": 0.0029,
      "step": 272000
    },
    {
      "epoch": 14.819447465738525,
      "grad_norm": 3.1857094764709473,
      "learning_rate": 7.036110506852295e-06,
      "loss": 0.0034,
      "step": 272500
    },
    {
      "epoch": 14.84663911246465,
      "grad_norm": 0.07301226258277893,
      "learning_rate": 7.030672177507071e-06,
      "loss": 0.003,
      "step": 273000
    },
    {
      "epoch": 14.873830759190776,
      "grad_norm": 6.0143280029296875,
      "learning_rate": 7.025233848161845e-06,
      "loss": 0.003,
      "step": 273500
    },
    {
      "epoch": 14.901022405916903,
      "grad_norm": 0.02105681598186493,
      "learning_rate": 7.01979551881662e-06,
      "loss": 0.0041,
      "step": 274000
    },
    {
      "epoch": 14.928214052643028,
      "grad_norm": 0.08257223665714264,
      "learning_rate": 7.014357189471395e-06,
      "loss": 0.0051,
      "step": 274500
    },
    {
      "epoch": 14.955405699369154,
      "grad_norm": 0.013296125456690788,
      "learning_rate": 7.008918860126169e-06,
      "loss": 0.0044,
      "step": 275000
    },
    {
      "epoch": 14.982597346095279,
      "grad_norm": 0.16596081852912903,
      "learning_rate": 7.003480530780945e-06,
      "loss": 0.0039,
      "step": 275500
    },
    {
      "epoch": 15.0,
      "eval_exact_match": 0.8834,
      "eval_loss": 0.3267911374568939,
      "eval_runtime": 1018.9931,
      "eval_samples_per_second": 11.165,
      "eval_steps_per_second": 0.699,
      "step": 275820
    },
    {
      "epoch": 15.009788992821406,
      "grad_norm": 0.004664579872041941,
      "learning_rate": 6.9980422014357195e-06,
      "loss": 0.004,
      "step": 276000
    },
    {
      "epoch": 15.036980639547531,
      "grad_norm": 0.004305088426917791,
      "learning_rate": 6.992603872090495e-06,
      "loss": 0.0022,
      "step": 276500
    },
    {
      "epoch": 15.064172286273656,
      "grad_norm": 0.0011663463665172458,
      "learning_rate": 6.987165542745269e-06,
      "loss": 0.0026,
      "step": 277000
    },
    {
      "epoch": 15.091363932999782,
      "grad_norm": 0.0547974593937397,
      "learning_rate": 6.9817272134000434e-06,
      "loss": 0.002,
      "step": 277500
    },
    {
      "epoch": 15.118555579725909,
      "grad_norm": 0.0003876920964103192,
      "learning_rate": 6.9762888840548195e-06,
      "loss": 0.0033,
      "step": 278000
    },
    {
      "epoch": 15.145747226452034,
      "grad_norm": 0.0008074783254414797,
      "learning_rate": 6.970850554709594e-06,
      "loss": 0.0034,
      "step": 278500
    },
    {
      "epoch": 15.17293887317816,
      "grad_norm": 2.947611093521118,
      "learning_rate": 6.965412225364368e-06,
      "loss": 0.0033,
      "step": 279000
    },
    {
      "epoch": 15.200130519904285,
      "grad_norm": 0.034830063581466675,
      "learning_rate": 6.959973896019143e-06,
      "loss": 0.0039,
      "step": 279500
    },
    {
      "epoch": 15.227322166630412,
      "grad_norm": 0.0010544485412538052,
      "learning_rate": 6.954535566673918e-06,
      "loss": 0.0017,
      "step": 280000
    },
    {
      "epoch": 15.254513813356537,
      "grad_norm": 0.5181836485862732,
      "learning_rate": 6.949097237328694e-06,
      "loss": 0.002,
      "step": 280500
    },
    {
      "epoch": 15.281705460082662,
      "grad_norm": 2.1243185997009277,
      "learning_rate": 6.943658907983468e-06,
      "loss": 0.0033,
      "step": 281000
    },
    {
      "epoch": 15.308897106808788,
      "grad_norm": 0.2341310679912567,
      "learning_rate": 6.9382205786382425e-06,
      "loss": 0.003,
      "step": 281500
    },
    {
      "epoch": 15.336088753534915,
      "grad_norm": 0.9371480941772461,
      "learning_rate": 6.932782249293018e-06,
      "loss": 0.003,
      "step": 282000
    },
    {
      "epoch": 15.36328040026104,
      "grad_norm": 0.0012509943917393684,
      "learning_rate": 6.927343919947792e-06,
      "loss": 0.0031,
      "step": 282500
    },
    {
      "epoch": 15.390472046987165,
      "grad_norm": 0.03419405594468117,
      "learning_rate": 6.921905590602568e-06,
      "loss": 0.0041,
      "step": 283000
    },
    {
      "epoch": 15.41766369371329,
      "grad_norm": 0.003866742830723524,
      "learning_rate": 6.9164672612573425e-06,
      "loss": 0.0037,
      "step": 283500
    },
    {
      "epoch": 15.444855340439418,
      "grad_norm": 3.6404711863724515e-05,
      "learning_rate": 6.911028931912117e-06,
      "loss": 0.0037,
      "step": 284000
    },
    {
      "epoch": 15.472046987165543,
      "grad_norm": 0.006914507132023573,
      "learning_rate": 6.905590602566892e-06,
      "loss": 0.0031,
      "step": 284500
    },
    {
      "epoch": 15.499238633891668,
      "grad_norm": 0.0347830168902874,
      "learning_rate": 6.900152273221666e-06,
      "loss": 0.0022,
      "step": 285000
    },
    {
      "epoch": 15.526430280617793,
      "grad_norm": 0.022012189030647278,
      "learning_rate": 6.8947139438764425e-06,
      "loss": 0.0035,
      "step": 285500
    },
    {
      "epoch": 15.55362192734392,
      "grad_norm": 0.000470901868538931,
      "learning_rate": 6.889275614531217e-06,
      "loss": 0.0046,
      "step": 286000
    },
    {
      "epoch": 15.580813574070046,
      "grad_norm": 0.16011016070842743,
      "learning_rate": 6.883837285185991e-06,
      "loss": 0.0041,
      "step": 286500
    },
    {
      "epoch": 15.608005220796171,
      "grad_norm": 0.008064665831625462,
      "learning_rate": 6.878398955840766e-06,
      "loss": 0.0028,
      "step": 287000
    },
    {
      "epoch": 15.635196867522296,
      "grad_norm": 1.2888329029083252,
      "learning_rate": 6.872960626495541e-06,
      "loss": 0.0035,
      "step": 287500
    },
    {
      "epoch": 15.662388514248423,
      "grad_norm": 0.01231129840016365,
      "learning_rate": 6.867522297150317e-06,
      "loss": 0.0037,
      "step": 288000
    },
    {
      "epoch": 15.689580160974549,
      "grad_norm": 0.0002686446823645383,
      "learning_rate": 6.862083967805091e-06,
      "loss": 0.0033,
      "step": 288500
    },
    {
      "epoch": 15.716771807700674,
      "grad_norm": 1.6821107864379883,
      "learning_rate": 6.8566456384598655e-06,
      "loss": 0.0036,
      "step": 289000
    },
    {
      "epoch": 15.743963454426801,
      "grad_norm": 0.003827363019809127,
      "learning_rate": 6.85120730911464e-06,
      "loss": 0.0043,
      "step": 289500
    },
    {
      "epoch": 15.771155101152926,
      "grad_norm": 0.03282500430941582,
      "learning_rate": 6.845768979769415e-06,
      "loss": 0.0035,
      "step": 290000
    },
    {
      "epoch": 15.798346747879052,
      "grad_norm": 0.020095176994800568,
      "learning_rate": 6.84033065042419e-06,
      "loss": 0.0036,
      "step": 290500
    },
    {
      "epoch": 15.825538394605177,
      "grad_norm": 0.0032423606608062983,
      "learning_rate": 6.8348923210789655e-06,
      "loss": 0.0049,
      "step": 291000
    },
    {
      "epoch": 15.852730041331302,
      "grad_norm": 0.02819984033703804,
      "learning_rate": 6.82945399173374e-06,
      "loss": 0.0028,
      "step": 291500
    },
    {
      "epoch": 15.87992168805743,
      "grad_norm": 0.0026651720982044935,
      "learning_rate": 6.824015662388514e-06,
      "loss": 0.0032,
      "step": 292000
    },
    {
      "epoch": 15.907113334783554,
      "grad_norm": 0.1840675175189972,
      "learning_rate": 6.818577333043289e-06,
      "loss": 0.0026,
      "step": 292500
    },
    {
      "epoch": 15.93430498150968,
      "grad_norm": 0.0006855327519588172,
      "learning_rate": 6.813139003698065e-06,
      "loss": 0.0028,
      "step": 293000
    },
    {
      "epoch": 15.961496628235807,
      "grad_norm": 0.3514760732650757,
      "learning_rate": 6.80770067435284e-06,
      "loss": 0.0048,
      "step": 293500
    },
    {
      "epoch": 15.988688274961932,
      "grad_norm": 0.10397683829069138,
      "learning_rate": 6.802262345007614e-06,
      "loss": 0.002,
      "step": 294000
    },
    {
      "epoch": 16.0,
      "eval_exact_match": 0.8854,
      "eval_loss": 0.3592107594013214,
      "eval_runtime": 1020.086,
      "eval_samples_per_second": 11.153,
      "eval_steps_per_second": 0.698,
      "step": 294208
    },
    {
      "epoch": 16.015879921688057,
      "grad_norm": 0.0021444354206323624,
      "learning_rate": 6.7968240156623885e-06,
      "loss": 0.0022,
      "step": 294500
    },
    {
      "epoch": 16.043071568414184,
      "grad_norm": 0.3667925000190735,
      "learning_rate": 6.791385686317164e-06,
      "loss": 0.0022,
      "step": 295000
    },
    {
      "epoch": 16.070263215140308,
      "grad_norm": 0.05687262490391731,
      "learning_rate": 6.785947356971939e-06,
      "loss": 0.0018,
      "step": 295500
    },
    {
      "epoch": 16.097454861866435,
      "grad_norm": 0.017906086519360542,
      "learning_rate": 6.780509027626714e-06,
      "loss": 0.0021,
      "step": 296000
    },
    {
      "epoch": 16.124646508592562,
      "grad_norm": 0.38542675971984863,
      "learning_rate": 6.7750706982814885e-06,
      "loss": 0.0033,
      "step": 296500
    },
    {
      "epoch": 16.151838155318686,
      "grad_norm": 0.0025707464665174484,
      "learning_rate": 6.769632368936263e-06,
      "loss": 0.0019,
      "step": 297000
    },
    {
      "epoch": 16.179029802044813,
      "grad_norm": 0.017234420403838158,
      "learning_rate": 6.764194039591038e-06,
      "loss": 0.0019,
      "step": 297500
    },
    {
      "epoch": 16.206221448770936,
      "grad_norm": 0.02613496407866478,
      "learning_rate": 6.758755710245813e-06,
      "loss": 0.0029,
      "step": 298000
    },
    {
      "epoch": 16.233413095497063,
      "grad_norm": 0.1518625020980835,
      "learning_rate": 6.7533173809005884e-06,
      "loss": 0.002,
      "step": 298500
    },
    {
      "epoch": 16.26060474222319,
      "grad_norm": 0.017356229946017265,
      "learning_rate": 6.747879051555363e-06,
      "loss": 0.0015,
      "step": 299000
    },
    {
      "epoch": 16.287796388949314,
      "grad_norm": 0.02871873788535595,
      "learning_rate": 6.742440722210137e-06,
      "loss": 0.003,
      "step": 299500
    },
    {
      "epoch": 16.31498803567544,
      "grad_norm": 0.0028448388911783695,
      "learning_rate": 6.737002392864912e-06,
      "loss": 0.0016,
      "step": 300000
    },
    {
      "epoch": 16.342179682401568,
      "grad_norm": 0.03786061704158783,
      "learning_rate": 6.7315640635196876e-06,
      "loss": 0.0024,
      "step": 300500
    },
    {
      "epoch": 16.36937132912769,
      "grad_norm": 0.17070689797401428,
      "learning_rate": 6.726125734174462e-06,
      "loss": 0.0038,
      "step": 301000
    },
    {
      "epoch": 16.39656297585382,
      "grad_norm": 0.04375317320227623,
      "learning_rate": 6.720687404829237e-06,
      "loss": 0.0027,
      "step": 301500
    },
    {
      "epoch": 16.423754622579942,
      "grad_norm": 0.0004980552475899458,
      "learning_rate": 6.7152490754840115e-06,
      "loss": 0.0027,
      "step": 302000
    },
    {
      "epoch": 16.45094626930607,
      "grad_norm": 0.00025424736668355763,
      "learning_rate": 6.709810746138786e-06,
      "loss": 0.0027,
      "step": 302500
    },
    {
      "epoch": 16.478137916032196,
      "grad_norm": 0.0015307448338717222,
      "learning_rate": 6.704372416793562e-06,
      "loss": 0.0031,
      "step": 303000
    },
    {
      "epoch": 16.50532956275832,
      "grad_norm": 0.03277304396033287,
      "learning_rate": 6.698934087448336e-06,
      "loss": 0.0039,
      "step": 303500
    },
    {
      "epoch": 16.532521209484447,
      "grad_norm": 0.0010686165187507868,
      "learning_rate": 6.6934957581031115e-06,
      "loss": 0.0031,
      "step": 304000
    },
    {
      "epoch": 16.559712856210574,
      "grad_norm": 0.014178049750626087,
      "learning_rate": 6.688057428757886e-06,
      "loss": 0.0027,
      "step": 304500
    },
    {
      "epoch": 16.586904502936697,
      "grad_norm": 0.9002902507781982,
      "learning_rate": 6.68261909941266e-06,
      "loss": 0.0033,
      "step": 305000
    },
    {
      "epoch": 16.614096149662824,
      "grad_norm": 0.06729450821876526,
      "learning_rate": 6.677180770067436e-06,
      "loss": 0.0028,
      "step": 305500
    },
    {
      "epoch": 16.641287796388948,
      "grad_norm": 0.3082190752029419,
      "learning_rate": 6.671742440722211e-06,
      "loss": 0.0024,
      "step": 306000
    },
    {
      "epoch": 16.668479443115075,
      "grad_norm": 0.0023857878986746073,
      "learning_rate": 6.666304111376986e-06,
      "loss": 0.002,
      "step": 306500
    },
    {
      "epoch": 16.695671089841202,
      "grad_norm": 0.2701680064201355,
      "learning_rate": 6.66086578203176e-06,
      "loss": 0.0034,
      "step": 307000
    },
    {
      "epoch": 16.722862736567325,
      "grad_norm": 0.0009946366772055626,
      "learning_rate": 6.6554274526865345e-06,
      "loss": 0.0028,
      "step": 307500
    },
    {
      "epoch": 16.750054383293453,
      "grad_norm": 0.006399666890501976,
      "learning_rate": 6.6499891233413106e-06,
      "loss": 0.0028,
      "step": 308000
    },
    {
      "epoch": 16.77724603001958,
      "grad_norm": 1.565460443496704,
      "learning_rate": 6.644550793996085e-06,
      "loss": 0.0026,
      "step": 308500
    },
    {
      "epoch": 16.804437676745703,
      "grad_norm": 0.40049976110458374,
      "learning_rate": 6.63911246465086e-06,
      "loss": 0.0029,
      "step": 309000
    },
    {
      "epoch": 16.83162932347183,
      "grad_norm": 0.30652865767478943,
      "learning_rate": 6.6336741353056345e-06,
      "loss": 0.0032,
      "step": 309500
    },
    {
      "epoch": 16.858820970197954,
      "grad_norm": 0.2172711193561554,
      "learning_rate": 6.628235805960409e-06,
      "loss": 0.0031,
      "step": 310000
    },
    {
      "epoch": 16.88601261692408,
      "grad_norm": 0.010080419480800629,
      "learning_rate": 6.622797476615185e-06,
      "loss": 0.0028,
      "step": 310500
    },
    {
      "epoch": 16.913204263650208,
      "grad_norm": 0.03477318584918976,
      "learning_rate": 6.617359147269959e-06,
      "loss": 0.0037,
      "step": 311000
    },
    {
      "epoch": 16.94039591037633,
      "grad_norm": 0.0014530881308019161,
      "learning_rate": 6.6119208179247344e-06,
      "loss": 0.003,
      "step": 311500
    },
    {
      "epoch": 16.96758755710246,
      "grad_norm": 0.008372816257178783,
      "learning_rate": 6.606482488579509e-06,
      "loss": 0.0023,
      "step": 312000
    },
    {
      "epoch": 16.994779203828585,
      "grad_norm": 0.00021387383458204567,
      "learning_rate": 6.601044159234283e-06,
      "loss": 0.0024,
      "step": 312500
    },
    {
      "epoch": 17.0,
      "eval_exact_match": 0.887,
      "eval_loss": 0.3400750756263733,
      "eval_runtime": 1019.1709,
      "eval_samples_per_second": 11.163,
      "eval_steps_per_second": 0.699,
      "step": 312596
    },
    {
      "epoch": 17.02197085055471,
      "grad_norm": 0.0042693424038589,
      "learning_rate": 6.595605829889059e-06,
      "loss": 0.0027,
      "step": 313000
    },
    {
      "epoch": 17.049162497280836,
      "grad_norm": 0.005676996428519487,
      "learning_rate": 6.5901675005438336e-06,
      "loss": 0.0024,
      "step": 313500
    },
    {
      "epoch": 17.07635414400696,
      "grad_norm": 0.006243901327252388,
      "learning_rate": 6.584729171198608e-06,
      "loss": 0.0019,
      "step": 314000
    },
    {
      "epoch": 17.103545790733087,
      "grad_norm": 0.0003304806014057249,
      "learning_rate": 6.579290841853383e-06,
      "loss": 0.0023,
      "step": 314500
    },
    {
      "epoch": 17.130737437459214,
      "grad_norm": 0.001669013872742653,
      "learning_rate": 6.5738525125081575e-06,
      "loss": 0.0022,
      "step": 315000
    },
    {
      "epoch": 17.157929084185337,
      "grad_norm": 0.07920686900615692,
      "learning_rate": 6.5684141831629335e-06,
      "loss": 0.0011,
      "step": 315500
    },
    {
      "epoch": 17.185120730911464,
      "grad_norm": 0.32899802923202515,
      "learning_rate": 6.562975853817708e-06,
      "loss": 0.0027,
      "step": 316000
    },
    {
      "epoch": 17.21231237763759,
      "grad_norm": 0.0661708191037178,
      "learning_rate": 6.557537524472482e-06,
      "loss": 0.003,
      "step": 316500
    },
    {
      "epoch": 17.239504024363715,
      "grad_norm": 0.002496266271919012,
      "learning_rate": 6.5520991951272574e-06,
      "loss": 0.0033,
      "step": 317000
    },
    {
      "epoch": 17.266695671089842,
      "grad_norm": 0.028615722432732582,
      "learning_rate": 6.546660865782032e-06,
      "loss": 0.0022,
      "step": 317500
    },
    {
      "epoch": 17.293887317815965,
      "grad_norm": 0.002526836236938834,
      "learning_rate": 6.541222536436808e-06,
      "loss": 0.0028,
      "step": 318000
    },
    {
      "epoch": 17.321078964542092,
      "grad_norm": 0.0003039772855117917,
      "learning_rate": 6.535784207091582e-06,
      "loss": 0.002,
      "step": 318500
    },
    {
      "epoch": 17.34827061126822,
      "grad_norm": 0.004081173334270716,
      "learning_rate": 6.5303458777463566e-06,
      "loss": 0.0012,
      "step": 319000
    },
    {
      "epoch": 17.375462257994343,
      "grad_norm": 0.09041595458984375,
      "learning_rate": 6.524907548401132e-06,
      "loss": 0.0023,
      "step": 319500
    },
    {
      "epoch": 17.40265390472047,
      "grad_norm": 0.0002773249871097505,
      "learning_rate": 6.519469219055906e-06,
      "loss": 0.002,
      "step": 320000
    },
    {
      "epoch": 17.429845551446597,
      "grad_norm": 0.0009189367992803454,
      "learning_rate": 6.514030889710682e-06,
      "loss": 0.0029,
      "step": 320500
    },
    {
      "epoch": 17.45703719817272,
      "grad_norm": 7.472214929293841e-05,
      "learning_rate": 6.5085925603654565e-06,
      "loss": 0.0034,
      "step": 321000
    },
    {
      "epoch": 17.484228844898848,
      "grad_norm": 0.011273865588009357,
      "learning_rate": 6.503154231020231e-06,
      "loss": 0.0021,
      "step": 321500
    },
    {
      "epoch": 17.51142049162497,
      "grad_norm": 0.008078432641923428,
      "learning_rate": 6.497715901675006e-06,
      "loss": 0.0018,
      "step": 322000
    },
    {
      "epoch": 17.5386121383511,
      "grad_norm": 0.0031718432437628508,
      "learning_rate": 6.4922775723297805e-06,
      "loss": 0.0029,
      "step": 322500
    },
    {
      "epoch": 17.565803785077225,
      "grad_norm": 0.10418205708265305,
      "learning_rate": 6.4868392429845565e-06,
      "loss": 0.0025,
      "step": 323000
    },
    {
      "epoch": 17.59299543180335,
      "grad_norm": 4.8601868911646307e-05,
      "learning_rate": 6.481400913639331e-06,
      "loss": 0.0024,
      "step": 323500
    },
    {
      "epoch": 17.620187078529476,
      "grad_norm": 0.0067114997655153275,
      "learning_rate": 6.475962584294105e-06,
      "loss": 0.0019,
      "step": 324000
    },
    {
      "epoch": 17.647378725255603,
      "grad_norm": 0.0027572689577937126,
      "learning_rate": 6.4705242549488804e-06,
      "loss": 0.0018,
      "step": 324500
    },
    {
      "epoch": 17.674570371981726,
      "grad_norm": 0.0052411542274057865,
      "learning_rate": 6.465085925603655e-06,
      "loss": 0.0017,
      "step": 325000
    },
    {
      "epoch": 17.701762018707853,
      "grad_norm": 0.048968348652124405,
      "learning_rate": 6.45964759625843e-06,
      "loss": 0.0041,
      "step": 325500
    },
    {
      "epoch": 17.728953665433977,
      "grad_norm": 0.0033183780033141375,
      "learning_rate": 6.454209266913205e-06,
      "loss": 0.0027,
      "step": 326000
    },
    {
      "epoch": 17.756145312160104,
      "grad_norm": 0.002152485540136695,
      "learning_rate": 6.4487709375679795e-06,
      "loss": 0.0029,
      "step": 326500
    },
    {
      "epoch": 17.78333695888623,
      "grad_norm": 0.1940182000398636,
      "learning_rate": 6.443332608222754e-06,
      "loss": 0.005,
      "step": 327000
    },
    {
      "epoch": 17.810528605612355,
      "grad_norm": 0.00849226675927639,
      "learning_rate": 6.437894278877529e-06,
      "loss": 0.0027,
      "step": 327500
    },
    {
      "epoch": 17.83772025233848,
      "grad_norm": 0.003068720456212759,
      "learning_rate": 6.432455949532304e-06,
      "loss": 0.0023,
      "step": 328000
    },
    {
      "epoch": 17.86491189906461,
      "grad_norm": 0.8710117936134338,
      "learning_rate": 6.4270176201870795e-06,
      "loss": 0.0027,
      "step": 328500
    },
    {
      "epoch": 17.892103545790732,
      "grad_norm": 0.0016409781528636813,
      "learning_rate": 6.421579290841854e-06,
      "loss": 0.0025,
      "step": 329000
    },
    {
      "epoch": 17.91929519251686,
      "grad_norm": 0.01407603919506073,
      "learning_rate": 6.416140961496628e-06,
      "loss": 0.0026,
      "step": 329500
    },
    {
      "epoch": 17.946486839242986,
      "grad_norm": 0.02472531422972679,
      "learning_rate": 6.4107026321514034e-06,
      "loss": 0.0043,
      "step": 330000
    },
    {
      "epoch": 17.97367848596911,
      "grad_norm": 0.0002928628819063306,
      "learning_rate": 6.405264302806179e-06,
      "loss": 0.003,
      "step": 330500
    },
    {
      "epoch": 18.0,
      "eval_exact_match": 0.8866,
      "eval_loss": 0.3563012182712555,
      "eval_runtime": 1021.8314,
      "eval_samples_per_second": 11.134,
      "eval_steps_per_second": 0.697,
      "step": 330984
    },
    {
      "epoch": 18.000870132695237,
      "grad_norm": 0.0025135932955890894,
      "learning_rate": 6.399825973460954e-06,
      "loss": 0.0043,
      "step": 331000
    },
    {
      "epoch": 18.02806177942136,
      "grad_norm": 0.0022032412234693766,
      "learning_rate": 6.394387644115728e-06,
      "loss": 0.0017,
      "step": 331500
    },
    {
      "epoch": 18.055253426147488,
      "grad_norm": 0.00628103269264102,
      "learning_rate": 6.3889493147705026e-06,
      "loss": 0.0011,
      "step": 332000
    },
    {
      "epoch": 18.082445072873615,
      "grad_norm": 0.0016064423834905028,
      "learning_rate": 6.383510985425278e-06,
      "loss": 0.0015,
      "step": 332500
    },
    {
      "epoch": 18.109636719599738,
      "grad_norm": 0.036574505269527435,
      "learning_rate": 6.378072656080053e-06,
      "loss": 0.0017,
      "step": 333000
    },
    {
      "epoch": 18.136828366325865,
      "grad_norm": 0.017870482057332993,
      "learning_rate": 6.372634326734828e-06,
      "loss": 0.001,
      "step": 333500
    },
    {
      "epoch": 18.16402001305199,
      "grad_norm": 0.013820737600326538,
      "learning_rate": 6.3671959973896025e-06,
      "loss": 0.0017,
      "step": 334000
    },
    {
      "epoch": 18.191211659778116,
      "grad_norm": 0.008671647869050503,
      "learning_rate": 6.361757668044377e-06,
      "loss": 0.0031,
      "step": 334500
    },
    {
      "epoch": 18.218403306504243,
      "grad_norm": 0.0009647606639191508,
      "learning_rate": 6.356319338699152e-06,
      "loss": 0.0028,
      "step": 335000
    },
    {
      "epoch": 18.245594953230366,
      "grad_norm": 0.316081166267395,
      "learning_rate": 6.350881009353927e-06,
      "loss": 0.0023,
      "step": 335500
    },
    {
      "epoch": 18.272786599956493,
      "grad_norm": 0.006871197838336229,
      "learning_rate": 6.3454426800087025e-06,
      "loss": 0.0026,
      "step": 336000
    },
    {
      "epoch": 18.29997824668262,
      "grad_norm": 0.004635128192603588,
      "learning_rate": 6.340004350663477e-06,
      "loss": 0.0023,
      "step": 336500
    },
    {
      "epoch": 18.327169893408744,
      "grad_norm": 0.77015620470047,
      "learning_rate": 6.334566021318251e-06,
      "loss": 0.0017,
      "step": 337000
    },
    {
      "epoch": 18.35436154013487,
      "grad_norm": 5.524133666767739e-05,
      "learning_rate": 6.329127691973026e-06,
      "loss": 0.0016,
      "step": 337500
    },
    {
      "epoch": 18.381553186860998,
      "grad_norm": 0.042669035494327545,
      "learning_rate": 6.323689362627802e-06,
      "loss": 0.0014,
      "step": 338000
    },
    {
      "epoch": 18.40874483358712,
      "grad_norm": 0.004605181515216827,
      "learning_rate": 6.318251033282576e-06,
      "loss": 0.0021,
      "step": 338500
    },
    {
      "epoch": 18.43593648031325,
      "grad_norm": 0.0011207705829292536,
      "learning_rate": 6.312812703937351e-06,
      "loss": 0.0022,
      "step": 339000
    },
    {
      "epoch": 18.463128127039372,
      "grad_norm": 0.0714796632528305,
      "learning_rate": 6.3073743745921255e-06,
      "loss": 0.0023,
      "step": 339500
    },
    {
      "epoch": 18.4903197737655,
      "grad_norm": 0.2758328318595886,
      "learning_rate": 6.3019360452469e-06,
      "loss": 0.0029,
      "step": 340000
    },
    {
      "epoch": 18.517511420491626,
      "grad_norm": 0.0030946761835366488,
      "learning_rate": 6.296497715901676e-06,
      "loss": 0.0025,
      "step": 340500
    },
    {
      "epoch": 18.54470306721775,
      "grad_norm": 0.001809292589314282,
      "learning_rate": 6.29105938655645e-06,
      "loss": 0.003,
      "step": 341000
    },
    {
      "epoch": 18.571894713943877,
      "grad_norm": 1.4280059337615967,
      "learning_rate": 6.2856210572112255e-06,
      "loss": 0.0022,
      "step": 341500
    },
    {
      "epoch": 18.599086360670004,
      "grad_norm": 0.003949850331991911,
      "learning_rate": 6.280182727866e-06,
      "loss": 0.0022,
      "step": 342000
    },
    {
      "epoch": 18.626278007396127,
      "grad_norm": 0.008224201388657093,
      "learning_rate": 6.274744398520774e-06,
      "loss": 0.0024,
      "step": 342500
    },
    {
      "epoch": 18.653469654122254,
      "grad_norm": 0.0052871499210596085,
      "learning_rate": 6.26930606917555e-06,
      "loss": 0.0026,
      "step": 343000
    },
    {
      "epoch": 18.680661300848378,
      "grad_norm": 0.0848020538687706,
      "learning_rate": 6.263867739830325e-06,
      "loss": 0.0023,
      "step": 343500
    },
    {
      "epoch": 18.707852947574505,
      "grad_norm": 0.00029155545053072274,
      "learning_rate": 6.2584294104851e-06,
      "loss": 0.0024,
      "step": 344000
    },
    {
      "epoch": 18.735044594300632,
      "grad_norm": 0.00019738083938136697,
      "learning_rate": 6.252991081139874e-06,
      "loss": 0.0021,
      "step": 344500
    },
    {
      "epoch": 18.762236241026756,
      "grad_norm": 0.009960692375898361,
      "learning_rate": 6.2475527517946485e-06,
      "loss": 0.0032,
      "step": 345000
    },
    {
      "epoch": 18.789427887752883,
      "grad_norm": 0.03927024081349373,
      "learning_rate": 6.242114422449425e-06,
      "loss": 0.0027,
      "step": 345500
    },
    {
      "epoch": 18.81661953447901,
      "grad_norm": 1.062012791633606,
      "learning_rate": 6.236676093104199e-06,
      "loss": 0.002,
      "step": 346000
    },
    {
      "epoch": 18.843811181205133,
      "grad_norm": 7.133814506232738e-05,
      "learning_rate": 6.231237763758974e-06,
      "loss": 0.0022,
      "step": 346500
    },
    {
      "epoch": 18.87100282793126,
      "grad_norm": 0.00028946486418135464,
      "learning_rate": 6.2257994344137485e-06,
      "loss": 0.0036,
      "step": 347000
    },
    {
      "epoch": 18.898194474657384,
      "grad_norm": 0.0008498073439113796,
      "learning_rate": 6.220361105068523e-06,
      "loss": 0.002,
      "step": 347500
    },
    {
      "epoch": 18.92538612138351,
      "grad_norm": 0.009209150448441505,
      "learning_rate": 6.214922775723299e-06,
      "loss": 0.0022,
      "step": 348000
    },
    {
      "epoch": 18.952577768109638,
      "grad_norm": 0.0022132894955575466,
      "learning_rate": 6.209484446378073e-06,
      "loss": 0.0019,
      "step": 348500
    },
    {
      "epoch": 18.97976941483576,
      "grad_norm": 0.09578939527273178,
      "learning_rate": 6.2040461170328485e-06,
      "loss": 0.0033,
      "step": 349000
    },
    {
      "epoch": 19.0,
      "eval_exact_match": 0.89,
      "eval_loss": 0.36704039573669434,
      "eval_runtime": 1019.123,
      "eval_samples_per_second": 11.164,
      "eval_steps_per_second": 0.699,
      "step": 349372
    },
    {
      "epoch": 19.00696106156189,
      "grad_norm": 0.017655082046985626,
      "learning_rate": 6.198607787687623e-06,
      "loss": 0.0025,
      "step": 349500
    },
    {
      "epoch": 19.034152708288016,
      "grad_norm": 0.15102127194404602,
      "learning_rate": 6.193169458342397e-06,
      "loss": 0.0013,
      "step": 350000
    },
    {
      "epoch": 19.06134435501414,
      "grad_norm": 0.0015626910608261824,
      "learning_rate": 6.187731128997173e-06,
      "loss": 0.0012,
      "step": 350500
    },
    {
      "epoch": 19.088536001740266,
      "grad_norm": 0.002951191971078515,
      "learning_rate": 6.182292799651948e-06,
      "loss": 0.0028,
      "step": 351000
    },
    {
      "epoch": 19.11572764846639,
      "grad_norm": 4.054812431335449,
      "learning_rate": 6.176854470306722e-06,
      "loss": 0.0027,
      "step": 351500
    },
    {
      "epoch": 19.142919295192517,
      "grad_norm": 0.000679698190651834,
      "learning_rate": 6.171416140961497e-06,
      "loss": 0.0017,
      "step": 352000
    },
    {
      "epoch": 19.170110941918644,
      "grad_norm": 0.0018216141033917665,
      "learning_rate": 6.1659778116162715e-06,
      "loss": 0.0019,
      "step": 352500
    },
    {
      "epoch": 19.197302588644767,
      "grad_norm": 9.838391304016113,
      "learning_rate": 6.160539482271048e-06,
      "loss": 0.0029,
      "step": 353000
    },
    {
      "epoch": 19.224494235370894,
      "grad_norm": 0.00637414213269949,
      "learning_rate": 6.155101152925822e-06,
      "loss": 0.0024,
      "step": 353500
    },
    {
      "epoch": 19.25168588209702,
      "grad_norm": 0.004731600638478994,
      "learning_rate": 6.149662823580596e-06,
      "loss": 0.0018,
      "step": 354000
    },
    {
      "epoch": 19.278877528823145,
      "grad_norm": 0.0009082912583835423,
      "learning_rate": 6.1442244942353715e-06,
      "loss": 0.0019,
      "step": 354500
    },
    {
      "epoch": 19.306069175549272,
      "grad_norm": 0.04166721552610397,
      "learning_rate": 6.138786164890146e-06,
      "loss": 0.003,
      "step": 355000
    },
    {
      "epoch": 19.333260822275395,
      "grad_norm": 0.002723533660173416,
      "learning_rate": 6.133347835544922e-06,
      "loss": 0.0022,
      "step": 355500
    },
    {
      "epoch": 19.360452469001523,
      "grad_norm": 0.05888589471578598,
      "learning_rate": 6.127909506199696e-06,
      "loss": 0.0016,
      "step": 356000
    },
    {
      "epoch": 19.38764411572765,
      "grad_norm": 0.17512783408164978,
      "learning_rate": 6.122471176854471e-06,
      "loss": 0.0014,
      "step": 356500
    },
    {
      "epoch": 19.414835762453773,
      "grad_norm": 0.021413423120975494,
      "learning_rate": 6.117032847509246e-06,
      "loss": 0.0015,
      "step": 357000
    },
    {
      "epoch": 19.4420274091799,
      "grad_norm": 0.5573779940605164,
      "learning_rate": 6.11159451816402e-06,
      "loss": 0.0022,
      "step": 357500
    },
    {
      "epoch": 19.469219055906027,
      "grad_norm": 0.003655732609331608,
      "learning_rate": 6.106156188818796e-06,
      "loss": 0.0015,
      "step": 358000
    },
    {
      "epoch": 19.49641070263215,
      "grad_norm": 0.01244965847581625,
      "learning_rate": 6.100717859473571e-06,
      "loss": 0.0022,
      "step": 358500
    },
    {
      "epoch": 19.523602349358278,
      "grad_norm": 0.0007435994339175522,
      "learning_rate": 6.095279530128345e-06,
      "loss": 0.0013,
      "step": 359000
    },
    {
      "epoch": 19.5507939960844,
      "grad_norm": 0.00016936496831476688,
      "learning_rate": 6.08984120078312e-06,
      "loss": 0.0029,
      "step": 359500
    },
    {
      "epoch": 19.57798564281053,
      "grad_norm": 0.0325549952685833,
      "learning_rate": 6.0844028714378945e-06,
      "loss": 0.0029,
      "step": 360000
    },
    {
      "epoch": 19.605177289536655,
      "grad_norm": 2.8864591121673584,
      "learning_rate": 6.0789645420926706e-06,
      "loss": 0.0023,
      "step": 360500
    },
    {
      "epoch": 19.63236893626278,
      "grad_norm": 4.946041584014893,
      "learning_rate": 6.073526212747445e-06,
      "loss": 0.0021,
      "step": 361000
    },
    {
      "epoch": 19.659560582988906,
      "grad_norm": 0.006130901165306568,
      "learning_rate": 6.068087883402219e-06,
      "loss": 0.0023,
      "step": 361500
    },
    {
      "epoch": 19.686752229715033,
      "grad_norm": 0.004118889570236206,
      "learning_rate": 6.0626495540569945e-06,
      "loss": 0.003,
      "step": 362000
    },
    {
      "epoch": 19.713943876441157,
      "grad_norm": 0.002466418081894517,
      "learning_rate": 6.057211224711769e-06,
      "loss": 0.0019,
      "step": 362500
    },
    {
      "epoch": 19.741135523167284,
      "grad_norm": 0.00014884216943755746,
      "learning_rate": 6.051772895366544e-06,
      "loss": 0.0016,
      "step": 363000
    },
    {
      "epoch": 19.768327169893407,
      "grad_norm": 8.588576019974425e-05,
      "learning_rate": 6.046334566021319e-06,
      "loss": 0.0022,
      "step": 363500
    },
    {
      "epoch": 19.795518816619534,
      "grad_norm": 0.0002782981318887323,
      "learning_rate": 6.040896236676094e-06,
      "loss": 0.0018,
      "step": 364000
    },
    {
      "epoch": 19.82271046334566,
      "grad_norm": 0.011060014367103577,
      "learning_rate": 6.035457907330868e-06,
      "loss": 0.0024,
      "step": 364500
    },
    {
      "epoch": 19.849902110071785,
      "grad_norm": 0.010638149455189705,
      "learning_rate": 6.030019577985643e-06,
      "loss": 0.0016,
      "step": 365000
    },
    {
      "epoch": 19.877093756797912,
      "grad_norm": 8.910888573154807e-05,
      "learning_rate": 6.024581248640418e-06,
      "loss": 0.0024,
      "step": 365500
    },
    {
      "epoch": 19.90428540352404,
      "grad_norm": 0.0009621421922929585,
      "learning_rate": 6.0191429192951936e-06,
      "loss": 0.002,
      "step": 366000
    },
    {
      "epoch": 19.931477050250162,
      "grad_norm": 3.6746921978192404e-05,
      "learning_rate": 6.013704589949968e-06,
      "loss": 0.0028,
      "step": 366500
    },
    {
      "epoch": 19.95866869697629,
      "grad_norm": 0.10308805108070374,
      "learning_rate": 6.008266260604742e-06,
      "loss": 0.002,
      "step": 367000
    },
    {
      "epoch": 19.985860343702413,
      "grad_norm": 0.002866877242922783,
      "learning_rate": 6.0028279312595175e-06,
      "loss": 0.0022,
      "step": 367500
    },
    {
      "epoch": 20.0,
      "eval_exact_match": 0.8862,
      "eval_loss": 0.3718360364437103,
      "eval_runtime": 1022.1019,
      "eval_samples_per_second": 11.131,
      "eval_steps_per_second": 0.697,
      "step": 367760
    },
    {
      "epoch": 20.01305199042854,
      "grad_norm": 0.0013934267917647958,
      "learning_rate": 5.997389601914292e-06,
      "loss": 0.0014,
      "step": 368000
    },
    {
      "epoch": 20.040243637154667,
      "grad_norm": 0.026680724695324898,
      "learning_rate": 5.991951272569068e-06,
      "loss": 0.0011,
      "step": 368500
    },
    {
      "epoch": 20.06743528388079,
      "grad_norm": 3.3085291385650635,
      "learning_rate": 5.986512943223842e-06,
      "loss": 0.0012,
      "step": 369000
    },
    {
      "epoch": 20.094626930606918,
      "grad_norm": 0.001910144928842783,
      "learning_rate": 5.981074613878617e-06,
      "loss": 0.001,
      "step": 369500
    },
    {
      "epoch": 20.121818577333045,
      "grad_norm": 0.2561395764350891,
      "learning_rate": 5.975636284533392e-06,
      "loss": 0.0007,
      "step": 370000
    },
    {
      "epoch": 20.149010224059168,
      "grad_norm": 0.02133098430931568,
      "learning_rate": 5.970197955188166e-06,
      "loss": 0.0013,
      "step": 370500
    },
    {
      "epoch": 20.176201870785295,
      "grad_norm": 0.001692277379333973,
      "learning_rate": 5.964759625842942e-06,
      "loss": 0.0022,
      "step": 371000
    },
    {
      "epoch": 20.20339351751142,
      "grad_norm": 0.12766559422016144,
      "learning_rate": 5.959321296497717e-06,
      "loss": 0.0018,
      "step": 371500
    },
    {
      "epoch": 20.230585164237546,
      "grad_norm": 0.007082671392709017,
      "learning_rate": 5.953882967152491e-06,
      "loss": 0.0035,
      "step": 372000
    },
    {
      "epoch": 20.257776810963673,
      "grad_norm": 0.0010481767822057009,
      "learning_rate": 5.948444637807266e-06,
      "loss": 0.0017,
      "step": 372500
    },
    {
      "epoch": 20.284968457689796,
      "grad_norm": 0.00016043047071434557,
      "learning_rate": 5.9430063084620405e-06,
      "loss": 0.0017,
      "step": 373000
    },
    {
      "epoch": 20.312160104415923,
      "grad_norm": 0.6118445992469788,
      "learning_rate": 5.9375679791168165e-06,
      "loss": 0.0021,
      "step": 373500
    },
    {
      "epoch": 20.33935175114205,
      "grad_norm": 0.0007704813033342361,
      "learning_rate": 5.932129649771591e-06,
      "loss": 0.0019,
      "step": 374000
    },
    {
      "epoch": 20.366543397868174,
      "grad_norm": 0.0017797828186303377,
      "learning_rate": 5.926691320426365e-06,
      "loss": 0.002,
      "step": 374500
    },
    {
      "epoch": 20.3937350445943,
      "grad_norm": 0.002132853027433157,
      "learning_rate": 5.9212529910811405e-06,
      "loss": 0.0017,
      "step": 375000
    },
    {
      "epoch": 20.420926691320425,
      "grad_norm": 0.004612368531525135,
      "learning_rate": 5.915814661735915e-06,
      "loss": 0.0016,
      "step": 375500
    },
    {
      "epoch": 20.44811833804655,
      "grad_norm": 0.011427465826272964,
      "learning_rate": 5.91037633239069e-06,
      "loss": 0.0023,
      "step": 376000
    },
    {
      "epoch": 20.47530998477268,
      "grad_norm": 0.001849128631874919,
      "learning_rate": 5.904938003045465e-06,
      "loss": 0.002,
      "step": 376500
    },
    {
      "epoch": 20.502501631498802,
      "grad_norm": 0.005862530320882797,
      "learning_rate": 5.89949967370024e-06,
      "loss": 0.0021,
      "step": 377000
    },
    {
      "epoch": 20.52969327822493,
      "grad_norm": 0.010311821475625038,
      "learning_rate": 5.894061344355014e-06,
      "loss": 0.0022,
      "step": 377500
    },
    {
      "epoch": 20.556884924951056,
      "grad_norm": 1.0294238563801628e-05,
      "learning_rate": 5.888623015009789e-06,
      "loss": 0.0018,
      "step": 378000
    },
    {
      "epoch": 20.58407657167718,
      "grad_norm": 0.0071517303586006165,
      "learning_rate": 5.883184685664564e-06,
      "loss": 0.0022,
      "step": 378500
    },
    {
      "epoch": 20.611268218403307,
      "grad_norm": 0.010942152701318264,
      "learning_rate": 5.8777463563193396e-06,
      "loss": 0.0017,
      "step": 379000
    },
    {
      "epoch": 20.638459865129434,
      "grad_norm": 0.10945701599121094,
      "learning_rate": 5.872308026974114e-06,
      "loss": 0.0019,
      "step": 379500
    },
    {
      "epoch": 20.665651511855557,
      "grad_norm": 0.00033361915848217905,
      "learning_rate": 5.866869697628888e-06,
      "loss": 0.0026,
      "step": 380000
    },
    {
      "epoch": 20.692843158581685,
      "grad_norm": 0.326998770236969,
      "learning_rate": 5.8614313682836635e-06,
      "loss": 0.0011,
      "step": 380500
    },
    {
      "epoch": 20.720034805307808,
      "grad_norm": 4.231648921966553,
      "learning_rate": 5.855993038938439e-06,
      "loss": 0.0033,
      "step": 381000
    },
    {
      "epoch": 20.747226452033935,
      "grad_norm": 0.006209293380379677,
      "learning_rate": 5.850554709593214e-06,
      "loss": 0.0027,
      "step": 381500
    },
    {
      "epoch": 20.774418098760062,
      "grad_norm": 0.0005513517535291612,
      "learning_rate": 5.845116380247988e-06,
      "loss": 0.0023,
      "step": 382000
    },
    {
      "epoch": 20.801609745486186,
      "grad_norm": 0.004190502222627401,
      "learning_rate": 5.839678050902763e-06,
      "loss": 0.0019,
      "step": 382500
    },
    {
      "epoch": 20.828801392212313,
      "grad_norm": 0.001985291251912713,
      "learning_rate": 5.834239721557538e-06,
      "loss": 0.0022,
      "step": 383000
    },
    {
      "epoch": 20.85599303893844,
      "grad_norm": 0.025443822145462036,
      "learning_rate": 5.828801392212313e-06,
      "loss": 0.0019,
      "step": 383500
    },
    {
      "epoch": 20.883184685664563,
      "grad_norm": 0.8501449823379517,
      "learning_rate": 5.823363062867088e-06,
      "loss": 0.0027,
      "step": 384000
    },
    {
      "epoch": 20.91037633239069,
      "grad_norm": 0.03940390795469284,
      "learning_rate": 5.8179247335218626e-06,
      "loss": 0.0015,
      "step": 384500
    },
    {
      "epoch": 20.937567979116814,
      "grad_norm": 0.004881420638412237,
      "learning_rate": 5.812486404176637e-06,
      "loss": 0.0021,
      "step": 385000
    },
    {
      "epoch": 20.96475962584294,
      "grad_norm": 2.895326852798462,
      "learning_rate": 5.807048074831412e-06,
      "loss": 0.0042,
      "step": 385500
    },
    {
      "epoch": 20.991951272569068,
      "grad_norm": 3.779355756705627e-05,
      "learning_rate": 5.801609745486187e-06,
      "loss": 0.0019,
      "step": 386000
    },
    {
      "epoch": 21.0,
      "eval_exact_match": 0.8866,
      "eval_loss": 0.3600582182407379,
      "eval_runtime": 1020.6405,
      "eval_samples_per_second": 11.147,
      "eval_steps_per_second": 0.698,
      "step": 386148
    },
    {
      "epoch": 21.01914291929519,
      "grad_norm": 0.002794648054987192,
      "learning_rate": 5.7961714161409625e-06,
      "loss": 0.0025,
      "step": 386500
    },
    {
      "epoch": 21.04633456602132,
      "grad_norm": 0.026449628174304962,
      "learning_rate": 5.790733086795737e-06,
      "loss": 0.0013,
      "step": 387000
    },
    {
      "epoch": 21.073526212747446,
      "grad_norm": 2.097803417200339e-06,
      "learning_rate": 5.785294757450511e-06,
      "loss": 0.0013,
      "step": 387500
    },
    {
      "epoch": 21.10071785947357,
      "grad_norm": 0.008700955659151077,
      "learning_rate": 5.779856428105286e-06,
      "loss": 0.0013,
      "step": 388000
    },
    {
      "epoch": 21.127909506199696,
      "grad_norm": 8.057923696469516e-05,
      "learning_rate": 5.774418098760062e-06,
      "loss": 0.0015,
      "step": 388500
    },
    {
      "epoch": 21.15510115292582,
      "grad_norm": 0.0005343501688912511,
      "learning_rate": 5.768979769414836e-06,
      "loss": 0.0017,
      "step": 389000
    },
    {
      "epoch": 21.182292799651947,
      "grad_norm": 0.20113813877105713,
      "learning_rate": 5.763541440069611e-06,
      "loss": 0.0015,
      "step": 389500
    },
    {
      "epoch": 21.209484446378074,
      "grad_norm": 0.03034699521958828,
      "learning_rate": 5.758103110724386e-06,
      "loss": 0.0016,
      "step": 390000
    },
    {
      "epoch": 21.236676093104197,
      "grad_norm": 0.14051289856433868,
      "learning_rate": 5.75266478137916e-06,
      "loss": 0.0026,
      "step": 390500
    },
    {
      "epoch": 21.263867739830324,
      "grad_norm": 0.04996541514992714,
      "learning_rate": 5.747226452033936e-06,
      "loss": 0.0027,
      "step": 391000
    },
    {
      "epoch": 21.29105938655645,
      "grad_norm": 0.0018328025471419096,
      "learning_rate": 5.74178812268871e-06,
      "loss": 0.0022,
      "step": 391500
    },
    {
      "epoch": 21.318251033282575,
      "grad_norm": 0.004908634349703789,
      "learning_rate": 5.7363497933434855e-06,
      "loss": 0.0019,
      "step": 392000
    },
    {
      "epoch": 21.345442680008702,
      "grad_norm": 0.0052336654625833035,
      "learning_rate": 5.73091146399826e-06,
      "loss": 0.001,
      "step": 392500
    },
    {
      "epoch": 21.372634326734826,
      "grad_norm": 0.0001200469268951565,
      "learning_rate": 5.725473134653034e-06,
      "loss": 0.0006,
      "step": 393000
    },
    {
      "epoch": 21.399825973460953,
      "grad_norm": 0.01052246242761612,
      "learning_rate": 5.72003480530781e-06,
      "loss": 0.0021,
      "step": 393500
    },
    {
      "epoch": 21.42701762018708,
      "grad_norm": 0.7549381256103516,
      "learning_rate": 5.714596475962585e-06,
      "loss": 0.0029,
      "step": 394000
    },
    {
      "epoch": 21.454209266913203,
      "grad_norm": 0.005689762532711029,
      "learning_rate": 5.70915814661736e-06,
      "loss": 0.0018,
      "step": 394500
    },
    {
      "epoch": 21.48140091363933,
      "grad_norm": 0.0006846439791843295,
      "learning_rate": 5.703719817272134e-06,
      "loss": 0.0019,
      "step": 395000
    },
    {
      "epoch": 21.508592560365457,
      "grad_norm": 0.008784202858805656,
      "learning_rate": 5.698281487926909e-06,
      "loss": 0.0011,
      "step": 395500
    },
    {
      "epoch": 21.53578420709158,
      "grad_norm": 0.7648417353630066,
      "learning_rate": 5.692843158581685e-06,
      "loss": 0.0023,
      "step": 396000
    },
    {
      "epoch": 21.562975853817708,
      "grad_norm": 1.8578131857793778e-05,
      "learning_rate": 5.687404829236459e-06,
      "loss": 0.0022,
      "step": 396500
    },
    {
      "epoch": 21.59016750054383,
      "grad_norm": 0.0013881041668355465,
      "learning_rate": 5.681966499891234e-06,
      "loss": 0.0019,
      "step": 397000
    },
    {
      "epoch": 21.61735914726996,
      "grad_norm": 0.09924587607383728,
      "learning_rate": 5.6765281705460086e-06,
      "loss": 0.0012,
      "step": 397500
    },
    {
      "epoch": 21.644550793996086,
      "grad_norm": 0.0016242824494838715,
      "learning_rate": 5.671089841200783e-06,
      "loss": 0.0022,
      "step": 398000
    },
    {
      "epoch": 21.67174244072221,
      "grad_norm": 0.41596370935440063,
      "learning_rate": 5.665651511855559e-06,
      "loss": 0.0016,
      "step": 398500
    },
    {
      "epoch": 21.698934087448336,
      "grad_norm": 0.0024071908555924892,
      "learning_rate": 5.660213182510333e-06,
      "loss": 0.0023,
      "step": 399000
    },
    {
      "epoch": 21.726125734174463,
      "grad_norm": 4.969572910340503e-05,
      "learning_rate": 5.6547748531651085e-06,
      "loss": 0.0032,
      "step": 399500
    },
    {
      "epoch": 21.753317380900587,
      "grad_norm": 0.0003244426043238491,
      "learning_rate": 5.649336523819883e-06,
      "loss": 0.0022,
      "step": 400000
    },
    {
      "epoch": 21.780509027626714,
      "grad_norm": 0.0028825532644987106,
      "learning_rate": 5.643898194474657e-06,
      "loss": 0.0015,
      "step": 400500
    },
    {
      "epoch": 21.807700674352837,
      "grad_norm": 0.0016660008113831282,
      "learning_rate": 5.638459865129433e-06,
      "loss": 0.0011,
      "step": 401000
    },
    {
      "epoch": 21.834892321078964,
      "grad_norm": 0.00013348861830309033,
      "learning_rate": 5.633021535784208e-06,
      "loss": 0.0018,
      "step": 401500
    },
    {
      "epoch": 21.86208396780509,
      "grad_norm": 0.0353393591940403,
      "learning_rate": 5.627583206438982e-06,
      "loss": 0.001,
      "step": 402000
    },
    {
      "epoch": 21.889275614531215,
      "grad_norm": 0.3004685640335083,
      "learning_rate": 5.622144877093757e-06,
      "loss": 0.002,
      "step": 402500
    },
    {
      "epoch": 21.916467261257342,
      "grad_norm": 0.19285576045513153,
      "learning_rate": 5.6167065477485316e-06,
      "loss": 0.0012,
      "step": 403000
    },
    {
      "epoch": 21.94365890798347,
      "grad_norm": 0.0028728207107633352,
      "learning_rate": 5.611268218403308e-06,
      "loss": 0.0018,
      "step": 403500
    },
    {
      "epoch": 21.970850554709592,
      "grad_norm": 0.004820770584046841,
      "learning_rate": 5.605829889058082e-06,
      "loss": 0.0028,
      "step": 404000
    },
    {
      "epoch": 21.99804220143572,
      "grad_norm": 0.0008347927941940725,
      "learning_rate": 5.600391559712856e-06,
      "loss": 0.0027,
      "step": 404500
    },
    {
      "epoch": 22.0,
      "eval_exact_match": 0.8878,
      "eval_loss": 0.36756497621536255,
      "eval_runtime": 1023.6211,
      "eval_samples_per_second": 11.114,
      "eval_steps_per_second": 0.696,
      "step": 404536
    },
    {
      "epoch": 22.025233848161843,
      "grad_norm": 0.0023675875272601843,
      "learning_rate": 5.5949532303676315e-06,
      "loss": 0.0006,
      "step": 405000
    },
    {
      "epoch": 22.05242549488797,
      "grad_norm": 0.03398232161998749,
      "learning_rate": 5.589514901022406e-06,
      "loss": 0.0015,
      "step": 405500
    },
    {
      "epoch": 22.079617141614097,
      "grad_norm": 0.000473001942737028,
      "learning_rate": 5.584076571677182e-06,
      "loss": 0.0014,
      "step": 406000
    },
    {
      "epoch": 22.10680878834022,
      "grad_norm": 0.0022323033772408962,
      "learning_rate": 5.578638242331956e-06,
      "loss": 0.0009,
      "step": 406500
    },
    {
      "epoch": 22.134000435066348,
      "grad_norm": 0.2420387715101242,
      "learning_rate": 5.573199912986731e-06,
      "loss": 0.0012,
      "step": 407000
    },
    {
      "epoch": 22.161192081792475,
      "grad_norm": 5.044465433456935e-05,
      "learning_rate": 5.567761583641506e-06,
      "loss": 0.0025,
      "step": 407500
    },
    {
      "epoch": 22.1883837285186,
      "grad_norm": 0.001589941792190075,
      "learning_rate": 5.56232325429628e-06,
      "loss": 0.0029,
      "step": 408000
    },
    {
      "epoch": 22.215575375244725,
      "grad_norm": 0.009327336214482784,
      "learning_rate": 5.556884924951056e-06,
      "loss": 0.0017,
      "step": 408500
    },
    {
      "epoch": 22.24276702197085,
      "grad_norm": 6.940307140350342,
      "learning_rate": 5.551446595605831e-06,
      "loss": 0.0025,
      "step": 409000
    },
    {
      "epoch": 22.269958668696976,
      "grad_norm": 0.0006782662239857018,
      "learning_rate": 5.546008266260605e-06,
      "loss": 0.001,
      "step": 409500
    },
    {
      "epoch": 22.297150315423103,
      "grad_norm": 0.008007903583347797,
      "learning_rate": 5.54056993691538e-06,
      "loss": 0.0009,
      "step": 410000
    },
    {
      "epoch": 22.324341962149227,
      "grad_norm": 0.028185179457068443,
      "learning_rate": 5.5351316075701545e-06,
      "loss": 0.002,
      "step": 410500
    },
    {
      "epoch": 22.351533608875354,
      "grad_norm": 0.00018136065045837313,
      "learning_rate": 5.529693278224931e-06,
      "loss": 0.0014,
      "step": 411000
    },
    {
      "epoch": 22.37872525560148,
      "grad_norm": 0.05492561310529709,
      "learning_rate": 5.524254948879705e-06,
      "loss": 0.0013,
      "step": 411500
    },
    {
      "epoch": 22.405916902327604,
      "grad_norm": 0.0018440353451296687,
      "learning_rate": 5.518816619534479e-06,
      "loss": 0.0009,
      "step": 412000
    },
    {
      "epoch": 22.43310854905373,
      "grad_norm": 0.0005936457309871912,
      "learning_rate": 5.513378290189254e-06,
      "loss": 0.0013,
      "step": 412500
    },
    {
      "epoch": 22.460300195779855,
      "grad_norm": 0.0012526239734143019,
      "learning_rate": 5.507939960844029e-06,
      "loss": 0.001,
      "step": 413000
    },
    {
      "epoch": 22.48749184250598,
      "grad_norm": 1.82191002368927,
      "learning_rate": 5.502501631498804e-06,
      "loss": 0.0026,
      "step": 413500
    },
    {
      "epoch": 22.51468348923211,
      "grad_norm": 0.0002850414894055575,
      "learning_rate": 5.497063302153579e-06,
      "loss": 0.0014,
      "step": 414000
    },
    {
      "epoch": 22.541875135958232,
      "grad_norm": 0.00402511702850461,
      "learning_rate": 5.491624972808354e-06,
      "loss": 0.0015,
      "step": 414500
    },
    {
      "epoch": 22.56906678268436,
      "grad_norm": 4.754391193273477e-05,
      "learning_rate": 5.486186643463128e-06,
      "loss": 0.0014,
      "step": 415000
    },
    {
      "epoch": 22.596258429410486,
      "grad_norm": 0.019172951579093933,
      "learning_rate": 5.480748314117903e-06,
      "loss": 0.002,
      "step": 415500
    },
    {
      "epoch": 22.62345007613661,
      "grad_norm": 0.001128927106037736,
      "learning_rate": 5.475309984772678e-06,
      "loss": 0.001,
      "step": 416000
    },
    {
      "epoch": 22.650641722862737,
      "grad_norm": 0.0911518856883049,
      "learning_rate": 5.469871655427454e-06,
      "loss": 0.0015,
      "step": 416500
    },
    {
      "epoch": 22.67783336958886,
      "grad_norm": 0.00012101631000405177,
      "learning_rate": 5.464433326082228e-06,
      "loss": 0.0016,
      "step": 417000
    },
    {
      "epoch": 22.705025016314988,
      "grad_norm": 3.2218173146247864e-05,
      "learning_rate": 5.458994996737002e-06,
      "loss": 0.0016,
      "step": 417500
    },
    {
      "epoch": 22.732216663041115,
      "grad_norm": 0.5471950173377991,
      "learning_rate": 5.4535566673917775e-06,
      "loss": 0.0023,
      "step": 418000
    },
    {
      "epoch": 22.759408309767238,
      "grad_norm": 0.036910202354192734,
      "learning_rate": 5.448118338046553e-06,
      "loss": 0.002,
      "step": 418500
    },
    {
      "epoch": 22.786599956493365,
      "grad_norm": 0.019610675051808357,
      "learning_rate": 5.442680008701328e-06,
      "loss": 0.0015,
      "step": 419000
    },
    {
      "epoch": 22.813791603219492,
      "grad_norm": 0.002965037478134036,
      "learning_rate": 5.437241679356102e-06,
      "loss": 0.0023,
      "step": 419500
    },
    {
      "epoch": 22.840983249945616,
      "grad_norm": 0.00817885622382164,
      "learning_rate": 5.431803350010877e-06,
      "loss": 0.002,
      "step": 420000
    },
    {
      "epoch": 22.868174896671743,
      "grad_norm": 0.0004293011443223804,
      "learning_rate": 5.426365020665652e-06,
      "loss": 0.0017,
      "step": 420500
    },
    {
      "epoch": 22.89536654339787,
      "grad_norm": 8.256949513452128e-05,
      "learning_rate": 5.420926691320427e-06,
      "loss": 0.0014,
      "step": 421000
    },
    {
      "epoch": 22.922558190123993,
      "grad_norm": 0.0028953582514077425,
      "learning_rate": 5.415488361975202e-06,
      "loss": 0.0021,
      "step": 421500
    },
    {
      "epoch": 22.94974983685012,
      "grad_norm": 0.00032379833282902837,
      "learning_rate": 5.410050032629977e-06,
      "loss": 0.0016,
      "step": 422000
    },
    {
      "epoch": 22.976941483576244,
      "grad_norm": 0.00023437933123204857,
      "learning_rate": 5.404611703284751e-06,
      "loss": 0.0019,
      "step": 422500
    },
    {
      "epoch": 23.0,
      "eval_exact_match": 0.8898,
      "eval_loss": 0.3633713126182556,
      "eval_runtime": 1017.8762,
      "eval_samples_per_second": 11.177,
      "eval_steps_per_second": 0.699,
      "step": 422924
    },
    {
      "epoch": 23.00413313030237,
      "grad_norm": 0.0025748254265636206,
      "learning_rate": 5.399173373939526e-06,
      "loss": 0.0016,
      "step": 423000
    },
    {
      "epoch": 23.031324777028498,
      "grad_norm": 0.0002550457138568163,
      "learning_rate": 5.393735044594301e-06,
      "loss": 0.001,
      "step": 423500
    },
    {
      "epoch": 23.05851642375462,
      "grad_norm": 0.00150071841198951,
      "learning_rate": 5.388296715249076e-06,
      "loss": 0.0009,
      "step": 424000
    },
    {
      "epoch": 23.08570807048075,
      "grad_norm": 0.011348661966621876,
      "learning_rate": 5.382858385903851e-06,
      "loss": 0.0011,
      "step": 424500
    },
    {
      "epoch": 23.112899717206876,
      "grad_norm": 0.008206741884350777,
      "learning_rate": 5.377420056558625e-06,
      "loss": 0.0013,
      "step": 425000
    },
    {
      "epoch": 23.140091363933,
      "grad_norm": 0.03831135109066963,
      "learning_rate": 5.3719817272134e-06,
      "loss": 0.0012,
      "step": 425500
    },
    {
      "epoch": 23.167283010659126,
      "grad_norm": 13.365209579467773,
      "learning_rate": 5.366543397868176e-06,
      "loss": 0.0014,
      "step": 426000
    },
    {
      "epoch": 23.19447465738525,
      "grad_norm": 0.002089498797431588,
      "learning_rate": 5.36110506852295e-06,
      "loss": 0.0011,
      "step": 426500
    },
    {
      "epoch": 23.221666304111377,
      "grad_norm": 0.0165526382625103,
      "learning_rate": 5.355666739177725e-06,
      "loss": 0.0016,
      "step": 427000
    },
    {
      "epoch": 23.248857950837504,
      "grad_norm": 9.401202260050923e-05,
      "learning_rate": 5.3502284098325e-06,
      "loss": 0.0014,
      "step": 427500
    },
    {
      "epoch": 23.276049597563627,
      "grad_norm": 7.19096040725708,
      "learning_rate": 5.344790080487274e-06,
      "loss": 0.0008,
      "step": 428000
    },
    {
      "epoch": 23.303241244289755,
      "grad_norm": 0.005182476248592138,
      "learning_rate": 5.33935175114205e-06,
      "loss": 0.003,
      "step": 428500
    },
    {
      "epoch": 23.33043289101588,
      "grad_norm": 1.577128750795964e-05,
      "learning_rate": 5.333913421796824e-06,
      "loss": 0.0008,
      "step": 429000
    },
    {
      "epoch": 23.357624537742005,
      "grad_norm": 0.6371750235557556,
      "learning_rate": 5.3284750924516e-06,
      "loss": 0.0025,
      "step": 429500
    },
    {
      "epoch": 23.384816184468132,
      "grad_norm": 0.1437244713306427,
      "learning_rate": 5.323036763106374e-06,
      "loss": 0.002,
      "step": 430000
    },
    {
      "epoch": 23.412007831194256,
      "grad_norm": 0.00017740836483426392,
      "learning_rate": 5.317598433761148e-06,
      "loss": 0.0011,
      "step": 430500
    },
    {
      "epoch": 23.439199477920383,
      "grad_norm": 0.010751834139227867,
      "learning_rate": 5.312160104415924e-06,
      "loss": 0.0021,
      "step": 431000
    },
    {
      "epoch": 23.46639112464651,
      "grad_norm": 0.011687032878398895,
      "learning_rate": 5.306721775070699e-06,
      "loss": 0.0011,
      "step": 431500
    },
    {
      "epoch": 23.493582771372633,
      "grad_norm": 0.028768140822649002,
      "learning_rate": 5.301283445725474e-06,
      "loss": 0.0016,
      "step": 432000
    },
    {
      "epoch": 23.52077441809876,
      "grad_norm": 0.0011045371647924185,
      "learning_rate": 5.295845116380248e-06,
      "loss": 0.002,
      "step": 432500
    },
    {
      "epoch": 23.547966064824887,
      "grad_norm": 0.004923527594655752,
      "learning_rate": 5.290406787035023e-06,
      "loss": 0.0018,
      "step": 433000
    },
    {
      "epoch": 23.57515771155101,
      "grad_norm": 0.00198232545517385,
      "learning_rate": 5.284968457689799e-06,
      "loss": 0.002,
      "step": 433500
    },
    {
      "epoch": 23.602349358277138,
      "grad_norm": 0.24075612425804138,
      "learning_rate": 5.279530128344573e-06,
      "loss": 0.0015,
      "step": 434000
    },
    {
      "epoch": 23.62954100500326,
      "grad_norm": 0.13289451599121094,
      "learning_rate": 5.274091798999348e-06,
      "loss": 0.0014,
      "step": 434500
    },
    {
      "epoch": 23.65673265172939,
      "grad_norm": 0.0009639549534767866,
      "learning_rate": 5.268653469654123e-06,
      "loss": 0.0021,
      "step": 435000
    },
    {
      "epoch": 23.683924298455516,
      "grad_norm": 0.003956354223191738,
      "learning_rate": 5.263215140308897e-06,
      "loss": 0.0023,
      "step": 435500
    },
    {
      "epoch": 23.71111594518164,
      "grad_norm": 0.0001058599809766747,
      "learning_rate": 5.257776810963673e-06,
      "loss": 0.0012,
      "step": 436000
    },
    {
      "epoch": 23.738307591907766,
      "grad_norm": 0.0002513038634788245,
      "learning_rate": 5.252338481618447e-06,
      "loss": 0.0019,
      "step": 436500
    },
    {
      "epoch": 23.765499238633893,
      "grad_norm": 9.151042468147352e-05,
      "learning_rate": 5.246900152273222e-06,
      "loss": 0.0015,
      "step": 437000
    },
    {
      "epoch": 23.792690885360017,
      "grad_norm": 0.07705456763505936,
      "learning_rate": 5.241461822927997e-06,
      "loss": 0.0016,
      "step": 437500
    },
    {
      "epoch": 23.819882532086144,
      "grad_norm": 0.00012023092131130397,
      "learning_rate": 5.236023493582771e-06,
      "loss": 0.0016,
      "step": 438000
    },
    {
      "epoch": 23.847074178812267,
      "grad_norm": 0.002552904188632965,
      "learning_rate": 5.230585164237547e-06,
      "loss": 0.002,
      "step": 438500
    },
    {
      "epoch": 23.874265825538394,
      "grad_norm": 0.00027760403463616967,
      "learning_rate": 5.225146834892322e-06,
      "loss": 0.0013,
      "step": 439000
    },
    {
      "epoch": 23.90145747226452,
      "grad_norm": 0.5065758228302002,
      "learning_rate": 5.219708505547096e-06,
      "loss": 0.0017,
      "step": 439500
    },
    {
      "epoch": 23.928649118990645,
      "grad_norm": 0.002194088650867343,
      "learning_rate": 5.214270176201871e-06,
      "loss": 0.0023,
      "step": 440000
    },
    {
      "epoch": 23.955840765716772,
      "grad_norm": 0.3404185473918915,
      "learning_rate": 5.208831846856646e-06,
      "loss": 0.0021,
      "step": 440500
    },
    {
      "epoch": 23.9830324124429,
      "grad_norm": 0.0021974656265228987,
      "learning_rate": 5.203393517511422e-06,
      "loss": 0.0017,
      "step": 441000
    },
    {
      "epoch": 24.0,
      "eval_exact_match": 0.8878,
      "eval_loss": 0.3819943964481354,
      "eval_runtime": 1022.1021,
      "eval_samples_per_second": 11.131,
      "eval_steps_per_second": 0.697,
      "step": 441312
    }
  ],
  "logging_steps": 500,
  "max_steps": 919400,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 50,
  "save_steps": 500,
  "stateful_callbacks": {
    "EarlyStoppingCallback": {
      "args": {
        "early_stopping_patience": 5,
        "early_stopping_threshold": 0.0
      },
      "attributes": {
        "early_stopping_patience_counter": 5
      }
    },
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 2.5992705382273843e+17,
  "train_batch_size": 16,
  "trial_name": null,
  "trial_params": null
}