XLM-T-full-xnli / trainer_state.json
morit's picture
upload model
9d583db
{
"best_metric": 0.7710575635876841,
"best_model_checkpoint": "models/full_xnli_3/checkpoint-552051",
"epoch": 3.0,
"global_step": 552051,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.9981885731571902e-05,
"loss": 1.026,
"step": 500
},
{
"epoch": 0.01,
"learning_rate": 1.9963771463143806e-05,
"loss": 0.8874,
"step": 1000
},
{
"epoch": 0.01,
"learning_rate": 1.9945657194715707e-05,
"loss": 0.845,
"step": 1500
},
{
"epoch": 0.01,
"learning_rate": 1.9927542926287608e-05,
"loss": 0.8122,
"step": 2000
},
{
"epoch": 0.01,
"learning_rate": 1.9909428657859512e-05,
"loss": 0.7991,
"step": 2500
},
{
"epoch": 0.02,
"learning_rate": 1.9891314389431413e-05,
"loss": 0.7869,
"step": 3000
},
{
"epoch": 0.02,
"learning_rate": 1.9873200121003317e-05,
"loss": 0.7801,
"step": 3500
},
{
"epoch": 0.02,
"learning_rate": 1.9855085852575217e-05,
"loss": 0.7703,
"step": 4000
},
{
"epoch": 0.02,
"learning_rate": 1.9836971584147118e-05,
"loss": 0.7661,
"step": 4500
},
{
"epoch": 0.03,
"learning_rate": 1.981885731571902e-05,
"loss": 0.7498,
"step": 5000
},
{
"epoch": 0.03,
"learning_rate": 1.9800743047290923e-05,
"loss": 0.7383,
"step": 5500
},
{
"epoch": 0.03,
"learning_rate": 1.9782628778862823e-05,
"loss": 0.7452,
"step": 6000
},
{
"epoch": 0.04,
"learning_rate": 1.9764514510434728e-05,
"loss": 0.7325,
"step": 6500
},
{
"epoch": 0.04,
"learning_rate": 1.9746400242006628e-05,
"loss": 0.7407,
"step": 7000
},
{
"epoch": 0.04,
"learning_rate": 1.972828597357853e-05,
"loss": 0.7248,
"step": 7500
},
{
"epoch": 0.04,
"learning_rate": 1.9710171705150433e-05,
"loss": 0.7216,
"step": 8000
},
{
"epoch": 0.05,
"learning_rate": 1.9692057436722334e-05,
"loss": 0.7197,
"step": 8500
},
{
"epoch": 0.05,
"learning_rate": 1.9673943168294238e-05,
"loss": 0.7168,
"step": 9000
},
{
"epoch": 0.05,
"learning_rate": 1.9655828899866135e-05,
"loss": 0.7146,
"step": 9500
},
{
"epoch": 0.05,
"learning_rate": 1.963771463143804e-05,
"loss": 0.708,
"step": 10000
},
{
"epoch": 0.06,
"learning_rate": 1.961960036300994e-05,
"loss": 0.7139,
"step": 10500
},
{
"epoch": 0.06,
"learning_rate": 1.9601486094581844e-05,
"loss": 0.7092,
"step": 11000
},
{
"epoch": 0.06,
"learning_rate": 1.9583371826153745e-05,
"loss": 0.6994,
"step": 11500
},
{
"epoch": 0.07,
"learning_rate": 1.956525755772565e-05,
"loss": 0.6958,
"step": 12000
},
{
"epoch": 0.07,
"learning_rate": 1.954714328929755e-05,
"loss": 0.6949,
"step": 12500
},
{
"epoch": 0.07,
"learning_rate": 1.952902902086945e-05,
"loss": 0.698,
"step": 13000
},
{
"epoch": 0.07,
"learning_rate": 1.951091475244135e-05,
"loss": 0.6803,
"step": 13500
},
{
"epoch": 0.08,
"learning_rate": 1.9492800484013255e-05,
"loss": 0.6865,
"step": 14000
},
{
"epoch": 0.08,
"learning_rate": 1.9474686215585155e-05,
"loss": 0.7004,
"step": 14500
},
{
"epoch": 0.08,
"learning_rate": 1.9456571947157056e-05,
"loss": 0.681,
"step": 15000
},
{
"epoch": 0.08,
"learning_rate": 1.943845767872896e-05,
"loss": 0.6864,
"step": 15500
},
{
"epoch": 0.09,
"learning_rate": 1.942034341030086e-05,
"loss": 0.6934,
"step": 16000
},
{
"epoch": 0.09,
"learning_rate": 1.9402229141872765e-05,
"loss": 0.6798,
"step": 16500
},
{
"epoch": 0.09,
"learning_rate": 1.9384114873444666e-05,
"loss": 0.6855,
"step": 17000
},
{
"epoch": 0.1,
"learning_rate": 1.9366000605016566e-05,
"loss": 0.6749,
"step": 17500
},
{
"epoch": 0.1,
"learning_rate": 1.9347886336588467e-05,
"loss": 0.6704,
"step": 18000
},
{
"epoch": 0.1,
"learning_rate": 1.932977206816037e-05,
"loss": 0.676,
"step": 18500
},
{
"epoch": 0.1,
"learning_rate": 1.9311657799732272e-05,
"loss": 0.6703,
"step": 19000
},
{
"epoch": 0.11,
"learning_rate": 1.9293543531304176e-05,
"loss": 0.6646,
"step": 19500
},
{
"epoch": 0.11,
"learning_rate": 1.9275429262876077e-05,
"loss": 0.669,
"step": 20000
},
{
"epoch": 0.11,
"learning_rate": 1.9257314994447977e-05,
"loss": 0.6619,
"step": 20500
},
{
"epoch": 0.11,
"learning_rate": 1.923920072601988e-05,
"loss": 0.6603,
"step": 21000
},
{
"epoch": 0.12,
"learning_rate": 1.9221086457591782e-05,
"loss": 0.6603,
"step": 21500
},
{
"epoch": 0.12,
"learning_rate": 1.9202972189163683e-05,
"loss": 0.6694,
"step": 22000
},
{
"epoch": 0.12,
"learning_rate": 1.9184857920735587e-05,
"loss": 0.658,
"step": 22500
},
{
"epoch": 0.12,
"learning_rate": 1.9166743652307488e-05,
"loss": 0.6506,
"step": 23000
},
{
"epoch": 0.13,
"learning_rate": 1.9148629383879388e-05,
"loss": 0.6673,
"step": 23500
},
{
"epoch": 0.13,
"learning_rate": 1.9130515115451292e-05,
"loss": 0.6613,
"step": 24000
},
{
"epoch": 0.13,
"learning_rate": 1.9112400847023193e-05,
"loss": 0.6597,
"step": 24500
},
{
"epoch": 0.14,
"learning_rate": 1.9094286578595097e-05,
"loss": 0.6432,
"step": 25000
},
{
"epoch": 0.14,
"learning_rate": 1.9076172310166994e-05,
"loss": 0.6436,
"step": 25500
},
{
"epoch": 0.14,
"learning_rate": 1.90580580417389e-05,
"loss": 0.6566,
"step": 26000
},
{
"epoch": 0.14,
"learning_rate": 1.90399437733108e-05,
"loss": 0.6467,
"step": 26500
},
{
"epoch": 0.15,
"learning_rate": 1.9021829504882703e-05,
"loss": 0.656,
"step": 27000
},
{
"epoch": 0.15,
"learning_rate": 1.9003715236454604e-05,
"loss": 0.6633,
"step": 27500
},
{
"epoch": 0.15,
"learning_rate": 1.8985600968026508e-05,
"loss": 0.6409,
"step": 28000
},
{
"epoch": 0.15,
"learning_rate": 1.896748669959841e-05,
"loss": 0.6562,
"step": 28500
},
{
"epoch": 0.16,
"learning_rate": 1.894937243117031e-05,
"loss": 0.6516,
"step": 29000
},
{
"epoch": 0.16,
"learning_rate": 1.8931258162742213e-05,
"loss": 0.6502,
"step": 29500
},
{
"epoch": 0.16,
"learning_rate": 1.8913143894314114e-05,
"loss": 0.6437,
"step": 30000
},
{
"epoch": 0.17,
"learning_rate": 1.8895029625886018e-05,
"loss": 0.6485,
"step": 30500
},
{
"epoch": 0.17,
"learning_rate": 1.8876915357457916e-05,
"loss": 0.6357,
"step": 31000
},
{
"epoch": 0.17,
"learning_rate": 1.885880108902982e-05,
"loss": 0.6438,
"step": 31500
},
{
"epoch": 0.17,
"learning_rate": 1.884068682060172e-05,
"loss": 0.644,
"step": 32000
},
{
"epoch": 0.18,
"learning_rate": 1.8822572552173624e-05,
"loss": 0.6493,
"step": 32500
},
{
"epoch": 0.18,
"learning_rate": 1.8804458283745525e-05,
"loss": 0.6418,
"step": 33000
},
{
"epoch": 0.18,
"learning_rate": 1.8786344015317426e-05,
"loss": 0.6448,
"step": 33500
},
{
"epoch": 0.18,
"learning_rate": 1.876822974688933e-05,
"loss": 0.6353,
"step": 34000
},
{
"epoch": 0.19,
"learning_rate": 1.875011547846123e-05,
"loss": 0.6332,
"step": 34500
},
{
"epoch": 0.19,
"learning_rate": 1.873200121003313e-05,
"loss": 0.6393,
"step": 35000
},
{
"epoch": 0.19,
"learning_rate": 1.8713886941605035e-05,
"loss": 0.6309,
"step": 35500
},
{
"epoch": 0.2,
"learning_rate": 1.8695772673176936e-05,
"loss": 0.6378,
"step": 36000
},
{
"epoch": 0.2,
"learning_rate": 1.8677658404748837e-05,
"loss": 0.6412,
"step": 36500
},
{
"epoch": 0.2,
"learning_rate": 1.865954413632074e-05,
"loss": 0.637,
"step": 37000
},
{
"epoch": 0.2,
"learning_rate": 1.864142986789264e-05,
"loss": 0.6293,
"step": 37500
},
{
"epoch": 0.21,
"learning_rate": 1.8623315599464545e-05,
"loss": 0.6273,
"step": 38000
},
{
"epoch": 0.21,
"learning_rate": 1.8605201331036446e-05,
"loss": 0.6205,
"step": 38500
},
{
"epoch": 0.21,
"learning_rate": 1.8587087062608347e-05,
"loss": 0.6221,
"step": 39000
},
{
"epoch": 0.21,
"learning_rate": 1.8568972794180248e-05,
"loss": 0.6236,
"step": 39500
},
{
"epoch": 0.22,
"learning_rate": 1.855085852575215e-05,
"loss": 0.6221,
"step": 40000
},
{
"epoch": 0.22,
"learning_rate": 1.8532744257324052e-05,
"loss": 0.618,
"step": 40500
},
{
"epoch": 0.22,
"learning_rate": 1.8514629988895956e-05,
"loss": 0.628,
"step": 41000
},
{
"epoch": 0.23,
"learning_rate": 1.8496515720467857e-05,
"loss": 0.6069,
"step": 41500
},
{
"epoch": 0.23,
"learning_rate": 1.8478401452039758e-05,
"loss": 0.6337,
"step": 42000
},
{
"epoch": 0.23,
"learning_rate": 1.8460287183611662e-05,
"loss": 0.6205,
"step": 42500
},
{
"epoch": 0.23,
"learning_rate": 1.8442172915183563e-05,
"loss": 0.6287,
"step": 43000
},
{
"epoch": 0.24,
"learning_rate": 1.8424058646755467e-05,
"loss": 0.6186,
"step": 43500
},
{
"epoch": 0.24,
"learning_rate": 1.8405944378327367e-05,
"loss": 0.6225,
"step": 44000
},
{
"epoch": 0.24,
"learning_rate": 1.8387830109899268e-05,
"loss": 0.6171,
"step": 44500
},
{
"epoch": 0.24,
"learning_rate": 1.836971584147117e-05,
"loss": 0.6147,
"step": 45000
},
{
"epoch": 0.25,
"learning_rate": 1.8351601573043073e-05,
"loss": 0.6293,
"step": 45500
},
{
"epoch": 0.25,
"learning_rate": 1.8333487304614973e-05,
"loss": 0.6098,
"step": 46000
},
{
"epoch": 0.25,
"learning_rate": 1.8315373036186878e-05,
"loss": 0.6162,
"step": 46500
},
{
"epoch": 0.26,
"learning_rate": 1.8297258767758778e-05,
"loss": 0.6195,
"step": 47000
},
{
"epoch": 0.26,
"learning_rate": 1.827914449933068e-05,
"loss": 0.6265,
"step": 47500
},
{
"epoch": 0.26,
"learning_rate": 1.826103023090258e-05,
"loss": 0.6166,
"step": 48000
},
{
"epoch": 0.26,
"learning_rate": 1.8242915962474484e-05,
"loss": 0.6064,
"step": 48500
},
{
"epoch": 0.27,
"learning_rate": 1.8224801694046384e-05,
"loss": 0.6251,
"step": 49000
},
{
"epoch": 0.27,
"learning_rate": 1.8206687425618285e-05,
"loss": 0.6102,
"step": 49500
},
{
"epoch": 0.27,
"learning_rate": 1.818857315719019e-05,
"loss": 0.6213,
"step": 50000
},
{
"epoch": 0.27,
"learning_rate": 1.817045888876209e-05,
"loss": 0.6138,
"step": 50500
},
{
"epoch": 0.28,
"learning_rate": 1.8152344620333994e-05,
"loss": 0.617,
"step": 51000
},
{
"epoch": 0.28,
"learning_rate": 1.8134230351905895e-05,
"loss": 0.6199,
"step": 51500
},
{
"epoch": 0.28,
"learning_rate": 1.81161160834778e-05,
"loss": 0.6011,
"step": 52000
},
{
"epoch": 0.29,
"learning_rate": 1.8098001815049696e-05,
"loss": 0.6098,
"step": 52500
},
{
"epoch": 0.29,
"learning_rate": 1.80798875466216e-05,
"loss": 0.6109,
"step": 53000
},
{
"epoch": 0.29,
"learning_rate": 1.80617732781935e-05,
"loss": 0.608,
"step": 53500
},
{
"epoch": 0.29,
"learning_rate": 1.8043659009765405e-05,
"loss": 0.6062,
"step": 54000
},
{
"epoch": 0.3,
"learning_rate": 1.8025544741337305e-05,
"loss": 0.6072,
"step": 54500
},
{
"epoch": 0.3,
"learning_rate": 1.8007430472909206e-05,
"loss": 0.615,
"step": 55000
},
{
"epoch": 0.3,
"learning_rate": 1.798931620448111e-05,
"loss": 0.6143,
"step": 55500
},
{
"epoch": 0.3,
"learning_rate": 1.797120193605301e-05,
"loss": 0.6119,
"step": 56000
},
{
"epoch": 0.31,
"learning_rate": 1.795308766762491e-05,
"loss": 0.6047,
"step": 56500
},
{
"epoch": 0.31,
"learning_rate": 1.7934973399196816e-05,
"loss": 0.5979,
"step": 57000
},
{
"epoch": 0.31,
"learning_rate": 1.7916859130768716e-05,
"loss": 0.6007,
"step": 57500
},
{
"epoch": 0.32,
"learning_rate": 1.7898744862340617e-05,
"loss": 0.5974,
"step": 58000
},
{
"epoch": 0.32,
"learning_rate": 1.788063059391252e-05,
"loss": 0.6116,
"step": 58500
},
{
"epoch": 0.32,
"learning_rate": 1.7862516325484422e-05,
"loss": 0.6025,
"step": 59000
},
{
"epoch": 0.32,
"learning_rate": 1.7844402057056326e-05,
"loss": 0.6033,
"step": 59500
},
{
"epoch": 0.33,
"learning_rate": 1.7826287788628227e-05,
"loss": 0.5954,
"step": 60000
},
{
"epoch": 0.33,
"learning_rate": 1.7808173520200127e-05,
"loss": 0.594,
"step": 60500
},
{
"epoch": 0.33,
"learning_rate": 1.7790059251772028e-05,
"loss": 0.6008,
"step": 61000
},
{
"epoch": 0.33,
"learning_rate": 1.7771944983343932e-05,
"loss": 0.5965,
"step": 61500
},
{
"epoch": 0.34,
"learning_rate": 1.7753830714915833e-05,
"loss": 0.6063,
"step": 62000
},
{
"epoch": 0.34,
"learning_rate": 1.7735716446487737e-05,
"loss": 0.6056,
"step": 62500
},
{
"epoch": 0.34,
"learning_rate": 1.7717602178059638e-05,
"loss": 0.5975,
"step": 63000
},
{
"epoch": 0.35,
"learning_rate": 1.7699487909631538e-05,
"loss": 0.6032,
"step": 63500
},
{
"epoch": 0.35,
"learning_rate": 1.7681373641203442e-05,
"loss": 0.6035,
"step": 64000
},
{
"epoch": 0.35,
"learning_rate": 1.7663259372775343e-05,
"loss": 0.5951,
"step": 64500
},
{
"epoch": 0.35,
"learning_rate": 1.7645145104347247e-05,
"loss": 0.596,
"step": 65000
},
{
"epoch": 0.36,
"learning_rate": 1.7627030835919144e-05,
"loss": 0.6031,
"step": 65500
},
{
"epoch": 0.36,
"learning_rate": 1.760891656749105e-05,
"loss": 0.5984,
"step": 66000
},
{
"epoch": 0.36,
"learning_rate": 1.759080229906295e-05,
"loss": 0.5867,
"step": 66500
},
{
"epoch": 0.36,
"learning_rate": 1.7572688030634853e-05,
"loss": 0.5925,
"step": 67000
},
{
"epoch": 0.37,
"learning_rate": 1.7554573762206754e-05,
"loss": 0.6039,
"step": 67500
},
{
"epoch": 0.37,
"learning_rate": 1.7536459493778658e-05,
"loss": 0.5921,
"step": 68000
},
{
"epoch": 0.37,
"learning_rate": 1.751834522535056e-05,
"loss": 0.5858,
"step": 68500
},
{
"epoch": 0.37,
"learning_rate": 1.750023095692246e-05,
"loss": 0.59,
"step": 69000
},
{
"epoch": 0.38,
"learning_rate": 1.748211668849436e-05,
"loss": 0.5949,
"step": 69500
},
{
"epoch": 0.38,
"learning_rate": 1.7464002420066264e-05,
"loss": 0.583,
"step": 70000
},
{
"epoch": 0.38,
"learning_rate": 1.7445888151638165e-05,
"loss": 0.5884,
"step": 70500
},
{
"epoch": 0.39,
"learning_rate": 1.7427773883210065e-05,
"loss": 0.58,
"step": 71000
},
{
"epoch": 0.39,
"learning_rate": 1.740965961478197e-05,
"loss": 0.5867,
"step": 71500
},
{
"epoch": 0.39,
"learning_rate": 1.739154534635387e-05,
"loss": 0.598,
"step": 72000
},
{
"epoch": 0.39,
"learning_rate": 1.7373431077925774e-05,
"loss": 0.5783,
"step": 72500
},
{
"epoch": 0.4,
"learning_rate": 1.7355316809497675e-05,
"loss": 0.5925,
"step": 73000
},
{
"epoch": 0.4,
"learning_rate": 1.7337202541069576e-05,
"loss": 0.584,
"step": 73500
},
{
"epoch": 0.4,
"learning_rate": 1.7319088272641476e-05,
"loss": 0.5863,
"step": 74000
},
{
"epoch": 0.4,
"learning_rate": 1.730097400421338e-05,
"loss": 0.5757,
"step": 74500
},
{
"epoch": 0.41,
"learning_rate": 1.728285973578528e-05,
"loss": 0.5879,
"step": 75000
},
{
"epoch": 0.41,
"learning_rate": 1.7264745467357185e-05,
"loss": 0.5988,
"step": 75500
},
{
"epoch": 0.41,
"learning_rate": 1.7246631198929086e-05,
"loss": 0.5807,
"step": 76000
},
{
"epoch": 0.42,
"learning_rate": 1.7228516930500987e-05,
"loss": 0.5872,
"step": 76500
},
{
"epoch": 0.42,
"learning_rate": 1.721040266207289e-05,
"loss": 0.5918,
"step": 77000
},
{
"epoch": 0.42,
"learning_rate": 1.719228839364479e-05,
"loss": 0.5845,
"step": 77500
},
{
"epoch": 0.42,
"learning_rate": 1.7174174125216695e-05,
"loss": 0.5726,
"step": 78000
},
{
"epoch": 0.43,
"learning_rate": 1.7156059856788596e-05,
"loss": 0.5772,
"step": 78500
},
{
"epoch": 0.43,
"learning_rate": 1.7137945588360497e-05,
"loss": 0.5966,
"step": 79000
},
{
"epoch": 0.43,
"learning_rate": 1.7119831319932398e-05,
"loss": 0.5734,
"step": 79500
},
{
"epoch": 0.43,
"learning_rate": 1.71017170515043e-05,
"loss": 0.5851,
"step": 80000
},
{
"epoch": 0.44,
"learning_rate": 1.7083602783076202e-05,
"loss": 0.5822,
"step": 80500
},
{
"epoch": 0.44,
"learning_rate": 1.7065488514648106e-05,
"loss": 0.5811,
"step": 81000
},
{
"epoch": 0.44,
"learning_rate": 1.7047374246220004e-05,
"loss": 0.5805,
"step": 81500
},
{
"epoch": 0.45,
"learning_rate": 1.7029259977791908e-05,
"loss": 0.579,
"step": 82000
},
{
"epoch": 0.45,
"learning_rate": 1.701114570936381e-05,
"loss": 0.5827,
"step": 82500
},
{
"epoch": 0.45,
"learning_rate": 1.6993031440935713e-05,
"loss": 0.5799,
"step": 83000
},
{
"epoch": 0.45,
"learning_rate": 1.6974917172507613e-05,
"loss": 0.5742,
"step": 83500
},
{
"epoch": 0.46,
"learning_rate": 1.6956802904079517e-05,
"loss": 0.5762,
"step": 84000
},
{
"epoch": 0.46,
"learning_rate": 1.6938688635651418e-05,
"loss": 0.57,
"step": 84500
},
{
"epoch": 0.46,
"learning_rate": 1.692057436722332e-05,
"loss": 0.5929,
"step": 85000
},
{
"epoch": 0.46,
"learning_rate": 1.6902460098795223e-05,
"loss": 0.5797,
"step": 85500
},
{
"epoch": 0.47,
"learning_rate": 1.6884345830367123e-05,
"loss": 0.5695,
"step": 86000
},
{
"epoch": 0.47,
"learning_rate": 1.6866231561939028e-05,
"loss": 0.5821,
"step": 86500
},
{
"epoch": 0.47,
"learning_rate": 1.6848117293510925e-05,
"loss": 0.5718,
"step": 87000
},
{
"epoch": 0.48,
"learning_rate": 1.683000302508283e-05,
"loss": 0.5738,
"step": 87500
},
{
"epoch": 0.48,
"learning_rate": 1.681188875665473e-05,
"loss": 0.5774,
"step": 88000
},
{
"epoch": 0.48,
"learning_rate": 1.6793774488226634e-05,
"loss": 0.5711,
"step": 88500
},
{
"epoch": 0.48,
"learning_rate": 1.6775660219798534e-05,
"loss": 0.5854,
"step": 89000
},
{
"epoch": 0.49,
"learning_rate": 1.6757545951370435e-05,
"loss": 0.5801,
"step": 89500
},
{
"epoch": 0.49,
"learning_rate": 1.673943168294234e-05,
"loss": 0.5647,
"step": 90000
},
{
"epoch": 0.49,
"learning_rate": 1.672131741451424e-05,
"loss": 0.5734,
"step": 90500
},
{
"epoch": 0.49,
"learning_rate": 1.670320314608614e-05,
"loss": 0.5764,
"step": 91000
},
{
"epoch": 0.5,
"learning_rate": 1.6685088877658045e-05,
"loss": 0.5748,
"step": 91500
},
{
"epoch": 0.5,
"learning_rate": 1.6666974609229945e-05,
"loss": 0.5715,
"step": 92000
},
{
"epoch": 0.5,
"learning_rate": 1.6648860340801846e-05,
"loss": 0.5686,
"step": 92500
},
{
"epoch": 0.51,
"learning_rate": 1.663074607237375e-05,
"loss": 0.5705,
"step": 93000
},
{
"epoch": 0.51,
"learning_rate": 1.661263180394565e-05,
"loss": 0.5726,
"step": 93500
},
{
"epoch": 0.51,
"learning_rate": 1.6594517535517555e-05,
"loss": 0.5719,
"step": 94000
},
{
"epoch": 0.51,
"learning_rate": 1.6576403267089455e-05,
"loss": 0.5644,
"step": 94500
},
{
"epoch": 0.52,
"learning_rate": 1.6558288998661356e-05,
"loss": 0.5622,
"step": 95000
},
{
"epoch": 0.52,
"learning_rate": 1.6540174730233257e-05,
"loss": 0.5754,
"step": 95500
},
{
"epoch": 0.52,
"learning_rate": 1.652206046180516e-05,
"loss": 0.5763,
"step": 96000
},
{
"epoch": 0.52,
"learning_rate": 1.650394619337706e-05,
"loss": 0.5664,
"step": 96500
},
{
"epoch": 0.53,
"learning_rate": 1.6485831924948966e-05,
"loss": 0.5798,
"step": 97000
},
{
"epoch": 0.53,
"learning_rate": 1.6467717656520866e-05,
"loss": 0.5722,
"step": 97500
},
{
"epoch": 0.53,
"learning_rate": 1.6449603388092767e-05,
"loss": 0.555,
"step": 98000
},
{
"epoch": 0.54,
"learning_rate": 1.643148911966467e-05,
"loss": 0.556,
"step": 98500
},
{
"epoch": 0.54,
"learning_rate": 1.6413374851236572e-05,
"loss": 0.5571,
"step": 99000
},
{
"epoch": 0.54,
"learning_rate": 1.6395260582808476e-05,
"loss": 0.5606,
"step": 99500
},
{
"epoch": 0.54,
"learning_rate": 1.6377146314380377e-05,
"loss": 0.5641,
"step": 100000
},
{
"epoch": 0.55,
"learning_rate": 1.6359032045952277e-05,
"loss": 0.5617,
"step": 100500
},
{
"epoch": 0.55,
"learning_rate": 1.6340917777524178e-05,
"loss": 0.5668,
"step": 101000
},
{
"epoch": 0.55,
"learning_rate": 1.6322803509096082e-05,
"loss": 0.566,
"step": 101500
},
{
"epoch": 0.55,
"learning_rate": 1.6304689240667983e-05,
"loss": 0.5661,
"step": 102000
},
{
"epoch": 0.56,
"learning_rate": 1.6286574972239887e-05,
"loss": 0.5644,
"step": 102500
},
{
"epoch": 0.56,
"learning_rate": 1.6268460703811788e-05,
"loss": 0.572,
"step": 103000
},
{
"epoch": 0.56,
"learning_rate": 1.6250346435383688e-05,
"loss": 0.5619,
"step": 103500
},
{
"epoch": 0.57,
"learning_rate": 1.623223216695559e-05,
"loss": 0.5546,
"step": 104000
},
{
"epoch": 0.57,
"learning_rate": 1.6214117898527493e-05,
"loss": 0.5557,
"step": 104500
},
{
"epoch": 0.57,
"learning_rate": 1.6196003630099394e-05,
"loss": 0.55,
"step": 105000
},
{
"epoch": 0.57,
"learning_rate": 1.6177889361671294e-05,
"loss": 0.5543,
"step": 105500
},
{
"epoch": 0.58,
"learning_rate": 1.61597750932432e-05,
"loss": 0.5496,
"step": 106000
},
{
"epoch": 0.58,
"learning_rate": 1.61416608248151e-05,
"loss": 0.5631,
"step": 106500
},
{
"epoch": 0.58,
"learning_rate": 1.6123546556387003e-05,
"loss": 0.557,
"step": 107000
},
{
"epoch": 0.58,
"learning_rate": 1.6105432287958904e-05,
"loss": 0.5533,
"step": 107500
},
{
"epoch": 0.59,
"learning_rate": 1.6087318019530808e-05,
"loss": 0.5501,
"step": 108000
},
{
"epoch": 0.59,
"learning_rate": 1.6069203751102705e-05,
"loss": 0.5644,
"step": 108500
},
{
"epoch": 0.59,
"learning_rate": 1.605108948267461e-05,
"loss": 0.5528,
"step": 109000
},
{
"epoch": 0.6,
"learning_rate": 1.603297521424651e-05,
"loss": 0.552,
"step": 109500
},
{
"epoch": 0.6,
"learning_rate": 1.6014860945818414e-05,
"loss": 0.5606,
"step": 110000
},
{
"epoch": 0.6,
"learning_rate": 1.5996746677390315e-05,
"loss": 0.5507,
"step": 110500
},
{
"epoch": 0.6,
"learning_rate": 1.5978632408962215e-05,
"loss": 0.5495,
"step": 111000
},
{
"epoch": 0.61,
"learning_rate": 1.596051814053412e-05,
"loss": 0.559,
"step": 111500
},
{
"epoch": 0.61,
"learning_rate": 1.594240387210602e-05,
"loss": 0.5575,
"step": 112000
},
{
"epoch": 0.61,
"learning_rate": 1.5924289603677924e-05,
"loss": 0.5528,
"step": 112500
},
{
"epoch": 0.61,
"learning_rate": 1.5906175335249825e-05,
"loss": 0.5497,
"step": 113000
},
{
"epoch": 0.62,
"learning_rate": 1.5888061066821726e-05,
"loss": 0.5522,
"step": 113500
},
{
"epoch": 0.62,
"learning_rate": 1.5869946798393626e-05,
"loss": 0.5509,
"step": 114000
},
{
"epoch": 0.62,
"learning_rate": 1.585183252996553e-05,
"loss": 0.5534,
"step": 114500
},
{
"epoch": 0.62,
"learning_rate": 1.583371826153743e-05,
"loss": 0.5544,
"step": 115000
},
{
"epoch": 0.63,
"learning_rate": 1.5815603993109335e-05,
"loss": 0.5469,
"step": 115500
},
{
"epoch": 0.63,
"learning_rate": 1.5797489724681236e-05,
"loss": 0.5584,
"step": 116000
},
{
"epoch": 0.63,
"learning_rate": 1.5779375456253137e-05,
"loss": 0.554,
"step": 116500
},
{
"epoch": 0.64,
"learning_rate": 1.5761261187825037e-05,
"loss": 0.556,
"step": 117000
},
{
"epoch": 0.64,
"learning_rate": 1.574314691939694e-05,
"loss": 0.5483,
"step": 117500
},
{
"epoch": 0.64,
"learning_rate": 1.5725032650968842e-05,
"loss": 0.5458,
"step": 118000
},
{
"epoch": 0.64,
"learning_rate": 1.5706918382540746e-05,
"loss": 0.5445,
"step": 118500
},
{
"epoch": 0.65,
"learning_rate": 1.5688804114112647e-05,
"loss": 0.55,
"step": 119000
},
{
"epoch": 0.65,
"learning_rate": 1.5670689845684548e-05,
"loss": 0.5505,
"step": 119500
},
{
"epoch": 0.65,
"learning_rate": 1.565257557725645e-05,
"loss": 0.5498,
"step": 120000
},
{
"epoch": 0.65,
"learning_rate": 1.5634461308828352e-05,
"loss": 0.5477,
"step": 120500
},
{
"epoch": 0.66,
"learning_rate": 1.5616347040400256e-05,
"loss": 0.551,
"step": 121000
},
{
"epoch": 0.66,
"learning_rate": 1.5598232771972157e-05,
"loss": 0.5459,
"step": 121500
},
{
"epoch": 0.66,
"learning_rate": 1.5580118503544058e-05,
"loss": 0.5384,
"step": 122000
},
{
"epoch": 0.67,
"learning_rate": 1.556200423511596e-05,
"loss": 0.546,
"step": 122500
},
{
"epoch": 0.67,
"learning_rate": 1.5543889966687863e-05,
"loss": 0.5465,
"step": 123000
},
{
"epoch": 0.67,
"learning_rate": 1.5525775698259763e-05,
"loss": 0.5476,
"step": 123500
},
{
"epoch": 0.67,
"learning_rate": 1.5507661429831667e-05,
"loss": 0.5464,
"step": 124000
},
{
"epoch": 0.68,
"learning_rate": 1.5489547161403568e-05,
"loss": 0.5559,
"step": 124500
},
{
"epoch": 0.68,
"learning_rate": 1.547143289297547e-05,
"loss": 0.5457,
"step": 125000
},
{
"epoch": 0.68,
"learning_rate": 1.545331862454737e-05,
"loss": 0.5378,
"step": 125500
},
{
"epoch": 0.68,
"learning_rate": 1.5435204356119273e-05,
"loss": 0.5366,
"step": 126000
},
{
"epoch": 0.69,
"learning_rate": 1.5417090087691174e-05,
"loss": 0.5378,
"step": 126500
},
{
"epoch": 0.69,
"learning_rate": 1.5398975819263075e-05,
"loss": 0.5389,
"step": 127000
},
{
"epoch": 0.69,
"learning_rate": 1.538086155083498e-05,
"loss": 0.5346,
"step": 127500
},
{
"epoch": 0.7,
"learning_rate": 1.536274728240688e-05,
"loss": 0.5433,
"step": 128000
},
{
"epoch": 0.7,
"learning_rate": 1.5344633013978784e-05,
"loss": 0.5419,
"step": 128500
},
{
"epoch": 0.7,
"learning_rate": 1.5326518745550684e-05,
"loss": 0.5466,
"step": 129000
},
{
"epoch": 0.7,
"learning_rate": 1.530840447712259e-05,
"loss": 0.5411,
"step": 129500
},
{
"epoch": 0.71,
"learning_rate": 1.5290290208694486e-05,
"loss": 0.5319,
"step": 130000
},
{
"epoch": 0.71,
"learning_rate": 1.527217594026639e-05,
"loss": 0.5394,
"step": 130500
},
{
"epoch": 0.71,
"learning_rate": 1.5254061671838292e-05,
"loss": 0.5399,
"step": 131000
},
{
"epoch": 0.71,
"learning_rate": 1.5235947403410195e-05,
"loss": 0.5412,
"step": 131500
},
{
"epoch": 0.72,
"learning_rate": 1.5217833134982097e-05,
"loss": 0.5391,
"step": 132000
},
{
"epoch": 0.72,
"learning_rate": 1.5199718866553996e-05,
"loss": 0.5384,
"step": 132500
},
{
"epoch": 0.72,
"learning_rate": 1.5181604598125898e-05,
"loss": 0.5361,
"step": 133000
},
{
"epoch": 0.73,
"learning_rate": 1.51634903296978e-05,
"loss": 0.5432,
"step": 133500
},
{
"epoch": 0.73,
"learning_rate": 1.5145376061269703e-05,
"loss": 0.5459,
"step": 134000
},
{
"epoch": 0.73,
"learning_rate": 1.5127261792841605e-05,
"loss": 0.5244,
"step": 134500
},
{
"epoch": 0.73,
"learning_rate": 1.5109147524413506e-05,
"loss": 0.5311,
"step": 135000
},
{
"epoch": 0.74,
"learning_rate": 1.5091033255985409e-05,
"loss": 0.5387,
"step": 135500
},
{
"epoch": 0.74,
"learning_rate": 1.5072918987557311e-05,
"loss": 0.5246,
"step": 136000
},
{
"epoch": 0.74,
"learning_rate": 1.5054804719129212e-05,
"loss": 0.5395,
"step": 136500
},
{
"epoch": 0.74,
"learning_rate": 1.5036690450701114e-05,
"loss": 0.5313,
"step": 137000
},
{
"epoch": 0.75,
"learning_rate": 1.5018576182273016e-05,
"loss": 0.5359,
"step": 137500
},
{
"epoch": 0.75,
"learning_rate": 1.5000461913844917e-05,
"loss": 0.5185,
"step": 138000
},
{
"epoch": 0.75,
"learning_rate": 1.498234764541682e-05,
"loss": 0.529,
"step": 138500
},
{
"epoch": 0.76,
"learning_rate": 1.4964233376988722e-05,
"loss": 0.5356,
"step": 139000
},
{
"epoch": 0.76,
"learning_rate": 1.4946119108560624e-05,
"loss": 0.537,
"step": 139500
},
{
"epoch": 0.76,
"learning_rate": 1.4928004840132527e-05,
"loss": 0.537,
"step": 140000
},
{
"epoch": 0.76,
"learning_rate": 1.4909890571704426e-05,
"loss": 0.5379,
"step": 140500
},
{
"epoch": 0.77,
"learning_rate": 1.4891776303276328e-05,
"loss": 0.5389,
"step": 141000
},
{
"epoch": 0.77,
"learning_rate": 1.487366203484823e-05,
"loss": 0.531,
"step": 141500
},
{
"epoch": 0.77,
"learning_rate": 1.4855547766420133e-05,
"loss": 0.5342,
"step": 142000
},
{
"epoch": 0.77,
"learning_rate": 1.4837433497992035e-05,
"loss": 0.5316,
"step": 142500
},
{
"epoch": 0.78,
"learning_rate": 1.4819319229563936e-05,
"loss": 0.5342,
"step": 143000
},
{
"epoch": 0.78,
"learning_rate": 1.4801204961135838e-05,
"loss": 0.5291,
"step": 143500
},
{
"epoch": 0.78,
"learning_rate": 1.478309069270774e-05,
"loss": 0.5218,
"step": 144000
},
{
"epoch": 0.79,
"learning_rate": 1.4764976424279643e-05,
"loss": 0.5331,
"step": 144500
},
{
"epoch": 0.79,
"learning_rate": 1.4746862155851545e-05,
"loss": 0.5213,
"step": 145000
},
{
"epoch": 0.79,
"learning_rate": 1.4728747887423446e-05,
"loss": 0.5269,
"step": 145500
},
{
"epoch": 0.79,
"learning_rate": 1.4710633618995347e-05,
"loss": 0.526,
"step": 146000
},
{
"epoch": 0.8,
"learning_rate": 1.4692519350567249e-05,
"loss": 0.5268,
"step": 146500
},
{
"epoch": 0.8,
"learning_rate": 1.4674405082139151e-05,
"loss": 0.5219,
"step": 147000
},
{
"epoch": 0.8,
"learning_rate": 1.4656290813711054e-05,
"loss": 0.5286,
"step": 147500
},
{
"epoch": 0.8,
"learning_rate": 1.4638176545282956e-05,
"loss": 0.5271,
"step": 148000
},
{
"epoch": 0.81,
"learning_rate": 1.4620062276854857e-05,
"loss": 0.5165,
"step": 148500
},
{
"epoch": 0.81,
"learning_rate": 1.4601948008426758e-05,
"loss": 0.5255,
"step": 149000
},
{
"epoch": 0.81,
"learning_rate": 1.458383373999866e-05,
"loss": 0.5288,
"step": 149500
},
{
"epoch": 0.82,
"learning_rate": 1.4565719471570562e-05,
"loss": 0.5226,
"step": 150000
},
{
"epoch": 0.82,
"learning_rate": 1.4547605203142465e-05,
"loss": 0.5142,
"step": 150500
},
{
"epoch": 0.82,
"learning_rate": 1.4529490934714365e-05,
"loss": 0.52,
"step": 151000
},
{
"epoch": 0.82,
"learning_rate": 1.4511376666286268e-05,
"loss": 0.5175,
"step": 151500
},
{
"epoch": 0.83,
"learning_rate": 1.449326239785817e-05,
"loss": 0.5263,
"step": 152000
},
{
"epoch": 0.83,
"learning_rate": 1.4475148129430073e-05,
"loss": 0.5174,
"step": 152500
},
{
"epoch": 0.83,
"learning_rate": 1.4457033861001975e-05,
"loss": 0.5239,
"step": 153000
},
{
"epoch": 0.83,
"learning_rate": 1.4438919592573877e-05,
"loss": 0.5187,
"step": 153500
},
{
"epoch": 0.84,
"learning_rate": 1.4420805324145776e-05,
"loss": 0.5319,
"step": 154000
},
{
"epoch": 0.84,
"learning_rate": 1.4402691055717679e-05,
"loss": 0.5241,
"step": 154500
},
{
"epoch": 0.84,
"learning_rate": 1.4384576787289581e-05,
"loss": 0.528,
"step": 155000
},
{
"epoch": 0.85,
"learning_rate": 1.4366462518861484e-05,
"loss": 0.5241,
"step": 155500
},
{
"epoch": 0.85,
"learning_rate": 1.4348348250433386e-05,
"loss": 0.5145,
"step": 156000
},
{
"epoch": 0.85,
"learning_rate": 1.4330233982005287e-05,
"loss": 0.5182,
"step": 156500
},
{
"epoch": 0.85,
"learning_rate": 1.4312119713577189e-05,
"loss": 0.5195,
"step": 157000
},
{
"epoch": 0.86,
"learning_rate": 1.4294005445149091e-05,
"loss": 0.5316,
"step": 157500
},
{
"epoch": 0.86,
"learning_rate": 1.4275891176720992e-05,
"loss": 0.5158,
"step": 158000
},
{
"epoch": 0.86,
"learning_rate": 1.4257776908292894e-05,
"loss": 0.5183,
"step": 158500
},
{
"epoch": 0.86,
"learning_rate": 1.4239662639864795e-05,
"loss": 0.5197,
"step": 159000
},
{
"epoch": 0.87,
"learning_rate": 1.4221548371436698e-05,
"loss": 0.5204,
"step": 159500
},
{
"epoch": 0.87,
"learning_rate": 1.42034341030086e-05,
"loss": 0.517,
"step": 160000
},
{
"epoch": 0.87,
"learning_rate": 1.4185319834580502e-05,
"loss": 0.502,
"step": 160500
},
{
"epoch": 0.87,
"learning_rate": 1.4167205566152405e-05,
"loss": 0.5137,
"step": 161000
},
{
"epoch": 0.88,
"learning_rate": 1.4149091297724307e-05,
"loss": 0.5213,
"step": 161500
},
{
"epoch": 0.88,
"learning_rate": 1.4130977029296206e-05,
"loss": 0.5108,
"step": 162000
},
{
"epoch": 0.88,
"learning_rate": 1.4112862760868108e-05,
"loss": 0.527,
"step": 162500
},
{
"epoch": 0.89,
"learning_rate": 1.409474849244001e-05,
"loss": 0.517,
"step": 163000
},
{
"epoch": 0.89,
"learning_rate": 1.4076634224011913e-05,
"loss": 0.5206,
"step": 163500
},
{
"epoch": 0.89,
"learning_rate": 1.4058519955583816e-05,
"loss": 0.5184,
"step": 164000
},
{
"epoch": 0.89,
"learning_rate": 1.4040405687155716e-05,
"loss": 0.5033,
"step": 164500
},
{
"epoch": 0.9,
"learning_rate": 1.4022291418727619e-05,
"loss": 0.5171,
"step": 165000
},
{
"epoch": 0.9,
"learning_rate": 1.4004177150299521e-05,
"loss": 0.5046,
"step": 165500
},
{
"epoch": 0.9,
"learning_rate": 1.3986062881871423e-05,
"loss": 0.5158,
"step": 166000
},
{
"epoch": 0.9,
"learning_rate": 1.3967948613443326e-05,
"loss": 0.5076,
"step": 166500
},
{
"epoch": 0.91,
"learning_rate": 1.3949834345015225e-05,
"loss": 0.5028,
"step": 167000
},
{
"epoch": 0.91,
"learning_rate": 1.3931720076587127e-05,
"loss": 0.5171,
"step": 167500
},
{
"epoch": 0.91,
"learning_rate": 1.391360580815903e-05,
"loss": 0.5167,
"step": 168000
},
{
"epoch": 0.92,
"learning_rate": 1.3895491539730932e-05,
"loss": 0.5133,
"step": 168500
},
{
"epoch": 0.92,
"learning_rate": 1.3877377271302834e-05,
"loss": 0.5075,
"step": 169000
},
{
"epoch": 0.92,
"learning_rate": 1.3859263002874737e-05,
"loss": 0.5098,
"step": 169500
},
{
"epoch": 0.92,
"learning_rate": 1.3841148734446637e-05,
"loss": 0.5153,
"step": 170000
},
{
"epoch": 0.93,
"learning_rate": 1.382303446601854e-05,
"loss": 0.5136,
"step": 170500
},
{
"epoch": 0.93,
"learning_rate": 1.380492019759044e-05,
"loss": 0.5199,
"step": 171000
},
{
"epoch": 0.93,
"learning_rate": 1.3786805929162343e-05,
"loss": 0.514,
"step": 171500
},
{
"epoch": 0.93,
"learning_rate": 1.3768691660734245e-05,
"loss": 0.5127,
"step": 172000
},
{
"epoch": 0.94,
"learning_rate": 1.3750577392306146e-05,
"loss": 0.5134,
"step": 172500
},
{
"epoch": 0.94,
"learning_rate": 1.3732463123878048e-05,
"loss": 0.5131,
"step": 173000
},
{
"epoch": 0.94,
"learning_rate": 1.371434885544995e-05,
"loss": 0.5159,
"step": 173500
},
{
"epoch": 0.95,
"learning_rate": 1.3696234587021853e-05,
"loss": 0.501,
"step": 174000
},
{
"epoch": 0.95,
"learning_rate": 1.3678120318593755e-05,
"loss": 0.5102,
"step": 174500
},
{
"epoch": 0.95,
"learning_rate": 1.3660006050165654e-05,
"loss": 0.5086,
"step": 175000
},
{
"epoch": 0.95,
"learning_rate": 1.3641891781737557e-05,
"loss": 0.5087,
"step": 175500
},
{
"epoch": 0.96,
"learning_rate": 1.362377751330946e-05,
"loss": 0.505,
"step": 176000
},
{
"epoch": 0.96,
"learning_rate": 1.3605663244881362e-05,
"loss": 0.5039,
"step": 176500
},
{
"epoch": 0.96,
"learning_rate": 1.3587548976453264e-05,
"loss": 0.5048,
"step": 177000
},
{
"epoch": 0.96,
"learning_rate": 1.3569434708025166e-05,
"loss": 0.5114,
"step": 177500
},
{
"epoch": 0.97,
"learning_rate": 1.3551320439597067e-05,
"loss": 0.5129,
"step": 178000
},
{
"epoch": 0.97,
"learning_rate": 1.353320617116897e-05,
"loss": 0.5156,
"step": 178500
},
{
"epoch": 0.97,
"learning_rate": 1.3515091902740872e-05,
"loss": 0.5098,
"step": 179000
},
{
"epoch": 0.98,
"learning_rate": 1.3496977634312774e-05,
"loss": 0.5019,
"step": 179500
},
{
"epoch": 0.98,
"learning_rate": 1.3478863365884675e-05,
"loss": 0.5057,
"step": 180000
},
{
"epoch": 0.98,
"learning_rate": 1.3460749097456576e-05,
"loss": 0.5076,
"step": 180500
},
{
"epoch": 0.98,
"learning_rate": 1.3442634829028478e-05,
"loss": 0.4996,
"step": 181000
},
{
"epoch": 0.99,
"learning_rate": 1.342452056060038e-05,
"loss": 0.4991,
"step": 181500
},
{
"epoch": 0.99,
"learning_rate": 1.3406406292172283e-05,
"loss": 0.5052,
"step": 182000
},
{
"epoch": 0.99,
"learning_rate": 1.3388292023744185e-05,
"loss": 0.5078,
"step": 182500
},
{
"epoch": 0.99,
"learning_rate": 1.3370177755316086e-05,
"loss": 0.5016,
"step": 183000
},
{
"epoch": 1.0,
"learning_rate": 1.3352063486887986e-05,
"loss": 0.4934,
"step": 183500
},
{
"epoch": 1.0,
"learning_rate": 1.3333949218459889e-05,
"loss": 0.4921,
"step": 184000
},
{
"epoch": 1.0,
"eval_accuracy": 0.7691834002677376,
"eval_loss": 0.608788013458252,
"eval_runtime": 73.5075,
"eval_samples_per_second": 508.111,
"eval_steps_per_second": 63.517,
"step": 184017
},
{
"epoch": 1.0,
"learning_rate": 1.3315834950031791e-05,
"loss": 0.4487,
"step": 184500
},
{
"epoch": 1.01,
"learning_rate": 1.3297720681603694e-05,
"loss": 0.4546,
"step": 185000
},
{
"epoch": 1.01,
"learning_rate": 1.3279606413175596e-05,
"loss": 0.4561,
"step": 185500
},
{
"epoch": 1.01,
"learning_rate": 1.3261492144747497e-05,
"loss": 0.4585,
"step": 186000
},
{
"epoch": 1.01,
"learning_rate": 1.3243377876319399e-05,
"loss": 0.4593,
"step": 186500
},
{
"epoch": 1.02,
"learning_rate": 1.3225263607891301e-05,
"loss": 0.4664,
"step": 187000
},
{
"epoch": 1.02,
"learning_rate": 1.3207149339463204e-05,
"loss": 0.445,
"step": 187500
},
{
"epoch": 1.02,
"learning_rate": 1.3189035071035106e-05,
"loss": 0.4663,
"step": 188000
},
{
"epoch": 1.02,
"learning_rate": 1.3170920802607005e-05,
"loss": 0.4495,
"step": 188500
},
{
"epoch": 1.03,
"learning_rate": 1.3152806534178908e-05,
"loss": 0.4525,
"step": 189000
},
{
"epoch": 1.03,
"learning_rate": 1.313469226575081e-05,
"loss": 0.4503,
"step": 189500
},
{
"epoch": 1.03,
"learning_rate": 1.3116577997322712e-05,
"loss": 0.4409,
"step": 190000
},
{
"epoch": 1.04,
"learning_rate": 1.3098463728894615e-05,
"loss": 0.4526,
"step": 190500
},
{
"epoch": 1.04,
"learning_rate": 1.3080349460466515e-05,
"loss": 0.4601,
"step": 191000
},
{
"epoch": 1.04,
"learning_rate": 1.3062235192038418e-05,
"loss": 0.4621,
"step": 191500
},
{
"epoch": 1.04,
"learning_rate": 1.304412092361032e-05,
"loss": 0.4522,
"step": 192000
},
{
"epoch": 1.05,
"learning_rate": 1.3026006655182221e-05,
"loss": 0.458,
"step": 192500
},
{
"epoch": 1.05,
"learning_rate": 1.3007892386754123e-05,
"loss": 0.4541,
"step": 193000
},
{
"epoch": 1.05,
"learning_rate": 1.2989778118326026e-05,
"loss": 0.453,
"step": 193500
},
{
"epoch": 1.05,
"learning_rate": 1.2971663849897926e-05,
"loss": 0.4488,
"step": 194000
},
{
"epoch": 1.06,
"learning_rate": 1.2953549581469829e-05,
"loss": 0.4524,
"step": 194500
},
{
"epoch": 1.06,
"learning_rate": 1.2935435313041731e-05,
"loss": 0.4448,
"step": 195000
},
{
"epoch": 1.06,
"learning_rate": 1.2917321044613634e-05,
"loss": 0.451,
"step": 195500
},
{
"epoch": 1.07,
"learning_rate": 1.2899206776185536e-05,
"loss": 0.4493,
"step": 196000
},
{
"epoch": 1.07,
"learning_rate": 1.2881092507757435e-05,
"loss": 0.4441,
"step": 196500
},
{
"epoch": 1.07,
"learning_rate": 1.2862978239329337e-05,
"loss": 0.4514,
"step": 197000
},
{
"epoch": 1.07,
"learning_rate": 1.284486397090124e-05,
"loss": 0.4497,
"step": 197500
},
{
"epoch": 1.08,
"learning_rate": 1.2826749702473142e-05,
"loss": 0.4515,
"step": 198000
},
{
"epoch": 1.08,
"learning_rate": 1.2808635434045044e-05,
"loss": 0.4563,
"step": 198500
},
{
"epoch": 1.08,
"learning_rate": 1.2790521165616945e-05,
"loss": 0.4573,
"step": 199000
},
{
"epoch": 1.08,
"learning_rate": 1.2772406897188848e-05,
"loss": 0.4535,
"step": 199500
},
{
"epoch": 1.09,
"learning_rate": 1.275429262876075e-05,
"loss": 0.446,
"step": 200000
},
{
"epoch": 1.09,
"learning_rate": 1.2736178360332652e-05,
"loss": 0.4543,
"step": 200500
},
{
"epoch": 1.09,
"learning_rate": 1.2718064091904555e-05,
"loss": 0.4575,
"step": 201000
},
{
"epoch": 1.1,
"learning_rate": 1.2699949823476457e-05,
"loss": 0.4424,
"step": 201500
},
{
"epoch": 1.1,
"learning_rate": 1.2681835555048356e-05,
"loss": 0.4448,
"step": 202000
},
{
"epoch": 1.1,
"learning_rate": 1.2663721286620258e-05,
"loss": 0.4517,
"step": 202500
},
{
"epoch": 1.1,
"learning_rate": 1.264560701819216e-05,
"loss": 0.4565,
"step": 203000
},
{
"epoch": 1.11,
"learning_rate": 1.2627492749764063e-05,
"loss": 0.4557,
"step": 203500
},
{
"epoch": 1.11,
"learning_rate": 1.2609378481335966e-05,
"loss": 0.4407,
"step": 204000
},
{
"epoch": 1.11,
"learning_rate": 1.2591264212907866e-05,
"loss": 0.4484,
"step": 204500
},
{
"epoch": 1.11,
"learning_rate": 1.2573149944479767e-05,
"loss": 0.4494,
"step": 205000
},
{
"epoch": 1.12,
"learning_rate": 1.255503567605167e-05,
"loss": 0.4567,
"step": 205500
},
{
"epoch": 1.12,
"learning_rate": 1.2536921407623572e-05,
"loss": 0.4476,
"step": 206000
},
{
"epoch": 1.12,
"learning_rate": 1.2518807139195474e-05,
"loss": 0.4602,
"step": 206500
},
{
"epoch": 1.12,
"learning_rate": 1.2500692870767375e-05,
"loss": 0.4417,
"step": 207000
},
{
"epoch": 1.13,
"learning_rate": 1.2482578602339277e-05,
"loss": 0.45,
"step": 207500
},
{
"epoch": 1.13,
"learning_rate": 1.246446433391118e-05,
"loss": 0.4425,
"step": 208000
},
{
"epoch": 1.13,
"learning_rate": 1.2446350065483082e-05,
"loss": 0.4494,
"step": 208500
},
{
"epoch": 1.14,
"learning_rate": 1.2428235797054984e-05,
"loss": 0.4524,
"step": 209000
},
{
"epoch": 1.14,
"learning_rate": 1.2410121528626887e-05,
"loss": 0.4437,
"step": 209500
},
{
"epoch": 1.14,
"learning_rate": 1.2392007260198786e-05,
"loss": 0.4477,
"step": 210000
},
{
"epoch": 1.14,
"learning_rate": 1.2373892991770688e-05,
"loss": 0.4369,
"step": 210500
},
{
"epoch": 1.15,
"learning_rate": 1.235577872334259e-05,
"loss": 0.4425,
"step": 211000
},
{
"epoch": 1.15,
"learning_rate": 1.2337664454914493e-05,
"loss": 0.4428,
"step": 211500
},
{
"epoch": 1.15,
"learning_rate": 1.2319550186486395e-05,
"loss": 0.4608,
"step": 212000
},
{
"epoch": 1.15,
"learning_rate": 1.2301435918058296e-05,
"loss": 0.4512,
"step": 212500
},
{
"epoch": 1.16,
"learning_rate": 1.2283321649630198e-05,
"loss": 0.4416,
"step": 213000
},
{
"epoch": 1.16,
"learning_rate": 1.22652073812021e-05,
"loss": 0.4542,
"step": 213500
},
{
"epoch": 1.16,
"learning_rate": 1.2247093112774003e-05,
"loss": 0.4418,
"step": 214000
},
{
"epoch": 1.17,
"learning_rate": 1.2228978844345904e-05,
"loss": 0.4437,
"step": 214500
},
{
"epoch": 1.17,
"learning_rate": 1.2210864575917804e-05,
"loss": 0.454,
"step": 215000
},
{
"epoch": 1.17,
"learning_rate": 1.2192750307489707e-05,
"loss": 0.4392,
"step": 215500
},
{
"epoch": 1.17,
"learning_rate": 1.217463603906161e-05,
"loss": 0.4426,
"step": 216000
},
{
"epoch": 1.18,
"learning_rate": 1.2156521770633512e-05,
"loss": 0.455,
"step": 216500
},
{
"epoch": 1.18,
"learning_rate": 1.2138407502205414e-05,
"loss": 0.4429,
"step": 217000
},
{
"epoch": 1.18,
"learning_rate": 1.2120293233777316e-05,
"loss": 0.4441,
"step": 217500
},
{
"epoch": 1.18,
"learning_rate": 1.2102178965349215e-05,
"loss": 0.448,
"step": 218000
},
{
"epoch": 1.19,
"learning_rate": 1.2084064696921118e-05,
"loss": 0.4568,
"step": 218500
},
{
"epoch": 1.19,
"learning_rate": 1.206595042849302e-05,
"loss": 0.4458,
"step": 219000
},
{
"epoch": 1.19,
"learning_rate": 1.2047836160064922e-05,
"loss": 0.4448,
"step": 219500
},
{
"epoch": 1.2,
"learning_rate": 1.2029721891636825e-05,
"loss": 0.4444,
"step": 220000
},
{
"epoch": 1.2,
"learning_rate": 1.2011607623208726e-05,
"loss": 0.4425,
"step": 220500
},
{
"epoch": 1.2,
"learning_rate": 1.1993493354780628e-05,
"loss": 0.4401,
"step": 221000
},
{
"epoch": 1.2,
"learning_rate": 1.197537908635253e-05,
"loss": 0.4464,
"step": 221500
},
{
"epoch": 1.21,
"learning_rate": 1.1957264817924433e-05,
"loss": 0.44,
"step": 222000
},
{
"epoch": 1.21,
"learning_rate": 1.1939150549496335e-05,
"loss": 0.4378,
"step": 222500
},
{
"epoch": 1.21,
"learning_rate": 1.1921036281068234e-05,
"loss": 0.4333,
"step": 223000
},
{
"epoch": 1.21,
"learning_rate": 1.1902922012640136e-05,
"loss": 0.442,
"step": 223500
},
{
"epoch": 1.22,
"learning_rate": 1.1884807744212039e-05,
"loss": 0.4402,
"step": 224000
},
{
"epoch": 1.22,
"learning_rate": 1.1866693475783941e-05,
"loss": 0.4532,
"step": 224500
},
{
"epoch": 1.22,
"learning_rate": 1.1848579207355844e-05,
"loss": 0.4469,
"step": 225000
},
{
"epoch": 1.23,
"learning_rate": 1.1830464938927746e-05,
"loss": 0.4428,
"step": 225500
},
{
"epoch": 1.23,
"learning_rate": 1.1812350670499647e-05,
"loss": 0.4448,
"step": 226000
},
{
"epoch": 1.23,
"learning_rate": 1.1794236402071549e-05,
"loss": 0.4431,
"step": 226500
},
{
"epoch": 1.23,
"learning_rate": 1.177612213364345e-05,
"loss": 0.4407,
"step": 227000
},
{
"epoch": 1.24,
"learning_rate": 1.1758007865215352e-05,
"loss": 0.4364,
"step": 227500
},
{
"epoch": 1.24,
"learning_rate": 1.1739893596787255e-05,
"loss": 0.4269,
"step": 228000
},
{
"epoch": 1.24,
"learning_rate": 1.1721779328359155e-05,
"loss": 0.4421,
"step": 228500
},
{
"epoch": 1.24,
"learning_rate": 1.1703665059931058e-05,
"loss": 0.4366,
"step": 229000
},
{
"epoch": 1.25,
"learning_rate": 1.168555079150296e-05,
"loss": 0.4459,
"step": 229500
},
{
"epoch": 1.25,
"learning_rate": 1.1667436523074862e-05,
"loss": 0.4415,
"step": 230000
},
{
"epoch": 1.25,
"learning_rate": 1.1649322254646765e-05,
"loss": 0.4333,
"step": 230500
},
{
"epoch": 1.26,
"learning_rate": 1.1631207986218664e-05,
"loss": 0.4407,
"step": 231000
},
{
"epoch": 1.26,
"learning_rate": 1.1613093717790566e-05,
"loss": 0.4346,
"step": 231500
},
{
"epoch": 1.26,
"learning_rate": 1.1594979449362469e-05,
"loss": 0.4321,
"step": 232000
},
{
"epoch": 1.26,
"learning_rate": 1.1576865180934371e-05,
"loss": 0.4289,
"step": 232500
},
{
"epoch": 1.27,
"learning_rate": 1.1558750912506273e-05,
"loss": 0.432,
"step": 233000
},
{
"epoch": 1.27,
"learning_rate": 1.1540636644078176e-05,
"loss": 0.4404,
"step": 233500
},
{
"epoch": 1.27,
"learning_rate": 1.1522522375650076e-05,
"loss": 0.4398,
"step": 234000
},
{
"epoch": 1.27,
"learning_rate": 1.1504408107221979e-05,
"loss": 0.4326,
"step": 234500
},
{
"epoch": 1.28,
"learning_rate": 1.1486293838793881e-05,
"loss": 0.4309,
"step": 235000
},
{
"epoch": 1.28,
"learning_rate": 1.1468179570365784e-05,
"loss": 0.4455,
"step": 235500
},
{
"epoch": 1.28,
"learning_rate": 1.1450065301937686e-05,
"loss": 0.4274,
"step": 236000
},
{
"epoch": 1.29,
"learning_rate": 1.1431951033509585e-05,
"loss": 0.4313,
"step": 236500
},
{
"epoch": 1.29,
"learning_rate": 1.1413836765081487e-05,
"loss": 0.4412,
"step": 237000
},
{
"epoch": 1.29,
"learning_rate": 1.139572249665339e-05,
"loss": 0.4319,
"step": 237500
},
{
"epoch": 1.29,
"learning_rate": 1.1377608228225292e-05,
"loss": 0.432,
"step": 238000
},
{
"epoch": 1.3,
"learning_rate": 1.1359493959797194e-05,
"loss": 0.4306,
"step": 238500
},
{
"epoch": 1.3,
"learning_rate": 1.1341379691369097e-05,
"loss": 0.4291,
"step": 239000
},
{
"epoch": 1.3,
"learning_rate": 1.1323265422940996e-05,
"loss": 0.4266,
"step": 239500
},
{
"epoch": 1.3,
"learning_rate": 1.1305151154512898e-05,
"loss": 0.4338,
"step": 240000
},
{
"epoch": 1.31,
"learning_rate": 1.12870368860848e-05,
"loss": 0.4323,
"step": 240500
},
{
"epoch": 1.31,
"learning_rate": 1.1268922617656703e-05,
"loss": 0.4307,
"step": 241000
},
{
"epoch": 1.31,
"learning_rate": 1.1250808349228605e-05,
"loss": 0.4382,
"step": 241500
},
{
"epoch": 1.32,
"learning_rate": 1.1232694080800506e-05,
"loss": 0.4332,
"step": 242000
},
{
"epoch": 1.32,
"learning_rate": 1.1214579812372408e-05,
"loss": 0.4299,
"step": 242500
},
{
"epoch": 1.32,
"learning_rate": 1.119646554394431e-05,
"loss": 0.4319,
"step": 243000
},
{
"epoch": 1.32,
"learning_rate": 1.1178351275516213e-05,
"loss": 0.4387,
"step": 243500
},
{
"epoch": 1.33,
"learning_rate": 1.1160237007088116e-05,
"loss": 0.4247,
"step": 244000
},
{
"epoch": 1.33,
"learning_rate": 1.1142122738660015e-05,
"loss": 0.425,
"step": 244500
},
{
"epoch": 1.33,
"learning_rate": 1.1124008470231917e-05,
"loss": 0.4255,
"step": 245000
},
{
"epoch": 1.33,
"learning_rate": 1.110589420180382e-05,
"loss": 0.4355,
"step": 245500
},
{
"epoch": 1.34,
"learning_rate": 1.1087779933375722e-05,
"loss": 0.4358,
"step": 246000
},
{
"epoch": 1.34,
"learning_rate": 1.1069665664947624e-05,
"loss": 0.4247,
"step": 246500
},
{
"epoch": 1.34,
"learning_rate": 1.1051551396519526e-05,
"loss": 0.4384,
"step": 247000
},
{
"epoch": 1.34,
"learning_rate": 1.1033437128091427e-05,
"loss": 0.4423,
"step": 247500
},
{
"epoch": 1.35,
"learning_rate": 1.101532285966333e-05,
"loss": 0.4361,
"step": 248000
},
{
"epoch": 1.35,
"learning_rate": 1.0997208591235232e-05,
"loss": 0.4414,
"step": 248500
},
{
"epoch": 1.35,
"learning_rate": 1.0979094322807133e-05,
"loss": 0.4313,
"step": 249000
},
{
"epoch": 1.36,
"learning_rate": 1.0960980054379035e-05,
"loss": 0.4205,
"step": 249500
},
{
"epoch": 1.36,
"learning_rate": 1.0942865785950936e-05,
"loss": 0.431,
"step": 250000
},
{
"epoch": 1.36,
"learning_rate": 1.0924751517522838e-05,
"loss": 0.4307,
"step": 250500
},
{
"epoch": 1.36,
"learning_rate": 1.090663724909474e-05,
"loss": 0.4341,
"step": 251000
},
{
"epoch": 1.37,
"learning_rate": 1.0888522980666643e-05,
"loss": 0.4334,
"step": 251500
},
{
"epoch": 1.37,
"learning_rate": 1.0870408712238545e-05,
"loss": 0.4262,
"step": 252000
},
{
"epoch": 1.37,
"learning_rate": 1.0852294443810444e-05,
"loss": 0.4367,
"step": 252500
},
{
"epoch": 1.37,
"learning_rate": 1.0834180175382347e-05,
"loss": 0.4278,
"step": 253000
},
{
"epoch": 1.38,
"learning_rate": 1.0816065906954249e-05,
"loss": 0.4359,
"step": 253500
},
{
"epoch": 1.38,
"learning_rate": 1.0797951638526151e-05,
"loss": 0.4341,
"step": 254000
},
{
"epoch": 1.38,
"learning_rate": 1.0779837370098054e-05,
"loss": 0.4285,
"step": 254500
},
{
"epoch": 1.39,
"learning_rate": 1.0761723101669956e-05,
"loss": 0.4346,
"step": 255000
},
{
"epoch": 1.39,
"learning_rate": 1.0743608833241857e-05,
"loss": 0.4232,
"step": 255500
},
{
"epoch": 1.39,
"learning_rate": 1.072549456481376e-05,
"loss": 0.4334,
"step": 256000
},
{
"epoch": 1.39,
"learning_rate": 1.0707380296385662e-05,
"loss": 0.4303,
"step": 256500
},
{
"epoch": 1.4,
"learning_rate": 1.0689266027957564e-05,
"loss": 0.4312,
"step": 257000
},
{
"epoch": 1.4,
"learning_rate": 1.0671151759529466e-05,
"loss": 0.4251,
"step": 257500
},
{
"epoch": 1.4,
"learning_rate": 1.0653037491101365e-05,
"loss": 0.4317,
"step": 258000
},
{
"epoch": 1.4,
"learning_rate": 1.0634923222673268e-05,
"loss": 0.4249,
"step": 258500
},
{
"epoch": 1.41,
"learning_rate": 1.061680895424517e-05,
"loss": 0.4256,
"step": 259000
},
{
"epoch": 1.41,
"learning_rate": 1.0598694685817072e-05,
"loss": 0.419,
"step": 259500
},
{
"epoch": 1.41,
"learning_rate": 1.0580580417388975e-05,
"loss": 0.4262,
"step": 260000
},
{
"epoch": 1.42,
"learning_rate": 1.0562466148960876e-05,
"loss": 0.4272,
"step": 260500
},
{
"epoch": 1.42,
"learning_rate": 1.0544351880532778e-05,
"loss": 0.4283,
"step": 261000
},
{
"epoch": 1.42,
"learning_rate": 1.0526237612104679e-05,
"loss": 0.4168,
"step": 261500
},
{
"epoch": 1.42,
"learning_rate": 1.0508123343676581e-05,
"loss": 0.4246,
"step": 262000
},
{
"epoch": 1.43,
"learning_rate": 1.0490009075248483e-05,
"loss": 0.4214,
"step": 262500
},
{
"epoch": 1.43,
"learning_rate": 1.0471894806820386e-05,
"loss": 0.4246,
"step": 263000
},
{
"epoch": 1.43,
"learning_rate": 1.0453780538392286e-05,
"loss": 0.4225,
"step": 263500
},
{
"epoch": 1.43,
"learning_rate": 1.0435666269964189e-05,
"loss": 0.416,
"step": 264000
},
{
"epoch": 1.44,
"learning_rate": 1.0417552001536091e-05,
"loss": 0.4195,
"step": 264500
},
{
"epoch": 1.44,
"learning_rate": 1.0399437733107994e-05,
"loss": 0.4237,
"step": 265000
},
{
"epoch": 1.44,
"learning_rate": 1.0381323464679896e-05,
"loss": 0.4198,
"step": 265500
},
{
"epoch": 1.45,
"learning_rate": 1.0363209196251795e-05,
"loss": 0.4213,
"step": 266000
},
{
"epoch": 1.45,
"learning_rate": 1.0345094927823697e-05,
"loss": 0.4221,
"step": 266500
},
{
"epoch": 1.45,
"learning_rate": 1.03269806593956e-05,
"loss": 0.4242,
"step": 267000
},
{
"epoch": 1.45,
"learning_rate": 1.0308866390967502e-05,
"loss": 0.4248,
"step": 267500
},
{
"epoch": 1.46,
"learning_rate": 1.0290752122539405e-05,
"loss": 0.4233,
"step": 268000
},
{
"epoch": 1.46,
"learning_rate": 1.0272637854111305e-05,
"loss": 0.4348,
"step": 268500
},
{
"epoch": 1.46,
"learning_rate": 1.0254523585683208e-05,
"loss": 0.4205,
"step": 269000
},
{
"epoch": 1.46,
"learning_rate": 1.023640931725511e-05,
"loss": 0.425,
"step": 269500
},
{
"epoch": 1.47,
"learning_rate": 1.0218295048827012e-05,
"loss": 0.422,
"step": 270000
},
{
"epoch": 1.47,
"learning_rate": 1.0200180780398915e-05,
"loss": 0.416,
"step": 270500
},
{
"epoch": 1.47,
"learning_rate": 1.0182066511970815e-05,
"loss": 0.425,
"step": 271000
},
{
"epoch": 1.48,
"learning_rate": 1.0163952243542716e-05,
"loss": 0.4111,
"step": 271500
},
{
"epoch": 1.48,
"learning_rate": 1.0145837975114619e-05,
"loss": 0.4122,
"step": 272000
},
{
"epoch": 1.48,
"learning_rate": 1.0127723706686521e-05,
"loss": 0.4197,
"step": 272500
},
{
"epoch": 1.48,
"learning_rate": 1.0109609438258423e-05,
"loss": 0.4158,
"step": 273000
},
{
"epoch": 1.49,
"learning_rate": 1.0091495169830326e-05,
"loss": 0.4223,
"step": 273500
},
{
"epoch": 1.49,
"learning_rate": 1.0073380901402225e-05,
"loss": 0.421,
"step": 274000
},
{
"epoch": 1.49,
"learning_rate": 1.0055266632974127e-05,
"loss": 0.4187,
"step": 274500
},
{
"epoch": 1.49,
"learning_rate": 1.003715236454603e-05,
"loss": 0.4184,
"step": 275000
},
{
"epoch": 1.5,
"learning_rate": 1.0019038096117932e-05,
"loss": 0.4247,
"step": 275500
},
{
"epoch": 1.5,
"learning_rate": 1.0000923827689834e-05,
"loss": 0.4215,
"step": 276000
},
{
"epoch": 1.5,
"learning_rate": 9.982809559261737e-06,
"loss": 0.4205,
"step": 276500
},
{
"epoch": 1.51,
"learning_rate": 9.964695290833637e-06,
"loss": 0.4129,
"step": 277000
},
{
"epoch": 1.51,
"learning_rate": 9.94658102240554e-06,
"loss": 0.4107,
"step": 277500
},
{
"epoch": 1.51,
"learning_rate": 9.928466753977442e-06,
"loss": 0.4163,
"step": 278000
},
{
"epoch": 1.51,
"learning_rate": 9.910352485549343e-06,
"loss": 0.4215,
"step": 278500
},
{
"epoch": 1.52,
"learning_rate": 9.892238217121245e-06,
"loss": 0.4195,
"step": 279000
},
{
"epoch": 1.52,
"learning_rate": 9.874123948693147e-06,
"loss": 0.4166,
"step": 279500
},
{
"epoch": 1.52,
"learning_rate": 9.856009680265048e-06,
"loss": 0.4144,
"step": 280000
},
{
"epoch": 1.52,
"learning_rate": 9.83789541183695e-06,
"loss": 0.4196,
"step": 280500
},
{
"epoch": 1.53,
"learning_rate": 9.819781143408853e-06,
"loss": 0.419,
"step": 281000
},
{
"epoch": 1.53,
"learning_rate": 9.801666874980754e-06,
"loss": 0.4086,
"step": 281500
},
{
"epoch": 1.53,
"learning_rate": 9.783552606552656e-06,
"loss": 0.414,
"step": 282000
},
{
"epoch": 1.54,
"learning_rate": 9.765438338124558e-06,
"loss": 0.4138,
"step": 282500
},
{
"epoch": 1.54,
"learning_rate": 9.74732406969646e-06,
"loss": 0.419,
"step": 283000
},
{
"epoch": 1.54,
"learning_rate": 9.729209801268361e-06,
"loss": 0.4183,
"step": 283500
},
{
"epoch": 1.54,
"learning_rate": 9.711095532840264e-06,
"loss": 0.418,
"step": 284000
},
{
"epoch": 1.55,
"learning_rate": 9.692981264412166e-06,
"loss": 0.4183,
"step": 284500
},
{
"epoch": 1.55,
"learning_rate": 9.674866995984069e-06,
"loss": 0.411,
"step": 285000
},
{
"epoch": 1.55,
"learning_rate": 9.65675272755597e-06,
"loss": 0.4199,
"step": 285500
},
{
"epoch": 1.55,
"learning_rate": 9.638638459127872e-06,
"loss": 0.4087,
"step": 286000
},
{
"epoch": 1.56,
"learning_rate": 9.620524190699772e-06,
"loss": 0.4163,
"step": 286500
},
{
"epoch": 1.56,
"learning_rate": 9.602409922271675e-06,
"loss": 0.4207,
"step": 287000
},
{
"epoch": 1.56,
"learning_rate": 9.584295653843577e-06,
"loss": 0.4079,
"step": 287500
},
{
"epoch": 1.57,
"learning_rate": 9.566181385415478e-06,
"loss": 0.42,
"step": 288000
},
{
"epoch": 1.57,
"learning_rate": 9.54806711698738e-06,
"loss": 0.4193,
"step": 288500
},
{
"epoch": 1.57,
"learning_rate": 9.529952848559283e-06,
"loss": 0.4206,
"step": 289000
},
{
"epoch": 1.57,
"learning_rate": 9.511838580131183e-06,
"loss": 0.4255,
"step": 289500
},
{
"epoch": 1.58,
"learning_rate": 9.493724311703086e-06,
"loss": 0.4177,
"step": 290000
},
{
"epoch": 1.58,
"learning_rate": 9.475610043274988e-06,
"loss": 0.4164,
"step": 290500
},
{
"epoch": 1.58,
"learning_rate": 9.45749577484689e-06,
"loss": 0.4086,
"step": 291000
},
{
"epoch": 1.58,
"learning_rate": 9.439381506418793e-06,
"loss": 0.4062,
"step": 291500
},
{
"epoch": 1.59,
"learning_rate": 9.421267237990694e-06,
"loss": 0.4027,
"step": 292000
},
{
"epoch": 1.59,
"learning_rate": 9.403152969562596e-06,
"loss": 0.4084,
"step": 292500
},
{
"epoch": 1.59,
"learning_rate": 9.385038701134498e-06,
"loss": 0.4199,
"step": 293000
},
{
"epoch": 1.59,
"learning_rate": 9.366924432706399e-06,
"loss": 0.4071,
"step": 293500
},
{
"epoch": 1.6,
"learning_rate": 9.348810164278301e-06,
"loss": 0.4062,
"step": 294000
},
{
"epoch": 1.6,
"learning_rate": 9.330695895850202e-06,
"loss": 0.4148,
"step": 294500
},
{
"epoch": 1.6,
"learning_rate": 9.312581627422104e-06,
"loss": 0.408,
"step": 295000
},
{
"epoch": 1.61,
"learning_rate": 9.294467358994007e-06,
"loss": 0.4072,
"step": 295500
},
{
"epoch": 1.61,
"learning_rate": 9.276353090565907e-06,
"loss": 0.4084,
"step": 296000
},
{
"epoch": 1.61,
"learning_rate": 9.25823882213781e-06,
"loss": 0.4089,
"step": 296500
},
{
"epoch": 1.61,
"learning_rate": 9.240124553709712e-06,
"loss": 0.4154,
"step": 297000
},
{
"epoch": 1.62,
"learning_rate": 9.222010285281615e-06,
"loss": 0.4074,
"step": 297500
},
{
"epoch": 1.62,
"learning_rate": 9.203896016853517e-06,
"loss": 0.4097,
"step": 298000
},
{
"epoch": 1.62,
"learning_rate": 9.185781748425418e-06,
"loss": 0.4162,
"step": 298500
},
{
"epoch": 1.62,
"learning_rate": 9.16766747999732e-06,
"loss": 0.4076,
"step": 299000
},
{
"epoch": 1.63,
"learning_rate": 9.149553211569222e-06,
"loss": 0.4,
"step": 299500
},
{
"epoch": 1.63,
"learning_rate": 9.131438943141123e-06,
"loss": 0.4035,
"step": 300000
},
{
"epoch": 1.63,
"learning_rate": 9.113324674713026e-06,
"loss": 0.4042,
"step": 300500
},
{
"epoch": 1.64,
"learning_rate": 9.095210406284928e-06,
"loss": 0.4115,
"step": 301000
},
{
"epoch": 1.64,
"learning_rate": 9.077096137856829e-06,
"loss": 0.4042,
"step": 301500
},
{
"epoch": 1.64,
"learning_rate": 9.058981869428731e-06,
"loss": 0.4112,
"step": 302000
},
{
"epoch": 1.64,
"learning_rate": 9.040867601000632e-06,
"loss": 0.4039,
"step": 302500
},
{
"epoch": 1.65,
"learning_rate": 9.022753332572534e-06,
"loss": 0.4083,
"step": 303000
},
{
"epoch": 1.65,
"learning_rate": 9.004639064144436e-06,
"loss": 0.4055,
"step": 303500
},
{
"epoch": 1.65,
"learning_rate": 8.986524795716339e-06,
"loss": 0.4026,
"step": 304000
},
{
"epoch": 1.65,
"learning_rate": 8.968410527288241e-06,
"loss": 0.4144,
"step": 304500
},
{
"epoch": 1.66,
"learning_rate": 8.950296258860142e-06,
"loss": 0.4055,
"step": 305000
},
{
"epoch": 1.66,
"learning_rate": 8.932181990432044e-06,
"loss": 0.4084,
"step": 305500
},
{
"epoch": 1.66,
"learning_rate": 8.914067722003947e-06,
"loss": 0.4082,
"step": 306000
},
{
"epoch": 1.67,
"learning_rate": 8.895953453575847e-06,
"loss": 0.4095,
"step": 306500
},
{
"epoch": 1.67,
"learning_rate": 8.87783918514775e-06,
"loss": 0.4107,
"step": 307000
},
{
"epoch": 1.67,
"learning_rate": 8.859724916719652e-06,
"loss": 0.4046,
"step": 307500
},
{
"epoch": 1.67,
"learning_rate": 8.841610648291553e-06,
"loss": 0.4032,
"step": 308000
},
{
"epoch": 1.68,
"learning_rate": 8.823496379863455e-06,
"loss": 0.411,
"step": 308500
},
{
"epoch": 1.68,
"learning_rate": 8.805382111435358e-06,
"loss": 0.4027,
"step": 309000
},
{
"epoch": 1.68,
"learning_rate": 8.787267843007258e-06,
"loss": 0.3982,
"step": 309500
},
{
"epoch": 1.68,
"learning_rate": 8.76915357457916e-06,
"loss": 0.3971,
"step": 310000
},
{
"epoch": 1.69,
"learning_rate": 8.751039306151063e-06,
"loss": 0.4027,
"step": 310500
},
{
"epoch": 1.69,
"learning_rate": 8.732925037722965e-06,
"loss": 0.4059,
"step": 311000
},
{
"epoch": 1.69,
"learning_rate": 8.714810769294866e-06,
"loss": 0.4021,
"step": 311500
},
{
"epoch": 1.7,
"learning_rate": 8.696696500866769e-06,
"loss": 0.4134,
"step": 312000
},
{
"epoch": 1.7,
"learning_rate": 8.678582232438671e-06,
"loss": 0.3985,
"step": 312500
},
{
"epoch": 1.7,
"learning_rate": 8.660467964010573e-06,
"loss": 0.3918,
"step": 313000
},
{
"epoch": 1.7,
"learning_rate": 8.642353695582474e-06,
"loss": 0.394,
"step": 313500
},
{
"epoch": 1.71,
"learning_rate": 8.624239427154376e-06,
"loss": 0.4117,
"step": 314000
},
{
"epoch": 1.71,
"learning_rate": 8.606125158726277e-06,
"loss": 0.4086,
"step": 314500
},
{
"epoch": 1.71,
"learning_rate": 8.58801089029818e-06,
"loss": 0.4041,
"step": 315000
},
{
"epoch": 1.71,
"learning_rate": 8.569896621870082e-06,
"loss": 0.4001,
"step": 315500
},
{
"epoch": 1.72,
"learning_rate": 8.551782353441982e-06,
"loss": 0.3984,
"step": 316000
},
{
"epoch": 1.72,
"learning_rate": 8.533668085013885e-06,
"loss": 0.4075,
"step": 316500
},
{
"epoch": 1.72,
"learning_rate": 8.515553816585787e-06,
"loss": 0.3936,
"step": 317000
},
{
"epoch": 1.73,
"learning_rate": 8.49743954815769e-06,
"loss": 0.4087,
"step": 317500
},
{
"epoch": 1.73,
"learning_rate": 8.47932527972959e-06,
"loss": 0.4097,
"step": 318000
},
{
"epoch": 1.73,
"learning_rate": 8.461211011301493e-06,
"loss": 0.3958,
"step": 318500
},
{
"epoch": 1.73,
"learning_rate": 8.443096742873395e-06,
"loss": 0.3968,
"step": 319000
},
{
"epoch": 1.74,
"learning_rate": 8.424982474445297e-06,
"loss": 0.4095,
"step": 319500
},
{
"epoch": 1.74,
"learning_rate": 8.406868206017198e-06,
"loss": 0.398,
"step": 320000
},
{
"epoch": 1.74,
"learning_rate": 8.3887539375891e-06,
"loss": 0.3995,
"step": 320500
},
{
"epoch": 1.74,
"learning_rate": 8.370639669161003e-06,
"loss": 0.3942,
"step": 321000
},
{
"epoch": 1.75,
"learning_rate": 8.352525400732904e-06,
"loss": 0.3973,
"step": 321500
},
{
"epoch": 1.75,
"learning_rate": 8.334411132304806e-06,
"loss": 0.3907,
"step": 322000
},
{
"epoch": 1.75,
"learning_rate": 8.316296863876707e-06,
"loss": 0.3982,
"step": 322500
},
{
"epoch": 1.76,
"learning_rate": 8.298182595448609e-06,
"loss": 0.3928,
"step": 323000
},
{
"epoch": 1.76,
"learning_rate": 8.280068327020511e-06,
"loss": 0.4021,
"step": 323500
},
{
"epoch": 1.76,
"learning_rate": 8.261954058592412e-06,
"loss": 0.4004,
"step": 324000
},
{
"epoch": 1.76,
"learning_rate": 8.243839790164315e-06,
"loss": 0.4082,
"step": 324500
},
{
"epoch": 1.77,
"learning_rate": 8.225725521736217e-06,
"loss": 0.3951,
"step": 325000
},
{
"epoch": 1.77,
"learning_rate": 8.20761125330812e-06,
"loss": 0.3935,
"step": 325500
},
{
"epoch": 1.77,
"learning_rate": 8.189496984880022e-06,
"loss": 0.4066,
"step": 326000
},
{
"epoch": 1.77,
"learning_rate": 8.171382716451922e-06,
"loss": 0.3934,
"step": 326500
},
{
"epoch": 1.78,
"learning_rate": 8.153268448023825e-06,
"loss": 0.3937,
"step": 327000
},
{
"epoch": 1.78,
"learning_rate": 8.135154179595727e-06,
"loss": 0.3965,
"step": 327500
},
{
"epoch": 1.78,
"learning_rate": 8.117039911167628e-06,
"loss": 0.4002,
"step": 328000
},
{
"epoch": 1.79,
"learning_rate": 8.09892564273953e-06,
"loss": 0.399,
"step": 328500
},
{
"epoch": 1.79,
"learning_rate": 8.080811374311433e-06,
"loss": 0.4041,
"step": 329000
},
{
"epoch": 1.79,
"learning_rate": 8.062697105883333e-06,
"loss": 0.397,
"step": 329500
},
{
"epoch": 1.79,
"learning_rate": 8.044582837455236e-06,
"loss": 0.3902,
"step": 330000
},
{
"epoch": 1.8,
"learning_rate": 8.026468569027136e-06,
"loss": 0.4092,
"step": 330500
},
{
"epoch": 1.8,
"learning_rate": 8.008354300599039e-06,
"loss": 0.3937,
"step": 331000
},
{
"epoch": 1.8,
"learning_rate": 7.990240032170941e-06,
"loss": 0.3857,
"step": 331500
},
{
"epoch": 1.8,
"learning_rate": 7.972125763742843e-06,
"loss": 0.3958,
"step": 332000
},
{
"epoch": 1.81,
"learning_rate": 7.954011495314746e-06,
"loss": 0.3934,
"step": 332500
},
{
"epoch": 1.81,
"learning_rate": 7.935897226886648e-06,
"loss": 0.3971,
"step": 333000
},
{
"epoch": 1.81,
"learning_rate": 7.917782958458549e-06,
"loss": 0.3908,
"step": 333500
},
{
"epoch": 1.82,
"learning_rate": 7.899668690030451e-06,
"loss": 0.3913,
"step": 334000
},
{
"epoch": 1.82,
"learning_rate": 7.881554421602352e-06,
"loss": 0.3912,
"step": 334500
},
{
"epoch": 1.82,
"learning_rate": 7.863440153174254e-06,
"loss": 0.3939,
"step": 335000
},
{
"epoch": 1.82,
"learning_rate": 7.845325884746157e-06,
"loss": 0.3955,
"step": 335500
},
{
"epoch": 1.83,
"learning_rate": 7.827211616318057e-06,
"loss": 0.3877,
"step": 336000
},
{
"epoch": 1.83,
"learning_rate": 7.80909734788996e-06,
"loss": 0.3938,
"step": 336500
},
{
"epoch": 1.83,
"learning_rate": 7.790983079461862e-06,
"loss": 0.3938,
"step": 337000
},
{
"epoch": 1.83,
"learning_rate": 7.772868811033763e-06,
"loss": 0.4011,
"step": 337500
},
{
"epoch": 1.84,
"learning_rate": 7.754754542605665e-06,
"loss": 0.3809,
"step": 338000
},
{
"epoch": 1.84,
"learning_rate": 7.736640274177568e-06,
"loss": 0.3949,
"step": 338500
},
{
"epoch": 1.84,
"learning_rate": 7.71852600574947e-06,
"loss": 0.3888,
"step": 339000
},
{
"epoch": 1.84,
"learning_rate": 7.70041173732137e-06,
"loss": 0.3905,
"step": 339500
},
{
"epoch": 1.85,
"learning_rate": 7.682297468893273e-06,
"loss": 0.3878,
"step": 340000
},
{
"epoch": 1.85,
"learning_rate": 7.664183200465176e-06,
"loss": 0.3882,
"step": 340500
},
{
"epoch": 1.85,
"learning_rate": 7.646068932037078e-06,
"loss": 0.3965,
"step": 341000
},
{
"epoch": 1.86,
"learning_rate": 7.627954663608979e-06,
"loss": 0.3884,
"step": 341500
},
{
"epoch": 1.86,
"learning_rate": 7.609840395180881e-06,
"loss": 0.3865,
"step": 342000
},
{
"epoch": 1.86,
"learning_rate": 7.591726126752782e-06,
"loss": 0.3914,
"step": 342500
},
{
"epoch": 1.86,
"learning_rate": 7.573611858324684e-06,
"loss": 0.3807,
"step": 343000
},
{
"epoch": 1.87,
"learning_rate": 7.5554975898965864e-06,
"loss": 0.3878,
"step": 343500
},
{
"epoch": 1.87,
"learning_rate": 7.537383321468488e-06,
"loss": 0.392,
"step": 344000
},
{
"epoch": 1.87,
"learning_rate": 7.51926905304039e-06,
"loss": 0.3938,
"step": 344500
},
{
"epoch": 1.87,
"learning_rate": 7.501154784612292e-06,
"loss": 0.3954,
"step": 345000
},
{
"epoch": 1.88,
"learning_rate": 7.4830405161841934e-06,
"loss": 0.3897,
"step": 345500
},
{
"epoch": 1.88,
"learning_rate": 7.464926247756096e-06,
"loss": 0.4023,
"step": 346000
},
{
"epoch": 1.88,
"learning_rate": 7.4468119793279965e-06,
"loss": 0.3944,
"step": 346500
},
{
"epoch": 1.89,
"learning_rate": 7.428697710899899e-06,
"loss": 0.3836,
"step": 347000
},
{
"epoch": 1.89,
"learning_rate": 7.410583442471801e-06,
"loss": 0.3894,
"step": 347500
},
{
"epoch": 1.89,
"learning_rate": 7.392469174043703e-06,
"loss": 0.3939,
"step": 348000
},
{
"epoch": 1.89,
"learning_rate": 7.374354905615605e-06,
"loss": 0.38,
"step": 348500
},
{
"epoch": 1.9,
"learning_rate": 7.356240637187508e-06,
"loss": 0.3906,
"step": 349000
},
{
"epoch": 1.9,
"learning_rate": 7.338126368759408e-06,
"loss": 0.3881,
"step": 349500
},
{
"epoch": 1.9,
"learning_rate": 7.320012100331311e-06,
"loss": 0.3777,
"step": 350000
},
{
"epoch": 1.9,
"learning_rate": 7.301897831903212e-06,
"loss": 0.3849,
"step": 350500
},
{
"epoch": 1.91,
"learning_rate": 7.2837835634751146e-06,
"loss": 0.3947,
"step": 351000
},
{
"epoch": 1.91,
"learning_rate": 7.265669295047016e-06,
"loss": 0.3896,
"step": 351500
},
{
"epoch": 1.91,
"learning_rate": 7.247555026618918e-06,
"loss": 0.3954,
"step": 352000
},
{
"epoch": 1.92,
"learning_rate": 7.22944075819082e-06,
"loss": 0.3864,
"step": 352500
},
{
"epoch": 1.92,
"learning_rate": 7.211326489762722e-06,
"loss": 0.3811,
"step": 353000
},
{
"epoch": 1.92,
"learning_rate": 7.193212221334623e-06,
"loss": 0.3902,
"step": 353500
},
{
"epoch": 1.92,
"learning_rate": 7.1750979529065255e-06,
"loss": 0.3776,
"step": 354000
},
{
"epoch": 1.93,
"learning_rate": 7.156983684478428e-06,
"loss": 0.383,
"step": 354500
},
{
"epoch": 1.93,
"learning_rate": 7.138869416050329e-06,
"loss": 0.3809,
"step": 355000
},
{
"epoch": 1.93,
"learning_rate": 7.120755147622232e-06,
"loss": 0.385,
"step": 355500
},
{
"epoch": 1.93,
"learning_rate": 7.1026408791941325e-06,
"loss": 0.387,
"step": 356000
},
{
"epoch": 1.94,
"learning_rate": 7.084526610766035e-06,
"loss": 0.3834,
"step": 356500
},
{
"epoch": 1.94,
"learning_rate": 7.066412342337937e-06,
"loss": 0.3801,
"step": 357000
},
{
"epoch": 1.94,
"learning_rate": 7.048298073909838e-06,
"loss": 0.3801,
"step": 357500
},
{
"epoch": 1.95,
"learning_rate": 7.03018380548174e-06,
"loss": 0.3878,
"step": 358000
},
{
"epoch": 1.95,
"learning_rate": 7.012069537053643e-06,
"loss": 0.3814,
"step": 358500
},
{
"epoch": 1.95,
"learning_rate": 6.993955268625544e-06,
"loss": 0.3841,
"step": 359000
},
{
"epoch": 1.95,
"learning_rate": 6.975841000197447e-06,
"loss": 0.3708,
"step": 359500
},
{
"epoch": 1.96,
"learning_rate": 6.957726731769347e-06,
"loss": 0.3828,
"step": 360000
},
{
"epoch": 1.96,
"learning_rate": 6.93961246334125e-06,
"loss": 0.3919,
"step": 360500
},
{
"epoch": 1.96,
"learning_rate": 6.921498194913152e-06,
"loss": 0.3845,
"step": 361000
},
{
"epoch": 1.96,
"learning_rate": 6.903383926485054e-06,
"loss": 0.379,
"step": 361500
},
{
"epoch": 1.97,
"learning_rate": 6.885269658056956e-06,
"loss": 0.3895,
"step": 362000
},
{
"epoch": 1.97,
"learning_rate": 6.8671553896288575e-06,
"loss": 0.3765,
"step": 362500
},
{
"epoch": 1.97,
"learning_rate": 6.849041121200759e-06,
"loss": 0.3773,
"step": 363000
},
{
"epoch": 1.98,
"learning_rate": 6.8309268527726614e-06,
"loss": 0.3943,
"step": 363500
},
{
"epoch": 1.98,
"learning_rate": 6.812812584344562e-06,
"loss": 0.3799,
"step": 364000
},
{
"epoch": 1.98,
"learning_rate": 6.7946983159164645e-06,
"loss": 0.3829,
"step": 364500
},
{
"epoch": 1.98,
"learning_rate": 6.776584047488367e-06,
"loss": 0.3889,
"step": 365000
},
{
"epoch": 1.99,
"learning_rate": 6.7584697790602684e-06,
"loss": 0.375,
"step": 365500
},
{
"epoch": 1.99,
"learning_rate": 6.740355510632171e-06,
"loss": 0.3787,
"step": 366000
},
{
"epoch": 1.99,
"learning_rate": 6.722241242204073e-06,
"loss": 0.3792,
"step": 366500
},
{
"epoch": 1.99,
"learning_rate": 6.704126973775974e-06,
"loss": 0.3944,
"step": 367000
},
{
"epoch": 2.0,
"learning_rate": 6.686012705347876e-06,
"loss": 0.3909,
"step": 367500
},
{
"epoch": 2.0,
"learning_rate": 6.667898436919778e-06,
"loss": 0.3905,
"step": 368000
},
{
"epoch": 2.0,
"eval_accuracy": 0.770441767068273,
"eval_loss": 0.7063438892364502,
"eval_runtime": 74.5232,
"eval_samples_per_second": 501.186,
"eval_steps_per_second": 62.652,
"step": 368034
},
{
"epoch": 2.0,
"learning_rate": 6.649784168491679e-06,
"loss": 0.3327,
"step": 368500
},
{
"epoch": 2.01,
"learning_rate": 6.631669900063582e-06,
"loss": 0.3324,
"step": 369000
},
{
"epoch": 2.01,
"learning_rate": 6.613555631635483e-06,
"loss": 0.3303,
"step": 369500
},
{
"epoch": 2.01,
"learning_rate": 6.595441363207386e-06,
"loss": 0.3386,
"step": 370000
},
{
"epoch": 2.01,
"learning_rate": 6.577327094779288e-06,
"loss": 0.3314,
"step": 370500
},
{
"epoch": 2.02,
"learning_rate": 6.559212826351189e-06,
"loss": 0.3249,
"step": 371000
},
{
"epoch": 2.02,
"learning_rate": 6.541098557923091e-06,
"loss": 0.3242,
"step": 371500
},
{
"epoch": 2.02,
"learning_rate": 6.522984289494993e-06,
"loss": 0.3312,
"step": 372000
},
{
"epoch": 2.02,
"learning_rate": 6.504870021066895e-06,
"loss": 0.334,
"step": 372500
},
{
"epoch": 2.03,
"learning_rate": 6.4867557526387966e-06,
"loss": 0.3307,
"step": 373000
},
{
"epoch": 2.03,
"learning_rate": 6.468641484210698e-06,
"loss": 0.3311,
"step": 373500
},
{
"epoch": 2.03,
"learning_rate": 6.4505272157826005e-06,
"loss": 0.3233,
"step": 374000
},
{
"epoch": 2.04,
"learning_rate": 6.432412947354503e-06,
"loss": 0.3323,
"step": 374500
},
{
"epoch": 2.04,
"learning_rate": 6.4142986789264035e-06,
"loss": 0.3191,
"step": 375000
},
{
"epoch": 2.04,
"learning_rate": 6.396184410498306e-06,
"loss": 0.3294,
"step": 375500
},
{
"epoch": 2.04,
"learning_rate": 6.3780701420702075e-06,
"loss": 0.3317,
"step": 376000
},
{
"epoch": 2.05,
"learning_rate": 6.35995587364211e-06,
"loss": 0.3356,
"step": 376500
},
{
"epoch": 2.05,
"learning_rate": 6.341841605214012e-06,
"loss": 0.3266,
"step": 377000
},
{
"epoch": 2.05,
"learning_rate": 6.323727336785913e-06,
"loss": 0.3246,
"step": 377500
},
{
"epoch": 2.05,
"learning_rate": 6.305613068357815e-06,
"loss": 0.322,
"step": 378000
},
{
"epoch": 2.06,
"learning_rate": 6.287498799929718e-06,
"loss": 0.3238,
"step": 378500
},
{
"epoch": 2.06,
"learning_rate": 6.269384531501619e-06,
"loss": 0.3319,
"step": 379000
},
{
"epoch": 2.06,
"learning_rate": 6.251270263073521e-06,
"loss": 0.3215,
"step": 379500
},
{
"epoch": 2.07,
"learning_rate": 6.233155994645422e-06,
"loss": 0.3225,
"step": 380000
},
{
"epoch": 2.07,
"learning_rate": 6.215041726217325e-06,
"loss": 0.3282,
"step": 380500
},
{
"epoch": 2.07,
"learning_rate": 6.196927457789227e-06,
"loss": 0.331,
"step": 381000
},
{
"epoch": 2.07,
"learning_rate": 6.178813189361128e-06,
"loss": 0.3234,
"step": 381500
},
{
"epoch": 2.08,
"learning_rate": 6.16069892093303e-06,
"loss": 0.3316,
"step": 382000
},
{
"epoch": 2.08,
"learning_rate": 6.1425846525049325e-06,
"loss": 0.3183,
"step": 382500
},
{
"epoch": 2.08,
"learning_rate": 6.124470384076834e-06,
"loss": 0.3203,
"step": 383000
},
{
"epoch": 2.08,
"learning_rate": 6.1063561156487364e-06,
"loss": 0.3341,
"step": 383500
},
{
"epoch": 2.09,
"learning_rate": 6.088241847220637e-06,
"loss": 0.3238,
"step": 384000
},
{
"epoch": 2.09,
"learning_rate": 6.0701275787925395e-06,
"loss": 0.33,
"step": 384500
},
{
"epoch": 2.09,
"learning_rate": 6.052013310364442e-06,
"loss": 0.3208,
"step": 385000
},
{
"epoch": 2.09,
"learning_rate": 6.0338990419363434e-06,
"loss": 0.3271,
"step": 385500
},
{
"epoch": 2.1,
"learning_rate": 6.015784773508245e-06,
"loss": 0.3296,
"step": 386000
},
{
"epoch": 2.1,
"learning_rate": 5.997670505080147e-06,
"loss": 0.3259,
"step": 386500
},
{
"epoch": 2.1,
"learning_rate": 5.979556236652049e-06,
"loss": 0.3241,
"step": 387000
},
{
"epoch": 2.11,
"learning_rate": 5.961441968223951e-06,
"loss": 0.3187,
"step": 387500
},
{
"epoch": 2.11,
"learning_rate": 5.943327699795852e-06,
"loss": 0.3211,
"step": 388000
},
{
"epoch": 2.11,
"learning_rate": 5.925213431367754e-06,
"loss": 0.3265,
"step": 388500
},
{
"epoch": 2.11,
"learning_rate": 5.907099162939657e-06,
"loss": 0.3233,
"step": 389000
},
{
"epoch": 2.12,
"learning_rate": 5.888984894511558e-06,
"loss": 0.3191,
"step": 389500
},
{
"epoch": 2.12,
"learning_rate": 5.870870626083461e-06,
"loss": 0.3254,
"step": 390000
},
{
"epoch": 2.12,
"learning_rate": 5.852756357655362e-06,
"loss": 0.3185,
"step": 390500
},
{
"epoch": 2.12,
"learning_rate": 5.834642089227264e-06,
"loss": 0.3197,
"step": 391000
},
{
"epoch": 2.13,
"learning_rate": 5.816527820799166e-06,
"loss": 0.3278,
"step": 391500
},
{
"epoch": 2.13,
"learning_rate": 5.798413552371067e-06,
"loss": 0.3269,
"step": 392000
},
{
"epoch": 2.13,
"learning_rate": 5.780299283942969e-06,
"loss": 0.3243,
"step": 392500
},
{
"epoch": 2.14,
"learning_rate": 5.7621850155148716e-06,
"loss": 0.3174,
"step": 393000
},
{
"epoch": 2.14,
"learning_rate": 5.744070747086773e-06,
"loss": 0.3188,
"step": 393500
},
{
"epoch": 2.14,
"learning_rate": 5.7259564786586755e-06,
"loss": 0.3301,
"step": 394000
},
{
"epoch": 2.14,
"learning_rate": 5.707842210230578e-06,
"loss": 0.3162,
"step": 394500
},
{
"epoch": 2.15,
"learning_rate": 5.6897279418024785e-06,
"loss": 0.3206,
"step": 395000
},
{
"epoch": 2.15,
"learning_rate": 5.671613673374381e-06,
"loss": 0.3179,
"step": 395500
},
{
"epoch": 2.15,
"learning_rate": 5.6534994049462825e-06,
"loss": 0.3274,
"step": 396000
},
{
"epoch": 2.15,
"learning_rate": 5.635385136518184e-06,
"loss": 0.322,
"step": 396500
},
{
"epoch": 2.16,
"learning_rate": 5.617270868090086e-06,
"loss": 0.3134,
"step": 397000
},
{
"epoch": 2.16,
"learning_rate": 5.599156599661988e-06,
"loss": 0.3213,
"step": 397500
},
{
"epoch": 2.16,
"learning_rate": 5.58104233123389e-06,
"loss": 0.3103,
"step": 398000
},
{
"epoch": 2.17,
"learning_rate": 5.562928062805793e-06,
"loss": 0.3338,
"step": 398500
},
{
"epoch": 2.17,
"learning_rate": 5.544813794377693e-06,
"loss": 0.3216,
"step": 399000
},
{
"epoch": 2.17,
"learning_rate": 5.526699525949596e-06,
"loss": 0.3245,
"step": 399500
},
{
"epoch": 2.17,
"learning_rate": 5.508585257521497e-06,
"loss": 0.3249,
"step": 400000
},
{
"epoch": 2.18,
"learning_rate": 5.4904709890934e-06,
"loss": 0.3168,
"step": 400500
},
{
"epoch": 2.18,
"learning_rate": 5.472356720665302e-06,
"loss": 0.3207,
"step": 401000
},
{
"epoch": 2.18,
"learning_rate": 5.454242452237203e-06,
"loss": 0.3269,
"step": 401500
},
{
"epoch": 2.18,
"learning_rate": 5.436128183809105e-06,
"loss": 0.3082,
"step": 402000
},
{
"epoch": 2.19,
"learning_rate": 5.4180139153810075e-06,
"loss": 0.3296,
"step": 402500
},
{
"epoch": 2.19,
"learning_rate": 5.399899646952908e-06,
"loss": 0.3191,
"step": 403000
},
{
"epoch": 2.19,
"learning_rate": 5.381785378524811e-06,
"loss": 0.3091,
"step": 403500
},
{
"epoch": 2.2,
"learning_rate": 5.363671110096712e-06,
"loss": 0.3171,
"step": 404000
},
{
"epoch": 2.2,
"learning_rate": 5.3455568416686145e-06,
"loss": 0.3215,
"step": 404500
},
{
"epoch": 2.2,
"learning_rate": 5.327442573240517e-06,
"loss": 0.3137,
"step": 405000
},
{
"epoch": 2.2,
"learning_rate": 5.3093283048124176e-06,
"loss": 0.3252,
"step": 405500
},
{
"epoch": 2.21,
"learning_rate": 5.29121403638432e-06,
"loss": 0.3205,
"step": 406000
},
{
"epoch": 2.21,
"learning_rate": 5.273099767956222e-06,
"loss": 0.3262,
"step": 406500
},
{
"epoch": 2.21,
"learning_rate": 5.254985499528124e-06,
"loss": 0.3187,
"step": 407000
},
{
"epoch": 2.21,
"learning_rate": 5.236871231100025e-06,
"loss": 0.3104,
"step": 407500
},
{
"epoch": 2.22,
"learning_rate": 5.218756962671927e-06,
"loss": 0.3193,
"step": 408000
},
{
"epoch": 2.22,
"learning_rate": 5.200642694243829e-06,
"loss": 0.3203,
"step": 408500
},
{
"epoch": 2.22,
"learning_rate": 5.182528425815732e-06,
"loss": 0.3218,
"step": 409000
},
{
"epoch": 2.23,
"learning_rate": 5.164414157387632e-06,
"loss": 0.3234,
"step": 409500
},
{
"epoch": 2.23,
"learning_rate": 5.146299888959535e-06,
"loss": 0.3153,
"step": 410000
},
{
"epoch": 2.23,
"learning_rate": 5.128185620531437e-06,
"loss": 0.3163,
"step": 410500
},
{
"epoch": 2.23,
"learning_rate": 5.110071352103339e-06,
"loss": 0.3253,
"step": 411000
},
{
"epoch": 2.24,
"learning_rate": 5.091957083675241e-06,
"loss": 0.3221,
"step": 411500
},
{
"epoch": 2.24,
"learning_rate": 5.073842815247142e-06,
"loss": 0.3189,
"step": 412000
},
{
"epoch": 2.24,
"learning_rate": 5.055728546819044e-06,
"loss": 0.3084,
"step": 412500
},
{
"epoch": 2.24,
"learning_rate": 5.0376142783909465e-06,
"loss": 0.3185,
"step": 413000
},
{
"epoch": 2.25,
"learning_rate": 5.019500009962848e-06,
"loss": 0.3149,
"step": 413500
},
{
"epoch": 2.25,
"learning_rate": 5.00138574153475e-06,
"loss": 0.3249,
"step": 414000
},
{
"epoch": 2.25,
"learning_rate": 4.983271473106652e-06,
"loss": 0.3187,
"step": 414500
},
{
"epoch": 2.26,
"learning_rate": 4.9651572046785535e-06,
"loss": 0.3179,
"step": 415000
},
{
"epoch": 2.26,
"learning_rate": 4.947042936250456e-06,
"loss": 0.3156,
"step": 415500
},
{
"epoch": 2.26,
"learning_rate": 4.9289286678223575e-06,
"loss": 0.3176,
"step": 416000
},
{
"epoch": 2.26,
"learning_rate": 4.910814399394259e-06,
"loss": 0.3172,
"step": 416500
},
{
"epoch": 2.27,
"learning_rate": 4.8927001309661605e-06,
"loss": 0.3225,
"step": 417000
},
{
"epoch": 2.27,
"learning_rate": 4.874585862538063e-06,
"loss": 0.3123,
"step": 417500
},
{
"epoch": 2.27,
"learning_rate": 4.856471594109965e-06,
"loss": 0.323,
"step": 418000
},
{
"epoch": 2.27,
"learning_rate": 4.838357325681867e-06,
"loss": 0.312,
"step": 418500
},
{
"epoch": 2.28,
"learning_rate": 4.820243057253768e-06,
"loss": 0.3177,
"step": 419000
},
{
"epoch": 2.28,
"learning_rate": 4.802128788825671e-06,
"loss": 0.3103,
"step": 419500
},
{
"epoch": 2.28,
"learning_rate": 4.784014520397572e-06,
"loss": 0.3129,
"step": 420000
},
{
"epoch": 2.29,
"learning_rate": 4.765900251969474e-06,
"loss": 0.3263,
"step": 420500
},
{
"epoch": 2.29,
"learning_rate": 4.747785983541376e-06,
"loss": 0.3068,
"step": 421000
},
{
"epoch": 2.29,
"learning_rate": 4.729671715113278e-06,
"loss": 0.3114,
"step": 421500
},
{
"epoch": 2.29,
"learning_rate": 4.71155744668518e-06,
"loss": 0.3218,
"step": 422000
},
{
"epoch": 2.3,
"learning_rate": 4.693443178257082e-06,
"loss": 0.3237,
"step": 422500
},
{
"epoch": 2.3,
"learning_rate": 4.675328909828983e-06,
"loss": 0.323,
"step": 423000
},
{
"epoch": 2.3,
"learning_rate": 4.657214641400886e-06,
"loss": 0.3315,
"step": 423500
},
{
"epoch": 2.3,
"learning_rate": 4.639100372972787e-06,
"loss": 0.317,
"step": 424000
},
{
"epoch": 2.31,
"learning_rate": 4.6209861045446895e-06,
"loss": 0.3129,
"step": 424500
},
{
"epoch": 2.31,
"learning_rate": 4.602871836116591e-06,
"loss": 0.319,
"step": 425000
},
{
"epoch": 2.31,
"learning_rate": 4.584757567688493e-06,
"loss": 0.3066,
"step": 425500
},
{
"epoch": 2.32,
"learning_rate": 4.566643299260395e-06,
"loss": 0.3144,
"step": 426000
},
{
"epoch": 2.32,
"learning_rate": 4.5485290308322965e-06,
"loss": 0.3187,
"step": 426500
},
{
"epoch": 2.32,
"learning_rate": 4.530414762404198e-06,
"loss": 0.3157,
"step": 427000
},
{
"epoch": 2.32,
"learning_rate": 4.5123004939761e-06,
"loss": 0.3108,
"step": 427500
},
{
"epoch": 2.33,
"learning_rate": 4.494186225548002e-06,
"loss": 0.3138,
"step": 428000
},
{
"epoch": 2.33,
"learning_rate": 4.476071957119904e-06,
"loss": 0.3132,
"step": 428500
},
{
"epoch": 2.33,
"learning_rate": 4.457957688691806e-06,
"loss": 0.3145,
"step": 429000
},
{
"epoch": 2.33,
"learning_rate": 4.439843420263708e-06,
"loss": 0.3175,
"step": 429500
},
{
"epoch": 2.34,
"learning_rate": 4.42172915183561e-06,
"loss": 0.3146,
"step": 430000
},
{
"epoch": 2.34,
"learning_rate": 4.403614883407511e-06,
"loss": 0.3142,
"step": 430500
},
{
"epoch": 2.34,
"learning_rate": 4.385500614979413e-06,
"loss": 0.3209,
"step": 431000
},
{
"epoch": 2.34,
"learning_rate": 4.367386346551315e-06,
"loss": 0.3088,
"step": 431500
},
{
"epoch": 2.35,
"learning_rate": 4.349272078123218e-06,
"loss": 0.3215,
"step": 432000
},
{
"epoch": 2.35,
"learning_rate": 4.331157809695119e-06,
"loss": 0.3235,
"step": 432500
},
{
"epoch": 2.35,
"learning_rate": 4.313043541267021e-06,
"loss": 0.3181,
"step": 433000
},
{
"epoch": 2.36,
"learning_rate": 4.294929272838923e-06,
"loss": 0.3111,
"step": 433500
},
{
"epoch": 2.36,
"learning_rate": 4.276815004410825e-06,
"loss": 0.3144,
"step": 434000
},
{
"epoch": 2.36,
"learning_rate": 4.258700735982726e-06,
"loss": 0.3135,
"step": 434500
},
{
"epoch": 2.36,
"learning_rate": 4.2405864675546285e-06,
"loss": 0.3088,
"step": 435000
},
{
"epoch": 2.37,
"learning_rate": 4.222472199126531e-06,
"loss": 0.3086,
"step": 435500
},
{
"epoch": 2.37,
"learning_rate": 4.2043579306984324e-06,
"loss": 0.3203,
"step": 436000
},
{
"epoch": 2.37,
"learning_rate": 4.186243662270334e-06,
"loss": 0.3093,
"step": 436500
},
{
"epoch": 2.37,
"learning_rate": 4.1681293938422355e-06,
"loss": 0.311,
"step": 437000
},
{
"epoch": 2.38,
"learning_rate": 4.150015125414138e-06,
"loss": 0.3084,
"step": 437500
},
{
"epoch": 2.38,
"learning_rate": 4.1319008569860394e-06,
"loss": 0.3176,
"step": 438000
},
{
"epoch": 2.38,
"learning_rate": 4.113786588557942e-06,
"loss": 0.3035,
"step": 438500
},
{
"epoch": 2.39,
"learning_rate": 4.095672320129843e-06,
"loss": 0.308,
"step": 439000
},
{
"epoch": 2.39,
"learning_rate": 4.077558051701746e-06,
"loss": 0.3134,
"step": 439500
},
{
"epoch": 2.39,
"learning_rate": 4.059443783273647e-06,
"loss": 0.2974,
"step": 440000
},
{
"epoch": 2.39,
"learning_rate": 4.041329514845549e-06,
"loss": 0.3137,
"step": 440500
},
{
"epoch": 2.4,
"learning_rate": 4.02321524641745e-06,
"loss": 0.3142,
"step": 441000
},
{
"epoch": 2.4,
"learning_rate": 4.005100977989353e-06,
"loss": 0.3102,
"step": 441500
},
{
"epoch": 2.4,
"learning_rate": 3.986986709561254e-06,
"loss": 0.3107,
"step": 442000
},
{
"epoch": 2.4,
"learning_rate": 3.968872441133157e-06,
"loss": 0.3053,
"step": 442500
},
{
"epoch": 2.41,
"learning_rate": 3.950758172705058e-06,
"loss": 0.3134,
"step": 443000
},
{
"epoch": 2.41,
"learning_rate": 3.9326439042769606e-06,
"loss": 0.3072,
"step": 443500
},
{
"epoch": 2.41,
"learning_rate": 3.914529635848862e-06,
"loss": 0.3073,
"step": 444000
},
{
"epoch": 2.42,
"learning_rate": 3.896415367420764e-06,
"loss": 0.3058,
"step": 444500
},
{
"epoch": 2.42,
"learning_rate": 3.878301098992665e-06,
"loss": 0.3052,
"step": 445000
},
{
"epoch": 2.42,
"learning_rate": 3.8601868305645676e-06,
"loss": 0.3153,
"step": 445500
},
{
"epoch": 2.42,
"learning_rate": 3.84207256213647e-06,
"loss": 0.3158,
"step": 446000
},
{
"epoch": 2.43,
"learning_rate": 3.8239582937083715e-06,
"loss": 0.3132,
"step": 446500
},
{
"epoch": 2.43,
"learning_rate": 3.805844025280273e-06,
"loss": 0.316,
"step": 447000
},
{
"epoch": 2.43,
"learning_rate": 3.7877297568521754e-06,
"loss": 0.3138,
"step": 447500
},
{
"epoch": 2.43,
"learning_rate": 3.769615488424077e-06,
"loss": 0.312,
"step": 448000
},
{
"epoch": 2.44,
"learning_rate": 3.751501219995979e-06,
"loss": 0.3064,
"step": 448500
},
{
"epoch": 2.44,
"learning_rate": 3.7333869515678804e-06,
"loss": 0.3072,
"step": 449000
},
{
"epoch": 2.44,
"learning_rate": 3.715272683139783e-06,
"loss": 0.3068,
"step": 449500
},
{
"epoch": 2.45,
"learning_rate": 3.6971584147116848e-06,
"loss": 0.3157,
"step": 450000
},
{
"epoch": 2.45,
"learning_rate": 3.6790441462835863e-06,
"loss": 0.3047,
"step": 450500
},
{
"epoch": 2.45,
"learning_rate": 3.660929877855488e-06,
"loss": 0.3107,
"step": 451000
},
{
"epoch": 2.45,
"learning_rate": 3.6428156094273902e-06,
"loss": 0.3156,
"step": 451500
},
{
"epoch": 2.46,
"learning_rate": 3.624701340999292e-06,
"loss": 0.2987,
"step": 452000
},
{
"epoch": 2.46,
"learning_rate": 3.6065870725711937e-06,
"loss": 0.3088,
"step": 452500
},
{
"epoch": 2.46,
"learning_rate": 3.5884728041430957e-06,
"loss": 0.3117,
"step": 453000
},
{
"epoch": 2.46,
"learning_rate": 3.5703585357149976e-06,
"loss": 0.3109,
"step": 453500
},
{
"epoch": 2.47,
"learning_rate": 3.5522442672868996e-06,
"loss": 0.3197,
"step": 454000
},
{
"epoch": 2.47,
"learning_rate": 3.534129998858801e-06,
"loss": 0.3121,
"step": 454500
},
{
"epoch": 2.47,
"learning_rate": 3.516015730430703e-06,
"loss": 0.3017,
"step": 455000
},
{
"epoch": 2.48,
"learning_rate": 3.4979014620026055e-06,
"loss": 0.3129,
"step": 455500
},
{
"epoch": 2.48,
"learning_rate": 3.479787193574507e-06,
"loss": 0.3093,
"step": 456000
},
{
"epoch": 2.48,
"learning_rate": 3.4616729251464086e-06,
"loss": 0.3062,
"step": 456500
},
{
"epoch": 2.48,
"learning_rate": 3.4435586567183105e-06,
"loss": 0.301,
"step": 457000
},
{
"epoch": 2.49,
"learning_rate": 3.425444388290213e-06,
"loss": 0.3082,
"step": 457500
},
{
"epoch": 2.49,
"learning_rate": 3.4073301198621144e-06,
"loss": 0.3069,
"step": 458000
},
{
"epoch": 2.49,
"learning_rate": 3.3892158514340164e-06,
"loss": 0.3084,
"step": 458500
},
{
"epoch": 2.49,
"learning_rate": 3.371101583005918e-06,
"loss": 0.3104,
"step": 459000
},
{
"epoch": 2.5,
"learning_rate": 3.3529873145778203e-06,
"loss": 0.3015,
"step": 459500
},
{
"epoch": 2.5,
"learning_rate": 3.334873046149722e-06,
"loss": 0.3113,
"step": 460000
},
{
"epoch": 2.5,
"learning_rate": 3.316758777721624e-06,
"loss": 0.3018,
"step": 460500
},
{
"epoch": 2.51,
"learning_rate": 3.2986445092935253e-06,
"loss": 0.3158,
"step": 461000
},
{
"epoch": 2.51,
"learning_rate": 3.2805302408654277e-06,
"loss": 0.3079,
"step": 461500
},
{
"epoch": 2.51,
"learning_rate": 3.2624159724373293e-06,
"loss": 0.3153,
"step": 462000
},
{
"epoch": 2.51,
"learning_rate": 3.2443017040092312e-06,
"loss": 0.2989,
"step": 462500
},
{
"epoch": 2.52,
"learning_rate": 3.2261874355811328e-06,
"loss": 0.3113,
"step": 463000
},
{
"epoch": 2.52,
"learning_rate": 3.208073167153035e-06,
"loss": 0.3077,
"step": 463500
},
{
"epoch": 2.52,
"learning_rate": 3.189958898724937e-06,
"loss": 0.3,
"step": 464000
},
{
"epoch": 2.52,
"learning_rate": 3.1718446302968386e-06,
"loss": 0.3044,
"step": 464500
},
{
"epoch": 2.53,
"learning_rate": 3.15373036186874e-06,
"loss": 0.296,
"step": 465000
},
{
"epoch": 2.53,
"learning_rate": 3.1356160934406426e-06,
"loss": 0.3072,
"step": 465500
},
{
"epoch": 2.53,
"learning_rate": 3.1175018250125445e-06,
"loss": 0.3002,
"step": 466000
},
{
"epoch": 2.54,
"learning_rate": 3.099387556584446e-06,
"loss": 0.3048,
"step": 466500
},
{
"epoch": 2.54,
"learning_rate": 3.081273288156348e-06,
"loss": 0.2984,
"step": 467000
},
{
"epoch": 2.54,
"learning_rate": 3.06315901972825e-06,
"loss": 0.3168,
"step": 467500
},
{
"epoch": 2.54,
"learning_rate": 3.045044751300152e-06,
"loss": 0.2973,
"step": 468000
},
{
"epoch": 2.55,
"learning_rate": 3.0269304828720535e-06,
"loss": 0.3085,
"step": 468500
},
{
"epoch": 2.55,
"learning_rate": 3.0088162144439554e-06,
"loss": 0.3066,
"step": 469000
},
{
"epoch": 2.55,
"learning_rate": 2.990701946015858e-06,
"loss": 0.3099,
"step": 469500
},
{
"epoch": 2.55,
"learning_rate": 2.9725876775877593e-06,
"loss": 0.2948,
"step": 470000
},
{
"epoch": 2.56,
"learning_rate": 2.954473409159661e-06,
"loss": 0.3089,
"step": 470500
},
{
"epoch": 2.56,
"learning_rate": 2.9363591407315633e-06,
"loss": 0.3003,
"step": 471000
},
{
"epoch": 2.56,
"learning_rate": 2.9182448723034652e-06,
"loss": 0.297,
"step": 471500
},
{
"epoch": 2.56,
"learning_rate": 2.9001306038753668e-06,
"loss": 0.302,
"step": 472000
},
{
"epoch": 2.57,
"learning_rate": 2.8820163354472687e-06,
"loss": 0.3137,
"step": 472500
},
{
"epoch": 2.57,
"learning_rate": 2.8639020670191707e-06,
"loss": 0.3104,
"step": 473000
},
{
"epoch": 2.57,
"learning_rate": 2.8457877985910726e-06,
"loss": 0.3063,
"step": 473500
},
{
"epoch": 2.58,
"learning_rate": 2.827673530162974e-06,
"loss": 0.3039,
"step": 474000
},
{
"epoch": 2.58,
"learning_rate": 2.809559261734876e-06,
"loss": 0.3042,
"step": 474500
},
{
"epoch": 2.58,
"learning_rate": 2.7914449933067785e-06,
"loss": 0.3072,
"step": 475000
},
{
"epoch": 2.58,
"learning_rate": 2.77333072487868e-06,
"loss": 0.3025,
"step": 475500
},
{
"epoch": 2.59,
"learning_rate": 2.7552164564505816e-06,
"loss": 0.3048,
"step": 476000
},
{
"epoch": 2.59,
"learning_rate": 2.7371021880224836e-06,
"loss": 0.314,
"step": 476500
},
{
"epoch": 2.59,
"learning_rate": 2.718987919594386e-06,
"loss": 0.3,
"step": 477000
},
{
"epoch": 2.59,
"learning_rate": 2.7008736511662875e-06,
"loss": 0.3074,
"step": 477500
},
{
"epoch": 2.6,
"learning_rate": 2.6827593827381894e-06,
"loss": 0.3116,
"step": 478000
},
{
"epoch": 2.6,
"learning_rate": 2.664645114310091e-06,
"loss": 0.2957,
"step": 478500
},
{
"epoch": 2.6,
"learning_rate": 2.6465308458819933e-06,
"loss": 0.307,
"step": 479000
},
{
"epoch": 2.61,
"learning_rate": 2.628416577453895e-06,
"loss": 0.304,
"step": 479500
},
{
"epoch": 2.61,
"learning_rate": 2.610302309025797e-06,
"loss": 0.2963,
"step": 480000
},
{
"epoch": 2.61,
"learning_rate": 2.5921880405976984e-06,
"loss": 0.306,
"step": 480500
},
{
"epoch": 2.61,
"learning_rate": 2.5740737721696008e-06,
"loss": 0.3084,
"step": 481000
},
{
"epoch": 2.62,
"learning_rate": 2.5559595037415023e-06,
"loss": 0.3028,
"step": 481500
},
{
"epoch": 2.62,
"learning_rate": 2.5378452353134043e-06,
"loss": 0.3041,
"step": 482000
},
{
"epoch": 2.62,
"learning_rate": 2.519730966885306e-06,
"loss": 0.3007,
"step": 482500
},
{
"epoch": 2.62,
"learning_rate": 2.501616698457208e-06,
"loss": 0.3054,
"step": 483000
},
{
"epoch": 2.63,
"learning_rate": 2.48350243002911e-06,
"loss": 0.3012,
"step": 483500
},
{
"epoch": 2.63,
"learning_rate": 2.4653881616010117e-06,
"loss": 0.3051,
"step": 484000
},
{
"epoch": 2.63,
"learning_rate": 2.4472738931729136e-06,
"loss": 0.2977,
"step": 484500
},
{
"epoch": 2.64,
"learning_rate": 2.4291596247448156e-06,
"loss": 0.2999,
"step": 485000
},
{
"epoch": 2.64,
"learning_rate": 2.4110453563167176e-06,
"loss": 0.3,
"step": 485500
},
{
"epoch": 2.64,
"learning_rate": 2.392931087888619e-06,
"loss": 0.2982,
"step": 486000
},
{
"epoch": 2.64,
"learning_rate": 2.374816819460521e-06,
"loss": 0.2997,
"step": 486500
},
{
"epoch": 2.65,
"learning_rate": 2.356702551032423e-06,
"loss": 0.3103,
"step": 487000
},
{
"epoch": 2.65,
"learning_rate": 2.338588282604325e-06,
"loss": 0.2953,
"step": 487500
},
{
"epoch": 2.65,
"learning_rate": 2.3204740141762265e-06,
"loss": 0.299,
"step": 488000
},
{
"epoch": 2.65,
"learning_rate": 2.3023597457481285e-06,
"loss": 0.3016,
"step": 488500
},
{
"epoch": 2.66,
"learning_rate": 2.2842454773200304e-06,
"loss": 0.3065,
"step": 489000
},
{
"epoch": 2.66,
"learning_rate": 2.2661312088919324e-06,
"loss": 0.2985,
"step": 489500
},
{
"epoch": 2.66,
"learning_rate": 2.248016940463834e-06,
"loss": 0.2986,
"step": 490000
},
{
"epoch": 2.67,
"learning_rate": 2.2299026720357363e-06,
"loss": 0.2939,
"step": 490500
},
{
"epoch": 2.67,
"learning_rate": 2.211788403607638e-06,
"loss": 0.2975,
"step": 491000
},
{
"epoch": 2.67,
"learning_rate": 2.19367413517954e-06,
"loss": 0.297,
"step": 491500
},
{
"epoch": 2.67,
"learning_rate": 2.1755598667514418e-06,
"loss": 0.2951,
"step": 492000
},
{
"epoch": 2.68,
"learning_rate": 2.1574455983233437e-06,
"loss": 0.2988,
"step": 492500
},
{
"epoch": 2.68,
"learning_rate": 2.1393313298952453e-06,
"loss": 0.301,
"step": 493000
},
{
"epoch": 2.68,
"learning_rate": 2.121217061467147e-06,
"loss": 0.3037,
"step": 493500
},
{
"epoch": 2.68,
"learning_rate": 2.103102793039049e-06,
"loss": 0.2951,
"step": 494000
},
{
"epoch": 2.69,
"learning_rate": 2.084988524610951e-06,
"loss": 0.3028,
"step": 494500
},
{
"epoch": 2.69,
"learning_rate": 2.0668742561828527e-06,
"loss": 0.2997,
"step": 495000
},
{
"epoch": 2.69,
"learning_rate": 2.0487599877547546e-06,
"loss": 0.3091,
"step": 495500
},
{
"epoch": 2.7,
"learning_rate": 2.0306457193266566e-06,
"loss": 0.2846,
"step": 496000
},
{
"epoch": 2.7,
"learning_rate": 2.0125314508985585e-06,
"loss": 0.2981,
"step": 496500
},
{
"epoch": 2.7,
"learning_rate": 1.99441718247046e-06,
"loss": 0.3015,
"step": 497000
},
{
"epoch": 2.7,
"learning_rate": 1.9763029140423625e-06,
"loss": 0.3045,
"step": 497500
},
{
"epoch": 2.71,
"learning_rate": 1.958188645614264e-06,
"loss": 0.3114,
"step": 498000
},
{
"epoch": 2.71,
"learning_rate": 1.940074377186166e-06,
"loss": 0.2938,
"step": 498500
},
{
"epoch": 2.71,
"learning_rate": 1.921960108758068e-06,
"loss": 0.2945,
"step": 499000
},
{
"epoch": 2.71,
"learning_rate": 1.9038458403299699e-06,
"loss": 0.3022,
"step": 499500
},
{
"epoch": 2.72,
"learning_rate": 1.8857315719018714e-06,
"loss": 0.2936,
"step": 500000
},
{
"epoch": 2.72,
"learning_rate": 1.8676173034737736e-06,
"loss": 0.3015,
"step": 500500
},
{
"epoch": 2.72,
"learning_rate": 1.8495030350456753e-06,
"loss": 0.2953,
"step": 501000
},
{
"epoch": 2.73,
"learning_rate": 1.8313887666175773e-06,
"loss": 0.2917,
"step": 501500
},
{
"epoch": 2.73,
"learning_rate": 1.813274498189479e-06,
"loss": 0.3005,
"step": 502000
},
{
"epoch": 2.73,
"learning_rate": 1.795160229761381e-06,
"loss": 0.2939,
"step": 502500
},
{
"epoch": 2.73,
"learning_rate": 1.7770459613332827e-06,
"loss": 0.2995,
"step": 503000
},
{
"epoch": 2.74,
"learning_rate": 1.7589316929051847e-06,
"loss": 0.3113,
"step": 503500
},
{
"epoch": 2.74,
"learning_rate": 1.7408174244770865e-06,
"loss": 0.3078,
"step": 504000
},
{
"epoch": 2.74,
"learning_rate": 1.7227031560489884e-06,
"loss": 0.3027,
"step": 504500
},
{
"epoch": 2.74,
"learning_rate": 1.7045888876208902e-06,
"loss": 0.2968,
"step": 505000
},
{
"epoch": 2.75,
"learning_rate": 1.6864746191927921e-06,
"loss": 0.3045,
"step": 505500
},
{
"epoch": 2.75,
"learning_rate": 1.6683603507646939e-06,
"loss": 0.3058,
"step": 506000
},
{
"epoch": 2.75,
"learning_rate": 1.650246082336596e-06,
"loss": 0.2926,
"step": 506500
},
{
"epoch": 2.76,
"learning_rate": 1.6321318139084976e-06,
"loss": 0.3029,
"step": 507000
},
{
"epoch": 2.76,
"learning_rate": 1.6140175454803997e-06,
"loss": 0.3055,
"step": 507500
},
{
"epoch": 2.76,
"learning_rate": 1.5959032770523015e-06,
"loss": 0.2957,
"step": 508000
},
{
"epoch": 2.76,
"learning_rate": 1.5777890086242035e-06,
"loss": 0.3008,
"step": 508500
},
{
"epoch": 2.77,
"learning_rate": 1.5596747401961052e-06,
"loss": 0.2984,
"step": 509000
},
{
"epoch": 2.77,
"learning_rate": 1.5415604717680072e-06,
"loss": 0.2984,
"step": 509500
},
{
"epoch": 2.77,
"learning_rate": 1.523446203339909e-06,
"loss": 0.2904,
"step": 510000
},
{
"epoch": 2.77,
"learning_rate": 1.5053319349118109e-06,
"loss": 0.3063,
"step": 510500
},
{
"epoch": 2.78,
"learning_rate": 1.4872176664837126e-06,
"loss": 0.2866,
"step": 511000
},
{
"epoch": 2.78,
"learning_rate": 1.4691033980556146e-06,
"loss": 0.2963,
"step": 511500
},
{
"epoch": 2.78,
"learning_rate": 1.4509891296275163e-06,
"loss": 0.2982,
"step": 512000
},
{
"epoch": 2.79,
"learning_rate": 1.4328748611994183e-06,
"loss": 0.2941,
"step": 512500
},
{
"epoch": 2.79,
"learning_rate": 1.41476059277132e-06,
"loss": 0.2851,
"step": 513000
},
{
"epoch": 2.79,
"learning_rate": 1.3966463243432222e-06,
"loss": 0.2976,
"step": 513500
},
{
"epoch": 2.79,
"learning_rate": 1.3785320559151237e-06,
"loss": 0.2935,
"step": 514000
},
{
"epoch": 2.8,
"learning_rate": 1.360417787487026e-06,
"loss": 0.3003,
"step": 514500
},
{
"epoch": 2.8,
"learning_rate": 1.3423035190589277e-06,
"loss": 0.3009,
"step": 515000
},
{
"epoch": 2.8,
"learning_rate": 1.3241892506308296e-06,
"loss": 0.2948,
"step": 515500
},
{
"epoch": 2.8,
"learning_rate": 1.3060749822027314e-06,
"loss": 0.2905,
"step": 516000
},
{
"epoch": 2.81,
"learning_rate": 1.2879607137746333e-06,
"loss": 0.296,
"step": 516500
},
{
"epoch": 2.81,
"learning_rate": 1.269846445346535e-06,
"loss": 0.2953,
"step": 517000
},
{
"epoch": 2.81,
"learning_rate": 1.251732176918437e-06,
"loss": 0.2974,
"step": 517500
},
{
"epoch": 2.81,
"learning_rate": 1.2336179084903388e-06,
"loss": 0.2864,
"step": 518000
},
{
"epoch": 2.82,
"learning_rate": 1.2155036400622407e-06,
"loss": 0.2926,
"step": 518500
},
{
"epoch": 2.82,
"learning_rate": 1.1973893716341425e-06,
"loss": 0.3051,
"step": 519000
},
{
"epoch": 2.82,
"learning_rate": 1.1792751032060445e-06,
"loss": 0.2924,
"step": 519500
},
{
"epoch": 2.83,
"learning_rate": 1.1611608347779462e-06,
"loss": 0.2983,
"step": 520000
},
{
"epoch": 2.83,
"learning_rate": 1.1430465663498482e-06,
"loss": 0.297,
"step": 520500
},
{
"epoch": 2.83,
"learning_rate": 1.1249322979217501e-06,
"loss": 0.2863,
"step": 521000
},
{
"epoch": 2.83,
"learning_rate": 1.1068180294936519e-06,
"loss": 0.2988,
"step": 521500
},
{
"epoch": 2.84,
"learning_rate": 1.0887037610655538e-06,
"loss": 0.2848,
"step": 522000
},
{
"epoch": 2.84,
"learning_rate": 1.0705894926374556e-06,
"loss": 0.2922,
"step": 522500
},
{
"epoch": 2.84,
"learning_rate": 1.0524752242093575e-06,
"loss": 0.2935,
"step": 523000
},
{
"epoch": 2.84,
"learning_rate": 1.0343609557812593e-06,
"loss": 0.296,
"step": 523500
},
{
"epoch": 2.85,
"learning_rate": 1.0162466873531612e-06,
"loss": 0.2974,
"step": 524000
},
{
"epoch": 2.85,
"learning_rate": 9.981324189250632e-07,
"loss": 0.2866,
"step": 524500
},
{
"epoch": 2.85,
"learning_rate": 9.80018150496965e-07,
"loss": 0.301,
"step": 525000
},
{
"epoch": 2.86,
"learning_rate": 9.61903882068867e-07,
"loss": 0.2959,
"step": 525500
},
{
"epoch": 2.86,
"learning_rate": 9.437896136407688e-07,
"loss": 0.3035,
"step": 526000
},
{
"epoch": 2.86,
"learning_rate": 9.256753452126706e-07,
"loss": 0.2796,
"step": 526500
},
{
"epoch": 2.86,
"learning_rate": 9.075610767845725e-07,
"loss": 0.2928,
"step": 527000
},
{
"epoch": 2.87,
"learning_rate": 8.894468083564743e-07,
"loss": 0.2943,
"step": 527500
},
{
"epoch": 2.87,
"learning_rate": 8.713325399283762e-07,
"loss": 0.293,
"step": 528000
},
{
"epoch": 2.87,
"learning_rate": 8.53218271500278e-07,
"loss": 0.2813,
"step": 528500
},
{
"epoch": 2.87,
"learning_rate": 8.351040030721799e-07,
"loss": 0.2936,
"step": 529000
},
{
"epoch": 2.88,
"learning_rate": 8.169897346440818e-07,
"loss": 0.2967,
"step": 529500
},
{
"epoch": 2.88,
"learning_rate": 7.988754662159838e-07,
"loss": 0.2939,
"step": 530000
},
{
"epoch": 2.88,
"learning_rate": 7.807611977878857e-07,
"loss": 0.2938,
"step": 530500
},
{
"epoch": 2.89,
"learning_rate": 7.626469293597875e-07,
"loss": 0.2956,
"step": 531000
},
{
"epoch": 2.89,
"learning_rate": 7.445326609316894e-07,
"loss": 0.2902,
"step": 531500
},
{
"epoch": 2.89,
"learning_rate": 7.264183925035913e-07,
"loss": 0.2943,
"step": 532000
},
{
"epoch": 2.89,
"learning_rate": 7.083041240754932e-07,
"loss": 0.2999,
"step": 532500
},
{
"epoch": 2.9,
"learning_rate": 6.90189855647395e-07,
"loss": 0.2913,
"step": 533000
},
{
"epoch": 2.9,
"learning_rate": 6.720755872192969e-07,
"loss": 0.291,
"step": 533500
},
{
"epoch": 2.9,
"learning_rate": 6.539613187911987e-07,
"loss": 0.3031,
"step": 534000
},
{
"epoch": 2.9,
"learning_rate": 6.358470503631006e-07,
"loss": 0.2961,
"step": 534500
},
{
"epoch": 2.91,
"learning_rate": 6.177327819350024e-07,
"loss": 0.2979,
"step": 535000
},
{
"epoch": 2.91,
"learning_rate": 5.996185135069043e-07,
"loss": 0.2888,
"step": 535500
},
{
"epoch": 2.91,
"learning_rate": 5.815042450788062e-07,
"loss": 0.3008,
"step": 536000
},
{
"epoch": 2.92,
"learning_rate": 5.63389976650708e-07,
"loss": 0.3043,
"step": 536500
},
{
"epoch": 2.92,
"learning_rate": 5.452757082226099e-07,
"loss": 0.2952,
"step": 537000
},
{
"epoch": 2.92,
"learning_rate": 5.271614397945118e-07,
"loss": 0.3015,
"step": 537500
},
{
"epoch": 2.92,
"learning_rate": 5.090471713664137e-07,
"loss": 0.2915,
"step": 538000
},
{
"epoch": 2.93,
"learning_rate": 4.909329029383155e-07,
"loss": 0.3014,
"step": 538500
},
{
"epoch": 2.93,
"learning_rate": 4.728186345102174e-07,
"loss": 0.2994,
"step": 539000
},
{
"epoch": 2.93,
"learning_rate": 4.5470436608211923e-07,
"loss": 0.3036,
"step": 539500
},
{
"epoch": 2.93,
"learning_rate": 4.3659009765402114e-07,
"loss": 0.29,
"step": 540000
},
{
"epoch": 2.94,
"learning_rate": 4.18475829225923e-07,
"loss": 0.2824,
"step": 540500
},
{
"epoch": 2.94,
"learning_rate": 4.0036156079782485e-07,
"loss": 0.2917,
"step": 541000
},
{
"epoch": 2.94,
"learning_rate": 3.8224729236972675e-07,
"loss": 0.2915,
"step": 541500
},
{
"epoch": 2.95,
"learning_rate": 3.641330239416286e-07,
"loss": 0.2932,
"step": 542000
},
{
"epoch": 2.95,
"learning_rate": 3.4601875551353046e-07,
"loss": 0.3033,
"step": 542500
},
{
"epoch": 2.95,
"learning_rate": 3.279044870854323e-07,
"loss": 0.2907,
"step": 543000
},
{
"epoch": 2.95,
"learning_rate": 3.097902186573342e-07,
"loss": 0.2925,
"step": 543500
},
{
"epoch": 2.96,
"learning_rate": 2.916759502292361e-07,
"loss": 0.2901,
"step": 544000
},
{
"epoch": 2.96,
"learning_rate": 2.73561681801138e-07,
"loss": 0.2898,
"step": 544500
},
{
"epoch": 2.96,
"learning_rate": 2.5544741337303984e-07,
"loss": 0.2861,
"step": 545000
},
{
"epoch": 2.96,
"learning_rate": 2.3733314494494172e-07,
"loss": 0.2947,
"step": 545500
},
{
"epoch": 2.97,
"learning_rate": 2.192188765168436e-07,
"loss": 0.2855,
"step": 546000
},
{
"epoch": 2.97,
"learning_rate": 2.0110460808874545e-07,
"loss": 0.2899,
"step": 546500
},
{
"epoch": 2.97,
"learning_rate": 1.8299033966064733e-07,
"loss": 0.2993,
"step": 547000
},
{
"epoch": 2.98,
"learning_rate": 1.6487607123254918e-07,
"loss": 0.3013,
"step": 547500
},
{
"epoch": 2.98,
"learning_rate": 1.4676180280445104e-07,
"loss": 0.2946,
"step": 548000
},
{
"epoch": 2.98,
"learning_rate": 1.2864753437635292e-07,
"loss": 0.2937,
"step": 548500
},
{
"epoch": 2.98,
"learning_rate": 1.1053326594825478e-07,
"loss": 0.2912,
"step": 549000
},
{
"epoch": 2.99,
"learning_rate": 9.241899752015665e-08,
"loss": 0.2912,
"step": 549500
},
{
"epoch": 2.99,
"learning_rate": 7.430472909205853e-08,
"loss": 0.2927,
"step": 550000
},
{
"epoch": 2.99,
"learning_rate": 5.6190460663960404e-08,
"loss": 0.2956,
"step": 550500
},
{
"epoch": 2.99,
"learning_rate": 3.807619223586227e-08,
"loss": 0.2959,
"step": 551000
},
{
"epoch": 3.0,
"learning_rate": 1.9961923807764138e-08,
"loss": 0.2791,
"step": 551500
},
{
"epoch": 3.0,
"learning_rate": 1.8476553796660092e-09,
"loss": 0.2951,
"step": 552000
},
{
"epoch": 3.0,
"eval_accuracy": 0.7710575635876841,
"eval_loss": 0.8383136987686157,
"eval_runtime": 72.6822,
"eval_samples_per_second": 513.881,
"eval_steps_per_second": 64.239,
"step": 552051
}
],
"max_steps": 552051,
"num_train_epochs": 3,
"total_flos": 1.162013440270395e+18,
"trial_name": null,
"trial_params": null
}