ArabianGPT-03B / trainer_state.json
riotu-lab's picture
Upload 12 Files
1000026
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.976631448884397,
"global_step": 1090500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 4e-05,
"loss": 9.6608,
"step": 100
},
{
"epoch": 0.0,
"learning_rate": 8e-05,
"loss": 8.6223,
"step": 200
},
{
"epoch": 0.0,
"learning_rate": 0.00012,
"loss": 8.3175,
"step": 300
},
{
"epoch": 0.0,
"learning_rate": 0.00016,
"loss": 7.9745,
"step": 400
},
{
"epoch": 0.0,
"learning_rate": 0.0002,
"loss": 7.6776,
"step": 500
},
{
"epoch": 0.0,
"learning_rate": 0.00024,
"loss": 7.4451,
"step": 600
},
{
"epoch": 0.0,
"learning_rate": 0.00028,
"loss": 7.2587,
"step": 700
},
{
"epoch": 0.0,
"learning_rate": 0.00032,
"loss": 7.0977,
"step": 800
},
{
"epoch": 0.0,
"learning_rate": 0.00036,
"loss": 6.9377,
"step": 900
},
{
"epoch": 0.01,
"learning_rate": 0.0004,
"loss": 6.8182,
"step": 1000
},
{
"epoch": 0.01,
"learning_rate": 0.0003999999998815762,
"loss": 6.6945,
"step": 1100
},
{
"epoch": 0.01,
"learning_rate": 0.0003999999995263047,
"loss": 6.5851,
"step": 1200
},
{
"epoch": 0.01,
"learning_rate": 0.00039999999893418564,
"loss": 6.476,
"step": 1300
},
{
"epoch": 0.01,
"learning_rate": 0.0003999999981052189,
"loss": 6.3753,
"step": 1400
},
{
"epoch": 0.01,
"learning_rate": 0.00039999999703940455,
"loss": 6.2997,
"step": 1500
},
{
"epoch": 0.01,
"learning_rate": 0.00039999933291862616,
"loss": 5.9559,
"step": 2000
},
{
"epoch": 0.01,
"learning_rate": 0.0003999994734068435,
"loss": 6.1649,
"step": 3000
},
{
"epoch": 0.01,
"learning_rate": 0.0003999988151660478,
"loss": 5.8819,
"step": 4000
},
{
"epoch": 0.01,
"learning_rate": 0.000399997893630147,
"loss": 5.8437,
"step": 5000
},
{
"epoch": 0.01,
"learning_rate": 0.0003999967088003543,
"loss": 5.857,
"step": 6000
},
{
"epoch": 0.01,
"learning_rate": 0.00039999526067822954,
"loss": 5.7574,
"step": 7000
},
{
"epoch": 0.01,
"learning_rate": 0.00039999354926567907,
"loss": 5.6647,
"step": 8000
},
{
"epoch": 0.02,
"learning_rate": 0.00039999157456495604,
"loss": 5.598,
"step": 9000
},
{
"epoch": 0.02,
"learning_rate": 0.00039998933657865997,
"loss": 5.528,
"step": 10000
},
{
"epoch": 0.02,
"learning_rate": 0.00039998683530973725,
"loss": 5.4848,
"step": 11000
},
{
"epoch": 0.02,
"learning_rate": 0.0003999840707614807,
"loss": 5.4314,
"step": 12000
},
{
"epoch": 0.02,
"learning_rate": 0.0003999810429375299,
"loss": 5.3931,
"step": 13000
},
{
"epoch": 0.03,
"learning_rate": 0.00039997775526446917,
"loss": 5.3531,
"step": 14000
},
{
"epoch": 0.03,
"learning_rate": 0.00039997420116469963,
"loss": 5.3126,
"step": 15000
},
{
"epoch": 0.03,
"learning_rate": 0.0003999703877510894,
"loss": 5.2782,
"step": 16000
},
{
"epoch": 0.03,
"learning_rate": 0.000399966307394198,
"loss": 5.2575,
"step": 17000
},
{
"epoch": 0.03,
"learning_rate": 0.0003999619682600994,
"loss": 5.2297,
"step": 18000
},
{
"epoch": 0.03,
"learning_rate": 0.0003999573616675516,
"loss": 5.1976,
"step": 19000
},
{
"epoch": 0.04,
"learning_rate": 0.00039995249683579117,
"loss": 5.182,
"step": 20000
},
{
"epoch": 0.04,
"learning_rate": 0.00039994736403182074,
"loss": 5.1576,
"step": 21000
},
{
"epoch": 0.04,
"learning_rate": 0.00039994197352799087,
"loss": 5.1435,
"step": 22000
},
{
"epoch": 0.04,
"learning_rate": 0.0003999363145395998,
"loss": 5.1286,
"step": 23000
},
{
"epoch": 0.04,
"learning_rate": 0.0003999303983920581,
"loss": 5.1065,
"step": 24000
},
{
"epoch": 0.05,
"learning_rate": 0.0003999242132490164,
"loss": 5.0946,
"step": 25000
},
{
"epoch": 0.05,
"learning_rate": 0.0003999177714888857,
"loss": 5.0748,
"step": 26000
},
{
"epoch": 0.05,
"learning_rate": 0.00039991106022373136,
"loss": 5.0674,
"step": 27000
},
{
"epoch": 0.05,
"learning_rate": 0.0003999040928848998,
"loss": 5.0493,
"step": 28000
},
{
"epoch": 0.05,
"learning_rate": 0.0003998968555329385,
"loss": 5.039,
"step": 29000
},
{
"epoch": 0.05,
"learning_rate": 0.0003998893626520587,
"loss": 5.0348,
"step": 30000
},
{
"epoch": 0.06,
"learning_rate": 0.0003998815992513638,
"loss": 5.0201,
"step": 31000
},
{
"epoch": 0.06,
"learning_rate": 0.00039987357270987667,
"loss": 5.0161,
"step": 32000
},
{
"epoch": 0.06,
"learning_rate": 0.0003998652914592657,
"loss": 4.9991,
"step": 33000
},
{
"epoch": 0.06,
"learning_rate": 0.00039985673893135445,
"loss": 4.9971,
"step": 34000
},
{
"epoch": 0.06,
"learning_rate": 0.0003998479232953792,
"loss": 4.9871,
"step": 35000
},
{
"epoch": 0.07,
"learning_rate": 0.0003998388445629455,
"loss": 4.9771,
"step": 36000
},
{
"epoch": 0.07,
"learning_rate": 0.0003998295122192289,
"loss": 4.9726,
"step": 37000
},
{
"epoch": 0.07,
"learning_rate": 0.0003998199075931465,
"loss": 4.9669,
"step": 38000
},
{
"epoch": 0.07,
"learning_rate": 0.0003998100499065675,
"loss": 4.9583,
"step": 39000
},
{
"epoch": 0.07,
"learning_rate": 0.00039979992969921984,
"loss": 4.9556,
"step": 40000
},
{
"epoch": 0.07,
"learning_rate": 0.0003997895364597799,
"loss": 4.942,
"step": 41000
},
{
"epoch": 0.08,
"learning_rate": 0.0003997788909883795,
"loss": 4.9406,
"step": 42000
},
{
"epoch": 0.08,
"learning_rate": 0.00039976797198678043,
"loss": 4.9323,
"step": 43000
},
{
"epoch": 0.08,
"learning_rate": 0.00039975680130732954,
"loss": 4.9277,
"step": 44000
},
{
"epoch": 0.08,
"learning_rate": 0.0003997453566010126,
"loss": 4.923,
"step": 45000
},
{
"epoch": 0.08,
"learning_rate": 0.0003997336489332646,
"loss": 4.9197,
"step": 46000
},
{
"epoch": 0.09,
"learning_rate": 0.0003997216904214485,
"loss": 4.9051,
"step": 47000
},
{
"epoch": 0.09,
"learning_rate": 0.00039970945714034553,
"loss": 4.9077,
"step": 48000
},
{
"epoch": 0.09,
"learning_rate": 0.0003996969609450725,
"loss": 4.9002,
"step": 49000
},
{
"epoch": 0.09,
"learning_rate": 0.0003996842147424852,
"loss": 4.9013,
"step": 50000
},
{
"epoch": 0.09,
"learning_rate": 0.00039967119303144363,
"loss": 4.8946,
"step": 51000
},
{
"epoch": 0.09,
"learning_rate": 0.00039965792187247553,
"loss": 4.8882,
"step": 52000
},
{
"epoch": 0.1,
"learning_rate": 0.00039964437471416833,
"loss": 4.8894,
"step": 53000
},
{
"epoch": 0.1,
"learning_rate": 0.0003996305786686345,
"loss": 4.8764,
"step": 54000
},
{
"epoch": 0.1,
"learning_rate": 0.0003996165061343288,
"loss": 4.8782,
"step": 55000
},
{
"epoch": 0.1,
"learning_rate": 0.0003996021852748057,
"loss": 4.8759,
"step": 56000
},
{
"epoch": 0.1,
"learning_rate": 0.00039958758743853225,
"loss": 4.8727,
"step": 57000
},
{
"epoch": 0.11,
"learning_rate": 0.0003995727418403572,
"loss": 4.8669,
"step": 58000
},
{
"epoch": 0.11,
"learning_rate": 0.0003995576187789104,
"loss": 4.8694,
"step": 59000
},
{
"epoch": 0.11,
"learning_rate": 0.00039954224852018107,
"loss": 4.8688,
"step": 60000
},
{
"epoch": 0.11,
"learning_rate": 0.0003995266003131184,
"loss": 4.86,
"step": 61000
},
{
"epoch": 0.11,
"learning_rate": 0.00039951070547469266,
"loss": 4.8559,
"step": 62000
},
{
"epoch": 0.12,
"learning_rate": 0.00039949453220433417,
"loss": 4.8543,
"step": 63000
},
{
"epoch": 0.12,
"learning_rate": 0.00039947811286982935,
"loss": 4.8515,
"step": 64000
},
{
"epoch": 0.12,
"learning_rate": 0.0003994614146212571,
"loss": 4.8498,
"step": 65000
},
{
"epoch": 0.12,
"learning_rate": 0.00039944447087704996,
"loss": 4.8443,
"step": 66000
},
{
"epoch": 0.12,
"learning_rate": 0.0003994272477381079,
"loss": 4.8399,
"step": 67000
},
{
"epoch": 0.12,
"learning_rate": 0.0003994097796733338,
"loss": 4.8381,
"step": 68000
},
{
"epoch": 0.13,
"learning_rate": 0.00039939203173462723,
"loss": 4.8381,
"step": 69000
},
{
"epoch": 0.13,
"learning_rate": 0.00039937403944117984,
"loss": 4.8354,
"step": 70000
},
{
"epoch": 0.13,
"learning_rate": 0.00039935576679607466,
"loss": 4.834,
"step": 71000
},
{
"epoch": 0.13,
"learning_rate": 0.0003993372503686054,
"loss": 4.8337,
"step": 72000
},
{
"epoch": 0.13,
"learning_rate": 0.0003993184531132279,
"loss": 4.8304,
"step": 73000
},
{
"epoch": 0.14,
"learning_rate": 0.00039929939345843064,
"loss": 4.8254,
"step": 74000
},
{
"epoch": 0.14,
"learning_rate": 0.000399280090882382,
"loss": 4.8248,
"step": 75000
},
{
"epoch": 0.14,
"learning_rate": 0.0003992605067667017,
"loss": 4.8248,
"step": 76000
},
{
"epoch": 0.14,
"learning_rate": 0.0003992406803053476,
"loss": 4.8246,
"step": 77000
},
{
"epoch": 0.14,
"learning_rate": 0.00039922057183181,
"loss": 4.8173,
"step": 78000
},
{
"epoch": 0.14,
"learning_rate": 0.00039920024209092803,
"loss": 4.8128,
"step": 79000
},
{
"epoch": 0.15,
"learning_rate": 0.00039917960962754717,
"loss": 4.818,
"step": 80000
},
{
"epoch": 0.15,
"learning_rate": 0.00039915871494753167,
"loss": 4.8107,
"step": 81000
},
{
"epoch": 0.15,
"learning_rate": 0.00039913755807838893,
"loss": 4.8121,
"step": 82000
},
{
"epoch": 0.15,
"learning_rate": 0.00039911613904797174,
"loss": 4.8116,
"step": 83000
},
{
"epoch": 0.15,
"learning_rate": 0.0003990944796965674,
"loss": 4.8057,
"step": 84000
},
{
"epoch": 0.16,
"learning_rate": 0.0003990725366906298,
"loss": 4.8055,
"step": 85000
},
{
"epoch": 0.16,
"learning_rate": 0.0003990503762807127,
"loss": 4.8028,
"step": 86000
},
{
"epoch": 0.16,
"learning_rate": 0.00039902790967672147,
"loss": 4.7969,
"step": 87000
},
{
"epoch": 0.16,
"learning_rate": 0.0003990052039152944,
"loss": 4.8025,
"step": 88000
},
{
"epoch": 0.16,
"learning_rate": 0.00039898221356934855,
"loss": 4.8017,
"step": 89000
},
{
"epoch": 0.16,
"learning_rate": 0.00039895896126663653,
"loss": 4.7986,
"step": 90000
},
{
"epoch": 0.17,
"learning_rate": 0.0003989354470377698,
"loss": 4.7991,
"step": 91000
},
{
"epoch": 0.17,
"learning_rate": 0.00039891169482063473,
"loss": 4.7965,
"step": 92000
},
{
"epoch": 0.17,
"learning_rate": 0.00039888765709451975,
"loss": 4.792,
"step": 93000
},
{
"epoch": 0.17,
"learning_rate": 0.00039886338196645364,
"loss": 4.7862,
"step": 94000
},
{
"epoch": 0.17,
"learning_rate": 0.00039883882086954475,
"loss": 4.7916,
"step": 95000
},
{
"epoch": 0.18,
"learning_rate": 0.000398814022958251,
"loss": 4.7883,
"step": 96000
},
{
"epoch": 0.18,
"learning_rate": 0.00039878893861975594,
"loss": 4.7908,
"step": 97000
},
{
"epoch": 0.18,
"learning_rate": 0.00039876359257893807,
"loss": 4.7877,
"step": 98000
},
{
"epoch": 0.18,
"learning_rate": 0.0003987379848691651,
"loss": 4.7873,
"step": 99000
},
{
"epoch": 0.18,
"learning_rate": 0.00039871214152416957,
"loss": 4.7876,
"step": 100000
},
{
"epoch": 0.18,
"learning_rate": 0.00039868601083955114,
"loss": 4.7883,
"step": 101000
},
{
"epoch": 0.19,
"learning_rate": 0.00039865964511100514,
"loss": 4.7893,
"step": 102000
},
{
"epoch": 0.19,
"learning_rate": 0.0003986329915890061,
"loss": 4.7789,
"step": 103000
},
{
"epoch": 0.19,
"learning_rate": 0.00039860610361561096,
"loss": 4.7815,
"step": 104000
},
{
"epoch": 0.19,
"learning_rate": 0.0003985789273964466,
"loss": 4.7738,
"step": 105000
},
{
"epoch": 0.19,
"learning_rate": 0.0003985515173196509,
"loss": 4.7753,
"step": 106000
},
{
"epoch": 0.2,
"learning_rate": 0.00039852381854628627,
"loss": 4.7724,
"step": 107000
},
{
"epoch": 0.2,
"learning_rate": 0.00039849588651028544,
"loss": 4.7726,
"step": 108000
},
{
"epoch": 0.2,
"learning_rate": 0.0003984676653284346,
"loss": 4.7685,
"step": 109000
},
{
"epoch": 0.2,
"learning_rate": 0.0003984392114801697,
"loss": 4.7715,
"step": 110000
},
{
"epoch": 0.2,
"learning_rate": 0.0003984104680382948,
"loss": 4.7713,
"step": 111000
},
{
"epoch": 0.2,
"learning_rate": 0.00039838149252745204,
"loss": 4.7698,
"step": 112000
},
{
"epoch": 0.21,
"learning_rate": 0.0003983522269767629,
"loss": 4.7753,
"step": 113000
},
{
"epoch": 0.21,
"learning_rate": 0.00039832272995577275,
"loss": 4.7652,
"step": 114000
},
{
"epoch": 0.21,
"learning_rate": 0.0003982929424502255,
"loss": 4.7664,
"step": 115000
},
{
"epoch": 0.21,
"learning_rate": 0.00039826292407426207,
"loss": 4.7713,
"step": 116000
},
{
"epoch": 0.21,
"learning_rate": 0.00039823264521022384,
"loss": 4.7628,
"step": 117000
},
{
"epoch": 0.22,
"learning_rate": 0.0003982020751975389,
"loss": 4.7682,
"step": 118000
},
{
"epoch": 0.22,
"learning_rate": 0.00039817124425512714,
"loss": 4.7644,
"step": 119000
},
{
"epoch": 0.22,
"learning_rate": 0.0003981401524235768,
"loss": 4.758,
"step": 120000
},
{
"epoch": 0.22,
"learning_rate": 0.00039810883122677967,
"loss": 4.7622,
"step": 121000
},
{
"epoch": 0.22,
"learning_rate": 0.0003980772180008777,
"loss": 4.762,
"step": 122000
},
{
"epoch": 0.22,
"learning_rate": 0.0003980453760138509,
"loss": 4.7571,
"step": 123000
},
{
"epoch": 0.23,
"learning_rate": 0.00039801324155990393,
"loss": 4.7619,
"step": 124000
},
{
"epoch": 0.23,
"learning_rate": 0.00039798091147522796,
"loss": 4.7618,
"step": 125000
},
{
"epoch": 0.23,
"learning_rate": 0.0003979482562229017,
"loss": 4.762,
"step": 126000
},
{
"epoch": 0.23,
"learning_rate": 0.000397915340374997,
"loss": 4.7562,
"step": 127000
},
{
"epoch": 0.23,
"learning_rate": 0.00039788216397484706,
"loss": 4.7528,
"step": 128000
},
{
"epoch": 0.24,
"learning_rate": 0.00039784876063314606,
"loss": 4.7567,
"step": 129000
},
{
"epoch": 0.24,
"learning_rate": 0.00039781506352031947,
"loss": 4.7554,
"step": 130000
},
{
"epoch": 0.24,
"learning_rate": 0.00039778114007485855,
"loss": 4.7494,
"step": 131000
},
{
"epoch": 0.24,
"learning_rate": 0.00039774692242662465,
"loss": 4.7591,
"step": 132000
},
{
"epoch": 0.24,
"learning_rate": 0.00039771244444786484,
"loss": 4.7605,
"step": 133000
},
{
"epoch": 0.24,
"learning_rate": 0.0003976777061839689,
"loss": 4.7469,
"step": 134000
},
{
"epoch": 0.25,
"learning_rate": 0.00039764274280914674,
"loss": 4.7506,
"step": 135000
},
{
"epoch": 0.25,
"learning_rate": 0.00039760748437268835,
"loss": 4.7506,
"step": 136000
},
{
"epoch": 0.25,
"learning_rate": 0.0003975720014377832,
"loss": 4.7509,
"step": 137000
},
{
"epoch": 0.25,
"learning_rate": 0.00039753622301424524,
"loss": 4.7488,
"step": 138000
},
{
"epoch": 0.25,
"learning_rate": 0.00039750022070592105,
"loss": 4.7544,
"step": 139000
},
{
"epoch": 0.26,
"learning_rate": 0.0003974639224835218,
"loss": 4.7502,
"step": 140000
},
{
"epoch": 0.26,
"learning_rate": 0.0003974274009911748,
"loss": 4.7433,
"step": 141000
},
{
"epoch": 0.26,
"learning_rate": 0.00039739058316086716,
"loss": 4.7466,
"step": 142000
},
{
"epoch": 0.26,
"learning_rate": 0.0003973535798838411,
"loss": 4.7469,
"step": 143000
},
{
"epoch": 0.26,
"learning_rate": 0.0003973162428990996,
"loss": 4.7414,
"step": 144000
},
{
"epoch": 0.26,
"learning_rate": 0.00039727864615081464,
"loss": 4.7418,
"step": 145000
},
{
"epoch": 0.27,
"learning_rate": 0.0003972407896884818,
"loss": 4.7484,
"step": 146000
},
{
"epoch": 0.27,
"learning_rate": 0.00039720271180775053,
"loss": 4.7454,
"step": 147000
},
{
"epoch": 0.27,
"learning_rate": 0.0003971643363267646,
"loss": 4.744,
"step": 148000
},
{
"epoch": 0.27,
"learning_rate": 0.00039712577881131754,
"loss": 4.7369,
"step": 149000
},
{
"epoch": 0.27,
"learning_rate": 0.00039708688477304655,
"loss": 4.7375,
"step": 150000
},
{
"epoch": 0.28,
"learning_rate": 0.0003970477312731783,
"loss": 4.7414,
"step": 151000
},
{
"epoch": 0.28,
"learning_rate": 0.0003970083183632576,
"loss": 4.7389,
"step": 152000
},
{
"epoch": 0.28,
"learning_rate": 0.0003969686858969712,
"loss": 4.7378,
"step": 153000
},
{
"epoch": 0.28,
"learning_rate": 0.0003969287545822263,
"loss": 4.7372,
"step": 154000
},
{
"epoch": 0.28,
"learning_rate": 0.00039688860433410763,
"loss": 4.7393,
"step": 155000
},
{
"epoch": 0.28,
"learning_rate": 0.00039684815482460387,
"loss": 4.7315,
"step": 156000
},
{
"epoch": 0.29,
"learning_rate": 0.00039680748700586993,
"loss": 4.7371,
"step": 157000
},
{
"epoch": 0.29,
"learning_rate": 0.00039676651951439873,
"loss": 4.7353,
"step": 158000
},
{
"epoch": 0.29,
"learning_rate": 0.0003967253343389894,
"loss": 4.7315,
"step": 159000
},
{
"epoch": 0.29,
"learning_rate": 0.00039668384908106706,
"loss": 4.7358,
"step": 160000
},
{
"epoch": 0.29,
"learning_rate": 0.00039664210489213713,
"loss": 4.7339,
"step": 161000
},
{
"epoch": 0.3,
"learning_rate": 0.00039660010182715526,
"loss": 4.737,
"step": 162000
},
{
"epoch": 0.3,
"learning_rate": 0.000396557882332566,
"loss": 4.7261,
"step": 163000
},
{
"epoch": 0.3,
"learning_rate": 0.0003965153619404471,
"loss": 4.7342,
"step": 164000
},
{
"epoch": 0.3,
"learning_rate": 0.0003964726257474391,
"loss": 4.7293,
"step": 165000
},
{
"epoch": 0.3,
"learning_rate": 0.0003964295882518688,
"loss": 4.7301,
"step": 166000
},
{
"epoch": 0.3,
"learning_rate": 0.00039638633558526285,
"loss": 4.7316,
"step": 167000
},
{
"epoch": 0.31,
"learning_rate": 0.00039634278121264703,
"loss": 4.7295,
"step": 168000
},
{
"epoch": 0.31,
"learning_rate": 0.0003962990122999811,
"loss": 4.7332,
"step": 169000
},
{
"epoch": 0.31,
"learning_rate": 0.0003962549412794449,
"loss": 4.732,
"step": 170000
},
{
"epoch": 0.31,
"learning_rate": 0.0003962106563509727,
"loss": 4.7321,
"step": 171000
},
{
"epoch": 0.31,
"learning_rate": 0.00039616606891435896,
"loss": 4.7276,
"step": 172000
},
{
"epoch": 0.32,
"learning_rate": 0.00039612122322838677,
"loss": 4.7245,
"step": 173000
},
{
"epoch": 0.32,
"learning_rate": 0.0003960761645849172,
"loss": 4.7286,
"step": 174000
},
{
"epoch": 0.32,
"learning_rate": 0.0003960308028357847,
"loss": 4.7239,
"step": 175000
},
{
"epoch": 0.64,
"learning_rate": 0.0003840903997775841,
"loss": 4.6145,
"step": 176000
},
{
"epoch": 0.65,
"learning_rate": 0.0003839104648613638,
"loss": 4.5905,
"step": 177000
},
{
"epoch": 0.65,
"learning_rate": 0.0003837297421617577,
"loss": 4.5891,
"step": 178000
},
{
"epoch": 0.65,
"learning_rate": 0.000383548053178735,
"loss": 4.5817,
"step": 179000
},
{
"epoch": 0.66,
"learning_rate": 0.0003833652155473882,
"loss": 4.5765,
"step": 180000
},
{
"epoch": 0.66,
"learning_rate": 0.00038318141161813824,
"loss": 4.574,
"step": 181000
},
{
"epoch": 0.66,
"learning_rate": 0.0003829966423595951,
"loss": 4.5725,
"step": 182000
},
{
"epoch": 0.67,
"learning_rate": 0.00038281109496044006,
"loss": 4.5666,
"step": 183000
},
{
"epoch": 0.67,
"learning_rate": 0.00038262439893236937,
"loss": 4.5631,
"step": 184000
},
{
"epoch": 0.68,
"learning_rate": 0.00038243692864915963,
"loss": 4.5591,
"step": 185000
},
{
"epoch": 0.68,
"learning_rate": 0.0003822483097830243,
"loss": 4.5552,
"step": 186000
},
{
"epoch": 0.68,
"learning_rate": 0.00038205873050485524,
"loss": 4.5543,
"step": 187000
},
{
"epoch": 0.69,
"learning_rate": 0.0003818683828312813,
"loss": 4.5512,
"step": 188000
},
{
"epoch": 0.69,
"learning_rate": 0.00038167688668914063,
"loss": 4.5484,
"step": 189000
},
{
"epoch": 0.69,
"learning_rate": 0.0003814844331462512,
"loss": 4.5501,
"step": 190000
},
{
"epoch": 0.7,
"learning_rate": 0.0003812912171041104,
"loss": 4.5431,
"step": 191000
},
{
"epoch": 0.7,
"learning_rate": 0.0003810968527621949,
"loss": 4.5418,
"step": 192000
},
{
"epoch": 0.7,
"learning_rate": 0.00038090153407619305,
"loss": 4.5379,
"step": 193000
},
{
"epoch": 0.71,
"learning_rate": 0.00038070526207539536,
"loss": 4.538,
"step": 194000
},
{
"epoch": 0.71,
"learning_rate": 0.0003805082354937156,
"loss": 4.5377,
"step": 195000
},
{
"epoch": 0.72,
"learning_rate": 0.0003803100609220069,
"loss": 4.5354,
"step": 196000
},
{
"epoch": 0.72,
"learning_rate": 0.0003801111357514916,
"loss": 4.5321,
"step": 197000
},
{
"epoch": 0.72,
"learning_rate": 0.000379911062782051,
"loss": 4.5327,
"step": 198000
},
{
"epoch": 0.73,
"learning_rate": 0.00037971004171739956,
"loss": 4.5342,
"step": 199000
},
{
"epoch": 0.73,
"learning_rate": 0.00037950827605766894,
"loss": 4.527,
"step": 200000
},
{
"epoch": 0.73,
"learning_rate": 0.00037930536293104657,
"loss": 4.5297,
"step": 201000
},
{
"epoch": 0.74,
"learning_rate": 0.00037910170923078203,
"loss": 4.5252,
"step": 202000
},
{
"epoch": 0.74,
"learning_rate": 0.00037889690831515295,
"loss": 4.5228,
"step": 203000
},
{
"epoch": 0.74,
"learning_rate": 0.0003786911646487036,
"loss": 4.5211,
"step": 204000
},
{
"epoch": 0.75,
"learning_rate": 0.00037848447931566176,
"loss": 4.521,
"step": 205000
},
{
"epoch": 0.75,
"learning_rate": 0.00037827685340521773,
"loss": 4.5257,
"step": 206000
},
{
"epoch": 0.76,
"learning_rate": 0.0003780684970458185,
"loss": 4.5204,
"step": 207000
},
{
"epoch": 0.76,
"learning_rate": 0.0003778589942057952,
"loss": 4.5209,
"step": 208000
},
{
"epoch": 0.76,
"learning_rate": 0.0003776487649924752,
"loss": 4.5167,
"step": 209000
},
{
"epoch": 0.77,
"learning_rate": 0.0003774373896346034,
"loss": 4.5142,
"step": 210000
},
{
"epoch": 0.77,
"learning_rate": 0.00037722507921728195,
"loss": 4.5166,
"step": 211000
},
{
"epoch": 0.77,
"learning_rate": 0.0003770122622793867,
"loss": 4.5127,
"step": 212000
},
{
"epoch": 0.78,
"learning_rate": 0.00037679808696909655,
"loss": 4.5163,
"step": 213000
},
{
"epoch": 0.78,
"learning_rate": 0.00037658297996835357,
"loss": 4.513,
"step": 214000
},
{
"epoch": 0.78,
"learning_rate": 0.0003763669424107285,
"loss": 4.5078,
"step": 215000
},
{
"epoch": 0.79,
"learning_rate": 0.00037614997543469595,
"loss": 4.5114,
"step": 216000
},
{
"epoch": 0.79,
"learning_rate": 0.00037593208018362834,
"loss": 4.5097,
"step": 217000
},
{
"epoch": 0.8,
"learning_rate": 0.00037571369637505247,
"loss": 4.5072,
"step": 218000
},
{
"epoch": 0.8,
"learning_rate": 0.00037549394987438647,
"loss": 4.5084,
"step": 219000
},
{
"epoch": 0.8,
"learning_rate": 0.00037527327855580843,
"loss": 4.5071,
"step": 220000
},
{
"epoch": 0.81,
"learning_rate": 0.0003750519056381631,
"loss": 4.5061,
"step": 221000
},
{
"epoch": 0.81,
"learning_rate": 0.00037482938909921175,
"loss": 4.5075,
"step": 222000
},
{
"epoch": 0.81,
"learning_rate": 0.0003746059512444505,
"loss": 4.5079,
"step": 223000
},
{
"epoch": 0.82,
"learning_rate": 0.0003743815932513518,
"loss": 4.5071,
"step": 224000
},
{
"epoch": 0.82,
"learning_rate": 0.00037415631630223755,
"loss": 4.5033,
"step": 225000
},
{
"epoch": 0.83,
"learning_rate": 0.00037393012158427186,
"loss": 4.505,
"step": 226000
},
{
"epoch": 0.83,
"learning_rate": 0.00037370323785818266,
"loss": 4.5032,
"step": 227000
},
{
"epoch": 0.83,
"learning_rate": 0.00037347521209812743,
"loss": 4.5017,
"step": 228000
},
{
"epoch": 0.84,
"learning_rate": 0.0003732465015546745,
"loss": 4.502,
"step": 229000
},
{
"epoch": 0.84,
"learning_rate": 0.00037301664955431804,
"loss": 4.4998,
"step": 230000
},
{
"epoch": 0.84,
"learning_rate": 0.0003727858857909254,
"loss": 4.4994,
"step": 231000
},
{
"epoch": 0.85,
"learning_rate": 0.0003725544436092979,
"loss": 4.4985,
"step": 232000
},
{
"epoch": 0.85,
"learning_rate": 0.000372321860881582,
"loss": 4.499,
"step": 233000
},
{
"epoch": 0.85,
"learning_rate": 0.00037208837005222694,
"loss": 4.4919,
"step": 234000
},
{
"epoch": 0.86,
"learning_rate": 0.0003718542072019544,
"loss": 4.4965,
"step": 235000
},
{
"epoch": 0.86,
"learning_rate": 0.00037161890477046666,
"loss": 4.4972,
"step": 236000
},
{
"epoch": 0.87,
"learning_rate": 0.00037138293459993847,
"loss": 4.4988,
"step": 237000
},
{
"epoch": 0.08,
"learning_rate": 0.00039974046056824423,
"loss": 5.0173,
"step": 238000
},
{
"epoch": 0.08,
"learning_rate": 0.0003997382653105697,
"loss": 5.1254,
"step": 239000
},
{
"epoch": 0.08,
"learning_rate": 0.0003997360630230883,
"loss": 5.137,
"step": 240000
},
{
"epoch": 0.08,
"learning_rate": 0.0003997338515152591,
"loss": 5.1396,
"step": 241000
},
{
"epoch": 0.08,
"learning_rate": 0.0003997316285596137,
"loss": 5.1539,
"step": 242000
},
{
"epoch": 0.08,
"learning_rate": 0.00039972939860216607,
"loss": 5.1836,
"step": 243000
},
{
"epoch": 0.08,
"learning_rate": 0.00039972715717864,
"loss": 5.1907,
"step": 244000
},
{
"epoch": 0.08,
"learning_rate": 0.00039972490651670964,
"loss": 5.2177,
"step": 245000
},
{
"epoch": 0.08,
"learning_rate": 0.00039972264888099373,
"loss": 5.2218,
"step": 246000
},
{
"epoch": 0.34,
"learning_rate": 0.0003955398710520662,
"loss": 4.9553,
"step": 247000
},
{
"epoch": 0.34,
"learning_rate": 0.000395503657852559,
"loss": 4.8679,
"step": 248000
},
{
"epoch": 0.34,
"learning_rate": 0.00039546729990487664,
"loss": 4.8395,
"step": 249000
},
{
"epoch": 0.34,
"learning_rate": 0.0003954307972359379,
"loss": 4.8217,
"step": 250000
},
{
"epoch": 0.34,
"learning_rate": 0.000395394149872769,
"loss": 4.8152,
"step": 251000
},
{
"epoch": 0.35,
"learning_rate": 0.0003953573947067854,
"loss": 4.8026,
"step": 252000
},
{
"epoch": 0.35,
"learning_rate": 0.0003953204581812889,
"loss": 4.8017,
"step": 253000
},
{
"epoch": 0.35,
"learning_rate": 0.0003952834141966186,
"loss": 4.7977,
"step": 254000
},
{
"epoch": 0.35,
"learning_rate": 0.00039524618861807426,
"loss": 4.7963,
"step": 255000
},
{
"epoch": 0.35,
"learning_rate": 0.0003952088184819814,
"loss": 4.79,
"step": 256000
},
{
"epoch": 0.35,
"learning_rate": 0.0003951713414028577,
"loss": 4.7877,
"step": 257000
},
{
"epoch": 0.35,
"learning_rate": 0.0003951336823792677,
"loss": 4.7854,
"step": 258000
},
{
"epoch": 0.35,
"learning_rate": 0.0003950959167570807,
"loss": 4.7945,
"step": 259000
},
{
"epoch": 0.36,
"learning_rate": 0.00039505796895741114,
"loss": 4.7845,
"step": 260000
},
{
"epoch": 0.36,
"learning_rate": 0.00039501991490389356,
"loss": 4.7821,
"step": 261000
},
{
"epoch": 0.36,
"learning_rate": 0.000394981678440416,
"loss": 4.7798,
"step": 262000
},
{
"epoch": 0.36,
"learning_rate": 0.00039494333606815397,
"loss": 4.7892,
"step": 263000
},
{
"epoch": 0.36,
"learning_rate": 0.00039490481105399416,
"loss": 4.7885,
"step": 264000
},
{
"epoch": 0.36,
"learning_rate": 0.000394866141735037,
"loss": 4.7838,
"step": 265000
},
{
"epoch": 0.36,
"learning_rate": 0.0003948273670255641,
"loss": 4.7812,
"step": 266000
},
{
"epoch": 0.37,
"learning_rate": 0.00039478840932724265,
"loss": 4.7749,
"step": 267000
},
{
"epoch": 0.37,
"learning_rate": 0.00039474934658425046,
"loss": 4.7823,
"step": 268000
},
{
"epoch": 0.37,
"learning_rate": 0.00039471010062182423,
"loss": 4.7809,
"step": 269000
},
{
"epoch": 0.37,
"learning_rate": 0.00039467074996088307,
"loss": 4.785,
"step": 270000
},
{
"epoch": 0.37,
"learning_rate": 0.0003946312158504645,
"loss": 4.7753,
"step": 271000
},
{
"epoch": 0.37,
"learning_rate": 0.00039459157738799654,
"loss": 4.775,
"step": 272000
},
{
"epoch": 0.37,
"learning_rate": 0.0003945517552465506,
"loss": 4.7755,
"step": 273000
},
{
"epoch": 0.38,
"learning_rate": 0.0003945118290998296,
"loss": 4.7849,
"step": 274000
},
{
"epoch": 0.38,
"learning_rate": 0.00039447175922715307,
"loss": 4.7806,
"step": 275000
},
{
"epoch": 0.38,
"learning_rate": 0.00039443150533232405,
"loss": 4.7791,
"step": 276000
},
{
"epoch": 0.38,
"learning_rate": 0.00039439110748312647,
"loss": 4.7798,
"step": 277000
},
{
"epoch": 0.38,
"learning_rate": 0.00039435056570947044,
"loss": 4.7794,
"step": 278000
},
{
"epoch": 0.38,
"learning_rate": 0.0003943099207989059,
"loss": 4.7821,
"step": 279000
},
{
"epoch": 0.38,
"learning_rate": 0.0003942690914103384,
"loss": 4.7815,
"step": 280000
},
{
"epoch": 0.38,
"learning_rate": 0.00039422811818765134,
"loss": 4.7713,
"step": 281000
},
{
"epoch": 0.39,
"learning_rate": 0.00039418704235002724,
"loss": 4.7707,
"step": 282000
},
{
"epoch": 0.39,
"learning_rate": 0.00039414582302643454,
"loss": 4.7764,
"step": 283000
},
{
"epoch": 0.39,
"learning_rate": 0.0003941044187712859,
"loss": 4.7864,
"step": 284000
},
{
"epoch": 0.39,
"learning_rate": 0.00039406287080393925,
"loss": 4.774,
"step": 285000
},
{
"epoch": 0.39,
"learning_rate": 0.0003940211791551559,
"loss": 4.7698,
"step": 286000
},
{
"epoch": 0.39,
"learning_rate": 0.00039397938576284634,
"loss": 4.7754,
"step": 287000
},
{
"epoch": 0.39,
"learning_rate": 0.00039393740698750394,
"loss": 4.7764,
"step": 288000
},
{
"epoch": 0.4,
"learning_rate": 0.0003938952846236165,
"loss": 4.7764,
"step": 289000
},
{
"epoch": 0.4,
"learning_rate": 0.00039385301870237103,
"loss": 4.7747,
"step": 290000
},
{
"epoch": 0.4,
"learning_rate": 0.00039381065173618853,
"loss": 4.7784,
"step": 291000
},
{
"epoch": 0.4,
"learning_rate": 0.00039376809893769117,
"loss": 4.7792,
"step": 292000
},
{
"epoch": 0.4,
"learning_rate": 0.00039372544544391313,
"loss": 4.7726,
"step": 293000
},
{
"epoch": 0.4,
"learning_rate": 0.0003936826488052433,
"loss": 4.7736,
"step": 294000
},
{
"epoch": 0.4,
"learning_rate": 0.0003936396659988803,
"loss": 4.7759,
"step": 295000
},
{
"epoch": 0.41,
"learning_rate": 0.00039359653982441555,
"loss": 4.7719,
"step": 296000
},
{
"epoch": 0.41,
"learning_rate": 0.00039355327031377916,
"loss": 4.7775,
"step": 297000
},
{
"epoch": 0.41,
"learning_rate": 0.0003935099009833917,
"loss": 4.7814,
"step": 298000
},
{
"epoch": 0.41,
"learning_rate": 0.00039346634503988233,
"loss": 4.7722,
"step": 299000
},
{
"epoch": 0.41,
"learning_rate": 0.0003934226458565957,
"loss": 4.7745,
"step": 300000
},
{
"epoch": 0.41,
"learning_rate": 0.000393378847379798,
"loss": 4.7748,
"step": 301000
},
{
"epoch": 0.41,
"learning_rate": 0.00039333486195728426,
"loss": 4.7774,
"step": 302000
},
{
"epoch": 0.41,
"learning_rate": 0.00039329077759239523,
"loss": 4.7777,
"step": 303000
},
{
"epoch": 0.42,
"learning_rate": 0.000393246506060789,
"loss": 4.7707,
"step": 304000
},
{
"epoch": 0.42,
"learning_rate": 0.0003932021359382358,
"loss": 4.7792,
"step": 305000
},
{
"epoch": 0.42,
"learning_rate": 0.000393157578428518,
"loss": 4.7711,
"step": 306000
},
{
"epoch": 0.42,
"learning_rate": 0.0003931128779076294,
"loss": 4.7712,
"step": 307000
},
{
"epoch": 0.42,
"learning_rate": 0.0003930680793235711,
"loss": 4.7732,
"step": 308000
},
{
"epoch": 0.42,
"learning_rate": 0.00039302309302266194,
"loss": 4.7753,
"step": 309000
},
{
"epoch": 0.42,
"learning_rate": 0.00039297800901073876,
"loss": 4.7747,
"step": 310000
},
{
"epoch": 0.43,
"learning_rate": 0.0003929327370629047,
"loss": 4.7756,
"step": 311000
},
{
"epoch": 0.43,
"learning_rate": 0.0003928873222703692,
"loss": 4.7733,
"step": 312000
},
{
"epoch": 0.43,
"learning_rate": 0.0003928418102956833,
"loss": 4.7714,
"step": 313000
},
{
"epoch": 0.43,
"learning_rate": 0.0003927961100574846,
"loss": 4.773,
"step": 314000
},
{
"epoch": 0.43,
"learning_rate": 0.0003927503129900122,
"loss": 4.7742,
"step": 315000
},
{
"epoch": 0.43,
"learning_rate": 0.0003927043274413583,
"loss": 4.7757,
"step": 316000
},
{
"epoch": 0.43,
"learning_rate": 0.0003926582454165936,
"loss": 4.7738,
"step": 317000
},
{
"epoch": 0.44,
"learning_rate": 0.00039261202103549754,
"loss": 4.7675,
"step": 318000
},
{
"epoch": 0.44,
"learning_rate": 0.0003925656078478171,
"loss": 4.7782,
"step": 319000
},
{
"epoch": 0.44,
"learning_rate": 0.00039251905208725256,
"loss": 4.7703,
"step": 320000
},
{
"epoch": 0.44,
"learning_rate": 0.00039247235378827314,
"loss": 4.7726,
"step": 321000
},
{
"epoch": 0.44,
"learning_rate": 0.0003924255598974257,
"loss": 4.7679,
"step": 322000
},
{
"epoch": 0.44,
"learning_rate": 0.00039237857676789823,
"loss": 4.767,
"step": 323000
},
{
"epoch": 0.44,
"learning_rate": 0.0003923314984006603,
"loss": 4.7621,
"step": 324000
},
{
"epoch": 0.44,
"learning_rate": 0.0003922842305795883,
"loss": 4.7717,
"step": 325000
},
{
"epoch": 0.45,
"learning_rate": 0.00039223686787524505,
"loss": 4.7682,
"step": 326000
},
{
"epoch": 0.45,
"learning_rate": 0.0003921893155024742,
"loss": 4.783,
"step": 327000
},
{
"epoch": 0.45,
"learning_rate": 0.0003921416686011523,
"loss": 4.7705,
"step": 328000
},
{
"epoch": 0.45,
"learning_rate": 0.0003920938318173703,
"loss": 4.7678,
"step": 329000
},
{
"epoch": 0.45,
"learning_rate": 0.0003920459008600368,
"loss": 4.7697,
"step": 330000
},
{
"epoch": 0.45,
"learning_rate": 0.0003919977798067727,
"loss": 4.7749,
"step": 331000
},
{
"epoch": 0.45,
"learning_rate": 0.00039194956493523547,
"loss": 4.7797,
"step": 332000
},
{
"epoch": 0.46,
"learning_rate": 0.00039190115975485935,
"loss": 4.7678,
"step": 333000
},
{
"epoch": 0.46,
"learning_rate": 0.0003918526124935473,
"loss": 4.7674,
"step": 334000
},
{
"epoch": 0.46,
"learning_rate": 0.0003918039719474887,
"loss": 4.7711,
"step": 335000
},
{
"epoch": 0.46,
"learning_rate": 0.0003917551407742319,
"loss": 4.7686,
"step": 336000
},
{
"epoch": 0.46,
"learning_rate": 0.00039170621667219887,
"loss": 4.7708,
"step": 337000
},
{
"epoch": 0.46,
"learning_rate": 0.00039165710173146836,
"loss": 4.7681,
"step": 338000
},
{
"epoch": 0.46,
"learning_rate": 0.0003916078942182069,
"loss": 4.7697,
"step": 339000
},
{
"epoch": 0.47,
"learning_rate": 0.0003915584956553133,
"loss": 4.7665,
"step": 340000
},
{
"epoch": 0.47,
"learning_rate": 0.00039150900487640804,
"loss": 4.7757,
"step": 341000
},
{
"epoch": 0.47,
"learning_rate": 0.00039145932283750107,
"loss": 4.7653,
"step": 342000
},
{
"epoch": 0.7,
"learning_rate": 0.00038084013217180266,
"loss": 4.7047,
"step": 343000
},
{
"epoch": 0.71,
"learning_rate": 0.0003807297139737221,
"loss": 4.6748,
"step": 344000
},
{
"epoch": 0.71,
"learning_rate": 0.0003806191054905468,
"loss": 4.6694,
"step": 345000
},
{
"epoch": 0.71,
"learning_rate": 0.00038050808546821253,
"loss": 4.6753,
"step": 346000
},
{
"epoch": 0.71,
"learning_rate": 0.0003803967646603707,
"loss": 4.6691,
"step": 347000
},
{
"epoch": 0.71,
"learning_rate": 0.0003802851432525181,
"loss": 4.6667,
"step": 348000
},
{
"epoch": 0.72,
"learning_rate": 0.0003801733335024691,
"loss": 4.6662,
"step": 349000
},
{
"epoch": 0.72,
"learning_rate": 0.0003800611117532231,
"loss": 4.6614,
"step": 350000
},
{
"epoch": 0.72,
"learning_rate": 0.000379948589963274,
"loss": 4.6566,
"step": 351000
},
{
"epoch": 0.72,
"learning_rate": 0.00037983588129147694,
"loss": 4.6586,
"step": 352000
},
{
"epoch": 0.72,
"learning_rate": 0.0003797227602826864,
"loss": 4.6598,
"step": 353000
},
{
"epoch": 0.73,
"learning_rate": 0.00037960933979699685,
"loss": 4.6561,
"step": 354000
},
{
"epoch": 0.73,
"learning_rate": 0.0003794956200234039,
"loss": 4.649,
"step": 355000
},
{
"epoch": 0.73,
"learning_rate": 0.00037938171531961043,
"loss": 4.6508,
"step": 356000
},
{
"epoch": 0.73,
"learning_rate": 0.0003792673978380055,
"loss": 4.6515,
"step": 357000
},
{
"epoch": 0.74,
"learning_rate": 0.00037915301116867755,
"loss": 4.6502,
"step": 358000
},
{
"epoch": 0.74,
"learning_rate": 0.00037903821210187236,
"loss": 4.6446,
"step": 359000
},
{
"epoch": 0.74,
"learning_rate": 0.00037892299993410043,
"loss": 4.6457,
"step": 360000
},
{
"epoch": 0.74,
"learning_rate": 0.0003788074896220918,
"loss": 4.6399,
"step": 361000
},
{
"epoch": 0.74,
"learning_rate": 0.0003786916813583244,
"loss": 4.6416,
"step": 362000
},
{
"epoch": 0.75,
"learning_rate": 0.0003785755753357728,
"loss": 4.6394,
"step": 363000
},
{
"epoch": 0.75,
"learning_rate": 0.00037845917174790744,
"loss": 4.644,
"step": 364000
},
{
"epoch": 0.75,
"learning_rate": 0.0003783425876381264,
"loss": 4.6455,
"step": 365000
},
{
"epoch": 0.75,
"learning_rate": 0.0003782255897991082,
"loss": 4.6427,
"step": 366000
},
{
"epoch": 0.75,
"learning_rate": 0.00037810841242106534,
"loss": 4.6383,
"step": 367000
},
{
"epoch": 0.76,
"learning_rate": 0.0003779908211099408,
"loss": 4.6398,
"step": 368000
},
{
"epoch": 0.76,
"learning_rate": 0.0003778729332078945,
"loss": 4.639,
"step": 369000
},
{
"epoch": 0.76,
"learning_rate": 0.00037775474891136603,
"loss": 4.642,
"step": 370000
},
{
"epoch": 0.76,
"learning_rate": 0.0003776363870456683,
"loss": 4.6378,
"step": 371000
},
{
"epoch": 0.76,
"learning_rate": 0.00037751761084737167,
"loss": 4.6251,
"step": 372000
},
{
"epoch": 0.77,
"learning_rate": 0.0003773986580663642,
"loss": 4.6378,
"step": 373000
},
{
"epoch": 0.77,
"learning_rate": 0.0003772792907571875,
"loss": 4.638,
"step": 374000
},
{
"epoch": 0.77,
"learning_rate": 0.0003771598676628421,
"loss": 4.6329,
"step": 375000
},
{
"epoch": 0.77,
"learning_rate": 0.0003770399103327158,
"loss": 4.6331,
"step": 376000
},
{
"epoch": 0.77,
"learning_rate": 0.000376919657996196,
"loss": 4.6307,
"step": 377000
},
{
"epoch": 0.78,
"learning_rate": 0.0003767992315479937,
"loss": 4.6366,
"step": 378000
},
{
"epoch": 0.78,
"learning_rate": 0.0003766783900948219,
"loss": 4.6312,
"step": 379000
},
{
"epoch": 0.78,
"learning_rate": 0.0003765572542376675,
"loss": 4.6322,
"step": 380000
},
{
"epoch": 0.78,
"learning_rate": 0.00037643582417838255,
"loss": 4.6272,
"step": 381000
},
{
"epoch": 0.78,
"learning_rate": 0.0003763142219901536,
"loss": 4.6261,
"step": 382000
},
{
"epoch": 0.79,
"learning_rate": 0.0003761922044278193,
"loss": 4.6332,
"step": 383000
},
{
"epoch": 0.79,
"learning_rate": 0.0003760698932716468,
"loss": 4.6285,
"step": 384000
},
{
"epoch": 0.79,
"learning_rate": 0.0003759472887254464,
"loss": 4.6315,
"step": 385000
},
{
"epoch": 0.79,
"learning_rate": 0.00037582451403762754,
"loss": 4.6252,
"step": 386000
},
{
"epoch": 0.79,
"learning_rate": 0.00037570132361763626,
"loss": 4.6238,
"step": 387000
},
{
"epoch": 0.8,
"learning_rate": 0.00037557808768022013,
"loss": 4.6309,
"step": 388000
},
{
"epoch": 0.8,
"learning_rate": 0.0003754543124991863,
"loss": 4.6227,
"step": 389000
},
{
"epoch": 0.8,
"learning_rate": 0.0003753302449538835,
"loss": 4.6264,
"step": 390000
},
{
"epoch": 0.8,
"learning_rate": 0.0003752058852510489,
"loss": 4.6297,
"step": 391000
},
{
"epoch": 0.81,
"learning_rate": 0.00037508135839531953,
"loss": 4.6229,
"step": 392000
},
{
"epoch": 0.81,
"learning_rate": 0.0003749564152912182,
"loss": 4.6277,
"step": 393000
},
{
"epoch": 0.81,
"learning_rate": 0.0003748313060326983,
"loss": 4.6258,
"step": 394000
},
{
"epoch": 0.81,
"learning_rate": 0.0003747057803592816,
"loss": 4.6298,
"step": 395000
},
{
"epoch": 0.81,
"learning_rate": 0.0003745799635688954,
"loss": 4.6275,
"step": 396000
},
{
"epoch": 0.82,
"learning_rate": 0.0003744538558711915,
"loss": 4.6305,
"step": 397000
},
{
"epoch": 0.82,
"learning_rate": 0.00037432758401983454,
"loss": 4.6254,
"step": 398000
},
{
"epoch": 0.82,
"learning_rate": 0.0003742008954287709,
"loss": 4.6232,
"step": 399000
},
{
"epoch": 0.82,
"learning_rate": 0.00037407404368583003,
"loss": 4.6243,
"step": 400000
},
{
"epoch": 0.82,
"learning_rate": 0.0003739467750449806,
"loss": 4.6271,
"step": 401000
},
{
"epoch": 0.83,
"learning_rate": 0.00037381921655191264,
"loss": 4.625,
"step": 402000
},
{
"epoch": 0.83,
"learning_rate": 0.0003736914964119172,
"loss": 4.6207,
"step": 403000
},
{
"epoch": 0.83,
"learning_rate": 0.0003735633591418774,
"loss": 4.6222,
"step": 404000
},
{
"epoch": 0.83,
"learning_rate": 0.0003734349326585155,
"loss": 4.6274,
"step": 405000
},
{
"epoch": 0.83,
"learning_rate": 0.00037330621717583185,
"loss": 4.6215,
"step": 406000
},
{
"epoch": 0.84,
"learning_rate": 0.00037317734205675264,
"loss": 4.6239,
"step": 407000
},
{
"epoch": 0.84,
"learning_rate": 0.00037304817894443345,
"loss": 4.6213,
"step": 408000
},
{
"epoch": 0.84,
"learning_rate": 0.0003729185983290953,
"loss": 4.6217,
"step": 409000
},
{
"epoch": 0.84,
"learning_rate": 0.00037278872957481737,
"loss": 4.6203,
"step": 410000
},
{
"epoch": 0.84,
"learning_rate": 0.00037265870319842543,
"loss": 4.6233,
"step": 411000
},
{
"epoch": 0.85,
"learning_rate": 0.0003725282591035563,
"loss": 4.6189,
"step": 412000
},
{
"epoch": 0.85,
"learning_rate": 0.0003723976583952915,
"loss": 4.6208,
"step": 413000
},
{
"epoch": 0.85,
"learning_rate": 0.0003722666398284116,
"loss": 4.6228,
"step": 414000
},
{
"epoch": 0.85,
"learning_rate": 0.000372135334208968,
"loss": 4.6152,
"step": 415000
},
{
"epoch": 0.85,
"learning_rate": 0.00037200374175575874,
"loss": 4.6127,
"step": 416000
},
{
"epoch": 0.86,
"learning_rate": 0.00037187199471021856,
"loss": 4.6182,
"step": 417000
},
{
"epoch": 0.86,
"learning_rate": 0.0003717399618422258,
"loss": 4.6196,
"step": 418000
},
{
"epoch": 0.86,
"learning_rate": 0.0003716075107774151,
"loss": 4.6225,
"step": 419000
},
{
"epoch": 0.86,
"learning_rate": 0.00037147477375836516,
"loss": 4.6181,
"step": 420000
},
{
"epoch": 0.86,
"learning_rate": 0.0003713418841716614,
"loss": 4.6207,
"step": 421000
},
{
"epoch": 0.87,
"learning_rate": 0.00037120857619355976,
"loss": 4.6168,
"step": 422000
},
{
"epoch": 0.87,
"learning_rate": 0.00037107511666167,
"loss": 4.6148,
"step": 423000
},
{
"epoch": 0.87,
"learning_rate": 0.0003709412386121666,
"loss": 4.6241,
"step": 424000
},
{
"epoch": 0.87,
"learning_rate": 0.00037080707571865136,
"loss": 4.6167,
"step": 425000
},
{
"epoch": 0.87,
"learning_rate": 0.000370672762794291,
"loss": 4.618,
"step": 426000
},
{
"epoch": 0.88,
"learning_rate": 0.0003705380311681886,
"loss": 4.6185,
"step": 427000
},
{
"epoch": 0.88,
"learning_rate": 0.00037040301536994983,
"loss": 4.6159,
"step": 428000
},
{
"epoch": 0.88,
"learning_rate": 0.00037026771562455524,
"loss": 4.6172,
"step": 429000
},
{
"epoch": 0.88,
"learning_rate": 0.0003701322678825694,
"loss": 4.6185,
"step": 430000
},
{
"epoch": 0.89,
"learning_rate": 0.0003699964012030795,
"loss": 4.6142,
"step": 431000
},
{
"epoch": 0.89,
"learning_rate": 0.0003698605238364365,
"loss": 4.619,
"step": 432000
},
{
"epoch": 0.89,
"learning_rate": 0.0003697240914104684,
"loss": 4.6125,
"step": 433000
},
{
"epoch": 0.89,
"learning_rate": 0.0003695873761686538,
"loss": 4.613,
"step": 434000
},
{
"epoch": 0.89,
"learning_rate": 0.00036945037833880495,
"loss": 4.6193,
"step": 435000
},
{
"epoch": 0.9,
"learning_rate": 0.00036931337299122744,
"loss": 4.6195,
"step": 436000
},
{
"epoch": 0.9,
"learning_rate": 0.00036917581123466377,
"loss": 4.6155,
"step": 437000
},
{
"epoch": 0.9,
"learning_rate": 0.0003690379675758677,
"loss": 4.6124,
"step": 438000
},
{
"epoch": 0.9,
"learning_rate": 0.0003688998422445319,
"loss": 4.6118,
"step": 439000
},
{
"epoch": 0.9,
"learning_rate": 0.0003687617125650919,
"loss": 4.6118,
"step": 440000
},
{
"epoch": 0.91,
"learning_rate": 0.00036862302514182444,
"loss": 4.6115,
"step": 441000
},
{
"epoch": 0.91,
"learning_rate": 0.0003684841958461244,
"loss": 4.6145,
"step": 442000
},
{
"epoch": 0.91,
"learning_rate": 0.0003683449469728375,
"loss": 4.6107,
"step": 443000
},
{
"epoch": 0.91,
"learning_rate": 0.00036820541758180987,
"loss": 4.6125,
"step": 444000
},
{
"epoch": 0.91,
"learning_rate": 0.00036806574785514423,
"loss": 4.6076,
"step": 445000
},
{
"epoch": 0.92,
"learning_rate": 0.0003679256584065426,
"loss": 4.6135,
"step": 446000
},
{
"epoch": 0.92,
"learning_rate": 0.00036778528913887205,
"loss": 4.611,
"step": 447000
},
{
"epoch": 0.92,
"learning_rate": 0.0003676447810744613,
"loss": 4.6169,
"step": 448000
},
{
"epoch": 0.92,
"learning_rate": 0.00036750385315005585,
"loss": 4.6124,
"step": 449000
},
{
"epoch": 0.92,
"learning_rate": 0.0003673626461094468,
"loss": 4.6091,
"step": 450000
},
{
"epoch": 0.93,
"learning_rate": 0.00036722130181307566,
"loss": 4.6084,
"step": 451000
},
{
"epoch": 0.93,
"learning_rate": 0.0003670795375249432,
"loss": 4.6093,
"step": 452000
},
{
"epoch": 0.93,
"learning_rate": 0.0003669376370093399,
"loss": 4.6098,
"step": 453000
},
{
"epoch": 0.93,
"learning_rate": 0.00036679531641764155,
"loss": 4.6088,
"step": 454000
},
{
"epoch": 0.93,
"learning_rate": 0.00036665271789039375,
"loss": 4.6135,
"step": 455000
},
{
"epoch": 0.94,
"learning_rate": 0.00036650984166521224,
"loss": 4.6111,
"step": 456000
},
{
"epoch": 0.94,
"learning_rate": 0.00036636697456429214,
"loss": 4.6087,
"step": 457000
},
{
"epoch": 0.94,
"learning_rate": 0.00036622354421214545,
"loss": 4.6105,
"step": 458000
},
{
"epoch": 0.94,
"learning_rate": 0.0003660798368772088,
"loss": 4.6142,
"step": 459000
},
{
"epoch": 0.94,
"learning_rate": 0.00036593599692117735,
"loss": 4.6036,
"step": 460000
},
{
"epoch": 0.95,
"learning_rate": 0.00036579173661589563,
"loss": 4.6129,
"step": 461000
},
{
"epoch": 0.95,
"learning_rate": 0.00036564720004735664,
"loss": 4.6066,
"step": 462000
},
{
"epoch": 0.95,
"learning_rate": 0.00036550253240678936,
"loss": 4.6065,
"step": 463000
},
{
"epoch": 0.95,
"learning_rate": 0.0003653574443103918,
"loss": 4.6065,
"step": 464000
},
{
"epoch": 0.95,
"learning_rate": 0.0003652122261755973,
"loss": 4.6055,
"step": 465000
},
{
"epoch": 0.96,
"learning_rate": 0.00036506658751743075,
"loss": 4.6077,
"step": 466000
},
{
"epoch": 0.96,
"learning_rate": 0.0003649206738043425,
"loss": 4.6113,
"step": 467000
},
{
"epoch": 0.96,
"learning_rate": 0.00036477463160518477,
"loss": 4.6056,
"step": 468000
},
{
"epoch": 0.96,
"learning_rate": 0.00036462816878657725,
"loss": 4.608,
"step": 469000
},
{
"epoch": 0.97,
"learning_rate": 0.000364481431643597,
"loss": 4.6023,
"step": 470000
},
{
"epoch": 0.97,
"learning_rate": 0.0003643345675688004,
"loss": 4.6027,
"step": 471000
},
{
"epoch": 0.97,
"learning_rate": 0.00036418728278478005,
"loss": 4.6068,
"step": 472000
},
{
"epoch": 0.97,
"learning_rate": 0.00036403972441104724,
"loss": 4.6072,
"step": 473000
},
{
"epoch": 0.97,
"learning_rate": 0.0003638920406616534,
"loss": 4.6038,
"step": 474000
},
{
"epoch": 0.98,
"learning_rate": 0.00036374393611956704,
"loss": 4.6014,
"step": 475000
},
{
"epoch": 0.98,
"learning_rate": 0.00036359555872652883,
"loss": 4.6013,
"step": 476000
},
{
"epoch": 0.98,
"learning_rate": 0.00036344705751586385,
"loss": 4.606,
"step": 477000
},
{
"epoch": 2.18,
"learning_rate": 0.00024019467959966674,
"loss": 4.4837,
"step": 478000
},
{
"epoch": 2.19,
"learning_rate": 0.00023963222037118084,
"loss": 4.4223,
"step": 479000
},
{
"epoch": 2.19,
"learning_rate": 0.00023906943468937218,
"loss": 4.39,
"step": 480000
},
{
"epoch": 2.2,
"learning_rate": 0.00023850689045664867,
"loss": 4.3772,
"step": 481000
},
{
"epoch": 2.2,
"learning_rate": 0.00023794346609281965,
"loss": 4.3726,
"step": 482000
},
{
"epoch": 2.2,
"learning_rate": 0.00023737972918605284,
"loss": 4.3631,
"step": 483000
},
{
"epoch": 2.21,
"learning_rate": 0.00023681624857694363,
"loss": 4.3497,
"step": 484000
},
{
"epoch": 2.21,
"learning_rate": 0.00023625190081838816,
"loss": 4.3482,
"step": 485000
},
{
"epoch": 2.22,
"learning_rate": 0.00023568781924440977,
"loss": 4.3443,
"step": 486000
},
{
"epoch": 2.22,
"learning_rate": 0.0002351228792097228,
"loss": 4.3383,
"step": 487000
},
{
"epoch": 2.23,
"learning_rate": 0.000234558215237771,
"loss": 4.3393,
"step": 488000
},
{
"epoch": 2.23,
"learning_rate": 0.0002339927015221048,
"loss": 4.3349,
"step": 489000
},
{
"epoch": 2.24,
"learning_rate": 0.00023342690780622,
"loss": 4.3287,
"step": 490000
},
{
"epoch": 2.24,
"learning_rate": 0.00023286083875059848,
"loss": 4.3263,
"step": 491000
},
{
"epoch": 2.25,
"learning_rate": 0.0002322950654913731,
"loss": 4.3202,
"step": 492000
},
{
"epoch": 2.25,
"learning_rate": 0.0002317284600104378,
"loss": 4.3198,
"step": 493000
},
{
"epoch": 2.25,
"learning_rate": 0.00023116216017581755,
"loss": 4.3163,
"step": 494000
},
{
"epoch": 2.26,
"learning_rate": 0.00023059503691953928,
"loss": 4.3163,
"step": 495000
},
{
"epoch": 2.26,
"learning_rate": 0.00023002766164983935,
"loss": 4.3123,
"step": 496000
},
{
"epoch": 2.27,
"learning_rate": 0.00022946060678482666,
"loss": 4.3091,
"step": 497000
},
{
"epoch": 2.27,
"learning_rate": 0.00022889274175117623,
"loss": 4.3067,
"step": 498000
},
{
"epoch": 2.28,
"learning_rate": 0.00022832463872602635,
"loss": 4.3056,
"step": 499000
},
{
"epoch": 2.28,
"learning_rate": 0.00022775687084019932,
"loss": 4.3042,
"step": 500000
},
{
"epoch": 2.29,
"learning_rate": 0.00022718887477616112,
"loss": 4.306,
"step": 501000
},
{
"epoch": 2.29,
"learning_rate": 0.00022662008630440305,
"loss": 4.3007,
"step": 502000
},
{
"epoch": 2.3,
"learning_rate": 0.0002260510785611647,
"loss": 4.2996,
"step": 503000
},
{
"epoch": 2.3,
"learning_rate": 0.00022548185623340192,
"loss": 4.2993,
"step": 504000
},
{
"epoch": 2.3,
"learning_rate": 0.00022491299354534364,
"loss": 4.2962,
"step": 505000
},
{
"epoch": 2.31,
"learning_rate": 0.0002243433563192932,
"loss": 4.2954,
"step": 506000
},
{
"epoch": 2.31,
"learning_rate": 0.00022377408851168427,
"loss": 4.2964,
"step": 507000
},
{
"epoch": 2.32,
"learning_rate": 0.00022320405513710757,
"loss": 4.2958,
"step": 508000
},
{
"epoch": 2.32,
"learning_rate": 0.00022263440094754997,
"loss": 4.2934,
"step": 509000
},
{
"epoch": 2.33,
"learning_rate": 0.0002220639901872479,
"loss": 4.2935,
"step": 510000
},
{
"epoch": 2.33,
"learning_rate": 0.00022149396836606137,
"loss": 4.2906,
"step": 511000
},
{
"epoch": 2.34,
"learning_rate": 0.00022092319899525643,
"loss": 4.2897,
"step": 512000
},
{
"epoch": 2.34,
"learning_rate": 0.00022035282830486165,
"loss": 4.2916,
"step": 513000
},
{
"epoch": 2.35,
"learning_rate": 0.00021978171911058022,
"loss": 4.2891,
"step": 514000
},
{
"epoch": 2.35,
"learning_rate": 0.00021921101832488073,
"loss": 4.2867,
"step": 515000
},
{
"epoch": 2.35,
"learning_rate": 0.00021863958810533452,
"loss": 4.288,
"step": 516000
},
{
"epoch": 2.36,
"learning_rate": 0.00021806800435022003,
"loss": 4.2857,
"step": 517000
},
{
"epoch": 2.36,
"learning_rate": 0.00021749684357306648,
"loss": 4.285,
"step": 518000
},
{
"epoch": 2.37,
"learning_rate": 0.00021692496701433082,
"loss": 4.2832,
"step": 519000
},
{
"epoch": 2.37,
"learning_rate": 0.00021635352312751783,
"loss": 4.2824,
"step": 520000
},
{
"epoch": 2.38,
"learning_rate": 0.0002157819448048862,
"loss": 4.2806,
"step": 521000
},
{
"epoch": 2.38,
"learning_rate": 0.0002152096644013863,
"loss": 4.2772,
"step": 522000
},
{
"epoch": 2.39,
"learning_rate": 0.00021463725871483544,
"loss": 4.2798,
"step": 523000
},
{
"epoch": 2.39,
"learning_rate": 0.0002140647324601787,
"loss": 4.2798,
"step": 524000
},
{
"epoch": 2.4,
"learning_rate": 0.00021349266305175916,
"loss": 4.2802,
"step": 525000
},
{
"epoch": 2.4,
"learning_rate": 0.00021291990991843793,
"loss": 4.2786,
"step": 526000
},
{
"epoch": 2.41,
"learning_rate": 0.0002123476232740738,
"loss": 4.2791,
"step": 527000
},
{
"epoch": 2.41,
"learning_rate": 0.00021177466211441055,
"loss": 4.274,
"step": 528000
},
{
"epoch": 2.41,
"learning_rate": 0.0002112021770710695,
"loss": 4.2765,
"step": 529000
},
{
"epoch": 2.42,
"learning_rate": 0.00021062959993907988,
"loss": 4.2751,
"step": 530000
},
{
"epoch": 2.42,
"learning_rate": 0.00021005636214541413,
"loss": 4.2751,
"step": 531000
},
{
"epoch": 2.43,
"learning_rate": 0.00020948304151680226,
"loss": 4.2744,
"step": 532000
},
{
"epoch": 2.43,
"learning_rate": 0.00020891021621191204,
"loss": 4.273,
"step": 533000
},
{
"epoch": 2.44,
"learning_rate": 0.00020833674415252564,
"loss": 4.2769,
"step": 534000
},
{
"epoch": 2.44,
"learning_rate": 0.00020776320342280467,
"loss": 4.2695,
"step": 535000
},
{
"epoch": 2.45,
"learning_rate": 0.00020718959874704363,
"loss": 4.2689,
"step": 536000
},
{
"epoch": 2.45,
"learning_rate": 0.00020661650854196894,
"loss": 4.2671,
"step": 537000
},
{
"epoch": 2.46,
"learning_rate": 0.0002060427902012143,
"loss": 4.2677,
"step": 538000
},
{
"epoch": 2.46,
"learning_rate": 0.0002054701696683469,
"loss": 4.2713,
"step": 539000
},
{
"epoch": 2.46,
"learning_rate": 0.00020489635658938387,
"loss": 4.2659,
"step": 540000
},
{
"epoch": 2.47,
"learning_rate": 0.0002043225031787951,
"loss": 4.2666,
"step": 541000
},
{
"epoch": 2.47,
"learning_rate": 0.00020374861416345058,
"loss": 4.2609,
"step": 542000
},
{
"epoch": 2.48,
"learning_rate": 0.0002031746942705136,
"loss": 4.2631,
"step": 543000
},
{
"epoch": 2.48,
"learning_rate": 0.0002026013221849334,
"loss": 4.2656,
"step": 544000
},
{
"epoch": 2.49,
"learning_rate": 0.0002020273547383406,
"loss": 4.2688,
"step": 545000
},
{
"epoch": 2.49,
"learning_rate": 0.00020145394458320146,
"loss": 4.2592,
"step": 546000
},
{
"epoch": 2.5,
"learning_rate": 0.00020087994847524482,
"loss": 4.2617,
"step": 547000
},
{
"epoch": 2.5,
"learning_rate": 0.00020030651912449513,
"loss": 4.2613,
"step": 548000
},
{
"epoch": 2.51,
"learning_rate": 0.00019973251324840986,
"loss": 4.2557,
"step": 549000
},
{
"epoch": 2.51,
"learning_rate": 0.0001991590835766299,
"loss": 4.2604,
"step": 550000
},
{
"epoch": 2.51,
"learning_rate": 0.00019858508682597277,
"loss": 4.2614,
"step": 551000
},
{
"epoch": 2.52,
"learning_rate": 0.00019801167570775345,
"loss": 4.2587,
"step": 552000
},
{
"epoch": 2.52,
"learning_rate": 0.0001974377069757808,
"loss": 4.2567,
"step": 553000
},
{
"epoch": 2.53,
"learning_rate": 0.000196863759349592,
"loss": 4.2542,
"step": 554000
},
{
"epoch": 2.53,
"learning_rate": 0.0001962904114641484,
"loss": 4.2528,
"step": 555000
},
{
"epoch": 2.54,
"learning_rate": 0.00019571652019933017,
"loss": 4.2529,
"step": 556000
},
{
"epoch": 2.54,
"learning_rate": 0.00019514323805461362,
"loss": 4.2504,
"step": 557000
},
{
"epoch": 2.55,
"learning_rate": 0.00019456999583540802,
"loss": 4.2557,
"step": 558000
},
{
"epoch": 2.55,
"learning_rate": 0.00019399622450669583,
"loss": 4.2527,
"step": 559000
},
{
"epoch": 2.56,
"learning_rate": 0.00019342250263149486,
"loss": 4.2493,
"step": 560000
},
{
"epoch": 2.56,
"learning_rate": 0.0001928488349355918,
"loss": 4.2533,
"step": 561000
},
{
"epoch": 2.56,
"learning_rate": 0.00019227579972212256,
"loss": 4.251,
"step": 562000
},
{
"epoch": 2.57,
"learning_rate": 0.00019170225449436132,
"loss": 4.2442,
"step": 563000
},
{
"epoch": 2.57,
"learning_rate": 0.00019112935105686604,
"loss": 4.2508,
"step": 564000
},
{
"epoch": 2.58,
"learning_rate": 0.00019055652054145262,
"loss": 4.2482,
"step": 565000
},
{
"epoch": 2.58,
"learning_rate": 0.00018998319437138936,
"loss": 4.2453,
"step": 566000
},
{
"epoch": 2.59,
"learning_rate": 0.0001894099507104425,
"loss": 4.2467,
"step": 567000
},
{
"epoch": 2.59,
"learning_rate": 0.00018883679428045936,
"loss": 4.2429,
"step": 568000
},
{
"epoch": 2.6,
"learning_rate": 0.00018826430281954561,
"loss": 4.2436,
"step": 569000
},
{
"epoch": 2.6,
"learning_rate": 0.00018769190783313742,
"loss": 4.2462,
"step": 570000
},
{
"epoch": 2.61,
"learning_rate": 0.00018711904121225677,
"loss": 4.2429,
"step": 571000
},
{
"epoch": 2.61,
"learning_rate": 0.000186546280692719,
"loss": 4.2415,
"step": 572000
},
{
"epoch": 2.61,
"learning_rate": 0.0001859736309923917,
"loss": 4.242,
"step": 573000
},
{
"epoch": 2.62,
"learning_rate": 0.00018540166930311399,
"loss": 4.2415,
"step": 574000
},
{
"epoch": 2.62,
"learning_rate": 0.00018482925526851332,
"loss": 4.2394,
"step": 575000
},
{
"epoch": 2.63,
"learning_rate": 0.00018425696619637965,
"loss": 4.2393,
"step": 576000
},
{
"epoch": 2.63,
"learning_rate": 0.00018368537889375085,
"loss": 4.2374,
"step": 577000
},
{
"epoch": 2.64,
"learning_rate": 0.00018311335375069304,
"loss": 4.2376,
"step": 578000
},
{
"epoch": 2.64,
"learning_rate": 0.00018254203951910075,
"loss": 4.2361,
"step": 579000
},
{
"epoch": 2.65,
"learning_rate": 0.00018197029713347917,
"loss": 4.2363,
"step": 580000
},
{
"epoch": 2.65,
"learning_rate": 0.000181399274777884,
"loss": 4.2322,
"step": 581000
},
{
"epoch": 2.66,
"learning_rate": 0.00018082783396875207,
"loss": 4.235,
"step": 582000
},
{
"epoch": 2.66,
"learning_rate": 0.00018025655108206925,
"loss": 4.2327,
"step": 583000
},
{
"epoch": 2.67,
"learning_rate": 0.0001796854308235321,
"loss": 4.2323,
"step": 584000
},
{
"epoch": 2.67,
"learning_rate": 0.0001791150487652753,
"loss": 4.2297,
"step": 585000
},
{
"epoch": 2.67,
"learning_rate": 0.00017854426770033718,
"loss": 4.2339,
"step": 586000
},
{
"epoch": 2.68,
"learning_rate": 0.00017797423388223084,
"loss": 4.2315,
"step": 587000
},
{
"epoch": 2.68,
"learning_rate": 0.00017740381079830306,
"loss": 4.2289,
"step": 588000
},
{
"epoch": 2.69,
"learning_rate": 0.0001768341439831626,
"loss": 4.2285,
"step": 589000
},
{
"epoch": 2.69,
"learning_rate": 0.00017626409765587338,
"loss": 4.2273,
"step": 590000
},
{
"epoch": 2.7,
"learning_rate": 0.000175694246842843,
"loss": 4.2272,
"step": 591000
},
{
"epoch": 2.7,
"learning_rate": 0.00017512459623797167,
"loss": 4.2267,
"step": 592000
},
{
"epoch": 2.71,
"learning_rate": 0.00017455571987530613,
"loss": 4.2242,
"step": 593000
},
{
"epoch": 2.71,
"learning_rate": 0.00017398648354988546,
"loss": 4.2238,
"step": 594000
},
{
"epoch": 2.72,
"learning_rate": 0.00017341803041304732,
"loss": 4.2245,
"step": 595000
},
{
"epoch": 2.72,
"learning_rate": 0.00017284922710364303,
"loss": 4.2219,
"step": 596000
},
{
"epoch": 2.72,
"learning_rate": 0.00017228121590341918,
"loss": 4.2215,
"step": 597000
},
{
"epoch": 2.73,
"learning_rate": 0.0001717128643323442,
"loss": 4.2196,
"step": 598000
},
{
"epoch": 2.73,
"learning_rate": 0.00017114474576434977,
"loss": 4.2186,
"step": 599000
},
{
"epoch": 2.74,
"learning_rate": 0.00017057686487906743,
"loss": 4.2218,
"step": 600000
},
{
"epoch": 2.74,
"learning_rate": 0.00017000922635417116,
"loss": 4.2175,
"step": 601000
},
{
"epoch": 2.75,
"learning_rate": 0.00016944183486533842,
"loss": 4.2174,
"step": 602000
},
{
"epoch": 2.75,
"learning_rate": 0.00016887582911145858,
"loss": 4.2208,
"step": 603000
},
{
"epoch": 2.76,
"learning_rate": 0.00016830894519618436,
"loss": 4.2176,
"step": 604000
},
{
"epoch": 2.76,
"learning_rate": 0.00016774232232230643,
"loss": 4.2131,
"step": 605000
},
{
"epoch": 2.77,
"learning_rate": 0.00016717596515713635,
"loss": 4.2148,
"step": 606000
},
{
"epoch": 2.77,
"learning_rate": 0.00016661044431598456,
"loss": 4.2163,
"step": 607000
},
{
"epoch": 2.77,
"learning_rate": 0.0001660446322840068,
"loss": 4.2121,
"step": 608000
},
{
"epoch": 2.78,
"learning_rate": 0.0001654796653358085,
"loss": 4.2126,
"step": 609000
},
{
"epoch": 2.78,
"learning_rate": 0.0001649144170608772,
"loss": 4.2111,
"step": 610000
},
{
"epoch": 2.79,
"learning_rate": 0.00016435002260167044,
"loss": 4.2093,
"step": 611000
},
{
"epoch": 2.79,
"learning_rate": 0.0001637853566890836,
"loss": 4.2104,
"step": 612000
},
{
"epoch": 2.8,
"learning_rate": 0.00016322155329606282,
"loss": 4.2104,
"step": 613000
},
{
"epoch": 2.8,
"learning_rate": 0.00016265748833194975,
"loss": 4.2095,
"step": 614000
},
{
"epoch": 2.81,
"learning_rate": 0.00016209373096067142,
"loss": 4.2061,
"step": 615000
},
{
"epoch": 2.81,
"learning_rate": 0.00016153141240150847,
"loss": 4.2059,
"step": 616000
},
{
"epoch": 2.82,
"learning_rate": 0.0001609682835060673,
"loss": 4.2093,
"step": 617000
},
{
"epoch": 2.82,
"learning_rate": 0.00016040547611755718,
"loss": 4.2025,
"step": 618000
},
{
"epoch": 2.82,
"learning_rate": 0.00015984299487186134,
"loss": 4.2069,
"step": 619000
},
{
"epoch": 2.83,
"learning_rate": 0.00015928140638588216,
"loss": 4.2031,
"step": 620000
},
{
"epoch": 2.83,
"learning_rate": 0.00015872015263128903,
"loss": 4.2021,
"step": 621000
},
{
"epoch": 2.84,
"learning_rate": 0.00015815867691759442,
"loss": 4.2014,
"step": 622000
},
{
"epoch": 2.84,
"learning_rate": 0.00015759754585375357,
"loss": 4.2014,
"step": 623000
},
{
"epoch": 2.85,
"learning_rate": 0.00015703676406184148,
"loss": 4.2015,
"step": 624000
},
{
"epoch": 2.85,
"learning_rate": 0.0001564768964106519,
"loss": 4.2018,
"step": 625000
},
{
"epoch": 2.86,
"learning_rate": 0.000155917386545611,
"loss": 4.2003,
"step": 626000
},
{
"epoch": 2.86,
"learning_rate": 0.00015535767954213264,
"loss": 4.1976,
"step": 627000
},
{
"epoch": 2.87,
"learning_rate": 0.00015479834026051583,
"loss": 4.1972,
"step": 628000
},
{
"epoch": 2.87,
"learning_rate": 0.00015423937330807675,
"loss": 4.1957,
"step": 629000
},
{
"epoch": 2.88,
"learning_rate": 0.00015368134168927352,
"loss": 4.1951,
"step": 630000
},
{
"epoch": 2.88,
"learning_rate": 0.00015312313282100077,
"loss": 4.1952,
"step": 631000
},
{
"epoch": 2.88,
"learning_rate": 0.00015256586770904422,
"loss": 4.1928,
"step": 632000
},
{
"epoch": 2.89,
"learning_rate": 0.00015200843529853173,
"loss": 4.1941,
"step": 633000
},
{
"epoch": 2.89,
"learning_rate": 0.00015145195503595184,
"loss": 4.1938,
"step": 634000
},
{
"epoch": 2.9,
"learning_rate": 0.00015089531743123636,
"loss": 4.1933,
"step": 635000
},
{
"epoch": 2.9,
"learning_rate": 0.00015033964033472967,
"loss": 4.1919,
"step": 636000
},
{
"epoch": 2.91,
"learning_rate": 0.00014978381585768676,
"loss": 4.191,
"step": 637000
},
{
"epoch": 2.91,
"learning_rate": 0.0001492289602175133,
"loss": 4.1907,
"step": 638000
},
{
"epoch": 2.92,
"learning_rate": 0.00014867396716325404,
"loss": 4.1906,
"step": 639000
},
{
"epoch": 2.92,
"learning_rate": 0.00014811995124263547,
"loss": 4.19,
"step": 640000
},
{
"epoch": 2.93,
"learning_rate": 0.00014756580787890456,
"loss": 4.1854,
"step": 641000
},
{
"epoch": 2.93,
"learning_rate": 0.0001470126499134229,
"loss": 4.1862,
"step": 642000
},
{
"epoch": 2.93,
"learning_rate": 0.0001464593744799972,
"loss": 4.1804,
"step": 643000
},
{
"epoch": 2.94,
"learning_rate": 0.00014590709267699477,
"loss": 4.1812,
"step": 644000
},
{
"epoch": 2.94,
"learning_rate": 0.00014535470338508303,
"loss": 4.1811,
"step": 645000
},
{
"epoch": 2.95,
"learning_rate": 0.0001448033159230627,
"loss": 4.1812,
"step": 646000
},
{
"epoch": 2.95,
"learning_rate": 0.00014425238221106002,
"loss": 4.1827,
"step": 647000
},
{
"epoch": 2.96,
"learning_rate": 0.00014370135598273356,
"loss": 4.1792,
"step": 648000
},
{
"epoch": 2.96,
"learning_rate": 0.00014315079349020695,
"loss": 4.1829,
"step": 649000
},
{
"epoch": 2.97,
"learning_rate": 0.00014260069926850117,
"loss": 4.1798,
"step": 650000
},
{
"epoch": 2.97,
"learning_rate": 0.00014205162723252818,
"loss": 4.1813,
"step": 651000
},
{
"epoch": 2.98,
"learning_rate": 0.00014150248266247203,
"loss": 4.1771,
"step": 652000
},
{
"epoch": 2.98,
"learning_rate": 0.0001409543683610207,
"loss": 4.1744,
"step": 653000
},
{
"epoch": 2.98,
"learning_rate": 0.00014040673945025616,
"loss": 4.1791,
"step": 654000
},
{
"epoch": 2.99,
"learning_rate": 0.00013985905299225343,
"loss": 4.1795,
"step": 655000
},
{
"epoch": 2.99,
"learning_rate": 0.00013931186191936434,
"loss": 4.1764,
"step": 656000
},
{
"epoch": 3.0,
"learning_rate": 0.0001387651707388392,
"loss": 4.1717,
"step": 657000
},
{
"epoch": 3.0,
"learning_rate": 0.0001382195298871527,
"loss": 4.1585,
"step": 658000
},
{
"epoch": 3.01,
"learning_rate": 0.00013767385148545907,
"loss": 4.1503,
"step": 659000
},
{
"epoch": 3.01,
"learning_rate": 0.0001371292313756203,
"loss": 4.1517,
"step": 660000
},
{
"epoch": 3.02,
"learning_rate": 0.00013658458371390849,
"loss": 4.1508,
"step": 661000
},
{
"epoch": 3.02,
"learning_rate": 0.00013604100227223385,
"loss": 4.1545,
"step": 662000
},
{
"epoch": 3.03,
"learning_rate": 0.00013549740327772723,
"loss": 4.1505,
"step": 663000
},
{
"epoch": 3.03,
"learning_rate": 0.00013495542119768334,
"loss": 4.1496,
"step": 664000
},
{
"epoch": 3.03,
"learning_rate": 0.00013441288822507396,
"loss": 4.1504,
"step": 665000
},
{
"epoch": 3.04,
"learning_rate": 0.0001338708954980116,
"loss": 4.1514,
"step": 666000
},
{
"epoch": 3.04,
"learning_rate": 0.0001333299886553773,
"loss": 4.1504,
"step": 667000
},
{
"epoch": 3.05,
"learning_rate": 0.00013278908925682,
"loss": 4.1553,
"step": 668000
},
{
"epoch": 3.05,
"learning_rate": 0.0001322487434791535,
"loss": 4.154,
"step": 669000
},
{
"epoch": 3.06,
"learning_rate": 0.00013170895577324293,
"loss": 4.1501,
"step": 670000
},
{
"epoch": 3.06,
"learning_rate": 0.00013117026952808839,
"loss": 4.1481,
"step": 671000
},
{
"epoch": 3.07,
"learning_rate": 0.00013063161073068494,
"loss": 4.153,
"step": 672000
},
{
"epoch": 3.07,
"learning_rate": 0.00013009406112599048,
"loss": 4.148,
"step": 673000
},
{
"epoch": 3.08,
"learning_rate": 0.00012955708619025508,
"loss": 4.1457,
"step": 674000
},
{
"epoch": 3.08,
"learning_rate": 0.00012902015369654687,
"loss": 4.1496,
"step": 675000
},
{
"epoch": 3.09,
"learning_rate": 0.00012848434192302686,
"loss": 4.1481,
"step": 676000
},
{
"epoch": 3.09,
"learning_rate": 0.00012794858258770753,
"loss": 4.148,
"step": 677000
},
{
"epoch": 3.09,
"learning_rate": 0.00012741341674486485,
"loss": 4.1484,
"step": 678000
},
{
"epoch": 3.1,
"learning_rate": 0.00012687884880269694,
"loss": 4.1446,
"step": 679000
},
{
"epoch": 3.1,
"learning_rate": 0.00012634541682779958,
"loss": 4.1428,
"step": 680000
},
{
"epoch": 3.11,
"learning_rate": 0.00012581205728294073,
"loss": 4.1455,
"step": 681000
},
{
"epoch": 3.11,
"learning_rate": 0.00012527984127101713,
"loss": 4.1422,
"step": 682000
},
{
"epoch": 3.12,
"learning_rate": 0.0001247477076791393,
"loss": 4.1427,
"step": 683000
},
{
"epoch": 3.12,
"learning_rate": 0.00012421672514822168,
"loss": 4.1434,
"step": 684000
},
{
"epoch": 3.13,
"learning_rate": 0.00012368583502464424,
"loss": 4.1414,
"step": 685000
},
{
"epoch": 3.13,
"learning_rate": 0.00012315610345216445,
"loss": 4.1437,
"step": 686000
},
{
"epoch": 3.14,
"learning_rate": 0.00012262647427127763,
"loss": 4.1419,
"step": 687000
},
{
"epoch": 3.14,
"learning_rate": 0.0001220980110934919,
"loss": 4.1379,
"step": 688000
},
{
"epoch": 3.14,
"learning_rate": 0.00012156966028818173,
"loss": 4.1382,
"step": 689000
},
{
"epoch": 3.15,
"learning_rate": 0.00012104248289959676,
"loss": 4.1365,
"step": 690000
},
{
"epoch": 3.15,
"learning_rate": 0.00012051542786067112,
"loss": 4.1394,
"step": 691000
},
{
"epoch": 3.16,
"learning_rate": 0.00011998955361347148,
"loss": 4.1366,
"step": 692000
},
{
"epoch": 3.16,
"learning_rate": 0.00011946381168908787,
"loss": 4.1347,
"step": 693000
},
{
"epoch": 3.17,
"learning_rate": 0.00011893873314682198,
"loss": 4.1357,
"step": 694000
},
{
"epoch": 3.17,
"learning_rate": 0.00011841432231178195,
"loss": 4.1337,
"step": 695000
},
{
"epoch": 3.18,
"learning_rate": 0.0001178911069052703,
"loss": 4.1347,
"step": 696000
},
{
"epoch": 3.18,
"learning_rate": 0.00011736804375947676,
"loss": 4.1351,
"step": 697000
},
{
"epoch": 3.19,
"learning_rate": 0.00011684618329987129,
"loss": 4.1297,
"step": 698000
},
{
"epoch": 3.19,
"learning_rate": 0.00011632448506008744,
"loss": 4.1351,
"step": 699000
},
{
"epoch": 3.19,
"learning_rate": 0.00011580399672456457,
"loss": 4.1329,
"step": 700000
},
{
"epoch": 3.2,
"learning_rate": 0.00011528368056262728,
"loss": 4.1313,
"step": 701000
},
{
"epoch": 3.2,
"learning_rate": 0.00011476458148319966,
"loss": 4.1265,
"step": 702000
},
{
"epoch": 3.21,
"learning_rate": 0.00011424566452545455,
"loss": 4.129,
"step": 703000
},
{
"epoch": 3.21,
"learning_rate": 0.00011372797178840713,
"loss": 4.1299,
"step": 704000
},
{
"epoch": 3.22,
"learning_rate": 0.00011321047111514422,
"loss": 4.1257,
"step": 705000
},
{
"epoch": 3.22,
"learning_rate": 0.0001126942017604717,
"loss": 4.1281,
"step": 706000
},
{
"epoch": 3.23,
"learning_rate": 0.00011217813440536418,
"loss": 4.1266,
"step": 707000
},
{
"epoch": 3.23,
"learning_rate": 0.00011166279044499894,
"loss": 4.1249,
"step": 708000
},
{
"epoch": 3.24,
"learning_rate": 0.00011114817412429949,
"loss": 4.1247,
"step": 709000
},
{
"epoch": 3.24,
"learning_rate": 0.00011063428968219605,
"loss": 4.1229,
"step": 710000
},
{
"epoch": 3.24,
"learning_rate": 0.00011012114135158998,
"loss": 4.1245,
"step": 711000
},
{
"epoch": 3.25,
"learning_rate": 0.00010960924539610728,
"loss": 4.1261,
"step": 712000
},
{
"epoch": 3.25,
"learning_rate": 0.00010909758121624652,
"loss": 4.1228,
"step": 713000
},
{
"epoch": 3.26,
"learning_rate": 0.00010858717634585534,
"loss": 4.1197,
"step": 714000
},
{
"epoch": 3.26,
"learning_rate": 0.00010807701315830314,
"loss": 4.1174,
"step": 715000
},
{
"epoch": 3.27,
"learning_rate": 0.00010756862520028245,
"loss": 4.1188,
"step": 716000
},
{
"epoch": 3.27,
"learning_rate": 0.00010705997903373485,
"loss": 4.1191,
"step": 717000
},
{
"epoch": 3.28,
"learning_rate": 0.00010655209842052723,
"loss": 4.114,
"step": 718000
},
{
"epoch": 3.28,
"learning_rate": 0.00010604549426910888,
"loss": 4.1175,
"step": 719000
},
{
"epoch": 3.29,
"learning_rate": 0.00010553915653058473,
"loss": 4.1166,
"step": 720000
},
{
"epoch": 3.29,
"learning_rate": 0.00010503359687251983,
"loss": 4.1143,
"step": 721000
},
{
"epoch": 3.29,
"learning_rate": 0.00010452881945924391,
"loss": 4.1152,
"step": 722000
},
{
"epoch": 3.3,
"learning_rate": 0.00010402533204546334,
"loss": 4.1116,
"step": 723000
},
{
"epoch": 3.3,
"learning_rate": 0.00010352213079632074,
"loss": 4.1111,
"step": 724000
},
{
"epoch": 3.31,
"learning_rate": 0.00010301972424201705,
"loss": 4.1103,
"step": 725000
},
{
"epoch": 3.31,
"learning_rate": 0.00010251861772823774,
"loss": 4.1111,
"step": 726000
},
{
"epoch": 3.32,
"learning_rate": 0.00010201781216707713,
"loss": 4.1106,
"step": 727000
},
{
"epoch": 3.32,
"learning_rate": 0.00010151831328589558,
"loss": 4.1069,
"step": 728000
},
{
"epoch": 3.33,
"learning_rate": 0.00010101962398354699,
"loss": 4.1067,
"step": 729000
},
{
"epoch": 3.33,
"learning_rate": 0.00010052174835955799,
"loss": 4.1035,
"step": 730000
},
{
"epoch": 3.34,
"learning_rate": 0.00010002419336242872,
"loss": 4.1095,
"step": 731000
},
{
"epoch": 3.34,
"learning_rate": 9.952746187288931e-05,
"loss": 4.1049,
"step": 732000
},
{
"epoch": 3.35,
"learning_rate": 9.903155798255135e-05,
"loss": 4.0988,
"step": 733000
},
{
"epoch": 3.35,
"learning_rate": 9.853648577620898e-05,
"loss": 4.1043,
"step": 734000
},
{
"epoch": 3.35,
"learning_rate": 9.804274314943199e-05,
"loss": 4.1043,
"step": 735000
},
{
"epoch": 3.36,
"learning_rate": 9.754934569616405e-05,
"loss": 4.1018,
"step": 736000
},
{
"epoch": 3.36,
"learning_rate": 9.705777639819362e-05,
"loss": 4.099,
"step": 737000
},
{
"epoch": 3.37,
"learning_rate": 9.656606908833878e-05,
"loss": 4.1011,
"step": 738000
},
{
"epoch": 3.37,
"learning_rate": 9.6075213770881e-05,
"loss": 4.1025,
"step": 739000
},
{
"epoch": 3.38,
"learning_rate": 9.558570405937759e-05,
"loss": 4.1005,
"step": 740000
},
{
"epoch": 3.38,
"learning_rate": 9.509656398720454e-05,
"loss": 4.0979,
"step": 741000
},
{
"epoch": 3.39,
"learning_rate": 9.46082880118432e-05,
"loss": 4.0973,
"step": 742000
},
{
"epoch": 3.39,
"learning_rate": 9.412088015525628e-05,
"loss": 4.0912,
"step": 743000
},
{
"epoch": 3.4,
"learning_rate": 9.363434443225589e-05,
"loss": 4.0913,
"step": 744000
},
{
"epoch": 3.4,
"learning_rate": 9.31486848504702e-05,
"loss": 4.0922,
"step": 745000
},
{
"epoch": 3.4,
"learning_rate": 9.266390541031052e-05,
"loss": 4.095,
"step": 746000
},
{
"epoch": 3.41,
"learning_rate": 9.218049355729118e-05,
"loss": 4.0916,
"step": 747000
},
{
"epoch": 3.41,
"learning_rate": 9.169748548247643e-05,
"loss": 4.0896,
"step": 748000
},
{
"epoch": 3.42,
"learning_rate": 9.121585117197211e-05,
"loss": 4.0896,
"step": 749000
},
{
"epoch": 3.42,
"learning_rate": 9.073463036084202e-05,
"loss": 4.0849,
"step": 750000
},
{
"epoch": 3.43,
"learning_rate": 9.025430957607068e-05,
"loss": 4.0861,
"step": 751000
},
{
"epoch": 3.43,
"learning_rate": 8.977489277409341e-05,
"loss": 4.0873,
"step": 752000
},
{
"epoch": 3.44,
"learning_rate": 8.929686195794506e-05,
"loss": 4.083,
"step": 753000
},
{
"epoch": 3.44,
"learning_rate": 8.882021833036489e-05,
"loss": 4.0873,
"step": 754000
},
{
"epoch": 3.45,
"learning_rate": 8.834353438745977e-05,
"loss": 4.083,
"step": 755000
},
{
"epoch": 3.45,
"learning_rate": 8.786824547005008e-05,
"loss": 4.0832,
"step": 756000
},
{
"epoch": 3.45,
"learning_rate": 8.739340396441291e-05,
"loss": 4.0828,
"step": 757000
},
{
"epoch": 3.46,
"learning_rate": 8.691949000704588e-05,
"loss": 4.087,
"step": 758000
},
{
"epoch": 3.46,
"learning_rate": 8.644650750161096e-05,
"loss": 4.0797,
"step": 759000
},
{
"epoch": 3.47,
"learning_rate": 8.597446034409749e-05,
"loss": 4.0808,
"step": 760000
},
{
"epoch": 3.47,
"learning_rate": 8.55033524227903e-05,
"loss": 4.0762,
"step": 761000
},
{
"epoch": 3.48,
"learning_rate": 8.503365731066581e-05,
"loss": 4.0769,
"step": 762000
},
{
"epoch": 3.48,
"learning_rate": 8.456443854672643e-05,
"loss": 4.0744,
"step": 763000
},
{
"epoch": 3.49,
"learning_rate": 8.409617063343962e-05,
"loss": 4.0785,
"step": 764000
},
{
"epoch": 3.49,
"learning_rate": 8.362885742796067e-05,
"loss": 4.074,
"step": 765000
},
{
"epoch": 3.5,
"learning_rate": 8.316296865415034e-05,
"loss": 4.0752,
"step": 766000
},
{
"epoch": 3.5,
"learning_rate": 8.269757543994949e-05,
"loss": 4.0721,
"step": 767000
},
{
"epoch": 3.5,
"learning_rate": 8.223314845388103e-05,
"loss": 4.0745,
"step": 768000
},
{
"epoch": 3.51,
"learning_rate": 8.176969152146221e-05,
"loss": 4.0721,
"step": 769000
},
{
"epoch": 3.51,
"learning_rate": 8.130767045556329e-05,
"loss": 4.0721,
"step": 770000
},
{
"epoch": 3.52,
"learning_rate": 8.084616409542043e-05,
"loss": 4.0681,
"step": 771000
},
{
"epoch": 3.52,
"learning_rate": 8.038609924698259e-05,
"loss": 4.0728,
"step": 772000
},
{
"epoch": 3.53,
"learning_rate": 7.992701769691633e-05,
"loss": 4.0687,
"step": 773000
},
{
"epoch": 3.53,
"learning_rate": 7.946846516190165e-05,
"loss": 4.0675,
"step": 774000
},
{
"epoch": 3.54,
"learning_rate": 7.90109054534227e-05,
"loss": 4.0665,
"step": 775000
},
{
"epoch": 3.54,
"learning_rate": 7.855434234043022e-05,
"loss": 4.0655,
"step": 776000
},
{
"epoch": 3.55,
"learning_rate": 7.80992346454953e-05,
"loss": 4.0671,
"step": 777000
},
{
"epoch": 3.55,
"learning_rate": 7.764512904833741e-05,
"loss": 4.0638,
"step": 778000
},
{
"epoch": 3.56,
"learning_rate": 7.71915762338268e-05,
"loss": 4.0613,
"step": 779000
},
{
"epoch": 3.56,
"learning_rate": 7.673948703544935e-05,
"loss": 4.0572,
"step": 780000
},
{
"epoch": 3.56,
"learning_rate": 7.628796009423646e-05,
"loss": 4.0639,
"step": 781000
},
{
"epoch": 3.57,
"learning_rate": 7.583745217759814e-05,
"loss": 4.0613,
"step": 782000
},
{
"epoch": 3.57,
"learning_rate": 7.538841596949084e-05,
"loss": 4.0572,
"step": 783000
},
{
"epoch": 3.58,
"learning_rate": 7.493995619788687e-05,
"loss": 4.0566,
"step": 784000
},
{
"epoch": 3.58,
"learning_rate": 7.44925265544582e-05,
"loss": 4.0562,
"step": 785000
},
{
"epoch": 3.59,
"learning_rate": 7.404613072471351e-05,
"loss": 4.056,
"step": 786000
},
{
"epoch": 3.59,
"learning_rate": 7.360077238564593e-05,
"loss": 4.0507,
"step": 787000
},
{
"epoch": 3.6,
"learning_rate": 7.315645520570287e-05,
"loss": 4.0505,
"step": 788000
},
{
"epoch": 3.6,
"learning_rate": 7.271362559401307e-05,
"loss": 4.0534,
"step": 789000
},
{
"epoch": 3.61,
"learning_rate": 7.22714006530347e-05,
"loss": 4.0539,
"step": 790000
},
{
"epoch": 3.61,
"learning_rate": 7.183066846739989e-05,
"loss": 4.0536,
"step": 791000
},
{
"epoch": 3.61,
"learning_rate": 7.139055032133843e-05,
"loss": 4.0522,
"step": 792000
},
{
"epoch": 3.62,
"learning_rate": 7.095236859806331e-05,
"loss": 4.05,
"step": 793000
},
{
"epoch": 3.62,
"learning_rate": 7.051437066874354e-05,
"loss": 4.0474,
"step": 794000
},
{
"epoch": 3.63,
"learning_rate": 7.007743932145127e-05,
"loss": 4.0424,
"step": 795000
},
{
"epoch": 3.63,
"learning_rate": 6.964157815522e-05,
"loss": 4.0445,
"step": 796000
},
{
"epoch": 3.64,
"learning_rate": 6.920679076026799e-05,
"loss": 4.0437,
"step": 797000
},
{
"epoch": 3.64,
"learning_rate": 6.877308071796904e-05,
"loss": 4.0428,
"step": 798000
},
{
"epoch": 3.65,
"learning_rate": 6.834088368883074e-05,
"loss": 4.0413,
"step": 799000
},
{
"epoch": 3.65,
"learning_rate": 6.790933797416663e-05,
"loss": 4.0423,
"step": 800000
},
{
"epoch": 3.66,
"learning_rate": 6.747888029936322e-05,
"loss": 4.0412,
"step": 801000
},
{
"epoch": 3.66,
"learning_rate": 6.704994302979443e-05,
"loss": 4.0394,
"step": 802000
},
{
"epoch": 3.66,
"learning_rate": 6.66216709659637e-05,
"loss": 4.0399,
"step": 803000
},
{
"epoch": 3.67,
"learning_rate": 6.619449754859523e-05,
"loss": 4.0385,
"step": 804000
},
{
"epoch": 3.67,
"learning_rate": 6.576885181589794e-05,
"loss": 4.0375,
"step": 805000
},
{
"epoch": 3.68,
"learning_rate": 6.534388513092143e-05,
"loss": 4.0376,
"step": 806000
},
{
"epoch": 3.68,
"learning_rate": 6.492002761761704e-05,
"loss": 4.0367,
"step": 807000
},
{
"epoch": 3.69,
"learning_rate": 6.44972827673282e-05,
"loss": 4.0361,
"step": 808000
},
{
"epoch": 3.69,
"learning_rate": 6.40760751322673e-05,
"loss": 4.0294,
"step": 809000
},
{
"epoch": 3.7,
"learning_rate": 6.365556492400127e-05,
"loss": 4.0324,
"step": 810000
},
{
"epoch": 3.7,
"learning_rate": 6.323659661921848e-05,
"loss": 4.0314,
"step": 811000
},
{
"epoch": 3.71,
"learning_rate": 6.281833489418096e-05,
"loss": 4.0306,
"step": 812000
},
{
"epoch": 3.71,
"learning_rate": 6.240161971012996e-05,
"loss": 4.0301,
"step": 813000
},
{
"epoch": 3.71,
"learning_rate": 6.198562023551751e-05,
"loss": 4.0285,
"step": 814000
},
{
"epoch": 3.72,
"learning_rate": 6.157158618328416e-05,
"loss": 4.0266,
"step": 815000
},
{
"epoch": 3.72,
"learning_rate": 6.115786150987899e-05,
"loss": 4.0292,
"step": 816000
},
{
"epoch": 3.73,
"learning_rate": 6.0745280488710155e-05,
"loss": 4.0234,
"step": 817000
},
{
"epoch": 3.73,
"learning_rate": 6.0333846518236035e-05,
"loss": 4.0236,
"step": 818000
},
{
"epoch": 3.74,
"learning_rate": 5.9923562987466307e-05,
"loss": 4.0237,
"step": 819000
},
{
"epoch": 3.74,
"learning_rate": 5.951484182819116e-05,
"loss": 4.021,
"step": 820000
},
{
"epoch": 3.75,
"learning_rate": 5.910727554160531e-05,
"loss": 4.0171,
"step": 821000
},
{
"epoch": 3.75,
"learning_rate": 5.8700461244659956e-05,
"loss": 4.0197,
"step": 822000
},
{
"epoch": 3.76,
"learning_rate": 5.829481084172575e-05,
"loss": 4.0153,
"step": 823000
},
{
"epoch": 3.76,
"learning_rate": 5.789032767417306e-05,
"loss": 4.0209,
"step": 824000
},
{
"epoch": 3.76,
"learning_rate": 5.748701507375753e-05,
"loss": 4.0169,
"step": 825000
},
{
"epoch": 3.77,
"learning_rate": 5.708487636259276e-05,
"loss": 4.0142,
"step": 826000
},
{
"epoch": 3.77,
"learning_rate": 5.6684315225520025e-05,
"loss": 4.0168,
"step": 827000
},
{
"epoch": 3.78,
"learning_rate": 5.628453303834178e-05,
"loss": 4.0179,
"step": 828000
},
{
"epoch": 3.78,
"learning_rate": 5.588633265133554e-05,
"loss": 4.0114,
"step": 829000
},
{
"epoch": 3.79,
"learning_rate": 5.5489316965551574e-05,
"loss": 4.0113,
"step": 830000
},
{
"epoch": 3.79,
"learning_rate": 5.5093093617013605e-05,
"loss": 4.0101,
"step": 831000
},
{
"epoch": 3.8,
"learning_rate": 5.469806387662206e-05,
"loss": 4.012,
"step": 832000
},
{
"epoch": 3.8,
"learning_rate": 5.4304230998263825e-05,
"loss": 4.011,
"step": 833000
},
{
"epoch": 3.81,
"learning_rate": 5.391199025820963e-05,
"loss": 4.0085,
"step": 834000
},
{
"epoch": 3.81,
"learning_rate": 5.352055962116598e-05,
"loss": 4.0103,
"step": 835000
},
{
"epoch": 3.82,
"learning_rate": 5.313033554533935e-05,
"loss": 4.007,
"step": 836000
},
{
"epoch": 3.82,
"learning_rate": 5.2741321245032015e-05,
"loss": 4.0057,
"step": 837000
},
{
"epoch": 3.82,
"learning_rate": 5.235429431454388e-05,
"loss": 4.0023,
"step": 838000
},
{
"epoch": 3.83,
"learning_rate": 5.196770673276694e-05,
"loss": 4.0024,
"step": 839000
},
{
"epoch": 3.83,
"learning_rate": 5.158233850316285e-05,
"loss": 4.0052,
"step": 840000
},
{
"epoch": 3.84,
"learning_rate": 5.119895986925622e-05,
"loss": 3.9984,
"step": 841000
},
{
"epoch": 3.84,
"learning_rate": 5.0816037402308914e-05,
"loss": 4.0024,
"step": 842000
},
{
"epoch": 3.85,
"learning_rate": 5.0434343773913936e-05,
"loss": 3.9997,
"step": 843000
},
{
"epoch": 3.85,
"learning_rate": 5.005388212810789e-05,
"loss": 4.0001,
"step": 844000
},
{
"epoch": 3.86,
"learning_rate": 4.967465559877949e-05,
"loss": 3.9953,
"step": 845000
},
{
"epoch": 3.86,
"learning_rate": 4.929666730964366e-05,
"loss": 3.9974,
"step": 846000
},
{
"epoch": 3.87,
"learning_rate": 4.8920296500061624e-05,
"loss": 3.9978,
"step": 847000
},
{
"epoch": 3.87,
"learning_rate": 4.854479277562882e-05,
"loss": 3.9965,
"step": 848000
},
{
"epoch": 3.87,
"learning_rate": 4.8170910230147306e-05,
"loss": 3.9938,
"step": 849000
},
{
"epoch": 3.88,
"learning_rate": 4.77979034302229e-05,
"loss": 3.9955,
"step": 850000
},
{
"epoch": 3.88,
"learning_rate": 4.7426521455285876e-05,
"loss": 3.9919,
"step": 851000
},
{
"epoch": 3.89,
"learning_rate": 4.705602385748844e-05,
"loss": 3.9902,
"step": 852000
},
{
"epoch": 3.89,
"learning_rate": 4.668678606973318e-05,
"loss": 3.9888,
"step": 853000
},
{
"epoch": 3.9,
"learning_rate": 4.631881113345728e-05,
"loss": 3.9864,
"step": 854000
},
{
"epoch": 3.9,
"learning_rate": 4.5952834232442806e-05,
"loss": 3.9886,
"step": 855000
},
{
"epoch": 3.91,
"learning_rate": 4.5587391540988944e-05,
"loss": 3.9877,
"step": 856000
},
{
"epoch": 3.91,
"learning_rate": 4.5223220756802585e-05,
"loss": 3.9858,
"step": 857000
},
{
"epoch": 3.92,
"learning_rate": 4.4860324879583624e-05,
"loss": 3.9823,
"step": 858000
},
{
"epoch": 3.92,
"learning_rate": 4.4498706898530285e-05,
"loss": 3.9823,
"step": 859000
},
{
"epoch": 3.92,
"learning_rate": 4.413836979231471e-05,
"loss": 3.9826,
"step": 860000
},
{
"epoch": 3.93,
"learning_rate": 4.3779674940056856e-05,
"loss": 3.9845,
"step": 861000
},
{
"epoch": 3.93,
"learning_rate": 4.342190718903205e-05,
"loss": 3.9797,
"step": 862000
},
{
"epoch": 3.94,
"learning_rate": 4.3066140849412765e-05,
"loss": 3.9826,
"step": 863000
},
{
"epoch": 3.94,
"learning_rate": 4.271095293545859e-05,
"loss": 3.9786,
"step": 864000
},
{
"epoch": 3.95,
"learning_rate": 4.235706062219449e-05,
"loss": 3.9789,
"step": 865000
},
{
"epoch": 3.95,
"learning_rate": 4.200481876887719e-05,
"loss": 3.9804,
"step": 866000
},
{
"epoch": 3.96,
"learning_rate": 4.165352508853595e-05,
"loss": 3.9759,
"step": 867000
},
{
"epoch": 3.96,
"learning_rate": 4.13038850558964e-05,
"loss": 3.9766,
"step": 868000
},
{
"epoch": 3.97,
"learning_rate": 4.095520157140329e-05,
"loss": 3.9754,
"step": 869000
},
{
"epoch": 3.97,
"learning_rate": 4.0608174867936735e-05,
"loss": 3.9726,
"step": 870000
},
{
"epoch": 3.97,
"learning_rate": 4.026211305630183e-05,
"loss": 3.9728,
"step": 871000
},
{
"epoch": 3.98,
"learning_rate": 3.9917367016619276e-05,
"loss": 3.9719,
"step": 872000
},
{
"epoch": 3.98,
"learning_rate": 3.9573939588586015e-05,
"loss": 3.9689,
"step": 873000
},
{
"epoch": 3.99,
"learning_rate": 3.923183360103733e-05,
"loss": 3.9669,
"step": 874000
},
{
"epoch": 3.99,
"learning_rate": 3.88910518719237e-05,
"loss": 3.9705,
"step": 875000
},
{
"epoch": 4.0,
"learning_rate": 3.8551935999150546e-05,
"loss": 3.969,
"step": 876000
},
{
"epoch": 4.0,
"learning_rate": 3.821414732678987e-05,
"loss": 3.9472,
"step": 877000
},
{
"epoch": 4.01,
"learning_rate": 3.7877352503423325e-05,
"loss": 3.9284,
"step": 878000
},
{
"epoch": 4.01,
"learning_rate": 3.7541893095445734e-05,
"loss": 3.9269,
"step": 879000
},
{
"epoch": 4.02,
"learning_rate": 3.720810531795154e-05,
"loss": 3.9298,
"step": 880000
},
{
"epoch": 4.02,
"learning_rate": 3.687532367703408e-05,
"loss": 3.9306,
"step": 881000
},
{
"epoch": 4.03,
"learning_rate": 3.65438857052858e-05,
"loss": 3.9292,
"step": 882000
},
{
"epoch": 4.03,
"learning_rate": 3.6213794132784204e-05,
"loss": 3.9297,
"step": 883000
},
{
"epoch": 4.03,
"learning_rate": 3.588537974618371e-05,
"loss": 3.9289,
"step": 884000
},
{
"epoch": 4.04,
"learning_rate": 3.555798776484851e-05,
"loss": 3.9268,
"step": 885000
},
{
"epoch": 4.04,
"learning_rate": 3.5232601024993396e-05,
"loss": 3.933,
"step": 886000
},
{
"epoch": 4.05,
"learning_rate": 3.490791805247826e-05,
"loss": 3.9279,
"step": 887000
},
{
"epoch": 4.05,
"learning_rate": 3.458459495478781e-05,
"loss": 3.9265,
"step": 888000
},
{
"epoch": 4.06,
"learning_rate": 3.4262634395156536e-05,
"loss": 3.9279,
"step": 889000
},
{
"epoch": 4.06,
"learning_rate": 3.394235893817297e-05,
"loss": 3.9273,
"step": 890000
},
{
"epoch": 4.07,
"learning_rate": 3.3623130030302484e-05,
"loss": 3.924,
"step": 891000
},
{
"epoch": 4.07,
"learning_rate": 3.330527158014394e-05,
"loss": 3.9239,
"step": 892000
},
{
"epoch": 4.08,
"learning_rate": 3.298910200457324e-05,
"loss": 3.9256,
"step": 893000
},
{
"epoch": 4.08,
"learning_rate": 3.267399093621268e-05,
"loss": 3.9225,
"step": 894000
},
{
"epoch": 4.08,
"learning_rate": 3.2360258143687926e-05,
"loss": 3.9213,
"step": 895000
},
{
"epoch": 4.09,
"learning_rate": 3.204821787257311e-05,
"loss": 3.923,
"step": 896000
},
{
"epoch": 4.09,
"learning_rate": 3.173724798834707e-05,
"loss": 3.9165,
"step": 897000
},
{
"epoch": 4.1,
"learning_rate": 3.142797298671269e-05,
"loss": 3.9243,
"step": 898000
},
{
"epoch": 4.1,
"learning_rate": 3.111977624644229e-05,
"loss": 3.9195,
"step": 899000
},
{
"epoch": 4.11,
"learning_rate": 3.081297058418091e-05,
"loss": 3.9208,
"step": 900000
},
{
"epoch": 4.11,
"learning_rate": 3.0507558527107828e-05,
"loss": 3.92,
"step": 901000
},
{
"epoch": 4.12,
"learning_rate": 3.0203845908662563e-05,
"loss": 3.9204,
"step": 902000
},
{
"epoch": 4.12,
"learning_rate": 2.9901227197694415e-05,
"loss": 3.9196,
"step": 903000
},
{
"epoch": 4.13,
"learning_rate": 2.9600310118919393e-05,
"loss": 3.9197,
"step": 904000
},
{
"epoch": 4.13,
"learning_rate": 2.9300494714831896e-05,
"loss": 3.9205,
"step": 905000
},
{
"epoch": 4.13,
"learning_rate": 2.9002383080493055e-05,
"loss": 3.9151,
"step": 906000
},
{
"epoch": 4.14,
"learning_rate": 2.8705380851790375e-05,
"loss": 3.9172,
"step": 907000
},
{
"epoch": 4.14,
"learning_rate": 2.8409789588637402e-05,
"loss": 3.9158,
"step": 908000
},
{
"epoch": 4.15,
"learning_rate": 2.8115611725839808e-05,
"loss": 3.9135,
"step": 909000
},
{
"epoch": 4.15,
"learning_rate": 2.7823141740592663e-05,
"loss": 3.9149,
"step": 910000
},
{
"epoch": 4.16,
"learning_rate": 2.7531796516897657e-05,
"loss": 3.9118,
"step": 911000
},
{
"epoch": 4.16,
"learning_rate": 2.7242161139836732e-05,
"loss": 3.9082,
"step": 912000
},
{
"epoch": 4.17,
"learning_rate": 2.6953658144950188e-05,
"loss": 3.9131,
"step": 913000
},
{
"epoch": 4.17,
"learning_rate": 2.666686690950142e-05,
"loss": 3.9105,
"step": 914000
},
{
"epoch": 4.18,
"learning_rate": 2.6381215639576494e-05,
"loss": 3.9091,
"step": 915000
},
{
"epoch": 4.18,
"learning_rate": 2.6097277985549907e-05,
"loss": 3.9072,
"step": 916000
},
{
"epoch": 4.18,
"learning_rate": 2.581476991673275e-05,
"loss": 3.9109,
"step": 917000
},
{
"epoch": 4.19,
"learning_rate": 2.553341311615387e-05,
"loss": 3.9079,
"step": 918000
},
{
"epoch": 4.19,
"learning_rate": 2.5253493408841024e-05,
"loss": 3.9061,
"step": 919000
},
{
"epoch": 4.2,
"learning_rate": 2.4975290861076127e-05,
"loss": 3.9058,
"step": 920000
},
{
"epoch": 4.2,
"learning_rate": 2.469825080275776e-05,
"loss": 3.9049,
"step": 921000
},
{
"epoch": 4.21,
"learning_rate": 2.4422929591059718e-05,
"loss": 3.9037,
"step": 922000
},
{
"epoch": 4.21,
"learning_rate": 2.4148778300583463e-05,
"loss": 3.9052,
"step": 923000
},
{
"epoch": 4.22,
"learning_rate": 2.3876075508705364e-05,
"loss": 3.906,
"step": 924000
},
{
"epoch": 4.22,
"learning_rate": 2.3605364516460604e-05,
"loss": 3.9016,
"step": 925000
},
{
"epoch": 4.23,
"learning_rate": 2.3335562540463497e-05,
"loss": 3.902,
"step": 926000
},
{
"epoch": 4.23,
"learning_rate": 2.3067215761578686e-05,
"loss": 3.901,
"step": 927000
},
{
"epoch": 4.24,
"learning_rate": 2.2800326390197003e-05,
"loss": 3.9034,
"step": 928000
},
{
"epoch": 4.24,
"learning_rate": 2.2535161324668153e-05,
"loss": 3.8986,
"step": 929000
},
{
"epoch": 4.24,
"learning_rate": 2.227119188854776e-05,
"loss": 3.9001,
"step": 930000
},
{
"epoch": 4.25,
"learning_rate": 2.200868641683378e-05,
"loss": 3.896,
"step": 931000
},
{
"epoch": 4.25,
"learning_rate": 2.1747647071801923e-05,
"loss": 3.8955,
"step": 932000
},
{
"epoch": 4.26,
"learning_rate": 2.1488593679023983e-05,
"loss": 3.896,
"step": 933000
},
{
"epoch": 4.26,
"learning_rate": 2.1230490082903298e-05,
"loss": 3.8937,
"step": 934000
},
{
"epoch": 4.27,
"learning_rate": 2.0973859023521336e-05,
"loss": 3.893,
"step": 935000
},
{
"epoch": 4.27,
"learning_rate": 2.0718957033886022e-05,
"loss": 3.8921,
"step": 936000
},
{
"epoch": 4.28,
"learning_rate": 2.0465275899699664e-05,
"loss": 3.8936,
"step": 937000
},
{
"epoch": 4.28,
"learning_rate": 2.021307360537388e-05,
"loss": 3.8894,
"step": 938000
},
{
"epoch": 4.29,
"learning_rate": 1.9962352228316283e-05,
"loss": 3.8913,
"step": 939000
},
{
"epoch": 4.29,
"learning_rate": 1.9713362330696583e-05,
"loss": 3.8938,
"step": 940000
},
{
"epoch": 4.29,
"learning_rate": 1.946560748553077e-05,
"loss": 3.8904,
"step": 941000
},
{
"epoch": 4.3,
"learning_rate": 1.921958523886409e-05,
"loss": 3.8881,
"step": 942000
},
{
"epoch": 4.3,
"learning_rate": 1.8974805080506908e-05,
"loss": 3.8859,
"step": 943000
},
{
"epoch": 4.31,
"learning_rate": 1.8732001127734854e-05,
"loss": 3.8898,
"step": 944000
},
{
"epoch": 4.31,
"learning_rate": 1.8490202219074714e-05,
"loss": 3.888,
"step": 945000
},
{
"epoch": 4.32,
"learning_rate": 1.8250137975426186e-05,
"loss": 3.8873,
"step": 946000
},
{
"epoch": 4.32,
"learning_rate": 1.8011329765448747e-05,
"loss": 3.8839,
"step": 947000
},
{
"epoch": 4.33,
"learning_rate": 1.7774020608654827e-05,
"loss": 3.8851,
"step": 948000
},
{
"epoch": 4.33,
"learning_rate": 1.753821245977625e-05,
"loss": 3.8815,
"step": 949000
},
{
"epoch": 4.34,
"learning_rate": 1.730414081501248e-05,
"loss": 3.8797,
"step": 950000
},
{
"epoch": 4.34,
"learning_rate": 1.7071338990848274e-05,
"loss": 3.8825,
"step": 951000
},
{
"epoch": 4.34,
"learning_rate": 1.6840274504384723e-05,
"loss": 3.8792,
"step": 952000
},
{
"epoch": 4.35,
"learning_rate": 1.6610715701279632e-05,
"loss": 3.8822,
"step": 953000
},
{
"epoch": 4.35,
"learning_rate": 1.6382436945055167e-05,
"loss": 3.8788,
"step": 954000
},
{
"epoch": 4.36,
"learning_rate": 1.615567065931629e-05,
"loss": 3.8825,
"step": 955000
},
{
"epoch": 4.36,
"learning_rate": 1.5930643206869322e-05,
"loss": 3.8789,
"step": 956000
},
{
"epoch": 4.37,
"learning_rate": 1.5706905936180028e-05,
"loss": 3.8776,
"step": 957000
},
{
"epoch": 4.37,
"learning_rate": 1.5484908160738844e-05,
"loss": 3.88,
"step": 958000
},
{
"epoch": 4.38,
"learning_rate": 1.5264207269471153e-05,
"loss": 3.8763,
"step": 959000
},
{
"epoch": 4.38,
"learning_rate": 1.5045028059623756e-05,
"loss": 3.8744,
"step": 960000
},
{
"epoch": 4.39,
"learning_rate": 1.4827372336590928e-05,
"loss": 3.8755,
"step": 961000
},
{
"epoch": 4.39,
"learning_rate": 1.4611457261190308e-05,
"loss": 3.8775,
"step": 962000
},
{
"epoch": 4.39,
"learning_rate": 1.4397066191369536e-05,
"loss": 3.8748,
"step": 963000
},
{
"epoch": 4.4,
"learning_rate": 1.4183988576170026e-05,
"loss": 3.8712,
"step": 964000
},
{
"epoch": 4.4,
"learning_rate": 1.3972441540226522e-05,
"loss": 3.8697,
"step": 965000
},
{
"epoch": 4.41,
"learning_rate": 1.3762426826066322e-05,
"loss": 3.8722,
"step": 966000
},
{
"epoch": 4.41,
"learning_rate": 1.3554153877426224e-05,
"loss": 3.8705,
"step": 967000
},
{
"epoch": 4.42,
"learning_rate": 1.3347207447291144e-05,
"loss": 3.8681,
"step": 968000
},
{
"epoch": 4.42,
"learning_rate": 1.314179848903565e-05,
"loss": 3.8657,
"step": 969000
},
{
"epoch": 4.43,
"learning_rate": 1.2938131795049502e-05,
"loss": 3.8674,
"step": 970000
},
{
"epoch": 4.43,
"learning_rate": 1.2735801302100369e-05,
"loss": 3.8668,
"step": 971000
},
{
"epoch": 4.44,
"learning_rate": 1.2535213334175821e-05,
"loss": 3.8706,
"step": 972000
},
{
"epoch": 4.44,
"learning_rate": 1.2335967966295303e-05,
"loss": 3.8628,
"step": 973000
},
{
"epoch": 4.44,
"learning_rate": 1.2138268399943431e-05,
"loss": 3.8666,
"step": 974000
},
{
"epoch": 4.45,
"learning_rate": 1.1942116263585212e-05,
"loss": 3.8637,
"step": 975000
},
{
"epoch": 4.45,
"learning_rate": 1.1747707001746943e-05,
"loss": 3.8626,
"step": 976000
},
{
"epoch": 4.46,
"learning_rate": 1.1554653008327055e-05,
"loss": 3.8644,
"step": 977000
},
{
"epoch": 4.46,
"learning_rate": 1.1363532705509805e-05,
"loss": 3.8628,
"step": 978000
},
{
"epoch": 4.47,
"learning_rate": 1.1173581654855314e-05,
"loss": 3.864,
"step": 979000
},
{
"epoch": 4.47,
"learning_rate": 1.0985185980385471e-05,
"loss": 3.8634,
"step": 980000
},
{
"epoch": 4.48,
"learning_rate": 1.079834723392832e-05,
"loss": 3.8611,
"step": 981000
},
{
"epoch": 4.48,
"learning_rate": 1.0613066954487539e-05,
"loss": 3.8614,
"step": 982000
},
{
"epoch": 4.49,
"learning_rate": 1.0429529608794375e-05,
"loss": 3.8612,
"step": 983000
},
{
"epoch": 4.49,
"learning_rate": 1.024736926677754e-05,
"loss": 3.8581,
"step": 984000
},
{
"epoch": 4.5,
"learning_rate": 1.0066951746339515e-05,
"loss": 3.8562,
"step": 985000
},
{
"epoch": 4.5,
"learning_rate": 9.887917337602925e-06,
"loss": 3.8566,
"step": 986000
},
{
"epoch": 4.5,
"learning_rate": 9.71044889515631e-06,
"loss": 3.8534,
"step": 987000
},
{
"epoch": 4.51,
"learning_rate": 9.534722998420087e-06,
"loss": 3.8577,
"step": 988000
},
{
"epoch": 4.51,
"learning_rate": 9.360389291505156e-06,
"loss": 3.8599,
"step": 989000
},
{
"epoch": 4.52,
"learning_rate": 9.187625896164997e-06,
"loss": 3.8554,
"step": 990000
},
{
"epoch": 4.52,
"learning_rate": 9.016434235463455e-06,
"loss": 3.8554,
"step": 991000
},
{
"epoch": 4.53,
"learning_rate": 8.846984551782144e-06,
"loss": 3.8531,
"step": 992000
},
{
"epoch": 4.53,
"learning_rate": 8.678939002516817e-06,
"loss": 3.8537,
"step": 993000
},
{
"epoch": 4.54,
"learning_rate": 8.512635059971796e-06,
"loss": 3.8556,
"step": 994000
},
{
"epoch": 4.54,
"learning_rate": 8.34774115340684e-06,
"loss": 3.8552,
"step": 995000
},
{
"epoch": 4.55,
"learning_rate": 8.184588425936723e-06,
"loss": 3.8518,
"step": 996000
},
{
"epoch": 4.55,
"learning_rate": 8.022851589599123e-06,
"loss": 3.8519,
"step": 997000
},
{
"epoch": 4.55,
"learning_rate": 7.862855447419604e-06,
"loss": 3.8497,
"step": 998000
},
{
"epoch": 4.56,
"learning_rate": 7.70428100492051e-06,
"loss": 3.8534,
"step": 999000
},
{
"epoch": 4.56,
"learning_rate": 7.54729051547387e-06,
"loss": 3.8516,
"step": 1000000
},
{
"epoch": 4.57,
"learning_rate": 7.392039885206847e-06,
"loss": 3.8476,
"step": 1001000
},
{
"epoch": 4.57,
"learning_rate": 7.238219581070471e-06,
"loss": 3.8473,
"step": 1002000
},
{
"epoch": 4.58,
"learning_rate": 7.085987068966549e-06,
"loss": 3.8497,
"step": 1003000
},
{
"epoch": 4.58,
"learning_rate": 6.935343602844757e-06,
"loss": 3.8455,
"step": 1004000
},
{
"epoch": 4.59,
"learning_rate": 6.786438681986962e-06,
"loss": 3.8503,
"step": 1005000
},
{
"epoch": 4.59,
"learning_rate": 6.638975425188365e-06,
"loss": 3.8475,
"step": 1006000
},
{
"epoch": 4.6,
"learning_rate": 6.493249970997628e-06,
"loss": 3.8456,
"step": 1007000
},
{
"epoch": 4.6,
"learning_rate": 6.349115258944571e-06,
"loss": 3.8407,
"step": 1008000
},
{
"epoch": 4.6,
"learning_rate": 6.2064305863833495e-06,
"loss": 3.8426,
"step": 1009000
},
{
"epoch": 4.61,
"learning_rate": 6.065342204771441e-06,
"loss": 3.8456,
"step": 1010000
},
{
"epoch": 4.61,
"learning_rate": 5.92585127626355e-06,
"loss": 3.8446,
"step": 1011000
},
{
"epoch": 4.62,
"learning_rate": 5.7880960433015715e-06,
"loss": 3.8439,
"step": 1012000
},
{
"epoch": 4.62,
"learning_rate": 5.651801854522143e-06,
"loss": 3.8432,
"step": 1013000
},
{
"epoch": 4.63,
"learning_rate": 5.517108525207015e-06,
"loss": 3.8411,
"step": 1014000
},
{
"epoch": 4.63,
"learning_rate": 5.384017164834387e-06,
"loss": 3.8405,
"step": 1015000
},
{
"epoch": 4.64,
"learning_rate": 5.25265955688945e-06,
"loss": 3.8406,
"step": 1016000
},
{
"epoch": 4.64,
"learning_rate": 5.122773805360459e-06,
"loss": 3.8425,
"step": 1017000
},
{
"epoch": 4.65,
"learning_rate": 4.9946207493118515e-06,
"loss": 3.8429,
"step": 1018000
},
{
"epoch": 4.65,
"learning_rate": 4.867944881850673e-06,
"loss": 3.84,
"step": 1019000
},
{
"epoch": 4.65,
"learning_rate": 4.743000595890457e-06,
"loss": 3.8434,
"step": 1020000
},
{
"epoch": 4.66,
"learning_rate": 4.619538782067134e-06,
"loss": 3.8404,
"step": 1021000
},
{
"epoch": 4.66,
"learning_rate": 4.497686330529982e-06,
"loss": 3.8382,
"step": 1022000
},
{
"epoch": 4.67,
"learning_rate": 4.377444244986006e-06,
"loss": 3.8392,
"step": 1023000
},
{
"epoch": 4.67,
"learning_rate": 4.2589313414077795e-06,
"loss": 3.8395,
"step": 1024000
},
{
"epoch": 4.68,
"learning_rate": 4.1420275474132856e-06,
"loss": 3.8404,
"step": 1025000
},
{
"epoch": 4.68,
"learning_rate": 4.02661922185521e-06,
"loss": 3.841,
"step": 1026000
},
{
"epoch": 4.69,
"learning_rate": 3.912938129952815e-06,
"loss": 3.8376,
"step": 1027000
},
{
"epoch": 4.69,
"learning_rate": 3.8007576184877935e-06,
"loss": 3.8383,
"step": 1028000
},
{
"epoch": 4.7,
"learning_rate": 3.6901932136656604e-06,
"loss": 3.837,
"step": 1029000
},
{
"epoch": 4.7,
"learning_rate": 3.5812458262129755e-06,
"loss": 3.8348,
"step": 1030000
},
{
"epoch": 4.71,
"learning_rate": 3.4740228745658187e-06,
"loss": 3.8393,
"step": 1031000
},
{
"epoch": 4.71,
"learning_rate": 3.368310581510614e-06,
"loss": 3.8335,
"step": 1032000
},
{
"epoch": 4.71,
"learning_rate": 3.2643212405075284e-06,
"loss": 3.833,
"step": 1033000
},
{
"epoch": 4.72,
"learning_rate": 3.161949185382773e-06,
"loss": 3.8357,
"step": 1034000
},
{
"epoch": 4.72,
"learning_rate": 3.0610952138760753e-06,
"loss": 3.8334,
"step": 1035000
},
{
"epoch": 4.73,
"learning_rate": 2.9618634416622936e-06,
"loss": 3.8355,
"step": 1036000
},
{
"epoch": 4.73,
"learning_rate": 2.864351483910399e-06,
"loss": 3.8369,
"step": 1037000
},
{
"epoch": 4.74,
"learning_rate": 2.768364924832545e-06,
"loss": 3.8369,
"step": 1038000
},
{
"epoch": 4.74,
"learning_rate": 2.6740029762871932e-06,
"loss": 3.8313,
"step": 1039000
},
{
"epoch": 4.75,
"learning_rate": 2.581358339964313e-06,
"loss": 3.8306,
"step": 1040000
},
{
"epoch": 4.75,
"learning_rate": 2.4902463043641854e-06,
"loss": 3.8327,
"step": 1041000
},
{
"epoch": 4.76,
"learning_rate": 2.4007611701787116e-06,
"loss": 3.834,
"step": 1042000
},
{
"epoch": 4.76,
"learning_rate": 2.3129036745030752e-06,
"loss": 3.8316,
"step": 1043000
},
{
"epoch": 4.76,
"learning_rate": 2.226759956554547e-06,
"loss": 3.8326,
"step": 1044000
},
{
"epoch": 4.77,
"learning_rate": 2.1421582661275585e-06,
"loss": 3.8354,
"step": 1045000
},
{
"epoch": 4.77,
"learning_rate": 2.0592685019640958e-06,
"loss": 3.8307,
"step": 1046000
},
{
"epoch": 4.78,
"learning_rate": 1.977925401473013e-06,
"loss": 3.8299,
"step": 1047000
},
{
"epoch": 4.78,
"learning_rate": 1.8982923194333036e-06,
"loss": 3.8297,
"step": 1048000
},
{
"epoch": 4.79,
"learning_rate": 1.8202104863079827e-06,
"loss": 3.8298,
"step": 1049000
},
{
"epoch": 4.79,
"learning_rate": 1.7438367075362172e-06,
"loss": 3.8303,
"step": 1050000
},
{
"epoch": 4.8,
"learning_rate": 1.6690927139422218e-06,
"loss": 3.8317,
"step": 1051000
},
{
"epoch": 4.8,
"learning_rate": 1.5959067502205883e-06,
"loss": 3.8296,
"step": 1052000
},
{
"epoch": 4.81,
"learning_rate": 1.5243550546499618e-06,
"loss": 3.8309,
"step": 1053000
},
{
"epoch": 4.81,
"learning_rate": 1.4544382166065795e-06,
"loss": 3.8339,
"step": 1054000
},
{
"epoch": 4.81,
"learning_rate": 1.3861568120002276e-06,
"loss": 3.831,
"step": 1055000
},
{
"epoch": 4.82,
"learning_rate": 1.3195114032695576e-06,
"loss": 3.8265,
"step": 1056000
},
{
"epoch": 4.82,
"learning_rate": 1.2545667306077758e-06,
"loss": 3.8309,
"step": 1057000
},
{
"epoch": 4.83,
"learning_rate": 1.1911933096932392e-06,
"loss": 3.8254,
"step": 1058000
},
{
"epoch": 4.83,
"learning_rate": 1.1294574905821087e-06,
"loss": 3.8288,
"step": 1059000
},
{
"epoch": 4.84,
"learning_rate": 1.0694190611034273e-06,
"loss": 3.8261,
"step": 1060000
},
{
"epoch": 4.84,
"learning_rate": 1.0109583188243843e-06,
"loss": 3.8253,
"step": 1061000
},
{
"epoch": 4.85,
"learning_rate": 9.541366629567838e-07,
"loss": 3.8274,
"step": 1062000
},
{
"epoch": 4.85,
"learning_rate": 8.989545615444961e-07,
"loss": 3.8264,
"step": 1063000
},
{
"epoch": 4.86,
"learning_rate": 8.454651918863299e-07,
"loss": 3.8278,
"step": 1064000
},
{
"epoch": 4.86,
"learning_rate": 7.935619088263124e-07,
"loss": 3.8267,
"step": 1065000
},
{
"epoch": 4.86,
"learning_rate": 7.433489455357823e-07,
"loss": 3.8298,
"step": 1066000
},
{
"epoch": 4.87,
"learning_rate": 6.94726189238426e-07,
"loss": 3.8305,
"step": 1067000
},
{
"epoch": 4.87,
"learning_rate": 6.477912850886725e-07,
"loss": 3.8305,
"step": 1068000
},
{
"epoch": 4.88,
"learning_rate": 6.024951762708009e-07,
"loss": 3.8248,
"step": 1069000
},
{
"epoch": 4.88,
"learning_rate": 5.588382351461308e-07,
"loss": 3.826,
"step": 1070000
},
{
"epoch": 4.89,
"learning_rate": 5.16779582648863e-07,
"loss": 3.8261,
"step": 1071000
},
{
"epoch": 4.89,
"learning_rate": 4.76364087147263e-07,
"loss": 3.8238,
"step": 1072000
},
{
"epoch": 4.9,
"learning_rate": 4.375920815465229e-07,
"loss": 3.8293,
"step": 1073000
},
{
"epoch": 4.9,
"learning_rate": 4.004638852143083e-07,
"loss": 3.8244,
"step": 1074000
},
{
"epoch": 4.91,
"learning_rate": 3.6497980397816043e-07,
"loss": 3.8275,
"step": 1075000
},
{
"epoch": 4.91,
"learning_rate": 3.3117314832133985e-07,
"loss": 3.8278,
"step": 1076000
},
{
"epoch": 4.92,
"learning_rate": 2.989765157657809e-07,
"loss": 3.824,
"step": 1077000
},
{
"epoch": 4.92,
"learning_rate": 2.684545642082537e-07,
"loss": 3.8268,
"step": 1078000
},
{
"epoch": 4.92,
"learning_rate": 2.395464400940739e-07,
"loss": 3.8244,
"step": 1079000
},
{
"epoch": 4.93,
"learning_rate": 2.1228375656396903e-07,
"loss": 3.8236,
"step": 1080000
},
{
"epoch": 4.93,
"learning_rate": 1.8666673818257262e-07,
"loss": 3.8255,
"step": 1081000
},
{
"epoch": 4.94,
"learning_rate": 1.6271874491924355e-07,
"loss": 3.828,
"step": 1082000
},
{
"epoch": 4.94,
"learning_rate": 1.4041353457650008e-07,
"loss": 3.8259,
"step": 1083000
},
{
"epoch": 4.95,
"learning_rate": 1.1973143077612658e-07,
"loss": 3.8265,
"step": 1084000
},
{
"epoch": 4.95,
"learning_rate": 1.0069575448430346e-07,
"loss": 3.826,
"step": 1085000
},
{
"epoch": 4.96,
"learning_rate": 8.330666249920515e-08,
"loss": 3.8267,
"step": 1086000
},
{
"epoch": 4.96,
"learning_rate": 6.759413926236135e-08,
"loss": 3.8269,
"step": 1087000
},
{
"epoch": 4.97,
"learning_rate": 5.349533819716257e-08,
"loss": 3.8255,
"step": 1088000
},
{
"epoch": 4.97,
"learning_rate": 4.1043510231775216e-08,
"loss": 3.8275,
"step": 1089000
},
{
"epoch": 4.97,
"learning_rate": 3.0248739940019756e-08,
"loss": 3.8277,
"step": 1090000
}
],
"max_steps": 1095620,
"num_train_epochs": 5,
"total_flos": 7.86932880566174e+19,
"trial_name": null,
"trial_params": null
}