{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.976631448884397, "global_step": 1090500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4e-05, "loss": 9.6608, "step": 100 }, { "epoch": 0.0, "learning_rate": 8e-05, "loss": 8.6223, "step": 200 }, { "epoch": 0.0, "learning_rate": 0.00012, "loss": 8.3175, "step": 300 }, { "epoch": 0.0, "learning_rate": 0.00016, "loss": 7.9745, "step": 400 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 7.6776, "step": 500 }, { "epoch": 0.0, "learning_rate": 0.00024, "loss": 7.4451, "step": 600 }, { "epoch": 0.0, "learning_rate": 0.00028, "loss": 7.2587, "step": 700 }, { "epoch": 0.0, "learning_rate": 0.00032, "loss": 7.0977, "step": 800 }, { "epoch": 0.0, "learning_rate": 0.00036, "loss": 6.9377, "step": 900 }, { "epoch": 0.01, "learning_rate": 0.0004, "loss": 6.8182, "step": 1000 }, { "epoch": 0.01, "learning_rate": 0.0003999999998815762, "loss": 6.6945, "step": 1100 }, { "epoch": 0.01, "learning_rate": 0.0003999999995263047, "loss": 6.5851, "step": 1200 }, { "epoch": 0.01, "learning_rate": 0.00039999999893418564, "loss": 6.476, "step": 1300 }, { "epoch": 0.01, "learning_rate": 0.0003999999981052189, "loss": 6.3753, "step": 1400 }, { "epoch": 0.01, "learning_rate": 0.00039999999703940455, "loss": 6.2997, "step": 1500 }, { "epoch": 0.01, "learning_rate": 0.00039999933291862616, "loss": 5.9559, "step": 2000 }, { "epoch": 0.01, "learning_rate": 0.0003999994734068435, "loss": 6.1649, "step": 3000 }, { "epoch": 0.01, "learning_rate": 0.0003999988151660478, "loss": 5.8819, "step": 4000 }, { "epoch": 0.01, "learning_rate": 0.000399997893630147, "loss": 5.8437, "step": 5000 }, { "epoch": 0.01, "learning_rate": 0.0003999967088003543, "loss": 5.857, "step": 6000 }, { "epoch": 0.01, "learning_rate": 0.00039999526067822954, "loss": 5.7574, "step": 7000 }, { "epoch": 0.01, "learning_rate": 0.00039999354926567907, "loss": 5.6647, "step": 8000 }, { "epoch": 0.02, "learning_rate": 0.00039999157456495604, "loss": 5.598, "step": 9000 }, { "epoch": 0.02, "learning_rate": 0.00039998933657865997, "loss": 5.528, "step": 10000 }, { "epoch": 0.02, "learning_rate": 0.00039998683530973725, "loss": 5.4848, "step": 11000 }, { "epoch": 0.02, "learning_rate": 0.0003999840707614807, "loss": 5.4314, "step": 12000 }, { "epoch": 0.02, "learning_rate": 0.0003999810429375299, "loss": 5.3931, "step": 13000 }, { "epoch": 0.03, "learning_rate": 0.00039997775526446917, "loss": 5.3531, "step": 14000 }, { "epoch": 0.03, "learning_rate": 0.00039997420116469963, "loss": 5.3126, "step": 15000 }, { "epoch": 0.03, "learning_rate": 0.0003999703877510894, "loss": 5.2782, "step": 16000 }, { "epoch": 0.03, "learning_rate": 0.000399966307394198, "loss": 5.2575, "step": 17000 }, { "epoch": 0.03, "learning_rate": 0.0003999619682600994, "loss": 5.2297, "step": 18000 }, { "epoch": 0.03, "learning_rate": 0.0003999573616675516, "loss": 5.1976, "step": 19000 }, { "epoch": 0.04, "learning_rate": 0.00039995249683579117, "loss": 5.182, "step": 20000 }, { "epoch": 0.04, "learning_rate": 0.00039994736403182074, "loss": 5.1576, "step": 21000 }, { "epoch": 0.04, "learning_rate": 0.00039994197352799087, "loss": 5.1435, "step": 22000 }, { "epoch": 0.04, "learning_rate": 0.0003999363145395998, "loss": 5.1286, "step": 23000 }, { "epoch": 0.04, "learning_rate": 0.0003999303983920581, "loss": 5.1065, "step": 24000 }, { "epoch": 0.05, "learning_rate": 0.0003999242132490164, "loss": 5.0946, "step": 25000 }, { "epoch": 0.05, "learning_rate": 0.0003999177714888857, "loss": 5.0748, "step": 26000 }, { "epoch": 0.05, "learning_rate": 0.00039991106022373136, "loss": 5.0674, "step": 27000 }, { "epoch": 0.05, "learning_rate": 0.0003999040928848998, "loss": 5.0493, "step": 28000 }, { "epoch": 0.05, "learning_rate": 0.0003998968555329385, "loss": 5.039, "step": 29000 }, { "epoch": 0.05, "learning_rate": 0.0003998893626520587, "loss": 5.0348, "step": 30000 }, { "epoch": 0.06, "learning_rate": 0.0003998815992513638, "loss": 5.0201, "step": 31000 }, { "epoch": 0.06, "learning_rate": 0.00039987357270987667, "loss": 5.0161, "step": 32000 }, { "epoch": 0.06, "learning_rate": 0.0003998652914592657, "loss": 4.9991, "step": 33000 }, { "epoch": 0.06, "learning_rate": 0.00039985673893135445, "loss": 4.9971, "step": 34000 }, { "epoch": 0.06, "learning_rate": 0.0003998479232953792, "loss": 4.9871, "step": 35000 }, { "epoch": 0.07, "learning_rate": 0.0003998388445629455, "loss": 4.9771, "step": 36000 }, { "epoch": 0.07, "learning_rate": 0.0003998295122192289, "loss": 4.9726, "step": 37000 }, { "epoch": 0.07, "learning_rate": 0.0003998199075931465, "loss": 4.9669, "step": 38000 }, { "epoch": 0.07, "learning_rate": 0.0003998100499065675, "loss": 4.9583, "step": 39000 }, { "epoch": 0.07, "learning_rate": 0.00039979992969921984, "loss": 4.9556, "step": 40000 }, { "epoch": 0.07, "learning_rate": 0.0003997895364597799, "loss": 4.942, "step": 41000 }, { "epoch": 0.08, "learning_rate": 0.0003997788909883795, "loss": 4.9406, "step": 42000 }, { "epoch": 0.08, "learning_rate": 0.00039976797198678043, "loss": 4.9323, "step": 43000 }, { "epoch": 0.08, "learning_rate": 0.00039975680130732954, "loss": 4.9277, "step": 44000 }, { "epoch": 0.08, "learning_rate": 0.0003997453566010126, "loss": 4.923, "step": 45000 }, { "epoch": 0.08, "learning_rate": 0.0003997336489332646, "loss": 4.9197, "step": 46000 }, { "epoch": 0.09, "learning_rate": 0.0003997216904214485, "loss": 4.9051, "step": 47000 }, { "epoch": 0.09, "learning_rate": 0.00039970945714034553, "loss": 4.9077, "step": 48000 }, { "epoch": 0.09, "learning_rate": 0.0003996969609450725, "loss": 4.9002, "step": 49000 }, { "epoch": 0.09, "learning_rate": 0.0003996842147424852, "loss": 4.9013, "step": 50000 }, { "epoch": 0.09, "learning_rate": 0.00039967119303144363, "loss": 4.8946, "step": 51000 }, { "epoch": 0.09, "learning_rate": 0.00039965792187247553, "loss": 4.8882, "step": 52000 }, { "epoch": 0.1, "learning_rate": 0.00039964437471416833, "loss": 4.8894, "step": 53000 }, { "epoch": 0.1, "learning_rate": 0.0003996305786686345, "loss": 4.8764, "step": 54000 }, { "epoch": 0.1, "learning_rate": 0.0003996165061343288, "loss": 4.8782, "step": 55000 }, { "epoch": 0.1, "learning_rate": 0.0003996021852748057, "loss": 4.8759, "step": 56000 }, { "epoch": 0.1, "learning_rate": 0.00039958758743853225, "loss": 4.8727, "step": 57000 }, { "epoch": 0.11, "learning_rate": 0.0003995727418403572, "loss": 4.8669, "step": 58000 }, { "epoch": 0.11, "learning_rate": 0.0003995576187789104, "loss": 4.8694, "step": 59000 }, { "epoch": 0.11, "learning_rate": 0.00039954224852018107, "loss": 4.8688, "step": 60000 }, { "epoch": 0.11, "learning_rate": 0.0003995266003131184, "loss": 4.86, "step": 61000 }, { "epoch": 0.11, "learning_rate": 0.00039951070547469266, "loss": 4.8559, "step": 62000 }, { "epoch": 0.12, "learning_rate": 0.00039949453220433417, "loss": 4.8543, "step": 63000 }, { "epoch": 0.12, "learning_rate": 0.00039947811286982935, "loss": 4.8515, "step": 64000 }, { "epoch": 0.12, "learning_rate": 0.0003994614146212571, "loss": 4.8498, "step": 65000 }, { "epoch": 0.12, "learning_rate": 0.00039944447087704996, "loss": 4.8443, "step": 66000 }, { "epoch": 0.12, "learning_rate": 0.0003994272477381079, "loss": 4.8399, "step": 67000 }, { "epoch": 0.12, "learning_rate": 0.0003994097796733338, "loss": 4.8381, "step": 68000 }, { "epoch": 0.13, "learning_rate": 0.00039939203173462723, "loss": 4.8381, "step": 69000 }, { "epoch": 0.13, "learning_rate": 0.00039937403944117984, "loss": 4.8354, "step": 70000 }, { "epoch": 0.13, "learning_rate": 0.00039935576679607466, "loss": 4.834, "step": 71000 }, { "epoch": 0.13, "learning_rate": 0.0003993372503686054, "loss": 4.8337, "step": 72000 }, { "epoch": 0.13, "learning_rate": 0.0003993184531132279, "loss": 4.8304, "step": 73000 }, { "epoch": 0.14, "learning_rate": 0.00039929939345843064, "loss": 4.8254, "step": 74000 }, { "epoch": 0.14, "learning_rate": 0.000399280090882382, "loss": 4.8248, "step": 75000 }, { "epoch": 0.14, "learning_rate": 0.0003992605067667017, "loss": 4.8248, "step": 76000 }, { "epoch": 0.14, "learning_rate": 0.0003992406803053476, "loss": 4.8246, "step": 77000 }, { "epoch": 0.14, "learning_rate": 0.00039922057183181, "loss": 4.8173, "step": 78000 }, { "epoch": 0.14, "learning_rate": 0.00039920024209092803, "loss": 4.8128, "step": 79000 }, { "epoch": 0.15, "learning_rate": 0.00039917960962754717, "loss": 4.818, "step": 80000 }, { "epoch": 0.15, "learning_rate": 0.00039915871494753167, "loss": 4.8107, "step": 81000 }, { "epoch": 0.15, "learning_rate": 0.00039913755807838893, "loss": 4.8121, "step": 82000 }, { "epoch": 0.15, "learning_rate": 0.00039911613904797174, "loss": 4.8116, "step": 83000 }, { "epoch": 0.15, "learning_rate": 0.0003990944796965674, "loss": 4.8057, "step": 84000 }, { "epoch": 0.16, "learning_rate": 0.0003990725366906298, "loss": 4.8055, "step": 85000 }, { "epoch": 0.16, "learning_rate": 0.0003990503762807127, "loss": 4.8028, "step": 86000 }, { "epoch": 0.16, "learning_rate": 0.00039902790967672147, "loss": 4.7969, "step": 87000 }, { "epoch": 0.16, "learning_rate": 0.0003990052039152944, "loss": 4.8025, "step": 88000 }, { "epoch": 0.16, "learning_rate": 0.00039898221356934855, "loss": 4.8017, "step": 89000 }, { "epoch": 0.16, "learning_rate": 0.00039895896126663653, "loss": 4.7986, "step": 90000 }, { "epoch": 0.17, "learning_rate": 0.0003989354470377698, "loss": 4.7991, "step": 91000 }, { "epoch": 0.17, "learning_rate": 0.00039891169482063473, "loss": 4.7965, "step": 92000 }, { "epoch": 0.17, "learning_rate": 0.00039888765709451975, "loss": 4.792, "step": 93000 }, { "epoch": 0.17, "learning_rate": 0.00039886338196645364, "loss": 4.7862, "step": 94000 }, { "epoch": 0.17, "learning_rate": 0.00039883882086954475, "loss": 4.7916, "step": 95000 }, { "epoch": 0.18, "learning_rate": 0.000398814022958251, "loss": 4.7883, "step": 96000 }, { "epoch": 0.18, "learning_rate": 0.00039878893861975594, "loss": 4.7908, "step": 97000 }, { "epoch": 0.18, "learning_rate": 0.00039876359257893807, "loss": 4.7877, "step": 98000 }, { "epoch": 0.18, "learning_rate": 0.0003987379848691651, "loss": 4.7873, "step": 99000 }, { "epoch": 0.18, "learning_rate": 0.00039871214152416957, "loss": 4.7876, "step": 100000 }, { "epoch": 0.18, "learning_rate": 0.00039868601083955114, "loss": 4.7883, "step": 101000 }, { "epoch": 0.19, "learning_rate": 0.00039865964511100514, "loss": 4.7893, "step": 102000 }, { "epoch": 0.19, "learning_rate": 0.0003986329915890061, "loss": 4.7789, "step": 103000 }, { "epoch": 0.19, "learning_rate": 0.00039860610361561096, "loss": 4.7815, "step": 104000 }, { "epoch": 0.19, "learning_rate": 0.0003985789273964466, "loss": 4.7738, "step": 105000 }, { "epoch": 0.19, "learning_rate": 0.0003985515173196509, "loss": 4.7753, "step": 106000 }, { "epoch": 0.2, "learning_rate": 0.00039852381854628627, "loss": 4.7724, "step": 107000 }, { "epoch": 0.2, "learning_rate": 0.00039849588651028544, "loss": 4.7726, "step": 108000 }, { "epoch": 0.2, "learning_rate": 0.0003984676653284346, "loss": 4.7685, "step": 109000 }, { "epoch": 0.2, "learning_rate": 0.0003984392114801697, "loss": 4.7715, "step": 110000 }, { "epoch": 0.2, "learning_rate": 0.0003984104680382948, "loss": 4.7713, "step": 111000 }, { "epoch": 0.2, "learning_rate": 0.00039838149252745204, "loss": 4.7698, "step": 112000 }, { "epoch": 0.21, "learning_rate": 0.0003983522269767629, "loss": 4.7753, "step": 113000 }, { "epoch": 0.21, "learning_rate": 0.00039832272995577275, "loss": 4.7652, "step": 114000 }, { "epoch": 0.21, "learning_rate": 0.0003982929424502255, "loss": 4.7664, "step": 115000 }, { "epoch": 0.21, "learning_rate": 0.00039826292407426207, "loss": 4.7713, "step": 116000 }, { "epoch": 0.21, "learning_rate": 0.00039823264521022384, "loss": 4.7628, "step": 117000 }, { "epoch": 0.22, "learning_rate": 0.0003982020751975389, "loss": 4.7682, "step": 118000 }, { "epoch": 0.22, "learning_rate": 0.00039817124425512714, "loss": 4.7644, "step": 119000 }, { "epoch": 0.22, "learning_rate": 0.0003981401524235768, "loss": 4.758, "step": 120000 }, { "epoch": 0.22, "learning_rate": 0.00039810883122677967, "loss": 4.7622, "step": 121000 }, { "epoch": 0.22, "learning_rate": 0.0003980772180008777, "loss": 4.762, "step": 122000 }, { "epoch": 0.22, "learning_rate": 0.0003980453760138509, "loss": 4.7571, "step": 123000 }, { "epoch": 0.23, "learning_rate": 0.00039801324155990393, "loss": 4.7619, "step": 124000 }, { "epoch": 0.23, "learning_rate": 0.00039798091147522796, "loss": 4.7618, "step": 125000 }, { "epoch": 0.23, "learning_rate": 0.0003979482562229017, "loss": 4.762, "step": 126000 }, { "epoch": 0.23, "learning_rate": 0.000397915340374997, "loss": 4.7562, "step": 127000 }, { "epoch": 0.23, "learning_rate": 0.00039788216397484706, "loss": 4.7528, "step": 128000 }, { "epoch": 0.24, "learning_rate": 0.00039784876063314606, "loss": 4.7567, "step": 129000 }, { "epoch": 0.24, "learning_rate": 0.00039781506352031947, "loss": 4.7554, "step": 130000 }, { "epoch": 0.24, "learning_rate": 0.00039778114007485855, "loss": 4.7494, "step": 131000 }, { "epoch": 0.24, "learning_rate": 0.00039774692242662465, "loss": 4.7591, "step": 132000 }, { "epoch": 0.24, "learning_rate": 0.00039771244444786484, "loss": 4.7605, "step": 133000 }, { "epoch": 0.24, "learning_rate": 0.0003976777061839689, "loss": 4.7469, "step": 134000 }, { "epoch": 0.25, "learning_rate": 0.00039764274280914674, "loss": 4.7506, "step": 135000 }, { "epoch": 0.25, "learning_rate": 0.00039760748437268835, "loss": 4.7506, "step": 136000 }, { "epoch": 0.25, "learning_rate": 0.0003975720014377832, "loss": 4.7509, "step": 137000 }, { "epoch": 0.25, "learning_rate": 0.00039753622301424524, "loss": 4.7488, "step": 138000 }, { "epoch": 0.25, "learning_rate": 0.00039750022070592105, "loss": 4.7544, "step": 139000 }, { "epoch": 0.26, "learning_rate": 0.0003974639224835218, "loss": 4.7502, "step": 140000 }, { "epoch": 0.26, "learning_rate": 0.0003974274009911748, "loss": 4.7433, "step": 141000 }, { "epoch": 0.26, "learning_rate": 0.00039739058316086716, "loss": 4.7466, "step": 142000 }, { "epoch": 0.26, "learning_rate": 0.0003973535798838411, "loss": 4.7469, "step": 143000 }, { "epoch": 0.26, "learning_rate": 0.0003973162428990996, "loss": 4.7414, "step": 144000 }, { "epoch": 0.26, "learning_rate": 0.00039727864615081464, "loss": 4.7418, "step": 145000 }, { "epoch": 0.27, "learning_rate": 0.0003972407896884818, "loss": 4.7484, "step": 146000 }, { "epoch": 0.27, "learning_rate": 0.00039720271180775053, "loss": 4.7454, "step": 147000 }, { "epoch": 0.27, "learning_rate": 0.0003971643363267646, "loss": 4.744, "step": 148000 }, { "epoch": 0.27, "learning_rate": 0.00039712577881131754, "loss": 4.7369, "step": 149000 }, { "epoch": 0.27, "learning_rate": 0.00039708688477304655, "loss": 4.7375, "step": 150000 }, { "epoch": 0.28, "learning_rate": 0.0003970477312731783, "loss": 4.7414, "step": 151000 }, { "epoch": 0.28, "learning_rate": 0.0003970083183632576, "loss": 4.7389, "step": 152000 }, { "epoch": 0.28, "learning_rate": 0.0003969686858969712, "loss": 4.7378, "step": 153000 }, { "epoch": 0.28, "learning_rate": 0.0003969287545822263, "loss": 4.7372, "step": 154000 }, { "epoch": 0.28, "learning_rate": 0.00039688860433410763, "loss": 4.7393, "step": 155000 }, { "epoch": 0.28, "learning_rate": 0.00039684815482460387, "loss": 4.7315, "step": 156000 }, { "epoch": 0.29, "learning_rate": 0.00039680748700586993, "loss": 4.7371, "step": 157000 }, { "epoch": 0.29, "learning_rate": 0.00039676651951439873, "loss": 4.7353, "step": 158000 }, { "epoch": 0.29, "learning_rate": 0.0003967253343389894, "loss": 4.7315, "step": 159000 }, { "epoch": 0.29, "learning_rate": 0.00039668384908106706, "loss": 4.7358, "step": 160000 }, { "epoch": 0.29, "learning_rate": 0.00039664210489213713, "loss": 4.7339, "step": 161000 }, { "epoch": 0.3, "learning_rate": 0.00039660010182715526, "loss": 4.737, "step": 162000 }, { "epoch": 0.3, "learning_rate": 0.000396557882332566, "loss": 4.7261, "step": 163000 }, { "epoch": 0.3, "learning_rate": 0.0003965153619404471, "loss": 4.7342, "step": 164000 }, { "epoch": 0.3, "learning_rate": 0.0003964726257474391, "loss": 4.7293, "step": 165000 }, { "epoch": 0.3, "learning_rate": 0.0003964295882518688, "loss": 4.7301, "step": 166000 }, { "epoch": 0.3, "learning_rate": 0.00039638633558526285, "loss": 4.7316, "step": 167000 }, { "epoch": 0.31, "learning_rate": 0.00039634278121264703, "loss": 4.7295, "step": 168000 }, { "epoch": 0.31, "learning_rate": 0.0003962990122999811, "loss": 4.7332, "step": 169000 }, { "epoch": 0.31, "learning_rate": 0.0003962549412794449, "loss": 4.732, "step": 170000 }, { "epoch": 0.31, "learning_rate": 0.0003962106563509727, "loss": 4.7321, "step": 171000 }, { "epoch": 0.31, "learning_rate": 0.00039616606891435896, "loss": 4.7276, "step": 172000 }, { "epoch": 0.32, "learning_rate": 0.00039612122322838677, "loss": 4.7245, "step": 173000 }, { "epoch": 0.32, "learning_rate": 0.0003960761645849172, "loss": 4.7286, "step": 174000 }, { "epoch": 0.32, "learning_rate": 0.0003960308028357847, "loss": 4.7239, "step": 175000 }, { "epoch": 0.64, "learning_rate": 0.0003840903997775841, "loss": 4.6145, "step": 176000 }, { "epoch": 0.65, "learning_rate": 0.0003839104648613638, "loss": 4.5905, "step": 177000 }, { "epoch": 0.65, "learning_rate": 0.0003837297421617577, "loss": 4.5891, "step": 178000 }, { "epoch": 0.65, "learning_rate": 0.000383548053178735, "loss": 4.5817, "step": 179000 }, { "epoch": 0.66, "learning_rate": 0.0003833652155473882, "loss": 4.5765, "step": 180000 }, { "epoch": 0.66, "learning_rate": 0.00038318141161813824, "loss": 4.574, "step": 181000 }, { "epoch": 0.66, "learning_rate": 0.0003829966423595951, "loss": 4.5725, "step": 182000 }, { "epoch": 0.67, "learning_rate": 0.00038281109496044006, "loss": 4.5666, "step": 183000 }, { "epoch": 0.67, "learning_rate": 0.00038262439893236937, "loss": 4.5631, "step": 184000 }, { "epoch": 0.68, "learning_rate": 0.00038243692864915963, "loss": 4.5591, "step": 185000 }, { "epoch": 0.68, "learning_rate": 0.0003822483097830243, "loss": 4.5552, "step": 186000 }, { "epoch": 0.68, "learning_rate": 0.00038205873050485524, "loss": 4.5543, "step": 187000 }, { "epoch": 0.69, "learning_rate": 0.0003818683828312813, "loss": 4.5512, "step": 188000 }, { "epoch": 0.69, "learning_rate": 0.00038167688668914063, "loss": 4.5484, "step": 189000 }, { "epoch": 0.69, "learning_rate": 0.0003814844331462512, "loss": 4.5501, "step": 190000 }, { "epoch": 0.7, "learning_rate": 0.0003812912171041104, "loss": 4.5431, "step": 191000 }, { "epoch": 0.7, "learning_rate": 0.0003810968527621949, "loss": 4.5418, "step": 192000 }, { "epoch": 0.7, "learning_rate": 0.00038090153407619305, "loss": 4.5379, "step": 193000 }, { "epoch": 0.71, "learning_rate": 0.00038070526207539536, "loss": 4.538, "step": 194000 }, { "epoch": 0.71, "learning_rate": 0.0003805082354937156, "loss": 4.5377, "step": 195000 }, { "epoch": 0.72, "learning_rate": 0.0003803100609220069, "loss": 4.5354, "step": 196000 }, { "epoch": 0.72, "learning_rate": 0.0003801111357514916, "loss": 4.5321, "step": 197000 }, { "epoch": 0.72, "learning_rate": 0.000379911062782051, "loss": 4.5327, "step": 198000 }, { "epoch": 0.73, "learning_rate": 0.00037971004171739956, "loss": 4.5342, "step": 199000 }, { "epoch": 0.73, "learning_rate": 0.00037950827605766894, "loss": 4.527, "step": 200000 }, { "epoch": 0.73, "learning_rate": 0.00037930536293104657, "loss": 4.5297, "step": 201000 }, { "epoch": 0.74, "learning_rate": 0.00037910170923078203, "loss": 4.5252, "step": 202000 }, { "epoch": 0.74, "learning_rate": 0.00037889690831515295, "loss": 4.5228, "step": 203000 }, { "epoch": 0.74, "learning_rate": 0.0003786911646487036, "loss": 4.5211, "step": 204000 }, { "epoch": 0.75, "learning_rate": 0.00037848447931566176, "loss": 4.521, "step": 205000 }, { "epoch": 0.75, "learning_rate": 0.00037827685340521773, "loss": 4.5257, "step": 206000 }, { "epoch": 0.76, "learning_rate": 0.0003780684970458185, "loss": 4.5204, "step": 207000 }, { "epoch": 0.76, "learning_rate": 0.0003778589942057952, "loss": 4.5209, "step": 208000 }, { "epoch": 0.76, "learning_rate": 0.0003776487649924752, "loss": 4.5167, "step": 209000 }, { "epoch": 0.77, "learning_rate": 0.0003774373896346034, "loss": 4.5142, "step": 210000 }, { "epoch": 0.77, "learning_rate": 0.00037722507921728195, "loss": 4.5166, "step": 211000 }, { "epoch": 0.77, "learning_rate": 0.0003770122622793867, "loss": 4.5127, "step": 212000 }, { "epoch": 0.78, "learning_rate": 0.00037679808696909655, "loss": 4.5163, "step": 213000 }, { "epoch": 0.78, "learning_rate": 0.00037658297996835357, "loss": 4.513, "step": 214000 }, { "epoch": 0.78, "learning_rate": 0.0003763669424107285, "loss": 4.5078, "step": 215000 }, { "epoch": 0.79, "learning_rate": 0.00037614997543469595, "loss": 4.5114, "step": 216000 }, { "epoch": 0.79, "learning_rate": 0.00037593208018362834, "loss": 4.5097, "step": 217000 }, { "epoch": 0.8, "learning_rate": 0.00037571369637505247, "loss": 4.5072, "step": 218000 }, { "epoch": 0.8, "learning_rate": 0.00037549394987438647, "loss": 4.5084, "step": 219000 }, { "epoch": 0.8, "learning_rate": 0.00037527327855580843, "loss": 4.5071, "step": 220000 }, { "epoch": 0.81, "learning_rate": 0.0003750519056381631, "loss": 4.5061, "step": 221000 }, { "epoch": 0.81, "learning_rate": 0.00037482938909921175, "loss": 4.5075, "step": 222000 }, { "epoch": 0.81, "learning_rate": 0.0003746059512444505, "loss": 4.5079, "step": 223000 }, { "epoch": 0.82, "learning_rate": 0.0003743815932513518, "loss": 4.5071, "step": 224000 }, { "epoch": 0.82, "learning_rate": 0.00037415631630223755, "loss": 4.5033, "step": 225000 }, { "epoch": 0.83, "learning_rate": 0.00037393012158427186, "loss": 4.505, "step": 226000 }, { "epoch": 0.83, "learning_rate": 0.00037370323785818266, "loss": 4.5032, "step": 227000 }, { "epoch": 0.83, "learning_rate": 0.00037347521209812743, "loss": 4.5017, "step": 228000 }, { "epoch": 0.84, "learning_rate": 0.0003732465015546745, "loss": 4.502, "step": 229000 }, { "epoch": 0.84, "learning_rate": 0.00037301664955431804, "loss": 4.4998, "step": 230000 }, { "epoch": 0.84, "learning_rate": 0.0003727858857909254, "loss": 4.4994, "step": 231000 }, { "epoch": 0.85, "learning_rate": 0.0003725544436092979, "loss": 4.4985, "step": 232000 }, { "epoch": 0.85, "learning_rate": 0.000372321860881582, "loss": 4.499, "step": 233000 }, { "epoch": 0.85, "learning_rate": 0.00037208837005222694, "loss": 4.4919, "step": 234000 }, { "epoch": 0.86, "learning_rate": 0.0003718542072019544, "loss": 4.4965, "step": 235000 }, { "epoch": 0.86, "learning_rate": 0.00037161890477046666, "loss": 4.4972, "step": 236000 }, { "epoch": 0.87, "learning_rate": 0.00037138293459993847, "loss": 4.4988, "step": 237000 }, { "epoch": 0.08, "learning_rate": 0.00039974046056824423, "loss": 5.0173, "step": 238000 }, { "epoch": 0.08, "learning_rate": 0.0003997382653105697, "loss": 5.1254, "step": 239000 }, { "epoch": 0.08, "learning_rate": 0.0003997360630230883, "loss": 5.137, "step": 240000 }, { "epoch": 0.08, "learning_rate": 0.0003997338515152591, "loss": 5.1396, "step": 241000 }, { "epoch": 0.08, "learning_rate": 0.0003997316285596137, "loss": 5.1539, "step": 242000 }, { "epoch": 0.08, "learning_rate": 0.00039972939860216607, "loss": 5.1836, "step": 243000 }, { "epoch": 0.08, "learning_rate": 0.00039972715717864, "loss": 5.1907, "step": 244000 }, { "epoch": 0.08, "learning_rate": 0.00039972490651670964, "loss": 5.2177, "step": 245000 }, { "epoch": 0.08, "learning_rate": 0.00039972264888099373, "loss": 5.2218, "step": 246000 }, { "epoch": 0.34, "learning_rate": 0.0003955398710520662, "loss": 4.9553, "step": 247000 }, { "epoch": 0.34, "learning_rate": 0.000395503657852559, "loss": 4.8679, "step": 248000 }, { "epoch": 0.34, "learning_rate": 0.00039546729990487664, "loss": 4.8395, "step": 249000 }, { "epoch": 0.34, "learning_rate": 0.0003954307972359379, "loss": 4.8217, "step": 250000 }, { "epoch": 0.34, "learning_rate": 0.000395394149872769, "loss": 4.8152, "step": 251000 }, { "epoch": 0.35, "learning_rate": 0.0003953573947067854, "loss": 4.8026, "step": 252000 }, { "epoch": 0.35, "learning_rate": 0.0003953204581812889, "loss": 4.8017, "step": 253000 }, { "epoch": 0.35, "learning_rate": 0.0003952834141966186, "loss": 4.7977, "step": 254000 }, { "epoch": 0.35, "learning_rate": 0.00039524618861807426, "loss": 4.7963, "step": 255000 }, { "epoch": 0.35, "learning_rate": 0.0003952088184819814, "loss": 4.79, "step": 256000 }, { "epoch": 0.35, "learning_rate": 0.0003951713414028577, "loss": 4.7877, "step": 257000 }, { "epoch": 0.35, "learning_rate": 0.0003951336823792677, "loss": 4.7854, "step": 258000 }, { "epoch": 0.35, "learning_rate": 0.0003950959167570807, "loss": 4.7945, "step": 259000 }, { "epoch": 0.36, "learning_rate": 0.00039505796895741114, "loss": 4.7845, "step": 260000 }, { "epoch": 0.36, "learning_rate": 0.00039501991490389356, "loss": 4.7821, "step": 261000 }, { "epoch": 0.36, "learning_rate": 0.000394981678440416, "loss": 4.7798, "step": 262000 }, { "epoch": 0.36, "learning_rate": 0.00039494333606815397, "loss": 4.7892, "step": 263000 }, { "epoch": 0.36, "learning_rate": 0.00039490481105399416, "loss": 4.7885, "step": 264000 }, { "epoch": 0.36, "learning_rate": 0.000394866141735037, "loss": 4.7838, "step": 265000 }, { "epoch": 0.36, "learning_rate": 0.0003948273670255641, "loss": 4.7812, "step": 266000 }, { "epoch": 0.37, "learning_rate": 0.00039478840932724265, "loss": 4.7749, "step": 267000 }, { "epoch": 0.37, "learning_rate": 0.00039474934658425046, "loss": 4.7823, "step": 268000 }, { "epoch": 0.37, "learning_rate": 0.00039471010062182423, "loss": 4.7809, "step": 269000 }, { "epoch": 0.37, "learning_rate": 0.00039467074996088307, "loss": 4.785, "step": 270000 }, { "epoch": 0.37, "learning_rate": 0.0003946312158504645, "loss": 4.7753, "step": 271000 }, { "epoch": 0.37, "learning_rate": 0.00039459157738799654, "loss": 4.775, "step": 272000 }, { "epoch": 0.37, "learning_rate": 0.0003945517552465506, "loss": 4.7755, "step": 273000 }, { "epoch": 0.38, "learning_rate": 0.0003945118290998296, "loss": 4.7849, "step": 274000 }, { "epoch": 0.38, "learning_rate": 0.00039447175922715307, "loss": 4.7806, "step": 275000 }, { "epoch": 0.38, "learning_rate": 0.00039443150533232405, "loss": 4.7791, "step": 276000 }, { "epoch": 0.38, "learning_rate": 0.00039439110748312647, "loss": 4.7798, "step": 277000 }, { "epoch": 0.38, "learning_rate": 0.00039435056570947044, "loss": 4.7794, "step": 278000 }, { "epoch": 0.38, "learning_rate": 0.0003943099207989059, "loss": 4.7821, "step": 279000 }, { "epoch": 0.38, "learning_rate": 0.0003942690914103384, "loss": 4.7815, "step": 280000 }, { "epoch": 0.38, "learning_rate": 0.00039422811818765134, "loss": 4.7713, "step": 281000 }, { "epoch": 0.39, "learning_rate": 0.00039418704235002724, "loss": 4.7707, "step": 282000 }, { "epoch": 0.39, "learning_rate": 0.00039414582302643454, "loss": 4.7764, "step": 283000 }, { "epoch": 0.39, "learning_rate": 0.0003941044187712859, "loss": 4.7864, "step": 284000 }, { "epoch": 0.39, "learning_rate": 0.00039406287080393925, "loss": 4.774, "step": 285000 }, { "epoch": 0.39, "learning_rate": 0.0003940211791551559, "loss": 4.7698, "step": 286000 }, { "epoch": 0.39, "learning_rate": 0.00039397938576284634, "loss": 4.7754, "step": 287000 }, { "epoch": 0.39, "learning_rate": 0.00039393740698750394, "loss": 4.7764, "step": 288000 }, { "epoch": 0.4, "learning_rate": 0.0003938952846236165, "loss": 4.7764, "step": 289000 }, { "epoch": 0.4, "learning_rate": 0.00039385301870237103, "loss": 4.7747, "step": 290000 }, { "epoch": 0.4, "learning_rate": 0.00039381065173618853, "loss": 4.7784, "step": 291000 }, { "epoch": 0.4, "learning_rate": 0.00039376809893769117, "loss": 4.7792, "step": 292000 }, { "epoch": 0.4, "learning_rate": 0.00039372544544391313, "loss": 4.7726, "step": 293000 }, { "epoch": 0.4, "learning_rate": 0.0003936826488052433, "loss": 4.7736, "step": 294000 }, { "epoch": 0.4, "learning_rate": 0.0003936396659988803, "loss": 4.7759, "step": 295000 }, { "epoch": 0.41, "learning_rate": 0.00039359653982441555, "loss": 4.7719, "step": 296000 }, { "epoch": 0.41, "learning_rate": 0.00039355327031377916, "loss": 4.7775, "step": 297000 }, { "epoch": 0.41, "learning_rate": 0.0003935099009833917, "loss": 4.7814, "step": 298000 }, { "epoch": 0.41, "learning_rate": 0.00039346634503988233, "loss": 4.7722, "step": 299000 }, { "epoch": 0.41, "learning_rate": 0.0003934226458565957, "loss": 4.7745, "step": 300000 }, { "epoch": 0.41, "learning_rate": 0.000393378847379798, "loss": 4.7748, "step": 301000 }, { "epoch": 0.41, "learning_rate": 0.00039333486195728426, "loss": 4.7774, "step": 302000 }, { "epoch": 0.41, "learning_rate": 0.00039329077759239523, "loss": 4.7777, "step": 303000 }, { "epoch": 0.42, "learning_rate": 0.000393246506060789, "loss": 4.7707, "step": 304000 }, { "epoch": 0.42, "learning_rate": 0.0003932021359382358, "loss": 4.7792, "step": 305000 }, { "epoch": 0.42, "learning_rate": 0.000393157578428518, "loss": 4.7711, "step": 306000 }, { "epoch": 0.42, "learning_rate": 0.0003931128779076294, "loss": 4.7712, "step": 307000 }, { "epoch": 0.42, "learning_rate": 0.0003930680793235711, "loss": 4.7732, "step": 308000 }, { "epoch": 0.42, "learning_rate": 0.00039302309302266194, "loss": 4.7753, "step": 309000 }, { "epoch": 0.42, "learning_rate": 0.00039297800901073876, "loss": 4.7747, "step": 310000 }, { "epoch": 0.43, "learning_rate": 0.0003929327370629047, "loss": 4.7756, "step": 311000 }, { "epoch": 0.43, "learning_rate": 0.0003928873222703692, "loss": 4.7733, "step": 312000 }, { "epoch": 0.43, "learning_rate": 0.0003928418102956833, "loss": 4.7714, "step": 313000 }, { "epoch": 0.43, "learning_rate": 0.0003927961100574846, "loss": 4.773, "step": 314000 }, { "epoch": 0.43, "learning_rate": 0.0003927503129900122, "loss": 4.7742, "step": 315000 }, { "epoch": 0.43, "learning_rate": 0.0003927043274413583, "loss": 4.7757, "step": 316000 }, { "epoch": 0.43, "learning_rate": 0.0003926582454165936, "loss": 4.7738, "step": 317000 }, { "epoch": 0.44, "learning_rate": 0.00039261202103549754, "loss": 4.7675, "step": 318000 }, { "epoch": 0.44, "learning_rate": 0.0003925656078478171, "loss": 4.7782, "step": 319000 }, { "epoch": 0.44, "learning_rate": 0.00039251905208725256, "loss": 4.7703, "step": 320000 }, { "epoch": 0.44, "learning_rate": 0.00039247235378827314, "loss": 4.7726, "step": 321000 }, { "epoch": 0.44, "learning_rate": 0.0003924255598974257, "loss": 4.7679, "step": 322000 }, { "epoch": 0.44, "learning_rate": 0.00039237857676789823, "loss": 4.767, "step": 323000 }, { "epoch": 0.44, "learning_rate": 0.0003923314984006603, "loss": 4.7621, "step": 324000 }, { "epoch": 0.44, "learning_rate": 0.0003922842305795883, "loss": 4.7717, "step": 325000 }, { "epoch": 0.45, "learning_rate": 0.00039223686787524505, "loss": 4.7682, "step": 326000 }, { "epoch": 0.45, "learning_rate": 0.0003921893155024742, "loss": 4.783, "step": 327000 }, { "epoch": 0.45, "learning_rate": 0.0003921416686011523, "loss": 4.7705, "step": 328000 }, { "epoch": 0.45, "learning_rate": 0.0003920938318173703, "loss": 4.7678, "step": 329000 }, { "epoch": 0.45, "learning_rate": 0.0003920459008600368, "loss": 4.7697, "step": 330000 }, { "epoch": 0.45, "learning_rate": 0.0003919977798067727, "loss": 4.7749, "step": 331000 }, { "epoch": 0.45, "learning_rate": 0.00039194956493523547, "loss": 4.7797, "step": 332000 }, { "epoch": 0.46, "learning_rate": 0.00039190115975485935, "loss": 4.7678, "step": 333000 }, { "epoch": 0.46, "learning_rate": 0.0003918526124935473, "loss": 4.7674, "step": 334000 }, { "epoch": 0.46, "learning_rate": 0.0003918039719474887, "loss": 4.7711, "step": 335000 }, { "epoch": 0.46, "learning_rate": 0.0003917551407742319, "loss": 4.7686, "step": 336000 }, { "epoch": 0.46, "learning_rate": 0.00039170621667219887, "loss": 4.7708, "step": 337000 }, { "epoch": 0.46, "learning_rate": 0.00039165710173146836, "loss": 4.7681, "step": 338000 }, { "epoch": 0.46, "learning_rate": 0.0003916078942182069, "loss": 4.7697, "step": 339000 }, { "epoch": 0.47, "learning_rate": 0.0003915584956553133, "loss": 4.7665, "step": 340000 }, { "epoch": 0.47, "learning_rate": 0.00039150900487640804, "loss": 4.7757, "step": 341000 }, { "epoch": 0.47, "learning_rate": 0.00039145932283750107, "loss": 4.7653, "step": 342000 }, { "epoch": 0.7, "learning_rate": 0.00038084013217180266, "loss": 4.7047, "step": 343000 }, { "epoch": 0.71, "learning_rate": 0.0003807297139737221, "loss": 4.6748, "step": 344000 }, { "epoch": 0.71, "learning_rate": 0.0003806191054905468, "loss": 4.6694, "step": 345000 }, { "epoch": 0.71, "learning_rate": 0.00038050808546821253, "loss": 4.6753, "step": 346000 }, { "epoch": 0.71, "learning_rate": 0.0003803967646603707, "loss": 4.6691, "step": 347000 }, { "epoch": 0.71, "learning_rate": 0.0003802851432525181, "loss": 4.6667, "step": 348000 }, { "epoch": 0.72, "learning_rate": 0.0003801733335024691, "loss": 4.6662, "step": 349000 }, { "epoch": 0.72, "learning_rate": 0.0003800611117532231, "loss": 4.6614, "step": 350000 }, { "epoch": 0.72, "learning_rate": 0.000379948589963274, "loss": 4.6566, "step": 351000 }, { "epoch": 0.72, "learning_rate": 0.00037983588129147694, "loss": 4.6586, "step": 352000 }, { "epoch": 0.72, "learning_rate": 0.0003797227602826864, "loss": 4.6598, "step": 353000 }, { "epoch": 0.73, "learning_rate": 0.00037960933979699685, "loss": 4.6561, "step": 354000 }, { "epoch": 0.73, "learning_rate": 0.0003794956200234039, "loss": 4.649, "step": 355000 }, { "epoch": 0.73, "learning_rate": 0.00037938171531961043, "loss": 4.6508, "step": 356000 }, { "epoch": 0.73, "learning_rate": 0.0003792673978380055, "loss": 4.6515, "step": 357000 }, { "epoch": 0.74, "learning_rate": 0.00037915301116867755, "loss": 4.6502, "step": 358000 }, { "epoch": 0.74, "learning_rate": 0.00037903821210187236, "loss": 4.6446, "step": 359000 }, { "epoch": 0.74, "learning_rate": 0.00037892299993410043, "loss": 4.6457, "step": 360000 }, { "epoch": 0.74, "learning_rate": 0.0003788074896220918, "loss": 4.6399, "step": 361000 }, { "epoch": 0.74, "learning_rate": 0.0003786916813583244, "loss": 4.6416, "step": 362000 }, { "epoch": 0.75, "learning_rate": 0.0003785755753357728, "loss": 4.6394, "step": 363000 }, { "epoch": 0.75, "learning_rate": 0.00037845917174790744, "loss": 4.644, "step": 364000 }, { "epoch": 0.75, "learning_rate": 0.0003783425876381264, "loss": 4.6455, "step": 365000 }, { "epoch": 0.75, "learning_rate": 0.0003782255897991082, "loss": 4.6427, "step": 366000 }, { "epoch": 0.75, "learning_rate": 0.00037810841242106534, "loss": 4.6383, "step": 367000 }, { "epoch": 0.76, "learning_rate": 0.0003779908211099408, "loss": 4.6398, "step": 368000 }, { "epoch": 0.76, "learning_rate": 0.0003778729332078945, "loss": 4.639, "step": 369000 }, { "epoch": 0.76, "learning_rate": 0.00037775474891136603, "loss": 4.642, "step": 370000 }, { "epoch": 0.76, "learning_rate": 0.0003776363870456683, "loss": 4.6378, "step": 371000 }, { "epoch": 0.76, "learning_rate": 0.00037751761084737167, "loss": 4.6251, "step": 372000 }, { "epoch": 0.77, "learning_rate": 0.0003773986580663642, "loss": 4.6378, "step": 373000 }, { "epoch": 0.77, "learning_rate": 0.0003772792907571875, "loss": 4.638, "step": 374000 }, { "epoch": 0.77, "learning_rate": 0.0003771598676628421, "loss": 4.6329, "step": 375000 }, { "epoch": 0.77, "learning_rate": 0.0003770399103327158, "loss": 4.6331, "step": 376000 }, { "epoch": 0.77, "learning_rate": 0.000376919657996196, "loss": 4.6307, "step": 377000 }, { "epoch": 0.78, "learning_rate": 0.0003767992315479937, "loss": 4.6366, "step": 378000 }, { "epoch": 0.78, "learning_rate": 0.0003766783900948219, "loss": 4.6312, "step": 379000 }, { "epoch": 0.78, "learning_rate": 0.0003765572542376675, "loss": 4.6322, "step": 380000 }, { "epoch": 0.78, "learning_rate": 0.00037643582417838255, "loss": 4.6272, "step": 381000 }, { "epoch": 0.78, "learning_rate": 0.0003763142219901536, "loss": 4.6261, "step": 382000 }, { "epoch": 0.79, "learning_rate": 0.0003761922044278193, "loss": 4.6332, "step": 383000 }, { "epoch": 0.79, "learning_rate": 0.0003760698932716468, "loss": 4.6285, "step": 384000 }, { "epoch": 0.79, "learning_rate": 0.0003759472887254464, "loss": 4.6315, "step": 385000 }, { "epoch": 0.79, "learning_rate": 0.00037582451403762754, "loss": 4.6252, "step": 386000 }, { "epoch": 0.79, "learning_rate": 0.00037570132361763626, "loss": 4.6238, "step": 387000 }, { "epoch": 0.8, "learning_rate": 0.00037557808768022013, "loss": 4.6309, "step": 388000 }, { "epoch": 0.8, "learning_rate": 0.0003754543124991863, "loss": 4.6227, "step": 389000 }, { "epoch": 0.8, "learning_rate": 0.0003753302449538835, "loss": 4.6264, "step": 390000 }, { "epoch": 0.8, "learning_rate": 0.0003752058852510489, "loss": 4.6297, "step": 391000 }, { "epoch": 0.81, "learning_rate": 0.00037508135839531953, "loss": 4.6229, "step": 392000 }, { "epoch": 0.81, "learning_rate": 0.0003749564152912182, "loss": 4.6277, "step": 393000 }, { "epoch": 0.81, "learning_rate": 0.0003748313060326983, "loss": 4.6258, "step": 394000 }, { "epoch": 0.81, "learning_rate": 0.0003747057803592816, "loss": 4.6298, "step": 395000 }, { "epoch": 0.81, "learning_rate": 0.0003745799635688954, "loss": 4.6275, "step": 396000 }, { "epoch": 0.82, "learning_rate": 0.0003744538558711915, "loss": 4.6305, "step": 397000 }, { "epoch": 0.82, "learning_rate": 0.00037432758401983454, "loss": 4.6254, "step": 398000 }, { "epoch": 0.82, "learning_rate": 0.0003742008954287709, "loss": 4.6232, "step": 399000 }, { "epoch": 0.82, "learning_rate": 0.00037407404368583003, "loss": 4.6243, "step": 400000 }, { "epoch": 0.82, "learning_rate": 0.0003739467750449806, "loss": 4.6271, "step": 401000 }, { "epoch": 0.83, "learning_rate": 0.00037381921655191264, "loss": 4.625, "step": 402000 }, { "epoch": 0.83, "learning_rate": 0.0003736914964119172, "loss": 4.6207, "step": 403000 }, { "epoch": 0.83, "learning_rate": 0.0003735633591418774, "loss": 4.6222, "step": 404000 }, { "epoch": 0.83, "learning_rate": 0.0003734349326585155, "loss": 4.6274, "step": 405000 }, { "epoch": 0.83, "learning_rate": 0.00037330621717583185, "loss": 4.6215, "step": 406000 }, { "epoch": 0.84, "learning_rate": 0.00037317734205675264, "loss": 4.6239, "step": 407000 }, { "epoch": 0.84, "learning_rate": 0.00037304817894443345, "loss": 4.6213, "step": 408000 }, { "epoch": 0.84, "learning_rate": 0.0003729185983290953, "loss": 4.6217, "step": 409000 }, { "epoch": 0.84, "learning_rate": 0.00037278872957481737, "loss": 4.6203, "step": 410000 }, { "epoch": 0.84, "learning_rate": 0.00037265870319842543, "loss": 4.6233, "step": 411000 }, { "epoch": 0.85, "learning_rate": 0.0003725282591035563, "loss": 4.6189, "step": 412000 }, { "epoch": 0.85, "learning_rate": 0.0003723976583952915, "loss": 4.6208, "step": 413000 }, { "epoch": 0.85, "learning_rate": 0.0003722666398284116, "loss": 4.6228, "step": 414000 }, { "epoch": 0.85, "learning_rate": 0.000372135334208968, "loss": 4.6152, "step": 415000 }, { "epoch": 0.85, "learning_rate": 0.00037200374175575874, "loss": 4.6127, "step": 416000 }, { "epoch": 0.86, "learning_rate": 0.00037187199471021856, "loss": 4.6182, "step": 417000 }, { "epoch": 0.86, "learning_rate": 0.0003717399618422258, "loss": 4.6196, "step": 418000 }, { "epoch": 0.86, "learning_rate": 0.0003716075107774151, "loss": 4.6225, "step": 419000 }, { "epoch": 0.86, "learning_rate": 0.00037147477375836516, "loss": 4.6181, "step": 420000 }, { "epoch": 0.86, "learning_rate": 0.0003713418841716614, "loss": 4.6207, "step": 421000 }, { "epoch": 0.87, "learning_rate": 0.00037120857619355976, "loss": 4.6168, "step": 422000 }, { "epoch": 0.87, "learning_rate": 0.00037107511666167, "loss": 4.6148, "step": 423000 }, { "epoch": 0.87, "learning_rate": 0.0003709412386121666, "loss": 4.6241, "step": 424000 }, { "epoch": 0.87, "learning_rate": 0.00037080707571865136, "loss": 4.6167, "step": 425000 }, { "epoch": 0.87, "learning_rate": 0.000370672762794291, "loss": 4.618, "step": 426000 }, { "epoch": 0.88, "learning_rate": 0.0003705380311681886, "loss": 4.6185, "step": 427000 }, { "epoch": 0.88, "learning_rate": 0.00037040301536994983, "loss": 4.6159, "step": 428000 }, { "epoch": 0.88, "learning_rate": 0.00037026771562455524, "loss": 4.6172, "step": 429000 }, { "epoch": 0.88, "learning_rate": 0.0003701322678825694, "loss": 4.6185, "step": 430000 }, { "epoch": 0.89, "learning_rate": 0.0003699964012030795, "loss": 4.6142, "step": 431000 }, { "epoch": 0.89, "learning_rate": 0.0003698605238364365, "loss": 4.619, "step": 432000 }, { "epoch": 0.89, "learning_rate": 0.0003697240914104684, "loss": 4.6125, "step": 433000 }, { "epoch": 0.89, "learning_rate": 0.0003695873761686538, "loss": 4.613, "step": 434000 }, { "epoch": 0.89, "learning_rate": 0.00036945037833880495, "loss": 4.6193, "step": 435000 }, { "epoch": 0.9, "learning_rate": 0.00036931337299122744, "loss": 4.6195, "step": 436000 }, { "epoch": 0.9, "learning_rate": 0.00036917581123466377, "loss": 4.6155, "step": 437000 }, { "epoch": 0.9, "learning_rate": 0.0003690379675758677, "loss": 4.6124, "step": 438000 }, { "epoch": 0.9, "learning_rate": 0.0003688998422445319, "loss": 4.6118, "step": 439000 }, { "epoch": 0.9, "learning_rate": 0.0003687617125650919, "loss": 4.6118, "step": 440000 }, { "epoch": 0.91, "learning_rate": 0.00036862302514182444, "loss": 4.6115, "step": 441000 }, { "epoch": 0.91, "learning_rate": 0.0003684841958461244, "loss": 4.6145, "step": 442000 }, { "epoch": 0.91, "learning_rate": 0.0003683449469728375, "loss": 4.6107, "step": 443000 }, { "epoch": 0.91, "learning_rate": 0.00036820541758180987, "loss": 4.6125, "step": 444000 }, { "epoch": 0.91, "learning_rate": 0.00036806574785514423, "loss": 4.6076, "step": 445000 }, { "epoch": 0.92, "learning_rate": 0.0003679256584065426, "loss": 4.6135, "step": 446000 }, { "epoch": 0.92, "learning_rate": 0.00036778528913887205, "loss": 4.611, "step": 447000 }, { "epoch": 0.92, "learning_rate": 0.0003676447810744613, "loss": 4.6169, "step": 448000 }, { "epoch": 0.92, "learning_rate": 0.00036750385315005585, "loss": 4.6124, "step": 449000 }, { "epoch": 0.92, "learning_rate": 0.0003673626461094468, "loss": 4.6091, "step": 450000 }, { "epoch": 0.93, "learning_rate": 0.00036722130181307566, "loss": 4.6084, "step": 451000 }, { "epoch": 0.93, "learning_rate": 0.0003670795375249432, "loss": 4.6093, "step": 452000 }, { "epoch": 0.93, "learning_rate": 0.0003669376370093399, "loss": 4.6098, "step": 453000 }, { "epoch": 0.93, "learning_rate": 0.00036679531641764155, "loss": 4.6088, "step": 454000 }, { "epoch": 0.93, "learning_rate": 0.00036665271789039375, "loss": 4.6135, "step": 455000 }, { "epoch": 0.94, "learning_rate": 0.00036650984166521224, "loss": 4.6111, "step": 456000 }, { "epoch": 0.94, "learning_rate": 0.00036636697456429214, "loss": 4.6087, "step": 457000 }, { "epoch": 0.94, "learning_rate": 0.00036622354421214545, "loss": 4.6105, "step": 458000 }, { "epoch": 0.94, "learning_rate": 0.0003660798368772088, "loss": 4.6142, "step": 459000 }, { "epoch": 0.94, "learning_rate": 0.00036593599692117735, "loss": 4.6036, "step": 460000 }, { "epoch": 0.95, "learning_rate": 0.00036579173661589563, "loss": 4.6129, "step": 461000 }, { "epoch": 0.95, "learning_rate": 0.00036564720004735664, "loss": 4.6066, "step": 462000 }, { "epoch": 0.95, "learning_rate": 0.00036550253240678936, "loss": 4.6065, "step": 463000 }, { "epoch": 0.95, "learning_rate": 0.0003653574443103918, "loss": 4.6065, "step": 464000 }, { "epoch": 0.95, "learning_rate": 0.0003652122261755973, "loss": 4.6055, "step": 465000 }, { "epoch": 0.96, "learning_rate": 0.00036506658751743075, "loss": 4.6077, "step": 466000 }, { "epoch": 0.96, "learning_rate": 0.0003649206738043425, "loss": 4.6113, "step": 467000 }, { "epoch": 0.96, "learning_rate": 0.00036477463160518477, "loss": 4.6056, "step": 468000 }, { "epoch": 0.96, "learning_rate": 0.00036462816878657725, "loss": 4.608, "step": 469000 }, { "epoch": 0.97, "learning_rate": 0.000364481431643597, "loss": 4.6023, "step": 470000 }, { "epoch": 0.97, "learning_rate": 0.0003643345675688004, "loss": 4.6027, "step": 471000 }, { "epoch": 0.97, "learning_rate": 0.00036418728278478005, "loss": 4.6068, "step": 472000 }, { "epoch": 0.97, "learning_rate": 0.00036403972441104724, "loss": 4.6072, "step": 473000 }, { "epoch": 0.97, "learning_rate": 0.0003638920406616534, "loss": 4.6038, "step": 474000 }, { "epoch": 0.98, "learning_rate": 0.00036374393611956704, "loss": 4.6014, "step": 475000 }, { "epoch": 0.98, "learning_rate": 0.00036359555872652883, "loss": 4.6013, "step": 476000 }, { "epoch": 0.98, "learning_rate": 0.00036344705751586385, "loss": 4.606, "step": 477000 }, { "epoch": 2.18, "learning_rate": 0.00024019467959966674, "loss": 4.4837, "step": 478000 }, { "epoch": 2.19, "learning_rate": 0.00023963222037118084, "loss": 4.4223, "step": 479000 }, { "epoch": 2.19, "learning_rate": 0.00023906943468937218, "loss": 4.39, "step": 480000 }, { "epoch": 2.2, "learning_rate": 0.00023850689045664867, "loss": 4.3772, "step": 481000 }, { "epoch": 2.2, "learning_rate": 0.00023794346609281965, "loss": 4.3726, "step": 482000 }, { "epoch": 2.2, "learning_rate": 0.00023737972918605284, "loss": 4.3631, "step": 483000 }, { "epoch": 2.21, "learning_rate": 0.00023681624857694363, "loss": 4.3497, "step": 484000 }, { "epoch": 2.21, "learning_rate": 0.00023625190081838816, "loss": 4.3482, "step": 485000 }, { "epoch": 2.22, "learning_rate": 0.00023568781924440977, "loss": 4.3443, "step": 486000 }, { "epoch": 2.22, "learning_rate": 0.0002351228792097228, "loss": 4.3383, "step": 487000 }, { "epoch": 2.23, "learning_rate": 0.000234558215237771, "loss": 4.3393, "step": 488000 }, { "epoch": 2.23, "learning_rate": 0.0002339927015221048, "loss": 4.3349, "step": 489000 }, { "epoch": 2.24, "learning_rate": 0.00023342690780622, "loss": 4.3287, "step": 490000 }, { "epoch": 2.24, "learning_rate": 0.00023286083875059848, "loss": 4.3263, "step": 491000 }, { "epoch": 2.25, "learning_rate": 0.0002322950654913731, "loss": 4.3202, "step": 492000 }, { "epoch": 2.25, "learning_rate": 0.0002317284600104378, "loss": 4.3198, "step": 493000 }, { "epoch": 2.25, "learning_rate": 0.00023116216017581755, "loss": 4.3163, "step": 494000 }, { "epoch": 2.26, "learning_rate": 0.00023059503691953928, "loss": 4.3163, "step": 495000 }, { "epoch": 2.26, "learning_rate": 0.00023002766164983935, "loss": 4.3123, "step": 496000 }, { "epoch": 2.27, "learning_rate": 0.00022946060678482666, "loss": 4.3091, "step": 497000 }, { "epoch": 2.27, "learning_rate": 0.00022889274175117623, "loss": 4.3067, "step": 498000 }, { "epoch": 2.28, "learning_rate": 0.00022832463872602635, "loss": 4.3056, "step": 499000 }, { "epoch": 2.28, "learning_rate": 0.00022775687084019932, "loss": 4.3042, "step": 500000 }, { "epoch": 2.29, "learning_rate": 0.00022718887477616112, "loss": 4.306, "step": 501000 }, { "epoch": 2.29, "learning_rate": 0.00022662008630440305, "loss": 4.3007, "step": 502000 }, { "epoch": 2.3, "learning_rate": 0.0002260510785611647, "loss": 4.2996, "step": 503000 }, { "epoch": 2.3, "learning_rate": 0.00022548185623340192, "loss": 4.2993, "step": 504000 }, { "epoch": 2.3, "learning_rate": 0.00022491299354534364, "loss": 4.2962, "step": 505000 }, { "epoch": 2.31, "learning_rate": 0.0002243433563192932, "loss": 4.2954, "step": 506000 }, { "epoch": 2.31, "learning_rate": 0.00022377408851168427, "loss": 4.2964, "step": 507000 }, { "epoch": 2.32, "learning_rate": 0.00022320405513710757, "loss": 4.2958, "step": 508000 }, { "epoch": 2.32, "learning_rate": 0.00022263440094754997, "loss": 4.2934, "step": 509000 }, { "epoch": 2.33, "learning_rate": 0.0002220639901872479, "loss": 4.2935, "step": 510000 }, { "epoch": 2.33, "learning_rate": 0.00022149396836606137, "loss": 4.2906, "step": 511000 }, { "epoch": 2.34, "learning_rate": 0.00022092319899525643, "loss": 4.2897, "step": 512000 }, { "epoch": 2.34, "learning_rate": 0.00022035282830486165, "loss": 4.2916, "step": 513000 }, { "epoch": 2.35, "learning_rate": 0.00021978171911058022, "loss": 4.2891, "step": 514000 }, { "epoch": 2.35, "learning_rate": 0.00021921101832488073, "loss": 4.2867, "step": 515000 }, { "epoch": 2.35, "learning_rate": 0.00021863958810533452, "loss": 4.288, "step": 516000 }, { "epoch": 2.36, "learning_rate": 0.00021806800435022003, "loss": 4.2857, "step": 517000 }, { "epoch": 2.36, "learning_rate": 0.00021749684357306648, "loss": 4.285, "step": 518000 }, { "epoch": 2.37, "learning_rate": 0.00021692496701433082, "loss": 4.2832, "step": 519000 }, { "epoch": 2.37, "learning_rate": 0.00021635352312751783, "loss": 4.2824, "step": 520000 }, { "epoch": 2.38, "learning_rate": 0.0002157819448048862, "loss": 4.2806, "step": 521000 }, { "epoch": 2.38, "learning_rate": 0.0002152096644013863, "loss": 4.2772, "step": 522000 }, { "epoch": 2.39, "learning_rate": 0.00021463725871483544, "loss": 4.2798, "step": 523000 }, { "epoch": 2.39, "learning_rate": 0.0002140647324601787, "loss": 4.2798, "step": 524000 }, { "epoch": 2.4, "learning_rate": 0.00021349266305175916, "loss": 4.2802, "step": 525000 }, { "epoch": 2.4, "learning_rate": 0.00021291990991843793, "loss": 4.2786, "step": 526000 }, { "epoch": 2.41, "learning_rate": 0.0002123476232740738, "loss": 4.2791, "step": 527000 }, { "epoch": 2.41, "learning_rate": 0.00021177466211441055, "loss": 4.274, "step": 528000 }, { "epoch": 2.41, "learning_rate": 0.0002112021770710695, "loss": 4.2765, "step": 529000 }, { "epoch": 2.42, "learning_rate": 0.00021062959993907988, "loss": 4.2751, "step": 530000 }, { "epoch": 2.42, "learning_rate": 0.00021005636214541413, "loss": 4.2751, "step": 531000 }, { "epoch": 2.43, "learning_rate": 0.00020948304151680226, "loss": 4.2744, "step": 532000 }, { "epoch": 2.43, "learning_rate": 0.00020891021621191204, "loss": 4.273, "step": 533000 }, { "epoch": 2.44, "learning_rate": 0.00020833674415252564, "loss": 4.2769, "step": 534000 }, { "epoch": 2.44, "learning_rate": 0.00020776320342280467, "loss": 4.2695, "step": 535000 }, { "epoch": 2.45, "learning_rate": 0.00020718959874704363, "loss": 4.2689, "step": 536000 }, { "epoch": 2.45, "learning_rate": 0.00020661650854196894, "loss": 4.2671, "step": 537000 }, { "epoch": 2.46, "learning_rate": 0.0002060427902012143, "loss": 4.2677, "step": 538000 }, { "epoch": 2.46, "learning_rate": 0.0002054701696683469, "loss": 4.2713, "step": 539000 }, { "epoch": 2.46, "learning_rate": 0.00020489635658938387, "loss": 4.2659, "step": 540000 }, { "epoch": 2.47, "learning_rate": 0.0002043225031787951, "loss": 4.2666, "step": 541000 }, { "epoch": 2.47, "learning_rate": 0.00020374861416345058, "loss": 4.2609, "step": 542000 }, { "epoch": 2.48, "learning_rate": 0.0002031746942705136, "loss": 4.2631, "step": 543000 }, { "epoch": 2.48, "learning_rate": 0.0002026013221849334, "loss": 4.2656, "step": 544000 }, { "epoch": 2.49, "learning_rate": 0.0002020273547383406, "loss": 4.2688, "step": 545000 }, { "epoch": 2.49, "learning_rate": 0.00020145394458320146, "loss": 4.2592, "step": 546000 }, { "epoch": 2.5, "learning_rate": 0.00020087994847524482, "loss": 4.2617, "step": 547000 }, { "epoch": 2.5, "learning_rate": 0.00020030651912449513, "loss": 4.2613, "step": 548000 }, { "epoch": 2.51, "learning_rate": 0.00019973251324840986, "loss": 4.2557, "step": 549000 }, { "epoch": 2.51, "learning_rate": 0.0001991590835766299, "loss": 4.2604, "step": 550000 }, { "epoch": 2.51, "learning_rate": 0.00019858508682597277, "loss": 4.2614, "step": 551000 }, { "epoch": 2.52, "learning_rate": 0.00019801167570775345, "loss": 4.2587, "step": 552000 }, { "epoch": 2.52, "learning_rate": 0.0001974377069757808, "loss": 4.2567, "step": 553000 }, { "epoch": 2.53, "learning_rate": 0.000196863759349592, "loss": 4.2542, "step": 554000 }, { "epoch": 2.53, "learning_rate": 0.0001962904114641484, "loss": 4.2528, "step": 555000 }, { "epoch": 2.54, "learning_rate": 0.00019571652019933017, "loss": 4.2529, "step": 556000 }, { "epoch": 2.54, "learning_rate": 0.00019514323805461362, "loss": 4.2504, "step": 557000 }, { "epoch": 2.55, "learning_rate": 0.00019456999583540802, "loss": 4.2557, "step": 558000 }, { "epoch": 2.55, "learning_rate": 0.00019399622450669583, "loss": 4.2527, "step": 559000 }, { "epoch": 2.56, "learning_rate": 0.00019342250263149486, "loss": 4.2493, "step": 560000 }, { "epoch": 2.56, "learning_rate": 0.0001928488349355918, "loss": 4.2533, "step": 561000 }, { "epoch": 2.56, "learning_rate": 0.00019227579972212256, "loss": 4.251, "step": 562000 }, { "epoch": 2.57, "learning_rate": 0.00019170225449436132, "loss": 4.2442, "step": 563000 }, { "epoch": 2.57, "learning_rate": 0.00019112935105686604, "loss": 4.2508, "step": 564000 }, { "epoch": 2.58, "learning_rate": 0.00019055652054145262, "loss": 4.2482, "step": 565000 }, { "epoch": 2.58, "learning_rate": 0.00018998319437138936, "loss": 4.2453, "step": 566000 }, { "epoch": 2.59, "learning_rate": 0.0001894099507104425, "loss": 4.2467, "step": 567000 }, { "epoch": 2.59, "learning_rate": 0.00018883679428045936, "loss": 4.2429, "step": 568000 }, { "epoch": 2.6, "learning_rate": 0.00018826430281954561, "loss": 4.2436, "step": 569000 }, { "epoch": 2.6, "learning_rate": 0.00018769190783313742, "loss": 4.2462, "step": 570000 }, { "epoch": 2.61, "learning_rate": 0.00018711904121225677, "loss": 4.2429, "step": 571000 }, { "epoch": 2.61, "learning_rate": 0.000186546280692719, "loss": 4.2415, "step": 572000 }, { "epoch": 2.61, "learning_rate": 0.0001859736309923917, "loss": 4.242, "step": 573000 }, { "epoch": 2.62, "learning_rate": 0.00018540166930311399, "loss": 4.2415, "step": 574000 }, { "epoch": 2.62, "learning_rate": 0.00018482925526851332, "loss": 4.2394, "step": 575000 }, { "epoch": 2.63, "learning_rate": 0.00018425696619637965, "loss": 4.2393, "step": 576000 }, { "epoch": 2.63, "learning_rate": 0.00018368537889375085, "loss": 4.2374, "step": 577000 }, { "epoch": 2.64, "learning_rate": 0.00018311335375069304, "loss": 4.2376, "step": 578000 }, { "epoch": 2.64, "learning_rate": 0.00018254203951910075, "loss": 4.2361, "step": 579000 }, { "epoch": 2.65, "learning_rate": 0.00018197029713347917, "loss": 4.2363, "step": 580000 }, { "epoch": 2.65, "learning_rate": 0.000181399274777884, "loss": 4.2322, "step": 581000 }, { "epoch": 2.66, "learning_rate": 0.00018082783396875207, "loss": 4.235, "step": 582000 }, { "epoch": 2.66, "learning_rate": 0.00018025655108206925, "loss": 4.2327, "step": 583000 }, { "epoch": 2.67, "learning_rate": 0.0001796854308235321, "loss": 4.2323, "step": 584000 }, { "epoch": 2.67, "learning_rate": 0.0001791150487652753, "loss": 4.2297, "step": 585000 }, { "epoch": 2.67, "learning_rate": 0.00017854426770033718, "loss": 4.2339, "step": 586000 }, { "epoch": 2.68, "learning_rate": 0.00017797423388223084, "loss": 4.2315, "step": 587000 }, { "epoch": 2.68, "learning_rate": 0.00017740381079830306, "loss": 4.2289, "step": 588000 }, { "epoch": 2.69, "learning_rate": 0.0001768341439831626, "loss": 4.2285, "step": 589000 }, { "epoch": 2.69, "learning_rate": 0.00017626409765587338, "loss": 4.2273, "step": 590000 }, { "epoch": 2.7, "learning_rate": 0.000175694246842843, "loss": 4.2272, "step": 591000 }, { "epoch": 2.7, "learning_rate": 0.00017512459623797167, "loss": 4.2267, "step": 592000 }, { "epoch": 2.71, "learning_rate": 0.00017455571987530613, "loss": 4.2242, "step": 593000 }, { "epoch": 2.71, "learning_rate": 0.00017398648354988546, "loss": 4.2238, "step": 594000 }, { "epoch": 2.72, "learning_rate": 0.00017341803041304732, "loss": 4.2245, "step": 595000 }, { "epoch": 2.72, "learning_rate": 0.00017284922710364303, "loss": 4.2219, "step": 596000 }, { "epoch": 2.72, "learning_rate": 0.00017228121590341918, "loss": 4.2215, "step": 597000 }, { "epoch": 2.73, "learning_rate": 0.0001717128643323442, "loss": 4.2196, "step": 598000 }, { "epoch": 2.73, "learning_rate": 0.00017114474576434977, "loss": 4.2186, "step": 599000 }, { "epoch": 2.74, "learning_rate": 0.00017057686487906743, "loss": 4.2218, "step": 600000 }, { "epoch": 2.74, "learning_rate": 0.00017000922635417116, "loss": 4.2175, "step": 601000 }, { "epoch": 2.75, "learning_rate": 0.00016944183486533842, "loss": 4.2174, "step": 602000 }, { "epoch": 2.75, "learning_rate": 0.00016887582911145858, "loss": 4.2208, "step": 603000 }, { "epoch": 2.76, "learning_rate": 0.00016830894519618436, "loss": 4.2176, "step": 604000 }, { "epoch": 2.76, "learning_rate": 0.00016774232232230643, "loss": 4.2131, "step": 605000 }, { "epoch": 2.77, "learning_rate": 0.00016717596515713635, "loss": 4.2148, "step": 606000 }, { "epoch": 2.77, "learning_rate": 0.00016661044431598456, "loss": 4.2163, "step": 607000 }, { "epoch": 2.77, "learning_rate": 0.0001660446322840068, "loss": 4.2121, "step": 608000 }, { "epoch": 2.78, "learning_rate": 0.0001654796653358085, "loss": 4.2126, "step": 609000 }, { "epoch": 2.78, "learning_rate": 0.0001649144170608772, "loss": 4.2111, "step": 610000 }, { "epoch": 2.79, "learning_rate": 0.00016435002260167044, "loss": 4.2093, "step": 611000 }, { "epoch": 2.79, "learning_rate": 0.0001637853566890836, "loss": 4.2104, "step": 612000 }, { "epoch": 2.8, "learning_rate": 0.00016322155329606282, "loss": 4.2104, "step": 613000 }, { "epoch": 2.8, "learning_rate": 0.00016265748833194975, "loss": 4.2095, "step": 614000 }, { "epoch": 2.81, "learning_rate": 0.00016209373096067142, "loss": 4.2061, "step": 615000 }, { "epoch": 2.81, "learning_rate": 0.00016153141240150847, "loss": 4.2059, "step": 616000 }, { "epoch": 2.82, "learning_rate": 0.0001609682835060673, "loss": 4.2093, "step": 617000 }, { "epoch": 2.82, "learning_rate": 0.00016040547611755718, "loss": 4.2025, "step": 618000 }, { "epoch": 2.82, "learning_rate": 0.00015984299487186134, "loss": 4.2069, "step": 619000 }, { "epoch": 2.83, "learning_rate": 0.00015928140638588216, "loss": 4.2031, "step": 620000 }, { "epoch": 2.83, "learning_rate": 0.00015872015263128903, "loss": 4.2021, "step": 621000 }, { "epoch": 2.84, "learning_rate": 0.00015815867691759442, "loss": 4.2014, "step": 622000 }, { "epoch": 2.84, "learning_rate": 0.00015759754585375357, "loss": 4.2014, "step": 623000 }, { "epoch": 2.85, "learning_rate": 0.00015703676406184148, "loss": 4.2015, "step": 624000 }, { "epoch": 2.85, "learning_rate": 0.0001564768964106519, "loss": 4.2018, "step": 625000 }, { "epoch": 2.86, "learning_rate": 0.000155917386545611, "loss": 4.2003, "step": 626000 }, { "epoch": 2.86, "learning_rate": 0.00015535767954213264, "loss": 4.1976, "step": 627000 }, { "epoch": 2.87, "learning_rate": 0.00015479834026051583, "loss": 4.1972, "step": 628000 }, { "epoch": 2.87, "learning_rate": 0.00015423937330807675, "loss": 4.1957, "step": 629000 }, { "epoch": 2.88, "learning_rate": 0.00015368134168927352, "loss": 4.1951, "step": 630000 }, { "epoch": 2.88, "learning_rate": 0.00015312313282100077, "loss": 4.1952, "step": 631000 }, { "epoch": 2.88, "learning_rate": 0.00015256586770904422, "loss": 4.1928, "step": 632000 }, { "epoch": 2.89, "learning_rate": 0.00015200843529853173, "loss": 4.1941, "step": 633000 }, { "epoch": 2.89, "learning_rate": 0.00015145195503595184, "loss": 4.1938, "step": 634000 }, { "epoch": 2.9, "learning_rate": 0.00015089531743123636, "loss": 4.1933, "step": 635000 }, { "epoch": 2.9, "learning_rate": 0.00015033964033472967, "loss": 4.1919, "step": 636000 }, { "epoch": 2.91, "learning_rate": 0.00014978381585768676, "loss": 4.191, "step": 637000 }, { "epoch": 2.91, "learning_rate": 0.0001492289602175133, "loss": 4.1907, "step": 638000 }, { "epoch": 2.92, "learning_rate": 0.00014867396716325404, "loss": 4.1906, "step": 639000 }, { "epoch": 2.92, "learning_rate": 0.00014811995124263547, "loss": 4.19, "step": 640000 }, { "epoch": 2.93, "learning_rate": 0.00014756580787890456, "loss": 4.1854, "step": 641000 }, { "epoch": 2.93, "learning_rate": 0.0001470126499134229, "loss": 4.1862, "step": 642000 }, { "epoch": 2.93, "learning_rate": 0.0001464593744799972, "loss": 4.1804, "step": 643000 }, { "epoch": 2.94, "learning_rate": 0.00014590709267699477, "loss": 4.1812, "step": 644000 }, { "epoch": 2.94, "learning_rate": 0.00014535470338508303, "loss": 4.1811, "step": 645000 }, { "epoch": 2.95, "learning_rate": 0.0001448033159230627, "loss": 4.1812, "step": 646000 }, { "epoch": 2.95, "learning_rate": 0.00014425238221106002, "loss": 4.1827, "step": 647000 }, { "epoch": 2.96, "learning_rate": 0.00014370135598273356, "loss": 4.1792, "step": 648000 }, { "epoch": 2.96, "learning_rate": 0.00014315079349020695, "loss": 4.1829, "step": 649000 }, { "epoch": 2.97, "learning_rate": 0.00014260069926850117, "loss": 4.1798, "step": 650000 }, { "epoch": 2.97, "learning_rate": 0.00014205162723252818, "loss": 4.1813, "step": 651000 }, { "epoch": 2.98, "learning_rate": 0.00014150248266247203, "loss": 4.1771, "step": 652000 }, { "epoch": 2.98, "learning_rate": 0.0001409543683610207, "loss": 4.1744, "step": 653000 }, { "epoch": 2.98, "learning_rate": 0.00014040673945025616, "loss": 4.1791, "step": 654000 }, { "epoch": 2.99, "learning_rate": 0.00013985905299225343, "loss": 4.1795, "step": 655000 }, { "epoch": 2.99, "learning_rate": 0.00013931186191936434, "loss": 4.1764, "step": 656000 }, { "epoch": 3.0, "learning_rate": 0.0001387651707388392, "loss": 4.1717, "step": 657000 }, { "epoch": 3.0, "learning_rate": 0.0001382195298871527, "loss": 4.1585, "step": 658000 }, { "epoch": 3.01, "learning_rate": 0.00013767385148545907, "loss": 4.1503, "step": 659000 }, { "epoch": 3.01, "learning_rate": 0.0001371292313756203, "loss": 4.1517, "step": 660000 }, { "epoch": 3.02, "learning_rate": 0.00013658458371390849, "loss": 4.1508, "step": 661000 }, { "epoch": 3.02, "learning_rate": 0.00013604100227223385, "loss": 4.1545, "step": 662000 }, { "epoch": 3.03, "learning_rate": 0.00013549740327772723, "loss": 4.1505, "step": 663000 }, { "epoch": 3.03, "learning_rate": 0.00013495542119768334, "loss": 4.1496, "step": 664000 }, { "epoch": 3.03, "learning_rate": 0.00013441288822507396, "loss": 4.1504, "step": 665000 }, { "epoch": 3.04, "learning_rate": 0.0001338708954980116, "loss": 4.1514, "step": 666000 }, { "epoch": 3.04, "learning_rate": 0.0001333299886553773, "loss": 4.1504, "step": 667000 }, { "epoch": 3.05, "learning_rate": 0.00013278908925682, "loss": 4.1553, "step": 668000 }, { "epoch": 3.05, "learning_rate": 0.0001322487434791535, "loss": 4.154, "step": 669000 }, { "epoch": 3.06, "learning_rate": 0.00013170895577324293, "loss": 4.1501, "step": 670000 }, { "epoch": 3.06, "learning_rate": 0.00013117026952808839, "loss": 4.1481, "step": 671000 }, { "epoch": 3.07, "learning_rate": 0.00013063161073068494, "loss": 4.153, "step": 672000 }, { "epoch": 3.07, "learning_rate": 0.00013009406112599048, "loss": 4.148, "step": 673000 }, { "epoch": 3.08, "learning_rate": 0.00012955708619025508, "loss": 4.1457, "step": 674000 }, { "epoch": 3.08, "learning_rate": 0.00012902015369654687, "loss": 4.1496, "step": 675000 }, { "epoch": 3.09, "learning_rate": 0.00012848434192302686, "loss": 4.1481, "step": 676000 }, { "epoch": 3.09, "learning_rate": 0.00012794858258770753, "loss": 4.148, "step": 677000 }, { "epoch": 3.09, "learning_rate": 0.00012741341674486485, "loss": 4.1484, "step": 678000 }, { "epoch": 3.1, "learning_rate": 0.00012687884880269694, "loss": 4.1446, "step": 679000 }, { "epoch": 3.1, "learning_rate": 0.00012634541682779958, "loss": 4.1428, "step": 680000 }, { "epoch": 3.11, "learning_rate": 0.00012581205728294073, "loss": 4.1455, "step": 681000 }, { "epoch": 3.11, "learning_rate": 0.00012527984127101713, "loss": 4.1422, "step": 682000 }, { "epoch": 3.12, "learning_rate": 0.0001247477076791393, "loss": 4.1427, "step": 683000 }, { "epoch": 3.12, "learning_rate": 0.00012421672514822168, "loss": 4.1434, "step": 684000 }, { "epoch": 3.13, "learning_rate": 0.00012368583502464424, "loss": 4.1414, "step": 685000 }, { "epoch": 3.13, "learning_rate": 0.00012315610345216445, "loss": 4.1437, "step": 686000 }, { "epoch": 3.14, "learning_rate": 0.00012262647427127763, "loss": 4.1419, "step": 687000 }, { "epoch": 3.14, "learning_rate": 0.0001220980110934919, "loss": 4.1379, "step": 688000 }, { "epoch": 3.14, "learning_rate": 0.00012156966028818173, "loss": 4.1382, "step": 689000 }, { "epoch": 3.15, "learning_rate": 0.00012104248289959676, "loss": 4.1365, "step": 690000 }, { "epoch": 3.15, "learning_rate": 0.00012051542786067112, "loss": 4.1394, "step": 691000 }, { "epoch": 3.16, "learning_rate": 0.00011998955361347148, "loss": 4.1366, "step": 692000 }, { "epoch": 3.16, "learning_rate": 0.00011946381168908787, "loss": 4.1347, "step": 693000 }, { "epoch": 3.17, "learning_rate": 0.00011893873314682198, "loss": 4.1357, "step": 694000 }, { "epoch": 3.17, "learning_rate": 0.00011841432231178195, "loss": 4.1337, "step": 695000 }, { "epoch": 3.18, "learning_rate": 0.0001178911069052703, "loss": 4.1347, "step": 696000 }, { "epoch": 3.18, "learning_rate": 0.00011736804375947676, "loss": 4.1351, "step": 697000 }, { "epoch": 3.19, "learning_rate": 0.00011684618329987129, "loss": 4.1297, "step": 698000 }, { "epoch": 3.19, "learning_rate": 0.00011632448506008744, "loss": 4.1351, "step": 699000 }, { "epoch": 3.19, "learning_rate": 0.00011580399672456457, "loss": 4.1329, "step": 700000 }, { "epoch": 3.2, "learning_rate": 0.00011528368056262728, "loss": 4.1313, "step": 701000 }, { "epoch": 3.2, "learning_rate": 0.00011476458148319966, "loss": 4.1265, "step": 702000 }, { "epoch": 3.21, "learning_rate": 0.00011424566452545455, "loss": 4.129, "step": 703000 }, { "epoch": 3.21, "learning_rate": 0.00011372797178840713, "loss": 4.1299, "step": 704000 }, { "epoch": 3.22, "learning_rate": 0.00011321047111514422, "loss": 4.1257, "step": 705000 }, { "epoch": 3.22, "learning_rate": 0.0001126942017604717, "loss": 4.1281, "step": 706000 }, { "epoch": 3.23, "learning_rate": 0.00011217813440536418, "loss": 4.1266, "step": 707000 }, { "epoch": 3.23, "learning_rate": 0.00011166279044499894, "loss": 4.1249, "step": 708000 }, { "epoch": 3.24, "learning_rate": 0.00011114817412429949, "loss": 4.1247, "step": 709000 }, { "epoch": 3.24, "learning_rate": 0.00011063428968219605, "loss": 4.1229, "step": 710000 }, { "epoch": 3.24, "learning_rate": 0.00011012114135158998, "loss": 4.1245, "step": 711000 }, { "epoch": 3.25, "learning_rate": 0.00010960924539610728, "loss": 4.1261, "step": 712000 }, { "epoch": 3.25, "learning_rate": 0.00010909758121624652, "loss": 4.1228, "step": 713000 }, { "epoch": 3.26, "learning_rate": 0.00010858717634585534, "loss": 4.1197, "step": 714000 }, { "epoch": 3.26, "learning_rate": 0.00010807701315830314, "loss": 4.1174, "step": 715000 }, { "epoch": 3.27, "learning_rate": 0.00010756862520028245, "loss": 4.1188, "step": 716000 }, { "epoch": 3.27, "learning_rate": 0.00010705997903373485, "loss": 4.1191, "step": 717000 }, { "epoch": 3.28, "learning_rate": 0.00010655209842052723, "loss": 4.114, "step": 718000 }, { "epoch": 3.28, "learning_rate": 0.00010604549426910888, "loss": 4.1175, "step": 719000 }, { "epoch": 3.29, "learning_rate": 0.00010553915653058473, "loss": 4.1166, "step": 720000 }, { "epoch": 3.29, "learning_rate": 0.00010503359687251983, "loss": 4.1143, "step": 721000 }, { "epoch": 3.29, "learning_rate": 0.00010452881945924391, "loss": 4.1152, "step": 722000 }, { "epoch": 3.3, "learning_rate": 0.00010402533204546334, "loss": 4.1116, "step": 723000 }, { "epoch": 3.3, "learning_rate": 0.00010352213079632074, "loss": 4.1111, "step": 724000 }, { "epoch": 3.31, "learning_rate": 0.00010301972424201705, "loss": 4.1103, "step": 725000 }, { "epoch": 3.31, "learning_rate": 0.00010251861772823774, "loss": 4.1111, "step": 726000 }, { "epoch": 3.32, "learning_rate": 0.00010201781216707713, "loss": 4.1106, "step": 727000 }, { "epoch": 3.32, "learning_rate": 0.00010151831328589558, "loss": 4.1069, "step": 728000 }, { "epoch": 3.33, "learning_rate": 0.00010101962398354699, "loss": 4.1067, "step": 729000 }, { "epoch": 3.33, "learning_rate": 0.00010052174835955799, "loss": 4.1035, "step": 730000 }, { "epoch": 3.34, "learning_rate": 0.00010002419336242872, "loss": 4.1095, "step": 731000 }, { "epoch": 3.34, "learning_rate": 9.952746187288931e-05, "loss": 4.1049, "step": 732000 }, { "epoch": 3.35, "learning_rate": 9.903155798255135e-05, "loss": 4.0988, "step": 733000 }, { "epoch": 3.35, "learning_rate": 9.853648577620898e-05, "loss": 4.1043, "step": 734000 }, { "epoch": 3.35, "learning_rate": 9.804274314943199e-05, "loss": 4.1043, "step": 735000 }, { "epoch": 3.36, "learning_rate": 9.754934569616405e-05, "loss": 4.1018, "step": 736000 }, { "epoch": 3.36, "learning_rate": 9.705777639819362e-05, "loss": 4.099, "step": 737000 }, { "epoch": 3.37, "learning_rate": 9.656606908833878e-05, "loss": 4.1011, "step": 738000 }, { "epoch": 3.37, "learning_rate": 9.6075213770881e-05, "loss": 4.1025, "step": 739000 }, { "epoch": 3.38, "learning_rate": 9.558570405937759e-05, "loss": 4.1005, "step": 740000 }, { "epoch": 3.38, "learning_rate": 9.509656398720454e-05, "loss": 4.0979, "step": 741000 }, { "epoch": 3.39, "learning_rate": 9.46082880118432e-05, "loss": 4.0973, "step": 742000 }, { "epoch": 3.39, "learning_rate": 9.412088015525628e-05, "loss": 4.0912, "step": 743000 }, { "epoch": 3.4, "learning_rate": 9.363434443225589e-05, "loss": 4.0913, "step": 744000 }, { "epoch": 3.4, "learning_rate": 9.31486848504702e-05, "loss": 4.0922, "step": 745000 }, { "epoch": 3.4, "learning_rate": 9.266390541031052e-05, "loss": 4.095, "step": 746000 }, { "epoch": 3.41, "learning_rate": 9.218049355729118e-05, "loss": 4.0916, "step": 747000 }, { "epoch": 3.41, "learning_rate": 9.169748548247643e-05, "loss": 4.0896, "step": 748000 }, { "epoch": 3.42, "learning_rate": 9.121585117197211e-05, "loss": 4.0896, "step": 749000 }, { "epoch": 3.42, "learning_rate": 9.073463036084202e-05, "loss": 4.0849, "step": 750000 }, { "epoch": 3.43, "learning_rate": 9.025430957607068e-05, "loss": 4.0861, "step": 751000 }, { "epoch": 3.43, "learning_rate": 8.977489277409341e-05, "loss": 4.0873, "step": 752000 }, { "epoch": 3.44, "learning_rate": 8.929686195794506e-05, "loss": 4.083, "step": 753000 }, { "epoch": 3.44, "learning_rate": 8.882021833036489e-05, "loss": 4.0873, "step": 754000 }, { "epoch": 3.45, "learning_rate": 8.834353438745977e-05, "loss": 4.083, "step": 755000 }, { "epoch": 3.45, "learning_rate": 8.786824547005008e-05, "loss": 4.0832, "step": 756000 }, { "epoch": 3.45, "learning_rate": 8.739340396441291e-05, "loss": 4.0828, "step": 757000 }, { "epoch": 3.46, "learning_rate": 8.691949000704588e-05, "loss": 4.087, "step": 758000 }, { "epoch": 3.46, "learning_rate": 8.644650750161096e-05, "loss": 4.0797, "step": 759000 }, { "epoch": 3.47, "learning_rate": 8.597446034409749e-05, "loss": 4.0808, "step": 760000 }, { "epoch": 3.47, "learning_rate": 8.55033524227903e-05, "loss": 4.0762, "step": 761000 }, { "epoch": 3.48, "learning_rate": 8.503365731066581e-05, "loss": 4.0769, "step": 762000 }, { "epoch": 3.48, "learning_rate": 8.456443854672643e-05, "loss": 4.0744, "step": 763000 }, { "epoch": 3.49, "learning_rate": 8.409617063343962e-05, "loss": 4.0785, "step": 764000 }, { "epoch": 3.49, "learning_rate": 8.362885742796067e-05, "loss": 4.074, "step": 765000 }, { "epoch": 3.5, "learning_rate": 8.316296865415034e-05, "loss": 4.0752, "step": 766000 }, { "epoch": 3.5, "learning_rate": 8.269757543994949e-05, "loss": 4.0721, "step": 767000 }, { "epoch": 3.5, "learning_rate": 8.223314845388103e-05, "loss": 4.0745, "step": 768000 }, { "epoch": 3.51, "learning_rate": 8.176969152146221e-05, "loss": 4.0721, "step": 769000 }, { "epoch": 3.51, "learning_rate": 8.130767045556329e-05, "loss": 4.0721, "step": 770000 }, { "epoch": 3.52, "learning_rate": 8.084616409542043e-05, "loss": 4.0681, "step": 771000 }, { "epoch": 3.52, "learning_rate": 8.038609924698259e-05, "loss": 4.0728, "step": 772000 }, { "epoch": 3.53, "learning_rate": 7.992701769691633e-05, "loss": 4.0687, "step": 773000 }, { "epoch": 3.53, "learning_rate": 7.946846516190165e-05, "loss": 4.0675, "step": 774000 }, { "epoch": 3.54, "learning_rate": 7.90109054534227e-05, "loss": 4.0665, "step": 775000 }, { "epoch": 3.54, "learning_rate": 7.855434234043022e-05, "loss": 4.0655, "step": 776000 }, { "epoch": 3.55, "learning_rate": 7.80992346454953e-05, "loss": 4.0671, "step": 777000 }, { "epoch": 3.55, "learning_rate": 7.764512904833741e-05, "loss": 4.0638, "step": 778000 }, { "epoch": 3.56, "learning_rate": 7.71915762338268e-05, "loss": 4.0613, "step": 779000 }, { "epoch": 3.56, "learning_rate": 7.673948703544935e-05, "loss": 4.0572, "step": 780000 }, { "epoch": 3.56, "learning_rate": 7.628796009423646e-05, "loss": 4.0639, "step": 781000 }, { "epoch": 3.57, "learning_rate": 7.583745217759814e-05, "loss": 4.0613, "step": 782000 }, { "epoch": 3.57, "learning_rate": 7.538841596949084e-05, "loss": 4.0572, "step": 783000 }, { "epoch": 3.58, "learning_rate": 7.493995619788687e-05, "loss": 4.0566, "step": 784000 }, { "epoch": 3.58, "learning_rate": 7.44925265544582e-05, "loss": 4.0562, "step": 785000 }, { "epoch": 3.59, "learning_rate": 7.404613072471351e-05, "loss": 4.056, "step": 786000 }, { "epoch": 3.59, "learning_rate": 7.360077238564593e-05, "loss": 4.0507, "step": 787000 }, { "epoch": 3.6, "learning_rate": 7.315645520570287e-05, "loss": 4.0505, "step": 788000 }, { "epoch": 3.6, "learning_rate": 7.271362559401307e-05, "loss": 4.0534, "step": 789000 }, { "epoch": 3.61, "learning_rate": 7.22714006530347e-05, "loss": 4.0539, "step": 790000 }, { "epoch": 3.61, "learning_rate": 7.183066846739989e-05, "loss": 4.0536, "step": 791000 }, { "epoch": 3.61, "learning_rate": 7.139055032133843e-05, "loss": 4.0522, "step": 792000 }, { "epoch": 3.62, "learning_rate": 7.095236859806331e-05, "loss": 4.05, "step": 793000 }, { "epoch": 3.62, "learning_rate": 7.051437066874354e-05, "loss": 4.0474, "step": 794000 }, { "epoch": 3.63, "learning_rate": 7.007743932145127e-05, "loss": 4.0424, "step": 795000 }, { "epoch": 3.63, "learning_rate": 6.964157815522e-05, "loss": 4.0445, "step": 796000 }, { "epoch": 3.64, "learning_rate": 6.920679076026799e-05, "loss": 4.0437, "step": 797000 }, { "epoch": 3.64, "learning_rate": 6.877308071796904e-05, "loss": 4.0428, "step": 798000 }, { "epoch": 3.65, "learning_rate": 6.834088368883074e-05, "loss": 4.0413, "step": 799000 }, { "epoch": 3.65, "learning_rate": 6.790933797416663e-05, "loss": 4.0423, "step": 800000 }, { "epoch": 3.66, "learning_rate": 6.747888029936322e-05, "loss": 4.0412, "step": 801000 }, { "epoch": 3.66, "learning_rate": 6.704994302979443e-05, "loss": 4.0394, "step": 802000 }, { "epoch": 3.66, "learning_rate": 6.66216709659637e-05, "loss": 4.0399, "step": 803000 }, { "epoch": 3.67, "learning_rate": 6.619449754859523e-05, "loss": 4.0385, "step": 804000 }, { "epoch": 3.67, "learning_rate": 6.576885181589794e-05, "loss": 4.0375, "step": 805000 }, { "epoch": 3.68, "learning_rate": 6.534388513092143e-05, "loss": 4.0376, "step": 806000 }, { "epoch": 3.68, "learning_rate": 6.492002761761704e-05, "loss": 4.0367, "step": 807000 }, { "epoch": 3.69, "learning_rate": 6.44972827673282e-05, "loss": 4.0361, "step": 808000 }, { "epoch": 3.69, "learning_rate": 6.40760751322673e-05, "loss": 4.0294, "step": 809000 }, { "epoch": 3.7, "learning_rate": 6.365556492400127e-05, "loss": 4.0324, "step": 810000 }, { "epoch": 3.7, "learning_rate": 6.323659661921848e-05, "loss": 4.0314, "step": 811000 }, { "epoch": 3.71, "learning_rate": 6.281833489418096e-05, "loss": 4.0306, "step": 812000 }, { "epoch": 3.71, "learning_rate": 6.240161971012996e-05, "loss": 4.0301, "step": 813000 }, { "epoch": 3.71, "learning_rate": 6.198562023551751e-05, "loss": 4.0285, "step": 814000 }, { "epoch": 3.72, "learning_rate": 6.157158618328416e-05, "loss": 4.0266, "step": 815000 }, { "epoch": 3.72, "learning_rate": 6.115786150987899e-05, "loss": 4.0292, "step": 816000 }, { "epoch": 3.73, "learning_rate": 6.0745280488710155e-05, "loss": 4.0234, "step": 817000 }, { "epoch": 3.73, "learning_rate": 6.0333846518236035e-05, "loss": 4.0236, "step": 818000 }, { "epoch": 3.74, "learning_rate": 5.9923562987466307e-05, "loss": 4.0237, "step": 819000 }, { "epoch": 3.74, "learning_rate": 5.951484182819116e-05, "loss": 4.021, "step": 820000 }, { "epoch": 3.75, "learning_rate": 5.910727554160531e-05, "loss": 4.0171, "step": 821000 }, { "epoch": 3.75, "learning_rate": 5.8700461244659956e-05, "loss": 4.0197, "step": 822000 }, { "epoch": 3.76, "learning_rate": 5.829481084172575e-05, "loss": 4.0153, "step": 823000 }, { "epoch": 3.76, "learning_rate": 5.789032767417306e-05, "loss": 4.0209, "step": 824000 }, { "epoch": 3.76, "learning_rate": 5.748701507375753e-05, "loss": 4.0169, "step": 825000 }, { "epoch": 3.77, "learning_rate": 5.708487636259276e-05, "loss": 4.0142, "step": 826000 }, { "epoch": 3.77, "learning_rate": 5.6684315225520025e-05, "loss": 4.0168, "step": 827000 }, { "epoch": 3.78, "learning_rate": 5.628453303834178e-05, "loss": 4.0179, "step": 828000 }, { "epoch": 3.78, "learning_rate": 5.588633265133554e-05, "loss": 4.0114, "step": 829000 }, { "epoch": 3.79, "learning_rate": 5.5489316965551574e-05, "loss": 4.0113, "step": 830000 }, { "epoch": 3.79, "learning_rate": 5.5093093617013605e-05, "loss": 4.0101, "step": 831000 }, { "epoch": 3.8, "learning_rate": 5.469806387662206e-05, "loss": 4.012, "step": 832000 }, { "epoch": 3.8, "learning_rate": 5.4304230998263825e-05, "loss": 4.011, "step": 833000 }, { "epoch": 3.81, "learning_rate": 5.391199025820963e-05, "loss": 4.0085, "step": 834000 }, { "epoch": 3.81, "learning_rate": 5.352055962116598e-05, "loss": 4.0103, "step": 835000 }, { "epoch": 3.82, "learning_rate": 5.313033554533935e-05, "loss": 4.007, "step": 836000 }, { "epoch": 3.82, "learning_rate": 5.2741321245032015e-05, "loss": 4.0057, "step": 837000 }, { "epoch": 3.82, "learning_rate": 5.235429431454388e-05, "loss": 4.0023, "step": 838000 }, { "epoch": 3.83, "learning_rate": 5.196770673276694e-05, "loss": 4.0024, "step": 839000 }, { "epoch": 3.83, "learning_rate": 5.158233850316285e-05, "loss": 4.0052, "step": 840000 }, { "epoch": 3.84, "learning_rate": 5.119895986925622e-05, "loss": 3.9984, "step": 841000 }, { "epoch": 3.84, "learning_rate": 5.0816037402308914e-05, "loss": 4.0024, "step": 842000 }, { "epoch": 3.85, "learning_rate": 5.0434343773913936e-05, "loss": 3.9997, "step": 843000 }, { "epoch": 3.85, "learning_rate": 5.005388212810789e-05, "loss": 4.0001, "step": 844000 }, { "epoch": 3.86, "learning_rate": 4.967465559877949e-05, "loss": 3.9953, "step": 845000 }, { "epoch": 3.86, "learning_rate": 4.929666730964366e-05, "loss": 3.9974, "step": 846000 }, { "epoch": 3.87, "learning_rate": 4.8920296500061624e-05, "loss": 3.9978, "step": 847000 }, { "epoch": 3.87, "learning_rate": 4.854479277562882e-05, "loss": 3.9965, "step": 848000 }, { "epoch": 3.87, "learning_rate": 4.8170910230147306e-05, "loss": 3.9938, "step": 849000 }, { "epoch": 3.88, "learning_rate": 4.77979034302229e-05, "loss": 3.9955, "step": 850000 }, { "epoch": 3.88, "learning_rate": 4.7426521455285876e-05, "loss": 3.9919, "step": 851000 }, { "epoch": 3.89, "learning_rate": 4.705602385748844e-05, "loss": 3.9902, "step": 852000 }, { "epoch": 3.89, "learning_rate": 4.668678606973318e-05, "loss": 3.9888, "step": 853000 }, { "epoch": 3.9, "learning_rate": 4.631881113345728e-05, "loss": 3.9864, "step": 854000 }, { "epoch": 3.9, "learning_rate": 4.5952834232442806e-05, "loss": 3.9886, "step": 855000 }, { "epoch": 3.91, "learning_rate": 4.5587391540988944e-05, "loss": 3.9877, "step": 856000 }, { "epoch": 3.91, "learning_rate": 4.5223220756802585e-05, "loss": 3.9858, "step": 857000 }, { "epoch": 3.92, "learning_rate": 4.4860324879583624e-05, "loss": 3.9823, "step": 858000 }, { "epoch": 3.92, "learning_rate": 4.4498706898530285e-05, "loss": 3.9823, "step": 859000 }, { "epoch": 3.92, "learning_rate": 4.413836979231471e-05, "loss": 3.9826, "step": 860000 }, { "epoch": 3.93, "learning_rate": 4.3779674940056856e-05, "loss": 3.9845, "step": 861000 }, { "epoch": 3.93, "learning_rate": 4.342190718903205e-05, "loss": 3.9797, "step": 862000 }, { "epoch": 3.94, "learning_rate": 4.3066140849412765e-05, "loss": 3.9826, "step": 863000 }, { "epoch": 3.94, "learning_rate": 4.271095293545859e-05, "loss": 3.9786, "step": 864000 }, { "epoch": 3.95, "learning_rate": 4.235706062219449e-05, "loss": 3.9789, "step": 865000 }, { "epoch": 3.95, "learning_rate": 4.200481876887719e-05, "loss": 3.9804, "step": 866000 }, { "epoch": 3.96, "learning_rate": 4.165352508853595e-05, "loss": 3.9759, "step": 867000 }, { "epoch": 3.96, "learning_rate": 4.13038850558964e-05, "loss": 3.9766, "step": 868000 }, { "epoch": 3.97, "learning_rate": 4.095520157140329e-05, "loss": 3.9754, "step": 869000 }, { "epoch": 3.97, "learning_rate": 4.0608174867936735e-05, "loss": 3.9726, "step": 870000 }, { "epoch": 3.97, "learning_rate": 4.026211305630183e-05, "loss": 3.9728, "step": 871000 }, { "epoch": 3.98, "learning_rate": 3.9917367016619276e-05, "loss": 3.9719, "step": 872000 }, { "epoch": 3.98, "learning_rate": 3.9573939588586015e-05, "loss": 3.9689, "step": 873000 }, { "epoch": 3.99, "learning_rate": 3.923183360103733e-05, "loss": 3.9669, "step": 874000 }, { "epoch": 3.99, "learning_rate": 3.88910518719237e-05, "loss": 3.9705, "step": 875000 }, { "epoch": 4.0, "learning_rate": 3.8551935999150546e-05, "loss": 3.969, "step": 876000 }, { "epoch": 4.0, "learning_rate": 3.821414732678987e-05, "loss": 3.9472, "step": 877000 }, { "epoch": 4.01, "learning_rate": 3.7877352503423325e-05, "loss": 3.9284, "step": 878000 }, { "epoch": 4.01, "learning_rate": 3.7541893095445734e-05, "loss": 3.9269, "step": 879000 }, { "epoch": 4.02, "learning_rate": 3.720810531795154e-05, "loss": 3.9298, "step": 880000 }, { "epoch": 4.02, "learning_rate": 3.687532367703408e-05, "loss": 3.9306, "step": 881000 }, { "epoch": 4.03, "learning_rate": 3.65438857052858e-05, "loss": 3.9292, "step": 882000 }, { "epoch": 4.03, "learning_rate": 3.6213794132784204e-05, "loss": 3.9297, "step": 883000 }, { "epoch": 4.03, "learning_rate": 3.588537974618371e-05, "loss": 3.9289, "step": 884000 }, { "epoch": 4.04, "learning_rate": 3.555798776484851e-05, "loss": 3.9268, "step": 885000 }, { "epoch": 4.04, "learning_rate": 3.5232601024993396e-05, "loss": 3.933, "step": 886000 }, { "epoch": 4.05, "learning_rate": 3.490791805247826e-05, "loss": 3.9279, "step": 887000 }, { "epoch": 4.05, "learning_rate": 3.458459495478781e-05, "loss": 3.9265, "step": 888000 }, { "epoch": 4.06, "learning_rate": 3.4262634395156536e-05, "loss": 3.9279, "step": 889000 }, { "epoch": 4.06, "learning_rate": 3.394235893817297e-05, "loss": 3.9273, "step": 890000 }, { "epoch": 4.07, "learning_rate": 3.3623130030302484e-05, "loss": 3.924, "step": 891000 }, { "epoch": 4.07, "learning_rate": 3.330527158014394e-05, "loss": 3.9239, "step": 892000 }, { "epoch": 4.08, "learning_rate": 3.298910200457324e-05, "loss": 3.9256, "step": 893000 }, { "epoch": 4.08, "learning_rate": 3.267399093621268e-05, "loss": 3.9225, "step": 894000 }, { "epoch": 4.08, "learning_rate": 3.2360258143687926e-05, "loss": 3.9213, "step": 895000 }, { "epoch": 4.09, "learning_rate": 3.204821787257311e-05, "loss": 3.923, "step": 896000 }, { "epoch": 4.09, "learning_rate": 3.173724798834707e-05, "loss": 3.9165, "step": 897000 }, { "epoch": 4.1, "learning_rate": 3.142797298671269e-05, "loss": 3.9243, "step": 898000 }, { "epoch": 4.1, "learning_rate": 3.111977624644229e-05, "loss": 3.9195, "step": 899000 }, { "epoch": 4.11, "learning_rate": 3.081297058418091e-05, "loss": 3.9208, "step": 900000 }, { "epoch": 4.11, "learning_rate": 3.0507558527107828e-05, "loss": 3.92, "step": 901000 }, { "epoch": 4.12, "learning_rate": 3.0203845908662563e-05, "loss": 3.9204, "step": 902000 }, { "epoch": 4.12, "learning_rate": 2.9901227197694415e-05, "loss": 3.9196, "step": 903000 }, { "epoch": 4.13, "learning_rate": 2.9600310118919393e-05, "loss": 3.9197, "step": 904000 }, { "epoch": 4.13, "learning_rate": 2.9300494714831896e-05, "loss": 3.9205, "step": 905000 }, { "epoch": 4.13, "learning_rate": 2.9002383080493055e-05, "loss": 3.9151, "step": 906000 }, { "epoch": 4.14, "learning_rate": 2.8705380851790375e-05, "loss": 3.9172, "step": 907000 }, { "epoch": 4.14, "learning_rate": 2.8409789588637402e-05, "loss": 3.9158, "step": 908000 }, { "epoch": 4.15, "learning_rate": 2.8115611725839808e-05, "loss": 3.9135, "step": 909000 }, { "epoch": 4.15, "learning_rate": 2.7823141740592663e-05, "loss": 3.9149, "step": 910000 }, { "epoch": 4.16, "learning_rate": 2.7531796516897657e-05, "loss": 3.9118, "step": 911000 }, { "epoch": 4.16, "learning_rate": 2.7242161139836732e-05, "loss": 3.9082, "step": 912000 }, { "epoch": 4.17, "learning_rate": 2.6953658144950188e-05, "loss": 3.9131, "step": 913000 }, { "epoch": 4.17, "learning_rate": 2.666686690950142e-05, "loss": 3.9105, "step": 914000 }, { "epoch": 4.18, "learning_rate": 2.6381215639576494e-05, "loss": 3.9091, "step": 915000 }, { "epoch": 4.18, "learning_rate": 2.6097277985549907e-05, "loss": 3.9072, "step": 916000 }, { "epoch": 4.18, "learning_rate": 2.581476991673275e-05, "loss": 3.9109, "step": 917000 }, { "epoch": 4.19, "learning_rate": 2.553341311615387e-05, "loss": 3.9079, "step": 918000 }, { "epoch": 4.19, "learning_rate": 2.5253493408841024e-05, "loss": 3.9061, "step": 919000 }, { "epoch": 4.2, "learning_rate": 2.4975290861076127e-05, "loss": 3.9058, "step": 920000 }, { "epoch": 4.2, "learning_rate": 2.469825080275776e-05, "loss": 3.9049, "step": 921000 }, { "epoch": 4.21, "learning_rate": 2.4422929591059718e-05, "loss": 3.9037, "step": 922000 }, { "epoch": 4.21, "learning_rate": 2.4148778300583463e-05, "loss": 3.9052, "step": 923000 }, { "epoch": 4.22, "learning_rate": 2.3876075508705364e-05, "loss": 3.906, "step": 924000 }, { "epoch": 4.22, "learning_rate": 2.3605364516460604e-05, "loss": 3.9016, "step": 925000 }, { "epoch": 4.23, "learning_rate": 2.3335562540463497e-05, "loss": 3.902, "step": 926000 }, { "epoch": 4.23, "learning_rate": 2.3067215761578686e-05, "loss": 3.901, "step": 927000 }, { "epoch": 4.24, "learning_rate": 2.2800326390197003e-05, "loss": 3.9034, "step": 928000 }, { "epoch": 4.24, "learning_rate": 2.2535161324668153e-05, "loss": 3.8986, "step": 929000 }, { "epoch": 4.24, "learning_rate": 2.227119188854776e-05, "loss": 3.9001, "step": 930000 }, { "epoch": 4.25, "learning_rate": 2.200868641683378e-05, "loss": 3.896, "step": 931000 }, { "epoch": 4.25, "learning_rate": 2.1747647071801923e-05, "loss": 3.8955, "step": 932000 }, { "epoch": 4.26, "learning_rate": 2.1488593679023983e-05, "loss": 3.896, "step": 933000 }, { "epoch": 4.26, "learning_rate": 2.1230490082903298e-05, "loss": 3.8937, "step": 934000 }, { "epoch": 4.27, "learning_rate": 2.0973859023521336e-05, "loss": 3.893, "step": 935000 }, { "epoch": 4.27, "learning_rate": 2.0718957033886022e-05, "loss": 3.8921, "step": 936000 }, { "epoch": 4.28, "learning_rate": 2.0465275899699664e-05, "loss": 3.8936, "step": 937000 }, { "epoch": 4.28, "learning_rate": 2.021307360537388e-05, "loss": 3.8894, "step": 938000 }, { "epoch": 4.29, "learning_rate": 1.9962352228316283e-05, "loss": 3.8913, "step": 939000 }, { "epoch": 4.29, "learning_rate": 1.9713362330696583e-05, "loss": 3.8938, "step": 940000 }, { "epoch": 4.29, "learning_rate": 1.946560748553077e-05, "loss": 3.8904, "step": 941000 }, { "epoch": 4.3, "learning_rate": 1.921958523886409e-05, "loss": 3.8881, "step": 942000 }, { "epoch": 4.3, "learning_rate": 1.8974805080506908e-05, "loss": 3.8859, "step": 943000 }, { "epoch": 4.31, "learning_rate": 1.8732001127734854e-05, "loss": 3.8898, "step": 944000 }, { "epoch": 4.31, "learning_rate": 1.8490202219074714e-05, "loss": 3.888, "step": 945000 }, { "epoch": 4.32, "learning_rate": 1.8250137975426186e-05, "loss": 3.8873, "step": 946000 }, { "epoch": 4.32, "learning_rate": 1.8011329765448747e-05, "loss": 3.8839, "step": 947000 }, { "epoch": 4.33, "learning_rate": 1.7774020608654827e-05, "loss": 3.8851, "step": 948000 }, { "epoch": 4.33, "learning_rate": 1.753821245977625e-05, "loss": 3.8815, "step": 949000 }, { "epoch": 4.34, "learning_rate": 1.730414081501248e-05, "loss": 3.8797, "step": 950000 }, { "epoch": 4.34, "learning_rate": 1.7071338990848274e-05, "loss": 3.8825, "step": 951000 }, { "epoch": 4.34, "learning_rate": 1.6840274504384723e-05, "loss": 3.8792, "step": 952000 }, { "epoch": 4.35, "learning_rate": 1.6610715701279632e-05, "loss": 3.8822, "step": 953000 }, { "epoch": 4.35, "learning_rate": 1.6382436945055167e-05, "loss": 3.8788, "step": 954000 }, { "epoch": 4.36, "learning_rate": 1.615567065931629e-05, "loss": 3.8825, "step": 955000 }, { "epoch": 4.36, "learning_rate": 1.5930643206869322e-05, "loss": 3.8789, "step": 956000 }, { "epoch": 4.37, "learning_rate": 1.5706905936180028e-05, "loss": 3.8776, "step": 957000 }, { "epoch": 4.37, "learning_rate": 1.5484908160738844e-05, "loss": 3.88, "step": 958000 }, { "epoch": 4.38, "learning_rate": 1.5264207269471153e-05, "loss": 3.8763, "step": 959000 }, { "epoch": 4.38, "learning_rate": 1.5045028059623756e-05, "loss": 3.8744, "step": 960000 }, { "epoch": 4.39, "learning_rate": 1.4827372336590928e-05, "loss": 3.8755, "step": 961000 }, { "epoch": 4.39, "learning_rate": 1.4611457261190308e-05, "loss": 3.8775, "step": 962000 }, { "epoch": 4.39, "learning_rate": 1.4397066191369536e-05, "loss": 3.8748, "step": 963000 }, { "epoch": 4.4, "learning_rate": 1.4183988576170026e-05, "loss": 3.8712, "step": 964000 }, { "epoch": 4.4, "learning_rate": 1.3972441540226522e-05, "loss": 3.8697, "step": 965000 }, { "epoch": 4.41, "learning_rate": 1.3762426826066322e-05, "loss": 3.8722, "step": 966000 }, { "epoch": 4.41, "learning_rate": 1.3554153877426224e-05, "loss": 3.8705, "step": 967000 }, { "epoch": 4.42, "learning_rate": 1.3347207447291144e-05, "loss": 3.8681, "step": 968000 }, { "epoch": 4.42, "learning_rate": 1.314179848903565e-05, "loss": 3.8657, "step": 969000 }, { "epoch": 4.43, "learning_rate": 1.2938131795049502e-05, "loss": 3.8674, "step": 970000 }, { "epoch": 4.43, "learning_rate": 1.2735801302100369e-05, "loss": 3.8668, "step": 971000 }, { "epoch": 4.44, "learning_rate": 1.2535213334175821e-05, "loss": 3.8706, "step": 972000 }, { "epoch": 4.44, "learning_rate": 1.2335967966295303e-05, "loss": 3.8628, "step": 973000 }, { "epoch": 4.44, "learning_rate": 1.2138268399943431e-05, "loss": 3.8666, "step": 974000 }, { "epoch": 4.45, "learning_rate": 1.1942116263585212e-05, "loss": 3.8637, "step": 975000 }, { "epoch": 4.45, "learning_rate": 1.1747707001746943e-05, "loss": 3.8626, "step": 976000 }, { "epoch": 4.46, "learning_rate": 1.1554653008327055e-05, "loss": 3.8644, "step": 977000 }, { "epoch": 4.46, "learning_rate": 1.1363532705509805e-05, "loss": 3.8628, "step": 978000 }, { "epoch": 4.47, "learning_rate": 1.1173581654855314e-05, "loss": 3.864, "step": 979000 }, { "epoch": 4.47, "learning_rate": 1.0985185980385471e-05, "loss": 3.8634, "step": 980000 }, { "epoch": 4.48, "learning_rate": 1.079834723392832e-05, "loss": 3.8611, "step": 981000 }, { "epoch": 4.48, "learning_rate": 1.0613066954487539e-05, "loss": 3.8614, "step": 982000 }, { "epoch": 4.49, "learning_rate": 1.0429529608794375e-05, "loss": 3.8612, "step": 983000 }, { "epoch": 4.49, "learning_rate": 1.024736926677754e-05, "loss": 3.8581, "step": 984000 }, { "epoch": 4.5, "learning_rate": 1.0066951746339515e-05, "loss": 3.8562, "step": 985000 }, { "epoch": 4.5, "learning_rate": 9.887917337602925e-06, "loss": 3.8566, "step": 986000 }, { "epoch": 4.5, "learning_rate": 9.71044889515631e-06, "loss": 3.8534, "step": 987000 }, { "epoch": 4.51, "learning_rate": 9.534722998420087e-06, "loss": 3.8577, "step": 988000 }, { "epoch": 4.51, "learning_rate": 9.360389291505156e-06, "loss": 3.8599, "step": 989000 }, { "epoch": 4.52, "learning_rate": 9.187625896164997e-06, "loss": 3.8554, "step": 990000 }, { "epoch": 4.52, "learning_rate": 9.016434235463455e-06, "loss": 3.8554, "step": 991000 }, { "epoch": 4.53, "learning_rate": 8.846984551782144e-06, "loss": 3.8531, "step": 992000 }, { "epoch": 4.53, "learning_rate": 8.678939002516817e-06, "loss": 3.8537, "step": 993000 }, { "epoch": 4.54, "learning_rate": 8.512635059971796e-06, "loss": 3.8556, "step": 994000 }, { "epoch": 4.54, "learning_rate": 8.34774115340684e-06, "loss": 3.8552, "step": 995000 }, { "epoch": 4.55, "learning_rate": 8.184588425936723e-06, "loss": 3.8518, "step": 996000 }, { "epoch": 4.55, "learning_rate": 8.022851589599123e-06, "loss": 3.8519, "step": 997000 }, { "epoch": 4.55, "learning_rate": 7.862855447419604e-06, "loss": 3.8497, "step": 998000 }, { "epoch": 4.56, "learning_rate": 7.70428100492051e-06, "loss": 3.8534, "step": 999000 }, { "epoch": 4.56, "learning_rate": 7.54729051547387e-06, "loss": 3.8516, "step": 1000000 }, { "epoch": 4.57, "learning_rate": 7.392039885206847e-06, "loss": 3.8476, "step": 1001000 }, { "epoch": 4.57, "learning_rate": 7.238219581070471e-06, "loss": 3.8473, "step": 1002000 }, { "epoch": 4.58, "learning_rate": 7.085987068966549e-06, "loss": 3.8497, "step": 1003000 }, { "epoch": 4.58, "learning_rate": 6.935343602844757e-06, "loss": 3.8455, "step": 1004000 }, { "epoch": 4.59, "learning_rate": 6.786438681986962e-06, "loss": 3.8503, "step": 1005000 }, { "epoch": 4.59, "learning_rate": 6.638975425188365e-06, "loss": 3.8475, "step": 1006000 }, { "epoch": 4.6, "learning_rate": 6.493249970997628e-06, "loss": 3.8456, "step": 1007000 }, { "epoch": 4.6, "learning_rate": 6.349115258944571e-06, "loss": 3.8407, "step": 1008000 }, { "epoch": 4.6, "learning_rate": 6.2064305863833495e-06, "loss": 3.8426, "step": 1009000 }, { "epoch": 4.61, "learning_rate": 6.065342204771441e-06, "loss": 3.8456, "step": 1010000 }, { "epoch": 4.61, "learning_rate": 5.92585127626355e-06, "loss": 3.8446, "step": 1011000 }, { "epoch": 4.62, "learning_rate": 5.7880960433015715e-06, "loss": 3.8439, "step": 1012000 }, { "epoch": 4.62, "learning_rate": 5.651801854522143e-06, "loss": 3.8432, "step": 1013000 }, { "epoch": 4.63, "learning_rate": 5.517108525207015e-06, "loss": 3.8411, "step": 1014000 }, { "epoch": 4.63, "learning_rate": 5.384017164834387e-06, "loss": 3.8405, "step": 1015000 }, { "epoch": 4.64, "learning_rate": 5.25265955688945e-06, "loss": 3.8406, "step": 1016000 }, { "epoch": 4.64, "learning_rate": 5.122773805360459e-06, "loss": 3.8425, "step": 1017000 }, { "epoch": 4.65, "learning_rate": 4.9946207493118515e-06, "loss": 3.8429, "step": 1018000 }, { "epoch": 4.65, "learning_rate": 4.867944881850673e-06, "loss": 3.84, "step": 1019000 }, { "epoch": 4.65, "learning_rate": 4.743000595890457e-06, "loss": 3.8434, "step": 1020000 }, { "epoch": 4.66, "learning_rate": 4.619538782067134e-06, "loss": 3.8404, "step": 1021000 }, { "epoch": 4.66, "learning_rate": 4.497686330529982e-06, "loss": 3.8382, "step": 1022000 }, { "epoch": 4.67, "learning_rate": 4.377444244986006e-06, "loss": 3.8392, "step": 1023000 }, { "epoch": 4.67, "learning_rate": 4.2589313414077795e-06, "loss": 3.8395, "step": 1024000 }, { "epoch": 4.68, "learning_rate": 4.1420275474132856e-06, "loss": 3.8404, "step": 1025000 }, { "epoch": 4.68, "learning_rate": 4.02661922185521e-06, "loss": 3.841, "step": 1026000 }, { "epoch": 4.69, "learning_rate": 3.912938129952815e-06, "loss": 3.8376, "step": 1027000 }, { "epoch": 4.69, "learning_rate": 3.8007576184877935e-06, "loss": 3.8383, "step": 1028000 }, { "epoch": 4.7, "learning_rate": 3.6901932136656604e-06, "loss": 3.837, "step": 1029000 }, { "epoch": 4.7, "learning_rate": 3.5812458262129755e-06, "loss": 3.8348, "step": 1030000 }, { "epoch": 4.71, "learning_rate": 3.4740228745658187e-06, "loss": 3.8393, "step": 1031000 }, { "epoch": 4.71, "learning_rate": 3.368310581510614e-06, "loss": 3.8335, "step": 1032000 }, { "epoch": 4.71, "learning_rate": 3.2643212405075284e-06, "loss": 3.833, "step": 1033000 }, { "epoch": 4.72, "learning_rate": 3.161949185382773e-06, "loss": 3.8357, "step": 1034000 }, { "epoch": 4.72, "learning_rate": 3.0610952138760753e-06, "loss": 3.8334, "step": 1035000 }, { "epoch": 4.73, "learning_rate": 2.9618634416622936e-06, "loss": 3.8355, "step": 1036000 }, { "epoch": 4.73, "learning_rate": 2.864351483910399e-06, "loss": 3.8369, "step": 1037000 }, { "epoch": 4.74, "learning_rate": 2.768364924832545e-06, "loss": 3.8369, "step": 1038000 }, { "epoch": 4.74, "learning_rate": 2.6740029762871932e-06, "loss": 3.8313, "step": 1039000 }, { "epoch": 4.75, "learning_rate": 2.581358339964313e-06, "loss": 3.8306, "step": 1040000 }, { "epoch": 4.75, "learning_rate": 2.4902463043641854e-06, "loss": 3.8327, "step": 1041000 }, { "epoch": 4.76, "learning_rate": 2.4007611701787116e-06, "loss": 3.834, "step": 1042000 }, { "epoch": 4.76, "learning_rate": 2.3129036745030752e-06, "loss": 3.8316, "step": 1043000 }, { "epoch": 4.76, "learning_rate": 2.226759956554547e-06, "loss": 3.8326, "step": 1044000 }, { "epoch": 4.77, "learning_rate": 2.1421582661275585e-06, "loss": 3.8354, "step": 1045000 }, { "epoch": 4.77, "learning_rate": 2.0592685019640958e-06, "loss": 3.8307, "step": 1046000 }, { "epoch": 4.78, "learning_rate": 1.977925401473013e-06, "loss": 3.8299, "step": 1047000 }, { "epoch": 4.78, "learning_rate": 1.8982923194333036e-06, "loss": 3.8297, "step": 1048000 }, { "epoch": 4.79, "learning_rate": 1.8202104863079827e-06, "loss": 3.8298, "step": 1049000 }, { "epoch": 4.79, "learning_rate": 1.7438367075362172e-06, "loss": 3.8303, "step": 1050000 }, { "epoch": 4.8, "learning_rate": 1.6690927139422218e-06, "loss": 3.8317, "step": 1051000 }, { "epoch": 4.8, "learning_rate": 1.5959067502205883e-06, "loss": 3.8296, "step": 1052000 }, { "epoch": 4.81, "learning_rate": 1.5243550546499618e-06, "loss": 3.8309, "step": 1053000 }, { "epoch": 4.81, "learning_rate": 1.4544382166065795e-06, "loss": 3.8339, "step": 1054000 }, { "epoch": 4.81, "learning_rate": 1.3861568120002276e-06, "loss": 3.831, "step": 1055000 }, { "epoch": 4.82, "learning_rate": 1.3195114032695576e-06, "loss": 3.8265, "step": 1056000 }, { "epoch": 4.82, "learning_rate": 1.2545667306077758e-06, "loss": 3.8309, "step": 1057000 }, { "epoch": 4.83, "learning_rate": 1.1911933096932392e-06, "loss": 3.8254, "step": 1058000 }, { "epoch": 4.83, "learning_rate": 1.1294574905821087e-06, "loss": 3.8288, "step": 1059000 }, { "epoch": 4.84, "learning_rate": 1.0694190611034273e-06, "loss": 3.8261, "step": 1060000 }, { "epoch": 4.84, "learning_rate": 1.0109583188243843e-06, "loss": 3.8253, "step": 1061000 }, { "epoch": 4.85, "learning_rate": 9.541366629567838e-07, "loss": 3.8274, "step": 1062000 }, { "epoch": 4.85, "learning_rate": 8.989545615444961e-07, "loss": 3.8264, "step": 1063000 }, { "epoch": 4.86, "learning_rate": 8.454651918863299e-07, "loss": 3.8278, "step": 1064000 }, { "epoch": 4.86, "learning_rate": 7.935619088263124e-07, "loss": 3.8267, "step": 1065000 }, { "epoch": 4.86, "learning_rate": 7.433489455357823e-07, "loss": 3.8298, "step": 1066000 }, { "epoch": 4.87, "learning_rate": 6.94726189238426e-07, "loss": 3.8305, "step": 1067000 }, { "epoch": 4.87, "learning_rate": 6.477912850886725e-07, "loss": 3.8305, "step": 1068000 }, { "epoch": 4.88, "learning_rate": 6.024951762708009e-07, "loss": 3.8248, "step": 1069000 }, { "epoch": 4.88, "learning_rate": 5.588382351461308e-07, "loss": 3.826, "step": 1070000 }, { "epoch": 4.89, "learning_rate": 5.16779582648863e-07, "loss": 3.8261, "step": 1071000 }, { "epoch": 4.89, "learning_rate": 4.76364087147263e-07, "loss": 3.8238, "step": 1072000 }, { "epoch": 4.9, "learning_rate": 4.375920815465229e-07, "loss": 3.8293, "step": 1073000 }, { "epoch": 4.9, "learning_rate": 4.004638852143083e-07, "loss": 3.8244, "step": 1074000 }, { "epoch": 4.91, "learning_rate": 3.6497980397816043e-07, "loss": 3.8275, "step": 1075000 }, { "epoch": 4.91, "learning_rate": 3.3117314832133985e-07, "loss": 3.8278, "step": 1076000 }, { "epoch": 4.92, "learning_rate": 2.989765157657809e-07, "loss": 3.824, "step": 1077000 }, { "epoch": 4.92, "learning_rate": 2.684545642082537e-07, "loss": 3.8268, "step": 1078000 }, { "epoch": 4.92, "learning_rate": 2.395464400940739e-07, "loss": 3.8244, "step": 1079000 }, { "epoch": 4.93, "learning_rate": 2.1228375656396903e-07, "loss": 3.8236, "step": 1080000 }, { "epoch": 4.93, "learning_rate": 1.8666673818257262e-07, "loss": 3.8255, "step": 1081000 }, { "epoch": 4.94, "learning_rate": 1.6271874491924355e-07, "loss": 3.828, "step": 1082000 }, { "epoch": 4.94, "learning_rate": 1.4041353457650008e-07, "loss": 3.8259, "step": 1083000 }, { "epoch": 4.95, "learning_rate": 1.1973143077612658e-07, "loss": 3.8265, "step": 1084000 }, { "epoch": 4.95, "learning_rate": 1.0069575448430346e-07, "loss": 3.826, "step": 1085000 }, { "epoch": 4.96, "learning_rate": 8.330666249920515e-08, "loss": 3.8267, "step": 1086000 }, { "epoch": 4.96, "learning_rate": 6.759413926236135e-08, "loss": 3.8269, "step": 1087000 }, { "epoch": 4.97, "learning_rate": 5.349533819716257e-08, "loss": 3.8255, "step": 1088000 }, { "epoch": 4.97, "learning_rate": 4.1043510231775216e-08, "loss": 3.8275, "step": 1089000 }, { "epoch": 4.97, "learning_rate": 3.0248739940019756e-08, "loss": 3.8277, "step": 1090000 } ], "max_steps": 1095620, "num_train_epochs": 5, "total_flos": 7.86932880566174e+19, "trial_name": null, "trial_params": null }