diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,6640 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.976631448884397, + "global_step": 1090500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4e-05, + "loss": 9.6608, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 8e-05, + "loss": 8.6223, + "step": 200 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012, + "loss": 8.3175, + "step": 300 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016, + "loss": 7.9745, + "step": 400 + }, + { + "epoch": 0.0, + "learning_rate": 0.0002, + "loss": 7.6776, + "step": 500 + }, + { + "epoch": 0.0, + "learning_rate": 0.00024, + "loss": 7.4451, + "step": 600 + }, + { + "epoch": 0.0, + "learning_rate": 0.00028, + "loss": 7.2587, + "step": 700 + }, + { + "epoch": 0.0, + "learning_rate": 0.00032, + "loss": 7.0977, + "step": 800 + }, + { + "epoch": 0.0, + "learning_rate": 0.00036, + "loss": 6.9377, + "step": 900 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004, + "loss": 6.8182, + "step": 1000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003999999998815762, + "loss": 6.6945, + "step": 1100 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003999999995263047, + "loss": 6.5851, + "step": 1200 + }, + { + "epoch": 0.01, + "learning_rate": 0.00039999999893418564, + "loss": 6.476, + "step": 1300 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003999999981052189, + "loss": 6.3753, + "step": 1400 + }, + { + "epoch": 0.01, + "learning_rate": 0.00039999999703940455, + "loss": 6.2997, + "step": 1500 + }, + { + "epoch": 0.01, + "learning_rate": 0.00039999933291862616, + "loss": 5.9559, + "step": 2000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003999994734068435, + "loss": 6.1649, + "step": 3000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003999988151660478, + "loss": 5.8819, + "step": 4000 + }, + { + "epoch": 0.01, + "learning_rate": 0.000399997893630147, + "loss": 5.8437, + "step": 5000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003999967088003543, + "loss": 5.857, + "step": 6000 + }, + { + "epoch": 0.01, + "learning_rate": 0.00039999526067822954, + "loss": 5.7574, + "step": 7000 + }, + { + "epoch": 0.01, + "learning_rate": 0.00039999354926567907, + "loss": 5.6647, + "step": 8000 + }, + { + "epoch": 0.02, + "learning_rate": 0.00039999157456495604, + "loss": 5.598, + "step": 9000 + }, + { + "epoch": 0.02, + "learning_rate": 0.00039998933657865997, + "loss": 5.528, + "step": 10000 + }, + { + "epoch": 0.02, + "learning_rate": 0.00039998683530973725, + "loss": 5.4848, + "step": 11000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003999840707614807, + "loss": 5.4314, + "step": 12000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003999810429375299, + "loss": 5.3931, + "step": 13000 + }, + { + "epoch": 0.03, + "learning_rate": 0.00039997775526446917, + "loss": 5.3531, + "step": 14000 + }, + { + "epoch": 0.03, + "learning_rate": 0.00039997420116469963, + "loss": 5.3126, + "step": 15000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003999703877510894, + "loss": 5.2782, + "step": 16000 + }, + { + "epoch": 0.03, + "learning_rate": 0.000399966307394198, + "loss": 5.2575, + "step": 17000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003999619682600994, + "loss": 5.2297, + "step": 18000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003999573616675516, + "loss": 5.1976, + "step": 19000 + }, + { + "epoch": 0.04, + "learning_rate": 0.00039995249683579117, + "loss": 5.182, + "step": 20000 + }, + { + "epoch": 0.04, + "learning_rate": 0.00039994736403182074, + "loss": 5.1576, + "step": 21000 + }, + { + "epoch": 0.04, + "learning_rate": 0.00039994197352799087, + "loss": 5.1435, + "step": 22000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0003999363145395998, + "loss": 5.1286, + "step": 23000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0003999303983920581, + "loss": 5.1065, + "step": 24000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003999242132490164, + "loss": 5.0946, + "step": 25000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003999177714888857, + "loss": 5.0748, + "step": 26000 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039991106022373136, + "loss": 5.0674, + "step": 27000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003999040928848998, + "loss": 5.0493, + "step": 28000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003998968555329385, + "loss": 5.039, + "step": 29000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003998893626520587, + "loss": 5.0348, + "step": 30000 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003998815992513638, + "loss": 5.0201, + "step": 31000 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039987357270987667, + "loss": 5.0161, + "step": 32000 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003998652914592657, + "loss": 4.9991, + "step": 33000 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039985673893135445, + "loss": 4.9971, + "step": 34000 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003998479232953792, + "loss": 4.9871, + "step": 35000 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003998388445629455, + "loss": 4.9771, + "step": 36000 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003998295122192289, + "loss": 4.9726, + "step": 37000 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003998199075931465, + "loss": 4.9669, + "step": 38000 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003998100499065675, + "loss": 4.9583, + "step": 39000 + }, + { + "epoch": 0.07, + "learning_rate": 0.00039979992969921984, + "loss": 4.9556, + "step": 40000 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003997895364597799, + "loss": 4.942, + "step": 41000 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003997788909883795, + "loss": 4.9406, + "step": 42000 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039976797198678043, + "loss": 4.9323, + "step": 43000 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039975680130732954, + "loss": 4.9277, + "step": 44000 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003997453566010126, + "loss": 4.923, + "step": 45000 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003997336489332646, + "loss": 4.9197, + "step": 46000 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003997216904214485, + "loss": 4.9051, + "step": 47000 + }, + { + "epoch": 0.09, + "learning_rate": 0.00039970945714034553, + "loss": 4.9077, + "step": 48000 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003996969609450725, + "loss": 4.9002, + "step": 49000 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003996842147424852, + "loss": 4.9013, + "step": 50000 + }, + { + "epoch": 0.09, + "learning_rate": 0.00039967119303144363, + "loss": 4.8946, + "step": 51000 + }, + { + "epoch": 0.09, + "learning_rate": 0.00039965792187247553, + "loss": 4.8882, + "step": 52000 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039964437471416833, + "loss": 4.8894, + "step": 53000 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003996305786686345, + "loss": 4.8764, + "step": 54000 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003996165061343288, + "loss": 4.8782, + "step": 55000 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003996021852748057, + "loss": 4.8759, + "step": 56000 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039958758743853225, + "loss": 4.8727, + "step": 57000 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003995727418403572, + "loss": 4.8669, + "step": 58000 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003995576187789104, + "loss": 4.8694, + "step": 59000 + }, + { + "epoch": 0.11, + "learning_rate": 0.00039954224852018107, + "loss": 4.8688, + "step": 60000 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003995266003131184, + "loss": 4.86, + "step": 61000 + }, + { + "epoch": 0.11, + "learning_rate": 0.00039951070547469266, + "loss": 4.8559, + "step": 62000 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039949453220433417, + "loss": 4.8543, + "step": 63000 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039947811286982935, + "loss": 4.8515, + "step": 64000 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003994614146212571, + "loss": 4.8498, + "step": 65000 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039944447087704996, + "loss": 4.8443, + "step": 66000 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003994272477381079, + "loss": 4.8399, + "step": 67000 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003994097796733338, + "loss": 4.8381, + "step": 68000 + }, + { + "epoch": 0.13, + "learning_rate": 0.00039939203173462723, + "loss": 4.8381, + "step": 69000 + }, + { + "epoch": 0.13, + "learning_rate": 0.00039937403944117984, + "loss": 4.8354, + "step": 70000 + }, + { + "epoch": 0.13, + "learning_rate": 0.00039935576679607466, + "loss": 4.834, + "step": 71000 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003993372503686054, + "loss": 4.8337, + "step": 72000 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003993184531132279, + "loss": 4.8304, + "step": 73000 + }, + { + "epoch": 0.14, + "learning_rate": 0.00039929939345843064, + "loss": 4.8254, + "step": 74000 + }, + { + "epoch": 0.14, + "learning_rate": 0.000399280090882382, + "loss": 4.8248, + "step": 75000 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003992605067667017, + "loss": 4.8248, + "step": 76000 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003992406803053476, + "loss": 4.8246, + "step": 77000 + }, + { + "epoch": 0.14, + "learning_rate": 0.00039922057183181, + "loss": 4.8173, + "step": 78000 + }, + { + "epoch": 0.14, + "learning_rate": 0.00039920024209092803, + "loss": 4.8128, + "step": 79000 + }, + { + "epoch": 0.15, + "learning_rate": 0.00039917960962754717, + "loss": 4.818, + "step": 80000 + }, + { + "epoch": 0.15, + "learning_rate": 0.00039915871494753167, + "loss": 4.8107, + "step": 81000 + }, + { + "epoch": 0.15, + "learning_rate": 0.00039913755807838893, + "loss": 4.8121, + "step": 82000 + }, + { + "epoch": 0.15, + "learning_rate": 0.00039911613904797174, + "loss": 4.8116, + "step": 83000 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003990944796965674, + "loss": 4.8057, + "step": 84000 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003990725366906298, + "loss": 4.8055, + "step": 85000 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003990503762807127, + "loss": 4.8028, + "step": 86000 + }, + { + "epoch": 0.16, + "learning_rate": 0.00039902790967672147, + "loss": 4.7969, + "step": 87000 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003990052039152944, + "loss": 4.8025, + "step": 88000 + }, + { + "epoch": 0.16, + "learning_rate": 0.00039898221356934855, + "loss": 4.8017, + "step": 89000 + }, + { + "epoch": 0.16, + "learning_rate": 0.00039895896126663653, + "loss": 4.7986, + "step": 90000 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003989354470377698, + "loss": 4.7991, + "step": 91000 + }, + { + "epoch": 0.17, + "learning_rate": 0.00039891169482063473, + "loss": 4.7965, + "step": 92000 + }, + { + "epoch": 0.17, + "learning_rate": 0.00039888765709451975, + "loss": 4.792, + "step": 93000 + }, + { + "epoch": 0.17, + "learning_rate": 0.00039886338196645364, + "loss": 4.7862, + "step": 94000 + }, + { + "epoch": 0.17, + "learning_rate": 0.00039883882086954475, + "loss": 4.7916, + "step": 95000 + }, + { + "epoch": 0.18, + "learning_rate": 0.000398814022958251, + "loss": 4.7883, + "step": 96000 + }, + { + "epoch": 0.18, + "learning_rate": 0.00039878893861975594, + "loss": 4.7908, + "step": 97000 + }, + { + "epoch": 0.18, + "learning_rate": 0.00039876359257893807, + "loss": 4.7877, + "step": 98000 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003987379848691651, + "loss": 4.7873, + "step": 99000 + }, + { + "epoch": 0.18, + "learning_rate": 0.00039871214152416957, + "loss": 4.7876, + "step": 100000 + }, + { + "epoch": 0.18, + "learning_rate": 0.00039868601083955114, + "loss": 4.7883, + "step": 101000 + }, + { + "epoch": 0.19, + "learning_rate": 0.00039865964511100514, + "loss": 4.7893, + "step": 102000 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003986329915890061, + "loss": 4.7789, + "step": 103000 + }, + { + "epoch": 0.19, + "learning_rate": 0.00039860610361561096, + "loss": 4.7815, + "step": 104000 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003985789273964466, + "loss": 4.7738, + "step": 105000 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003985515173196509, + "loss": 4.7753, + "step": 106000 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039852381854628627, + "loss": 4.7724, + "step": 107000 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039849588651028544, + "loss": 4.7726, + "step": 108000 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003984676653284346, + "loss": 4.7685, + "step": 109000 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003984392114801697, + "loss": 4.7715, + "step": 110000 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003984104680382948, + "loss": 4.7713, + "step": 111000 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039838149252745204, + "loss": 4.7698, + "step": 112000 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003983522269767629, + "loss": 4.7753, + "step": 113000 + }, + { + "epoch": 0.21, + "learning_rate": 0.00039832272995577275, + "loss": 4.7652, + "step": 114000 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003982929424502255, + "loss": 4.7664, + "step": 115000 + }, + { + "epoch": 0.21, + "learning_rate": 0.00039826292407426207, + "loss": 4.7713, + "step": 116000 + }, + { + "epoch": 0.21, + "learning_rate": 0.00039823264521022384, + "loss": 4.7628, + "step": 117000 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003982020751975389, + "loss": 4.7682, + "step": 118000 + }, + { + "epoch": 0.22, + "learning_rate": 0.00039817124425512714, + "loss": 4.7644, + "step": 119000 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003981401524235768, + "loss": 4.758, + "step": 120000 + }, + { + "epoch": 0.22, + "learning_rate": 0.00039810883122677967, + "loss": 4.7622, + "step": 121000 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003980772180008777, + "loss": 4.762, + "step": 122000 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003980453760138509, + "loss": 4.7571, + "step": 123000 + }, + { + "epoch": 0.23, + "learning_rate": 0.00039801324155990393, + "loss": 4.7619, + "step": 124000 + }, + { + "epoch": 0.23, + "learning_rate": 0.00039798091147522796, + "loss": 4.7618, + "step": 125000 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003979482562229017, + "loss": 4.762, + "step": 126000 + }, + { + "epoch": 0.23, + "learning_rate": 0.000397915340374997, + "loss": 4.7562, + "step": 127000 + }, + { + "epoch": 0.23, + "learning_rate": 0.00039788216397484706, + "loss": 4.7528, + "step": 128000 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039784876063314606, + "loss": 4.7567, + "step": 129000 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039781506352031947, + "loss": 4.7554, + "step": 130000 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039778114007485855, + "loss": 4.7494, + "step": 131000 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039774692242662465, + "loss": 4.7591, + "step": 132000 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039771244444786484, + "loss": 4.7605, + "step": 133000 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003976777061839689, + "loss": 4.7469, + "step": 134000 + }, + { + "epoch": 0.25, + "learning_rate": 0.00039764274280914674, + "loss": 4.7506, + "step": 135000 + }, + { + "epoch": 0.25, + "learning_rate": 0.00039760748437268835, + "loss": 4.7506, + "step": 136000 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003975720014377832, + "loss": 4.7509, + "step": 137000 + }, + { + "epoch": 0.25, + "learning_rate": 0.00039753622301424524, + "loss": 4.7488, + "step": 138000 + }, + { + "epoch": 0.25, + "learning_rate": 0.00039750022070592105, + "loss": 4.7544, + "step": 139000 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003974639224835218, + "loss": 4.7502, + "step": 140000 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003974274009911748, + "loss": 4.7433, + "step": 141000 + }, + { + "epoch": 0.26, + "learning_rate": 0.00039739058316086716, + "loss": 4.7466, + "step": 142000 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003973535798838411, + "loss": 4.7469, + "step": 143000 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003973162428990996, + "loss": 4.7414, + "step": 144000 + }, + { + "epoch": 0.26, + "learning_rate": 0.00039727864615081464, + "loss": 4.7418, + "step": 145000 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003972407896884818, + "loss": 4.7484, + "step": 146000 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039720271180775053, + "loss": 4.7454, + "step": 147000 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003971643363267646, + "loss": 4.744, + "step": 148000 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039712577881131754, + "loss": 4.7369, + "step": 149000 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039708688477304655, + "loss": 4.7375, + "step": 150000 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003970477312731783, + "loss": 4.7414, + "step": 151000 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003970083183632576, + "loss": 4.7389, + "step": 152000 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003969686858969712, + "loss": 4.7378, + "step": 153000 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003969287545822263, + "loss": 4.7372, + "step": 154000 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039688860433410763, + "loss": 4.7393, + "step": 155000 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039684815482460387, + "loss": 4.7315, + "step": 156000 + }, + { + "epoch": 0.29, + "learning_rate": 0.00039680748700586993, + "loss": 4.7371, + "step": 157000 + }, + { + "epoch": 0.29, + "learning_rate": 0.00039676651951439873, + "loss": 4.7353, + "step": 158000 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003967253343389894, + "loss": 4.7315, + "step": 159000 + }, + { + "epoch": 0.29, + "learning_rate": 0.00039668384908106706, + "loss": 4.7358, + "step": 160000 + }, + { + "epoch": 0.29, + "learning_rate": 0.00039664210489213713, + "loss": 4.7339, + "step": 161000 + }, + { + "epoch": 0.3, + "learning_rate": 0.00039660010182715526, + "loss": 4.737, + "step": 162000 + }, + { + "epoch": 0.3, + "learning_rate": 0.000396557882332566, + "loss": 4.7261, + "step": 163000 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003965153619404471, + "loss": 4.7342, + "step": 164000 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003964726257474391, + "loss": 4.7293, + "step": 165000 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003964295882518688, + "loss": 4.7301, + "step": 166000 + }, + { + "epoch": 0.3, + "learning_rate": 0.00039638633558526285, + "loss": 4.7316, + "step": 167000 + }, + { + "epoch": 0.31, + "learning_rate": 0.00039634278121264703, + "loss": 4.7295, + "step": 168000 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003962990122999811, + "loss": 4.7332, + "step": 169000 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003962549412794449, + "loss": 4.732, + "step": 170000 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003962106563509727, + "loss": 4.7321, + "step": 171000 + }, + { + "epoch": 0.31, + "learning_rate": 0.00039616606891435896, + "loss": 4.7276, + "step": 172000 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039612122322838677, + "loss": 4.7245, + "step": 173000 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003960761645849172, + "loss": 4.7286, + "step": 174000 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003960308028357847, + "loss": 4.7239, + "step": 175000 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003840903997775841, + "loss": 4.6145, + "step": 176000 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003839104648613638, + "loss": 4.5905, + "step": 177000 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003837297421617577, + "loss": 4.5891, + "step": 178000 + }, + { + "epoch": 0.65, + "learning_rate": 0.000383548053178735, + "loss": 4.5817, + "step": 179000 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003833652155473882, + "loss": 4.5765, + "step": 180000 + }, + { + "epoch": 0.66, + "learning_rate": 0.00038318141161813824, + "loss": 4.574, + "step": 181000 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003829966423595951, + "loss": 4.5725, + "step": 182000 + }, + { + "epoch": 0.67, + "learning_rate": 0.00038281109496044006, + "loss": 4.5666, + "step": 183000 + }, + { + "epoch": 0.67, + "learning_rate": 0.00038262439893236937, + "loss": 4.5631, + "step": 184000 + }, + { + "epoch": 0.68, + "learning_rate": 0.00038243692864915963, + "loss": 4.5591, + "step": 185000 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003822483097830243, + "loss": 4.5552, + "step": 186000 + }, + { + "epoch": 0.68, + "learning_rate": 0.00038205873050485524, + "loss": 4.5543, + "step": 187000 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003818683828312813, + "loss": 4.5512, + "step": 188000 + }, + { + "epoch": 0.69, + "learning_rate": 0.00038167688668914063, + "loss": 4.5484, + "step": 189000 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003814844331462512, + "loss": 4.5501, + "step": 190000 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003812912171041104, + "loss": 4.5431, + "step": 191000 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003810968527621949, + "loss": 4.5418, + "step": 192000 + }, + { + "epoch": 0.7, + "learning_rate": 0.00038090153407619305, + "loss": 4.5379, + "step": 193000 + }, + { + "epoch": 0.71, + "learning_rate": 0.00038070526207539536, + "loss": 4.538, + "step": 194000 + }, + { + "epoch": 0.71, + "learning_rate": 0.0003805082354937156, + "loss": 4.5377, + "step": 195000 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003803100609220069, + "loss": 4.5354, + "step": 196000 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003801111357514916, + "loss": 4.5321, + "step": 197000 + }, + { + "epoch": 0.72, + "learning_rate": 0.000379911062782051, + "loss": 4.5327, + "step": 198000 + }, + { + "epoch": 0.73, + "learning_rate": 0.00037971004171739956, + "loss": 4.5342, + "step": 199000 + }, + { + "epoch": 0.73, + "learning_rate": 0.00037950827605766894, + "loss": 4.527, + "step": 200000 + }, + { + "epoch": 0.73, + "learning_rate": 0.00037930536293104657, + "loss": 4.5297, + "step": 201000 + }, + { + "epoch": 0.74, + "learning_rate": 0.00037910170923078203, + "loss": 4.5252, + "step": 202000 + }, + { + "epoch": 0.74, + "learning_rate": 0.00037889690831515295, + "loss": 4.5228, + "step": 203000 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003786911646487036, + "loss": 4.5211, + "step": 204000 + }, + { + "epoch": 0.75, + "learning_rate": 0.00037848447931566176, + "loss": 4.521, + "step": 205000 + }, + { + "epoch": 0.75, + "learning_rate": 0.00037827685340521773, + "loss": 4.5257, + "step": 206000 + }, + { + "epoch": 0.76, + "learning_rate": 0.0003780684970458185, + "loss": 4.5204, + "step": 207000 + }, + { + "epoch": 0.76, + "learning_rate": 0.0003778589942057952, + "loss": 4.5209, + "step": 208000 + }, + { + "epoch": 0.76, + "learning_rate": 0.0003776487649924752, + "loss": 4.5167, + "step": 209000 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003774373896346034, + "loss": 4.5142, + "step": 210000 + }, + { + "epoch": 0.77, + "learning_rate": 0.00037722507921728195, + "loss": 4.5166, + "step": 211000 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003770122622793867, + "loss": 4.5127, + "step": 212000 + }, + { + "epoch": 0.78, + "learning_rate": 0.00037679808696909655, + "loss": 4.5163, + "step": 213000 + }, + { + "epoch": 0.78, + "learning_rate": 0.00037658297996835357, + "loss": 4.513, + "step": 214000 + }, + { + "epoch": 0.78, + "learning_rate": 0.0003763669424107285, + "loss": 4.5078, + "step": 215000 + }, + { + "epoch": 0.79, + "learning_rate": 0.00037614997543469595, + "loss": 4.5114, + "step": 216000 + }, + { + "epoch": 0.79, + "learning_rate": 0.00037593208018362834, + "loss": 4.5097, + "step": 217000 + }, + { + "epoch": 0.8, + "learning_rate": 0.00037571369637505247, + "loss": 4.5072, + "step": 218000 + }, + { + "epoch": 0.8, + "learning_rate": 0.00037549394987438647, + "loss": 4.5084, + "step": 219000 + }, + { + "epoch": 0.8, + "learning_rate": 0.00037527327855580843, + "loss": 4.5071, + "step": 220000 + }, + { + "epoch": 0.81, + "learning_rate": 0.0003750519056381631, + "loss": 4.5061, + "step": 221000 + }, + { + "epoch": 0.81, + "learning_rate": 0.00037482938909921175, + "loss": 4.5075, + "step": 222000 + }, + { + "epoch": 0.81, + "learning_rate": 0.0003746059512444505, + "loss": 4.5079, + "step": 223000 + }, + { + "epoch": 0.82, + "learning_rate": 0.0003743815932513518, + "loss": 4.5071, + "step": 224000 + }, + { + "epoch": 0.82, + "learning_rate": 0.00037415631630223755, + "loss": 4.5033, + "step": 225000 + }, + { + "epoch": 0.83, + "learning_rate": 0.00037393012158427186, + "loss": 4.505, + "step": 226000 + }, + { + "epoch": 0.83, + "learning_rate": 0.00037370323785818266, + "loss": 4.5032, + "step": 227000 + }, + { + "epoch": 0.83, + "learning_rate": 0.00037347521209812743, + "loss": 4.5017, + "step": 228000 + }, + { + "epoch": 0.84, + "learning_rate": 0.0003732465015546745, + "loss": 4.502, + "step": 229000 + }, + { + "epoch": 0.84, + "learning_rate": 0.00037301664955431804, + "loss": 4.4998, + "step": 230000 + }, + { + "epoch": 0.84, + "learning_rate": 0.0003727858857909254, + "loss": 4.4994, + "step": 231000 + }, + { + "epoch": 0.85, + "learning_rate": 0.0003725544436092979, + "loss": 4.4985, + "step": 232000 + }, + { + "epoch": 0.85, + "learning_rate": 0.000372321860881582, + "loss": 4.499, + "step": 233000 + }, + { + "epoch": 0.85, + "learning_rate": 0.00037208837005222694, + "loss": 4.4919, + "step": 234000 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003718542072019544, + "loss": 4.4965, + "step": 235000 + }, + { + "epoch": 0.86, + "learning_rate": 0.00037161890477046666, + "loss": 4.4972, + "step": 236000 + }, + { + "epoch": 0.87, + "learning_rate": 0.00037138293459993847, + "loss": 4.4988, + "step": 237000 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039974046056824423, + "loss": 5.0173, + "step": 238000 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003997382653105697, + "loss": 5.1254, + "step": 239000 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003997360630230883, + "loss": 5.137, + "step": 240000 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003997338515152591, + "loss": 5.1396, + "step": 241000 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003997316285596137, + "loss": 5.1539, + "step": 242000 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039972939860216607, + "loss": 5.1836, + "step": 243000 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039972715717864, + "loss": 5.1907, + "step": 244000 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039972490651670964, + "loss": 5.2177, + "step": 245000 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039972264888099373, + "loss": 5.2218, + "step": 246000 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003955398710520662, + "loss": 4.9553, + "step": 247000 + }, + { + "epoch": 0.34, + "learning_rate": 0.000395503657852559, + "loss": 4.8679, + "step": 248000 + }, + { + "epoch": 0.34, + "learning_rate": 0.00039546729990487664, + "loss": 4.8395, + "step": 249000 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003954307972359379, + "loss": 4.8217, + "step": 250000 + }, + { + "epoch": 0.34, + "learning_rate": 0.000395394149872769, + "loss": 4.8152, + "step": 251000 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003953573947067854, + "loss": 4.8026, + "step": 252000 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003953204581812889, + "loss": 4.8017, + "step": 253000 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003952834141966186, + "loss": 4.7977, + "step": 254000 + }, + { + "epoch": 0.35, + "learning_rate": 0.00039524618861807426, + "loss": 4.7963, + "step": 255000 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003952088184819814, + "loss": 4.79, + "step": 256000 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003951713414028577, + "loss": 4.7877, + "step": 257000 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003951336823792677, + "loss": 4.7854, + "step": 258000 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003950959167570807, + "loss": 4.7945, + "step": 259000 + }, + { + "epoch": 0.36, + "learning_rate": 0.00039505796895741114, + "loss": 4.7845, + "step": 260000 + }, + { + "epoch": 0.36, + "learning_rate": 0.00039501991490389356, + "loss": 4.7821, + "step": 261000 + }, + { + "epoch": 0.36, + "learning_rate": 0.000394981678440416, + "loss": 4.7798, + "step": 262000 + }, + { + "epoch": 0.36, + "learning_rate": 0.00039494333606815397, + "loss": 4.7892, + "step": 263000 + }, + { + "epoch": 0.36, + "learning_rate": 0.00039490481105399416, + "loss": 4.7885, + "step": 264000 + }, + { + "epoch": 0.36, + "learning_rate": 0.000394866141735037, + "loss": 4.7838, + "step": 265000 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003948273670255641, + "loss": 4.7812, + "step": 266000 + }, + { + "epoch": 0.37, + "learning_rate": 0.00039478840932724265, + "loss": 4.7749, + "step": 267000 + }, + { + "epoch": 0.37, + "learning_rate": 0.00039474934658425046, + "loss": 4.7823, + "step": 268000 + }, + { + "epoch": 0.37, + "learning_rate": 0.00039471010062182423, + "loss": 4.7809, + "step": 269000 + }, + { + "epoch": 0.37, + "learning_rate": 0.00039467074996088307, + "loss": 4.785, + "step": 270000 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003946312158504645, + "loss": 4.7753, + "step": 271000 + }, + { + "epoch": 0.37, + "learning_rate": 0.00039459157738799654, + "loss": 4.775, + "step": 272000 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003945517552465506, + "loss": 4.7755, + "step": 273000 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003945118290998296, + "loss": 4.7849, + "step": 274000 + }, + { + "epoch": 0.38, + "learning_rate": 0.00039447175922715307, + "loss": 4.7806, + "step": 275000 + }, + { + "epoch": 0.38, + "learning_rate": 0.00039443150533232405, + "loss": 4.7791, + "step": 276000 + }, + { + "epoch": 0.38, + "learning_rate": 0.00039439110748312647, + "loss": 4.7798, + "step": 277000 + }, + { + "epoch": 0.38, + "learning_rate": 0.00039435056570947044, + "loss": 4.7794, + "step": 278000 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003943099207989059, + "loss": 4.7821, + "step": 279000 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003942690914103384, + "loss": 4.7815, + "step": 280000 + }, + { + "epoch": 0.38, + "learning_rate": 0.00039422811818765134, + "loss": 4.7713, + "step": 281000 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039418704235002724, + "loss": 4.7707, + "step": 282000 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039414582302643454, + "loss": 4.7764, + "step": 283000 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003941044187712859, + "loss": 4.7864, + "step": 284000 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039406287080393925, + "loss": 4.774, + "step": 285000 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003940211791551559, + "loss": 4.7698, + "step": 286000 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039397938576284634, + "loss": 4.7754, + "step": 287000 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039393740698750394, + "loss": 4.7764, + "step": 288000 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003938952846236165, + "loss": 4.7764, + "step": 289000 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039385301870237103, + "loss": 4.7747, + "step": 290000 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039381065173618853, + "loss": 4.7784, + "step": 291000 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039376809893769117, + "loss": 4.7792, + "step": 292000 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039372544544391313, + "loss": 4.7726, + "step": 293000 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003936826488052433, + "loss": 4.7736, + "step": 294000 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003936396659988803, + "loss": 4.7759, + "step": 295000 + }, + { + "epoch": 0.41, + "learning_rate": 0.00039359653982441555, + "loss": 4.7719, + "step": 296000 + }, + { + "epoch": 0.41, + "learning_rate": 0.00039355327031377916, + "loss": 4.7775, + "step": 297000 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003935099009833917, + "loss": 4.7814, + "step": 298000 + }, + { + "epoch": 0.41, + "learning_rate": 0.00039346634503988233, + "loss": 4.7722, + "step": 299000 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003934226458565957, + "loss": 4.7745, + "step": 300000 + }, + { + "epoch": 0.41, + "learning_rate": 0.000393378847379798, + "loss": 4.7748, + "step": 301000 + }, + { + "epoch": 0.41, + "learning_rate": 0.00039333486195728426, + "loss": 4.7774, + "step": 302000 + }, + { + "epoch": 0.41, + "learning_rate": 0.00039329077759239523, + "loss": 4.7777, + "step": 303000 + }, + { + "epoch": 0.42, + "learning_rate": 0.000393246506060789, + "loss": 4.7707, + "step": 304000 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003932021359382358, + "loss": 4.7792, + "step": 305000 + }, + { + "epoch": 0.42, + "learning_rate": 0.000393157578428518, + "loss": 4.7711, + "step": 306000 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003931128779076294, + "loss": 4.7712, + "step": 307000 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003930680793235711, + "loss": 4.7732, + "step": 308000 + }, + { + "epoch": 0.42, + "learning_rate": 0.00039302309302266194, + "loss": 4.7753, + "step": 309000 + }, + { + "epoch": 0.42, + "learning_rate": 0.00039297800901073876, + "loss": 4.7747, + "step": 310000 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003929327370629047, + "loss": 4.7756, + "step": 311000 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003928873222703692, + "loss": 4.7733, + "step": 312000 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003928418102956833, + "loss": 4.7714, + "step": 313000 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003927961100574846, + "loss": 4.773, + "step": 314000 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003927503129900122, + "loss": 4.7742, + "step": 315000 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003927043274413583, + "loss": 4.7757, + "step": 316000 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003926582454165936, + "loss": 4.7738, + "step": 317000 + }, + { + "epoch": 0.44, + "learning_rate": 0.00039261202103549754, + "loss": 4.7675, + "step": 318000 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003925656078478171, + "loss": 4.7782, + "step": 319000 + }, + { + "epoch": 0.44, + "learning_rate": 0.00039251905208725256, + "loss": 4.7703, + "step": 320000 + }, + { + "epoch": 0.44, + "learning_rate": 0.00039247235378827314, + "loss": 4.7726, + "step": 321000 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003924255598974257, + "loss": 4.7679, + "step": 322000 + }, + { + "epoch": 0.44, + "learning_rate": 0.00039237857676789823, + "loss": 4.767, + "step": 323000 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003923314984006603, + "loss": 4.7621, + "step": 324000 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003922842305795883, + "loss": 4.7717, + "step": 325000 + }, + { + "epoch": 0.45, + "learning_rate": 0.00039223686787524505, + "loss": 4.7682, + "step": 326000 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003921893155024742, + "loss": 4.783, + "step": 327000 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003921416686011523, + "loss": 4.7705, + "step": 328000 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003920938318173703, + "loss": 4.7678, + "step": 329000 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003920459008600368, + "loss": 4.7697, + "step": 330000 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003919977798067727, + "loss": 4.7749, + "step": 331000 + }, + { + "epoch": 0.45, + "learning_rate": 0.00039194956493523547, + "loss": 4.7797, + "step": 332000 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039190115975485935, + "loss": 4.7678, + "step": 333000 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003918526124935473, + "loss": 4.7674, + "step": 334000 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003918039719474887, + "loss": 4.7711, + "step": 335000 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003917551407742319, + "loss": 4.7686, + "step": 336000 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039170621667219887, + "loss": 4.7708, + "step": 337000 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039165710173146836, + "loss": 4.7681, + "step": 338000 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003916078942182069, + "loss": 4.7697, + "step": 339000 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003915584956553133, + "loss": 4.7665, + "step": 340000 + }, + { + "epoch": 0.47, + "learning_rate": 0.00039150900487640804, + "loss": 4.7757, + "step": 341000 + }, + { + "epoch": 0.47, + "learning_rate": 0.00039145932283750107, + "loss": 4.7653, + "step": 342000 + }, + { + "epoch": 0.7, + "learning_rate": 0.00038084013217180266, + "loss": 4.7047, + "step": 343000 + }, + { + "epoch": 0.71, + "learning_rate": 0.0003807297139737221, + "loss": 4.6748, + "step": 344000 + }, + { + "epoch": 0.71, + "learning_rate": 0.0003806191054905468, + "loss": 4.6694, + "step": 345000 + }, + { + "epoch": 0.71, + "learning_rate": 0.00038050808546821253, + "loss": 4.6753, + "step": 346000 + }, + { + "epoch": 0.71, + "learning_rate": 0.0003803967646603707, + "loss": 4.6691, + "step": 347000 + }, + { + "epoch": 0.71, + "learning_rate": 0.0003802851432525181, + "loss": 4.6667, + "step": 348000 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003801733335024691, + "loss": 4.6662, + "step": 349000 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003800611117532231, + "loss": 4.6614, + "step": 350000 + }, + { + "epoch": 0.72, + "learning_rate": 0.000379948589963274, + "loss": 4.6566, + "step": 351000 + }, + { + "epoch": 0.72, + "learning_rate": 0.00037983588129147694, + "loss": 4.6586, + "step": 352000 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003797227602826864, + "loss": 4.6598, + "step": 353000 + }, + { + "epoch": 0.73, + "learning_rate": 0.00037960933979699685, + "loss": 4.6561, + "step": 354000 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003794956200234039, + "loss": 4.649, + "step": 355000 + }, + { + "epoch": 0.73, + "learning_rate": 0.00037938171531961043, + "loss": 4.6508, + "step": 356000 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003792673978380055, + "loss": 4.6515, + "step": 357000 + }, + { + "epoch": 0.74, + "learning_rate": 0.00037915301116867755, + "loss": 4.6502, + "step": 358000 + }, + { + "epoch": 0.74, + "learning_rate": 0.00037903821210187236, + "loss": 4.6446, + "step": 359000 + }, + { + "epoch": 0.74, + "learning_rate": 0.00037892299993410043, + "loss": 4.6457, + "step": 360000 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003788074896220918, + "loss": 4.6399, + "step": 361000 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003786916813583244, + "loss": 4.6416, + "step": 362000 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003785755753357728, + "loss": 4.6394, + "step": 363000 + }, + { + "epoch": 0.75, + "learning_rate": 0.00037845917174790744, + "loss": 4.644, + "step": 364000 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003783425876381264, + "loss": 4.6455, + "step": 365000 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003782255897991082, + "loss": 4.6427, + "step": 366000 + }, + { + "epoch": 0.75, + "learning_rate": 0.00037810841242106534, + "loss": 4.6383, + "step": 367000 + }, + { + "epoch": 0.76, + "learning_rate": 0.0003779908211099408, + "loss": 4.6398, + "step": 368000 + }, + { + "epoch": 0.76, + "learning_rate": 0.0003778729332078945, + "loss": 4.639, + "step": 369000 + }, + { + "epoch": 0.76, + "learning_rate": 0.00037775474891136603, + "loss": 4.642, + "step": 370000 + }, + { + "epoch": 0.76, + "learning_rate": 0.0003776363870456683, + "loss": 4.6378, + "step": 371000 + }, + { + "epoch": 0.76, + "learning_rate": 0.00037751761084737167, + "loss": 4.6251, + "step": 372000 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003773986580663642, + "loss": 4.6378, + "step": 373000 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003772792907571875, + "loss": 4.638, + "step": 374000 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003771598676628421, + "loss": 4.6329, + "step": 375000 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003770399103327158, + "loss": 4.6331, + "step": 376000 + }, + { + "epoch": 0.77, + "learning_rate": 0.000376919657996196, + "loss": 4.6307, + "step": 377000 + }, + { + "epoch": 0.78, + "learning_rate": 0.0003767992315479937, + "loss": 4.6366, + "step": 378000 + }, + { + "epoch": 0.78, + "learning_rate": 0.0003766783900948219, + "loss": 4.6312, + "step": 379000 + }, + { + "epoch": 0.78, + "learning_rate": 0.0003765572542376675, + "loss": 4.6322, + "step": 380000 + }, + { + "epoch": 0.78, + "learning_rate": 0.00037643582417838255, + "loss": 4.6272, + "step": 381000 + }, + { + "epoch": 0.78, + "learning_rate": 0.0003763142219901536, + "loss": 4.6261, + "step": 382000 + }, + { + "epoch": 0.79, + "learning_rate": 0.0003761922044278193, + "loss": 4.6332, + "step": 383000 + }, + { + "epoch": 0.79, + "learning_rate": 0.0003760698932716468, + "loss": 4.6285, + "step": 384000 + }, + { + "epoch": 0.79, + "learning_rate": 0.0003759472887254464, + "loss": 4.6315, + "step": 385000 + }, + { + "epoch": 0.79, + "learning_rate": 0.00037582451403762754, + "loss": 4.6252, + "step": 386000 + }, + { + "epoch": 0.79, + "learning_rate": 0.00037570132361763626, + "loss": 4.6238, + "step": 387000 + }, + { + "epoch": 0.8, + "learning_rate": 0.00037557808768022013, + "loss": 4.6309, + "step": 388000 + }, + { + "epoch": 0.8, + "learning_rate": 0.0003754543124991863, + "loss": 4.6227, + "step": 389000 + }, + { + "epoch": 0.8, + "learning_rate": 0.0003753302449538835, + "loss": 4.6264, + "step": 390000 + }, + { + "epoch": 0.8, + "learning_rate": 0.0003752058852510489, + "loss": 4.6297, + "step": 391000 + }, + { + "epoch": 0.81, + "learning_rate": 0.00037508135839531953, + "loss": 4.6229, + "step": 392000 + }, + { + "epoch": 0.81, + "learning_rate": 0.0003749564152912182, + "loss": 4.6277, + "step": 393000 + }, + { + "epoch": 0.81, + "learning_rate": 0.0003748313060326983, + "loss": 4.6258, + "step": 394000 + }, + { + "epoch": 0.81, + "learning_rate": 0.0003747057803592816, + "loss": 4.6298, + "step": 395000 + }, + { + "epoch": 0.81, + "learning_rate": 0.0003745799635688954, + "loss": 4.6275, + "step": 396000 + }, + { + "epoch": 0.82, + "learning_rate": 0.0003744538558711915, + "loss": 4.6305, + "step": 397000 + }, + { + "epoch": 0.82, + "learning_rate": 0.00037432758401983454, + "loss": 4.6254, + "step": 398000 + }, + { + "epoch": 0.82, + "learning_rate": 0.0003742008954287709, + "loss": 4.6232, + "step": 399000 + }, + { + "epoch": 0.82, + "learning_rate": 0.00037407404368583003, + "loss": 4.6243, + "step": 400000 + }, + { + "epoch": 0.82, + "learning_rate": 0.0003739467750449806, + "loss": 4.6271, + "step": 401000 + }, + { + "epoch": 0.83, + "learning_rate": 0.00037381921655191264, + "loss": 4.625, + "step": 402000 + }, + { + "epoch": 0.83, + "learning_rate": 0.0003736914964119172, + "loss": 4.6207, + "step": 403000 + }, + { + "epoch": 0.83, + "learning_rate": 0.0003735633591418774, + "loss": 4.6222, + "step": 404000 + }, + { + "epoch": 0.83, + "learning_rate": 0.0003734349326585155, + "loss": 4.6274, + "step": 405000 + }, + { + "epoch": 0.83, + "learning_rate": 0.00037330621717583185, + "loss": 4.6215, + "step": 406000 + }, + { + "epoch": 0.84, + "learning_rate": 0.00037317734205675264, + "loss": 4.6239, + "step": 407000 + }, + { + "epoch": 0.84, + "learning_rate": 0.00037304817894443345, + "loss": 4.6213, + "step": 408000 + }, + { + "epoch": 0.84, + "learning_rate": 0.0003729185983290953, + "loss": 4.6217, + "step": 409000 + }, + { + "epoch": 0.84, + "learning_rate": 0.00037278872957481737, + "loss": 4.6203, + "step": 410000 + }, + { + "epoch": 0.84, + "learning_rate": 0.00037265870319842543, + "loss": 4.6233, + "step": 411000 + }, + { + "epoch": 0.85, + "learning_rate": 0.0003725282591035563, + "loss": 4.6189, + "step": 412000 + }, + { + "epoch": 0.85, + "learning_rate": 0.0003723976583952915, + "loss": 4.6208, + "step": 413000 + }, + { + "epoch": 0.85, + "learning_rate": 0.0003722666398284116, + "loss": 4.6228, + "step": 414000 + }, + { + "epoch": 0.85, + "learning_rate": 0.000372135334208968, + "loss": 4.6152, + "step": 415000 + }, + { + "epoch": 0.85, + "learning_rate": 0.00037200374175575874, + "loss": 4.6127, + "step": 416000 + }, + { + "epoch": 0.86, + "learning_rate": 0.00037187199471021856, + "loss": 4.6182, + "step": 417000 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003717399618422258, + "loss": 4.6196, + "step": 418000 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003716075107774151, + "loss": 4.6225, + "step": 419000 + }, + { + "epoch": 0.86, + "learning_rate": 0.00037147477375836516, + "loss": 4.6181, + "step": 420000 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003713418841716614, + "loss": 4.6207, + "step": 421000 + }, + { + "epoch": 0.87, + "learning_rate": 0.00037120857619355976, + "loss": 4.6168, + "step": 422000 + }, + { + "epoch": 0.87, + "learning_rate": 0.00037107511666167, + "loss": 4.6148, + "step": 423000 + }, + { + "epoch": 0.87, + "learning_rate": 0.0003709412386121666, + "loss": 4.6241, + "step": 424000 + }, + { + "epoch": 0.87, + "learning_rate": 0.00037080707571865136, + "loss": 4.6167, + "step": 425000 + }, + { + "epoch": 0.87, + "learning_rate": 0.000370672762794291, + "loss": 4.618, + "step": 426000 + }, + { + "epoch": 0.88, + "learning_rate": 0.0003705380311681886, + "loss": 4.6185, + "step": 427000 + }, + { + "epoch": 0.88, + "learning_rate": 0.00037040301536994983, + "loss": 4.6159, + "step": 428000 + }, + { + "epoch": 0.88, + "learning_rate": 0.00037026771562455524, + "loss": 4.6172, + "step": 429000 + }, + { + "epoch": 0.88, + "learning_rate": 0.0003701322678825694, + "loss": 4.6185, + "step": 430000 + }, + { + "epoch": 0.89, + "learning_rate": 0.0003699964012030795, + "loss": 4.6142, + "step": 431000 + }, + { + "epoch": 0.89, + "learning_rate": 0.0003698605238364365, + "loss": 4.619, + "step": 432000 + }, + { + "epoch": 0.89, + "learning_rate": 0.0003697240914104684, + "loss": 4.6125, + "step": 433000 + }, + { + "epoch": 0.89, + "learning_rate": 0.0003695873761686538, + "loss": 4.613, + "step": 434000 + }, + { + "epoch": 0.89, + "learning_rate": 0.00036945037833880495, + "loss": 4.6193, + "step": 435000 + }, + { + "epoch": 0.9, + "learning_rate": 0.00036931337299122744, + "loss": 4.6195, + "step": 436000 + }, + { + "epoch": 0.9, + "learning_rate": 0.00036917581123466377, + "loss": 4.6155, + "step": 437000 + }, + { + "epoch": 0.9, + "learning_rate": 0.0003690379675758677, + "loss": 4.6124, + "step": 438000 + }, + { + "epoch": 0.9, + "learning_rate": 0.0003688998422445319, + "loss": 4.6118, + "step": 439000 + }, + { + "epoch": 0.9, + "learning_rate": 0.0003687617125650919, + "loss": 4.6118, + "step": 440000 + }, + { + "epoch": 0.91, + "learning_rate": 0.00036862302514182444, + "loss": 4.6115, + "step": 441000 + }, + { + "epoch": 0.91, + "learning_rate": 0.0003684841958461244, + "loss": 4.6145, + "step": 442000 + }, + { + "epoch": 0.91, + "learning_rate": 0.0003683449469728375, + "loss": 4.6107, + "step": 443000 + }, + { + "epoch": 0.91, + "learning_rate": 0.00036820541758180987, + "loss": 4.6125, + "step": 444000 + }, + { + "epoch": 0.91, + "learning_rate": 0.00036806574785514423, + "loss": 4.6076, + "step": 445000 + }, + { + "epoch": 0.92, + "learning_rate": 0.0003679256584065426, + "loss": 4.6135, + "step": 446000 + }, + { + "epoch": 0.92, + "learning_rate": 0.00036778528913887205, + "loss": 4.611, + "step": 447000 + }, + { + "epoch": 0.92, + "learning_rate": 0.0003676447810744613, + "loss": 4.6169, + "step": 448000 + }, + { + "epoch": 0.92, + "learning_rate": 0.00036750385315005585, + "loss": 4.6124, + "step": 449000 + }, + { + "epoch": 0.92, + "learning_rate": 0.0003673626461094468, + "loss": 4.6091, + "step": 450000 + }, + { + "epoch": 0.93, + "learning_rate": 0.00036722130181307566, + "loss": 4.6084, + "step": 451000 + }, + { + "epoch": 0.93, + "learning_rate": 0.0003670795375249432, + "loss": 4.6093, + "step": 452000 + }, + { + "epoch": 0.93, + "learning_rate": 0.0003669376370093399, + "loss": 4.6098, + "step": 453000 + }, + { + "epoch": 0.93, + "learning_rate": 0.00036679531641764155, + "loss": 4.6088, + "step": 454000 + }, + { + "epoch": 0.93, + "learning_rate": 0.00036665271789039375, + "loss": 4.6135, + "step": 455000 + }, + { + "epoch": 0.94, + "learning_rate": 0.00036650984166521224, + "loss": 4.6111, + "step": 456000 + }, + { + "epoch": 0.94, + "learning_rate": 0.00036636697456429214, + "loss": 4.6087, + "step": 457000 + }, + { + "epoch": 0.94, + "learning_rate": 0.00036622354421214545, + "loss": 4.6105, + "step": 458000 + }, + { + "epoch": 0.94, + "learning_rate": 0.0003660798368772088, + "loss": 4.6142, + "step": 459000 + }, + { + "epoch": 0.94, + "learning_rate": 0.00036593599692117735, + "loss": 4.6036, + "step": 460000 + }, + { + "epoch": 0.95, + "learning_rate": 0.00036579173661589563, + "loss": 4.6129, + "step": 461000 + }, + { + "epoch": 0.95, + "learning_rate": 0.00036564720004735664, + "loss": 4.6066, + "step": 462000 + }, + { + "epoch": 0.95, + "learning_rate": 0.00036550253240678936, + "loss": 4.6065, + "step": 463000 + }, + { + "epoch": 0.95, + "learning_rate": 0.0003653574443103918, + "loss": 4.6065, + "step": 464000 + }, + { + "epoch": 0.95, + "learning_rate": 0.0003652122261755973, + "loss": 4.6055, + "step": 465000 + }, + { + "epoch": 0.96, + "learning_rate": 0.00036506658751743075, + "loss": 4.6077, + "step": 466000 + }, + { + "epoch": 0.96, + "learning_rate": 0.0003649206738043425, + "loss": 4.6113, + "step": 467000 + }, + { + "epoch": 0.96, + "learning_rate": 0.00036477463160518477, + "loss": 4.6056, + "step": 468000 + }, + { + "epoch": 0.96, + "learning_rate": 0.00036462816878657725, + "loss": 4.608, + "step": 469000 + }, + { + "epoch": 0.97, + "learning_rate": 0.000364481431643597, + "loss": 4.6023, + "step": 470000 + }, + { + "epoch": 0.97, + "learning_rate": 0.0003643345675688004, + "loss": 4.6027, + "step": 471000 + }, + { + "epoch": 0.97, + "learning_rate": 0.00036418728278478005, + "loss": 4.6068, + "step": 472000 + }, + { + "epoch": 0.97, + "learning_rate": 0.00036403972441104724, + "loss": 4.6072, + "step": 473000 + }, + { + "epoch": 0.97, + "learning_rate": 0.0003638920406616534, + "loss": 4.6038, + "step": 474000 + }, + { + "epoch": 0.98, + "learning_rate": 0.00036374393611956704, + "loss": 4.6014, + "step": 475000 + }, + { + "epoch": 0.98, + "learning_rate": 0.00036359555872652883, + "loss": 4.6013, + "step": 476000 + }, + { + "epoch": 0.98, + "learning_rate": 0.00036344705751586385, + "loss": 4.606, + "step": 477000 + }, + { + "epoch": 2.18, + "learning_rate": 0.00024019467959966674, + "loss": 4.4837, + "step": 478000 + }, + { + "epoch": 2.19, + "learning_rate": 0.00023963222037118084, + "loss": 4.4223, + "step": 479000 + }, + { + "epoch": 2.19, + "learning_rate": 0.00023906943468937218, + "loss": 4.39, + "step": 480000 + }, + { + "epoch": 2.2, + "learning_rate": 0.00023850689045664867, + "loss": 4.3772, + "step": 481000 + }, + { + "epoch": 2.2, + "learning_rate": 0.00023794346609281965, + "loss": 4.3726, + "step": 482000 + }, + { + "epoch": 2.2, + "learning_rate": 0.00023737972918605284, + "loss": 4.3631, + "step": 483000 + }, + { + "epoch": 2.21, + "learning_rate": 0.00023681624857694363, + "loss": 4.3497, + "step": 484000 + }, + { + "epoch": 2.21, + "learning_rate": 0.00023625190081838816, + "loss": 4.3482, + "step": 485000 + }, + { + "epoch": 2.22, + "learning_rate": 0.00023568781924440977, + "loss": 4.3443, + "step": 486000 + }, + { + "epoch": 2.22, + "learning_rate": 0.0002351228792097228, + "loss": 4.3383, + "step": 487000 + }, + { + "epoch": 2.23, + "learning_rate": 0.000234558215237771, + "loss": 4.3393, + "step": 488000 + }, + { + "epoch": 2.23, + "learning_rate": 0.0002339927015221048, + "loss": 4.3349, + "step": 489000 + }, + { + "epoch": 2.24, + "learning_rate": 0.00023342690780622, + "loss": 4.3287, + "step": 490000 + }, + { + "epoch": 2.24, + "learning_rate": 0.00023286083875059848, + "loss": 4.3263, + "step": 491000 + }, + { + "epoch": 2.25, + "learning_rate": 0.0002322950654913731, + "loss": 4.3202, + "step": 492000 + }, + { + "epoch": 2.25, + "learning_rate": 0.0002317284600104378, + "loss": 4.3198, + "step": 493000 + }, + { + "epoch": 2.25, + "learning_rate": 0.00023116216017581755, + "loss": 4.3163, + "step": 494000 + }, + { + "epoch": 2.26, + "learning_rate": 0.00023059503691953928, + "loss": 4.3163, + "step": 495000 + }, + { + "epoch": 2.26, + "learning_rate": 0.00023002766164983935, + "loss": 4.3123, + "step": 496000 + }, + { + "epoch": 2.27, + "learning_rate": 0.00022946060678482666, + "loss": 4.3091, + "step": 497000 + }, + { + "epoch": 2.27, + "learning_rate": 0.00022889274175117623, + "loss": 4.3067, + "step": 498000 + }, + { + "epoch": 2.28, + "learning_rate": 0.00022832463872602635, + "loss": 4.3056, + "step": 499000 + }, + { + "epoch": 2.28, + "learning_rate": 0.00022775687084019932, + "loss": 4.3042, + "step": 500000 + }, + { + "epoch": 2.29, + "learning_rate": 0.00022718887477616112, + "loss": 4.306, + "step": 501000 + }, + { + "epoch": 2.29, + "learning_rate": 0.00022662008630440305, + "loss": 4.3007, + "step": 502000 + }, + { + "epoch": 2.3, + "learning_rate": 0.0002260510785611647, + "loss": 4.2996, + "step": 503000 + }, + { + "epoch": 2.3, + "learning_rate": 0.00022548185623340192, + "loss": 4.2993, + "step": 504000 + }, + { + "epoch": 2.3, + "learning_rate": 0.00022491299354534364, + "loss": 4.2962, + "step": 505000 + }, + { + "epoch": 2.31, + "learning_rate": 0.0002243433563192932, + "loss": 4.2954, + "step": 506000 + }, + { + "epoch": 2.31, + "learning_rate": 0.00022377408851168427, + "loss": 4.2964, + "step": 507000 + }, + { + "epoch": 2.32, + "learning_rate": 0.00022320405513710757, + "loss": 4.2958, + "step": 508000 + }, + { + "epoch": 2.32, + "learning_rate": 0.00022263440094754997, + "loss": 4.2934, + "step": 509000 + }, + { + "epoch": 2.33, + "learning_rate": 0.0002220639901872479, + "loss": 4.2935, + "step": 510000 + }, + { + "epoch": 2.33, + "learning_rate": 0.00022149396836606137, + "loss": 4.2906, + "step": 511000 + }, + { + "epoch": 2.34, + "learning_rate": 0.00022092319899525643, + "loss": 4.2897, + "step": 512000 + }, + { + "epoch": 2.34, + "learning_rate": 0.00022035282830486165, + "loss": 4.2916, + "step": 513000 + }, + { + "epoch": 2.35, + "learning_rate": 0.00021978171911058022, + "loss": 4.2891, + "step": 514000 + }, + { + "epoch": 2.35, + "learning_rate": 0.00021921101832488073, + "loss": 4.2867, + "step": 515000 + }, + { + "epoch": 2.35, + "learning_rate": 0.00021863958810533452, + "loss": 4.288, + "step": 516000 + }, + { + "epoch": 2.36, + "learning_rate": 0.00021806800435022003, + "loss": 4.2857, + "step": 517000 + }, + { + "epoch": 2.36, + "learning_rate": 0.00021749684357306648, + "loss": 4.285, + "step": 518000 + }, + { + "epoch": 2.37, + "learning_rate": 0.00021692496701433082, + "loss": 4.2832, + "step": 519000 + }, + { + "epoch": 2.37, + "learning_rate": 0.00021635352312751783, + "loss": 4.2824, + "step": 520000 + }, + { + "epoch": 2.38, + "learning_rate": 0.0002157819448048862, + "loss": 4.2806, + "step": 521000 + }, + { + "epoch": 2.38, + "learning_rate": 0.0002152096644013863, + "loss": 4.2772, + "step": 522000 + }, + { + "epoch": 2.39, + "learning_rate": 0.00021463725871483544, + "loss": 4.2798, + "step": 523000 + }, + { + "epoch": 2.39, + "learning_rate": 0.0002140647324601787, + "loss": 4.2798, + "step": 524000 + }, + { + "epoch": 2.4, + "learning_rate": 0.00021349266305175916, + "loss": 4.2802, + "step": 525000 + }, + { + "epoch": 2.4, + "learning_rate": 0.00021291990991843793, + "loss": 4.2786, + "step": 526000 + }, + { + "epoch": 2.41, + "learning_rate": 0.0002123476232740738, + "loss": 4.2791, + "step": 527000 + }, + { + "epoch": 2.41, + "learning_rate": 0.00021177466211441055, + "loss": 4.274, + "step": 528000 + }, + { + "epoch": 2.41, + "learning_rate": 0.0002112021770710695, + "loss": 4.2765, + "step": 529000 + }, + { + "epoch": 2.42, + "learning_rate": 0.00021062959993907988, + "loss": 4.2751, + "step": 530000 + }, + { + "epoch": 2.42, + "learning_rate": 0.00021005636214541413, + "loss": 4.2751, + "step": 531000 + }, + { + "epoch": 2.43, + "learning_rate": 0.00020948304151680226, + "loss": 4.2744, + "step": 532000 + }, + { + "epoch": 2.43, + "learning_rate": 0.00020891021621191204, + "loss": 4.273, + "step": 533000 + }, + { + "epoch": 2.44, + "learning_rate": 0.00020833674415252564, + "loss": 4.2769, + "step": 534000 + }, + { + "epoch": 2.44, + "learning_rate": 0.00020776320342280467, + "loss": 4.2695, + "step": 535000 + }, + { + "epoch": 2.45, + "learning_rate": 0.00020718959874704363, + "loss": 4.2689, + "step": 536000 + }, + { + "epoch": 2.45, + "learning_rate": 0.00020661650854196894, + "loss": 4.2671, + "step": 537000 + }, + { + "epoch": 2.46, + "learning_rate": 0.0002060427902012143, + "loss": 4.2677, + "step": 538000 + }, + { + "epoch": 2.46, + "learning_rate": 0.0002054701696683469, + "loss": 4.2713, + "step": 539000 + }, + { + "epoch": 2.46, + "learning_rate": 0.00020489635658938387, + "loss": 4.2659, + "step": 540000 + }, + { + "epoch": 2.47, + "learning_rate": 0.0002043225031787951, + "loss": 4.2666, + "step": 541000 + }, + { + "epoch": 2.47, + "learning_rate": 0.00020374861416345058, + "loss": 4.2609, + "step": 542000 + }, + { + "epoch": 2.48, + "learning_rate": 0.0002031746942705136, + "loss": 4.2631, + "step": 543000 + }, + { + "epoch": 2.48, + "learning_rate": 0.0002026013221849334, + "loss": 4.2656, + "step": 544000 + }, + { + "epoch": 2.49, + "learning_rate": 0.0002020273547383406, + "loss": 4.2688, + "step": 545000 + }, + { + "epoch": 2.49, + "learning_rate": 0.00020145394458320146, + "loss": 4.2592, + "step": 546000 + }, + { + "epoch": 2.5, + "learning_rate": 0.00020087994847524482, + "loss": 4.2617, + "step": 547000 + }, + { + "epoch": 2.5, + "learning_rate": 0.00020030651912449513, + "loss": 4.2613, + "step": 548000 + }, + { + "epoch": 2.51, + "learning_rate": 0.00019973251324840986, + "loss": 4.2557, + "step": 549000 + }, + { + "epoch": 2.51, + "learning_rate": 0.0001991590835766299, + "loss": 4.2604, + "step": 550000 + }, + { + "epoch": 2.51, + "learning_rate": 0.00019858508682597277, + "loss": 4.2614, + "step": 551000 + }, + { + "epoch": 2.52, + "learning_rate": 0.00019801167570775345, + "loss": 4.2587, + "step": 552000 + }, + { + "epoch": 2.52, + "learning_rate": 0.0001974377069757808, + "loss": 4.2567, + "step": 553000 + }, + { + "epoch": 2.53, + "learning_rate": 0.000196863759349592, + "loss": 4.2542, + "step": 554000 + }, + { + "epoch": 2.53, + "learning_rate": 0.0001962904114641484, + "loss": 4.2528, + "step": 555000 + }, + { + "epoch": 2.54, + "learning_rate": 0.00019571652019933017, + "loss": 4.2529, + "step": 556000 + }, + { + "epoch": 2.54, + "learning_rate": 0.00019514323805461362, + "loss": 4.2504, + "step": 557000 + }, + { + "epoch": 2.55, + "learning_rate": 0.00019456999583540802, + "loss": 4.2557, + "step": 558000 + }, + { + "epoch": 2.55, + "learning_rate": 0.00019399622450669583, + "loss": 4.2527, + "step": 559000 + }, + { + "epoch": 2.56, + "learning_rate": 0.00019342250263149486, + "loss": 4.2493, + "step": 560000 + }, + { + "epoch": 2.56, + "learning_rate": 0.0001928488349355918, + "loss": 4.2533, + "step": 561000 + }, + { + "epoch": 2.56, + "learning_rate": 0.00019227579972212256, + "loss": 4.251, + "step": 562000 + }, + { + "epoch": 2.57, + "learning_rate": 0.00019170225449436132, + "loss": 4.2442, + "step": 563000 + }, + { + "epoch": 2.57, + "learning_rate": 0.00019112935105686604, + "loss": 4.2508, + "step": 564000 + }, + { + "epoch": 2.58, + "learning_rate": 0.00019055652054145262, + "loss": 4.2482, + "step": 565000 + }, + { + "epoch": 2.58, + "learning_rate": 0.00018998319437138936, + "loss": 4.2453, + "step": 566000 + }, + { + "epoch": 2.59, + "learning_rate": 0.0001894099507104425, + "loss": 4.2467, + "step": 567000 + }, + { + "epoch": 2.59, + "learning_rate": 0.00018883679428045936, + "loss": 4.2429, + "step": 568000 + }, + { + "epoch": 2.6, + "learning_rate": 0.00018826430281954561, + "loss": 4.2436, + "step": 569000 + }, + { + "epoch": 2.6, + "learning_rate": 0.00018769190783313742, + "loss": 4.2462, + "step": 570000 + }, + { + "epoch": 2.61, + "learning_rate": 0.00018711904121225677, + "loss": 4.2429, + "step": 571000 + }, + { + "epoch": 2.61, + "learning_rate": 0.000186546280692719, + "loss": 4.2415, + "step": 572000 + }, + { + "epoch": 2.61, + "learning_rate": 0.0001859736309923917, + "loss": 4.242, + "step": 573000 + }, + { + "epoch": 2.62, + "learning_rate": 0.00018540166930311399, + "loss": 4.2415, + "step": 574000 + }, + { + "epoch": 2.62, + "learning_rate": 0.00018482925526851332, + "loss": 4.2394, + "step": 575000 + }, + { + "epoch": 2.63, + "learning_rate": 0.00018425696619637965, + "loss": 4.2393, + "step": 576000 + }, + { + "epoch": 2.63, + "learning_rate": 0.00018368537889375085, + "loss": 4.2374, + "step": 577000 + }, + { + "epoch": 2.64, + "learning_rate": 0.00018311335375069304, + "loss": 4.2376, + "step": 578000 + }, + { + "epoch": 2.64, + "learning_rate": 0.00018254203951910075, + "loss": 4.2361, + "step": 579000 + }, + { + "epoch": 2.65, + "learning_rate": 0.00018197029713347917, + "loss": 4.2363, + "step": 580000 + }, + { + "epoch": 2.65, + "learning_rate": 0.000181399274777884, + "loss": 4.2322, + "step": 581000 + }, + { + "epoch": 2.66, + "learning_rate": 0.00018082783396875207, + "loss": 4.235, + "step": 582000 + }, + { + "epoch": 2.66, + "learning_rate": 0.00018025655108206925, + "loss": 4.2327, + "step": 583000 + }, + { + "epoch": 2.67, + "learning_rate": 0.0001796854308235321, + "loss": 4.2323, + "step": 584000 + }, + { + "epoch": 2.67, + "learning_rate": 0.0001791150487652753, + "loss": 4.2297, + "step": 585000 + }, + { + "epoch": 2.67, + "learning_rate": 0.00017854426770033718, + "loss": 4.2339, + "step": 586000 + }, + { + "epoch": 2.68, + "learning_rate": 0.00017797423388223084, + "loss": 4.2315, + "step": 587000 + }, + { + "epoch": 2.68, + "learning_rate": 0.00017740381079830306, + "loss": 4.2289, + "step": 588000 + }, + { + "epoch": 2.69, + "learning_rate": 0.0001768341439831626, + "loss": 4.2285, + "step": 589000 + }, + { + "epoch": 2.69, + "learning_rate": 0.00017626409765587338, + "loss": 4.2273, + "step": 590000 + }, + { + "epoch": 2.7, + "learning_rate": 0.000175694246842843, + "loss": 4.2272, + "step": 591000 + }, + { + "epoch": 2.7, + "learning_rate": 0.00017512459623797167, + "loss": 4.2267, + "step": 592000 + }, + { + "epoch": 2.71, + "learning_rate": 0.00017455571987530613, + "loss": 4.2242, + "step": 593000 + }, + { + "epoch": 2.71, + "learning_rate": 0.00017398648354988546, + "loss": 4.2238, + "step": 594000 + }, + { + "epoch": 2.72, + "learning_rate": 0.00017341803041304732, + "loss": 4.2245, + "step": 595000 + }, + { + "epoch": 2.72, + "learning_rate": 0.00017284922710364303, + "loss": 4.2219, + "step": 596000 + }, + { + "epoch": 2.72, + "learning_rate": 0.00017228121590341918, + "loss": 4.2215, + "step": 597000 + }, + { + "epoch": 2.73, + "learning_rate": 0.0001717128643323442, + "loss": 4.2196, + "step": 598000 + }, + { + "epoch": 2.73, + "learning_rate": 0.00017114474576434977, + "loss": 4.2186, + "step": 599000 + }, + { + "epoch": 2.74, + "learning_rate": 0.00017057686487906743, + "loss": 4.2218, + "step": 600000 + }, + { + "epoch": 2.74, + "learning_rate": 0.00017000922635417116, + "loss": 4.2175, + "step": 601000 + }, + { + "epoch": 2.75, + "learning_rate": 0.00016944183486533842, + "loss": 4.2174, + "step": 602000 + }, + { + "epoch": 2.75, + "learning_rate": 0.00016887582911145858, + "loss": 4.2208, + "step": 603000 + }, + { + "epoch": 2.76, + "learning_rate": 0.00016830894519618436, + "loss": 4.2176, + "step": 604000 + }, + { + "epoch": 2.76, + "learning_rate": 0.00016774232232230643, + "loss": 4.2131, + "step": 605000 + }, + { + "epoch": 2.77, + "learning_rate": 0.00016717596515713635, + "loss": 4.2148, + "step": 606000 + }, + { + "epoch": 2.77, + "learning_rate": 0.00016661044431598456, + "loss": 4.2163, + "step": 607000 + }, + { + "epoch": 2.77, + "learning_rate": 0.0001660446322840068, + "loss": 4.2121, + "step": 608000 + }, + { + "epoch": 2.78, + "learning_rate": 0.0001654796653358085, + "loss": 4.2126, + "step": 609000 + }, + { + "epoch": 2.78, + "learning_rate": 0.0001649144170608772, + "loss": 4.2111, + "step": 610000 + }, + { + "epoch": 2.79, + "learning_rate": 0.00016435002260167044, + "loss": 4.2093, + "step": 611000 + }, + { + "epoch": 2.79, + "learning_rate": 0.0001637853566890836, + "loss": 4.2104, + "step": 612000 + }, + { + "epoch": 2.8, + "learning_rate": 0.00016322155329606282, + "loss": 4.2104, + "step": 613000 + }, + { + "epoch": 2.8, + "learning_rate": 0.00016265748833194975, + "loss": 4.2095, + "step": 614000 + }, + { + "epoch": 2.81, + "learning_rate": 0.00016209373096067142, + "loss": 4.2061, + "step": 615000 + }, + { + "epoch": 2.81, + "learning_rate": 0.00016153141240150847, + "loss": 4.2059, + "step": 616000 + }, + { + "epoch": 2.82, + "learning_rate": 0.0001609682835060673, + "loss": 4.2093, + "step": 617000 + }, + { + "epoch": 2.82, + "learning_rate": 0.00016040547611755718, + "loss": 4.2025, + "step": 618000 + }, + { + "epoch": 2.82, + "learning_rate": 0.00015984299487186134, + "loss": 4.2069, + "step": 619000 + }, + { + "epoch": 2.83, + "learning_rate": 0.00015928140638588216, + "loss": 4.2031, + "step": 620000 + }, + { + "epoch": 2.83, + "learning_rate": 0.00015872015263128903, + "loss": 4.2021, + "step": 621000 + }, + { + "epoch": 2.84, + "learning_rate": 0.00015815867691759442, + "loss": 4.2014, + "step": 622000 + }, + { + "epoch": 2.84, + "learning_rate": 0.00015759754585375357, + "loss": 4.2014, + "step": 623000 + }, + { + "epoch": 2.85, + "learning_rate": 0.00015703676406184148, + "loss": 4.2015, + "step": 624000 + }, + { + "epoch": 2.85, + "learning_rate": 0.0001564768964106519, + "loss": 4.2018, + "step": 625000 + }, + { + "epoch": 2.86, + "learning_rate": 0.000155917386545611, + "loss": 4.2003, + "step": 626000 + }, + { + "epoch": 2.86, + "learning_rate": 0.00015535767954213264, + "loss": 4.1976, + "step": 627000 + }, + { + "epoch": 2.87, + "learning_rate": 0.00015479834026051583, + "loss": 4.1972, + "step": 628000 + }, + { + "epoch": 2.87, + "learning_rate": 0.00015423937330807675, + "loss": 4.1957, + "step": 629000 + }, + { + "epoch": 2.88, + "learning_rate": 0.00015368134168927352, + "loss": 4.1951, + "step": 630000 + }, + { + "epoch": 2.88, + "learning_rate": 0.00015312313282100077, + "loss": 4.1952, + "step": 631000 + }, + { + "epoch": 2.88, + "learning_rate": 0.00015256586770904422, + "loss": 4.1928, + "step": 632000 + }, + { + "epoch": 2.89, + "learning_rate": 0.00015200843529853173, + "loss": 4.1941, + "step": 633000 + }, + { + "epoch": 2.89, + "learning_rate": 0.00015145195503595184, + "loss": 4.1938, + "step": 634000 + }, + { + "epoch": 2.9, + "learning_rate": 0.00015089531743123636, + "loss": 4.1933, + "step": 635000 + }, + { + "epoch": 2.9, + "learning_rate": 0.00015033964033472967, + "loss": 4.1919, + "step": 636000 + }, + { + "epoch": 2.91, + "learning_rate": 0.00014978381585768676, + "loss": 4.191, + "step": 637000 + }, + { + "epoch": 2.91, + "learning_rate": 0.0001492289602175133, + "loss": 4.1907, + "step": 638000 + }, + { + "epoch": 2.92, + "learning_rate": 0.00014867396716325404, + "loss": 4.1906, + "step": 639000 + }, + { + "epoch": 2.92, + "learning_rate": 0.00014811995124263547, + "loss": 4.19, + "step": 640000 + }, + { + "epoch": 2.93, + "learning_rate": 0.00014756580787890456, + "loss": 4.1854, + "step": 641000 + }, + { + "epoch": 2.93, + "learning_rate": 0.0001470126499134229, + "loss": 4.1862, + "step": 642000 + }, + { + "epoch": 2.93, + "learning_rate": 0.0001464593744799972, + "loss": 4.1804, + "step": 643000 + }, + { + "epoch": 2.94, + "learning_rate": 0.00014590709267699477, + "loss": 4.1812, + "step": 644000 + }, + { + "epoch": 2.94, + "learning_rate": 0.00014535470338508303, + "loss": 4.1811, + "step": 645000 + }, + { + "epoch": 2.95, + "learning_rate": 0.0001448033159230627, + "loss": 4.1812, + "step": 646000 + }, + { + "epoch": 2.95, + "learning_rate": 0.00014425238221106002, + "loss": 4.1827, + "step": 647000 + }, + { + "epoch": 2.96, + "learning_rate": 0.00014370135598273356, + "loss": 4.1792, + "step": 648000 + }, + { + "epoch": 2.96, + "learning_rate": 0.00014315079349020695, + "loss": 4.1829, + "step": 649000 + }, + { + "epoch": 2.97, + "learning_rate": 0.00014260069926850117, + "loss": 4.1798, + "step": 650000 + }, + { + "epoch": 2.97, + "learning_rate": 0.00014205162723252818, + "loss": 4.1813, + "step": 651000 + }, + { + "epoch": 2.98, + "learning_rate": 0.00014150248266247203, + "loss": 4.1771, + "step": 652000 + }, + { + "epoch": 2.98, + "learning_rate": 0.0001409543683610207, + "loss": 4.1744, + "step": 653000 + }, + { + "epoch": 2.98, + "learning_rate": 0.00014040673945025616, + "loss": 4.1791, + "step": 654000 + }, + { + "epoch": 2.99, + "learning_rate": 0.00013985905299225343, + "loss": 4.1795, + "step": 655000 + }, + { + "epoch": 2.99, + "learning_rate": 0.00013931186191936434, + "loss": 4.1764, + "step": 656000 + }, + { + "epoch": 3.0, + "learning_rate": 0.0001387651707388392, + "loss": 4.1717, + "step": 657000 + }, + { + "epoch": 3.0, + "learning_rate": 0.0001382195298871527, + "loss": 4.1585, + "step": 658000 + }, + { + "epoch": 3.01, + "learning_rate": 0.00013767385148545907, + "loss": 4.1503, + "step": 659000 + }, + { + "epoch": 3.01, + "learning_rate": 0.0001371292313756203, + "loss": 4.1517, + "step": 660000 + }, + { + "epoch": 3.02, + "learning_rate": 0.00013658458371390849, + "loss": 4.1508, + "step": 661000 + }, + { + "epoch": 3.02, + "learning_rate": 0.00013604100227223385, + "loss": 4.1545, + "step": 662000 + }, + { + "epoch": 3.03, + "learning_rate": 0.00013549740327772723, + "loss": 4.1505, + "step": 663000 + }, + { + "epoch": 3.03, + "learning_rate": 0.00013495542119768334, + "loss": 4.1496, + "step": 664000 + }, + { + "epoch": 3.03, + "learning_rate": 0.00013441288822507396, + "loss": 4.1504, + "step": 665000 + }, + { + "epoch": 3.04, + "learning_rate": 0.0001338708954980116, + "loss": 4.1514, + "step": 666000 + }, + { + "epoch": 3.04, + "learning_rate": 0.0001333299886553773, + "loss": 4.1504, + "step": 667000 + }, + { + "epoch": 3.05, + "learning_rate": 0.00013278908925682, + "loss": 4.1553, + "step": 668000 + }, + { + "epoch": 3.05, + "learning_rate": 0.0001322487434791535, + "loss": 4.154, + "step": 669000 + }, + { + "epoch": 3.06, + "learning_rate": 0.00013170895577324293, + "loss": 4.1501, + "step": 670000 + }, + { + "epoch": 3.06, + "learning_rate": 0.00013117026952808839, + "loss": 4.1481, + "step": 671000 + }, + { + "epoch": 3.07, + "learning_rate": 0.00013063161073068494, + "loss": 4.153, + "step": 672000 + }, + { + "epoch": 3.07, + "learning_rate": 0.00013009406112599048, + "loss": 4.148, + "step": 673000 + }, + { + "epoch": 3.08, + "learning_rate": 0.00012955708619025508, + "loss": 4.1457, + "step": 674000 + }, + { + "epoch": 3.08, + "learning_rate": 0.00012902015369654687, + "loss": 4.1496, + "step": 675000 + }, + { + "epoch": 3.09, + "learning_rate": 0.00012848434192302686, + "loss": 4.1481, + "step": 676000 + }, + { + "epoch": 3.09, + "learning_rate": 0.00012794858258770753, + "loss": 4.148, + "step": 677000 + }, + { + "epoch": 3.09, + "learning_rate": 0.00012741341674486485, + "loss": 4.1484, + "step": 678000 + }, + { + "epoch": 3.1, + "learning_rate": 0.00012687884880269694, + "loss": 4.1446, + "step": 679000 + }, + { + "epoch": 3.1, + "learning_rate": 0.00012634541682779958, + "loss": 4.1428, + "step": 680000 + }, + { + "epoch": 3.11, + "learning_rate": 0.00012581205728294073, + "loss": 4.1455, + "step": 681000 + }, + { + "epoch": 3.11, + "learning_rate": 0.00012527984127101713, + "loss": 4.1422, + "step": 682000 + }, + { + "epoch": 3.12, + "learning_rate": 0.0001247477076791393, + "loss": 4.1427, + "step": 683000 + }, + { + "epoch": 3.12, + "learning_rate": 0.00012421672514822168, + "loss": 4.1434, + "step": 684000 + }, + { + "epoch": 3.13, + "learning_rate": 0.00012368583502464424, + "loss": 4.1414, + "step": 685000 + }, + { + "epoch": 3.13, + "learning_rate": 0.00012315610345216445, + "loss": 4.1437, + "step": 686000 + }, + { + "epoch": 3.14, + "learning_rate": 0.00012262647427127763, + "loss": 4.1419, + "step": 687000 + }, + { + "epoch": 3.14, + "learning_rate": 0.0001220980110934919, + "loss": 4.1379, + "step": 688000 + }, + { + "epoch": 3.14, + "learning_rate": 0.00012156966028818173, + "loss": 4.1382, + "step": 689000 + }, + { + "epoch": 3.15, + "learning_rate": 0.00012104248289959676, + "loss": 4.1365, + "step": 690000 + }, + { + "epoch": 3.15, + "learning_rate": 0.00012051542786067112, + "loss": 4.1394, + "step": 691000 + }, + { + "epoch": 3.16, + "learning_rate": 0.00011998955361347148, + "loss": 4.1366, + "step": 692000 + }, + { + "epoch": 3.16, + "learning_rate": 0.00011946381168908787, + "loss": 4.1347, + "step": 693000 + }, + { + "epoch": 3.17, + "learning_rate": 0.00011893873314682198, + "loss": 4.1357, + "step": 694000 + }, + { + "epoch": 3.17, + "learning_rate": 0.00011841432231178195, + "loss": 4.1337, + "step": 695000 + }, + { + "epoch": 3.18, + "learning_rate": 0.0001178911069052703, + "loss": 4.1347, + "step": 696000 + }, + { + "epoch": 3.18, + "learning_rate": 0.00011736804375947676, + "loss": 4.1351, + "step": 697000 + }, + { + "epoch": 3.19, + "learning_rate": 0.00011684618329987129, + "loss": 4.1297, + "step": 698000 + }, + { + "epoch": 3.19, + "learning_rate": 0.00011632448506008744, + "loss": 4.1351, + "step": 699000 + }, + { + "epoch": 3.19, + "learning_rate": 0.00011580399672456457, + "loss": 4.1329, + "step": 700000 + }, + { + "epoch": 3.2, + "learning_rate": 0.00011528368056262728, + "loss": 4.1313, + "step": 701000 + }, + { + "epoch": 3.2, + "learning_rate": 0.00011476458148319966, + "loss": 4.1265, + "step": 702000 + }, + { + "epoch": 3.21, + "learning_rate": 0.00011424566452545455, + "loss": 4.129, + "step": 703000 + }, + { + "epoch": 3.21, + "learning_rate": 0.00011372797178840713, + "loss": 4.1299, + "step": 704000 + }, + { + "epoch": 3.22, + "learning_rate": 0.00011321047111514422, + "loss": 4.1257, + "step": 705000 + }, + { + "epoch": 3.22, + "learning_rate": 0.0001126942017604717, + "loss": 4.1281, + "step": 706000 + }, + { + "epoch": 3.23, + "learning_rate": 0.00011217813440536418, + "loss": 4.1266, + "step": 707000 + }, + { + "epoch": 3.23, + "learning_rate": 0.00011166279044499894, + "loss": 4.1249, + "step": 708000 + }, + { + "epoch": 3.24, + "learning_rate": 0.00011114817412429949, + "loss": 4.1247, + "step": 709000 + }, + { + "epoch": 3.24, + "learning_rate": 0.00011063428968219605, + "loss": 4.1229, + "step": 710000 + }, + { + "epoch": 3.24, + "learning_rate": 0.00011012114135158998, + "loss": 4.1245, + "step": 711000 + }, + { + "epoch": 3.25, + "learning_rate": 0.00010960924539610728, + "loss": 4.1261, + "step": 712000 + }, + { + "epoch": 3.25, + "learning_rate": 0.00010909758121624652, + "loss": 4.1228, + "step": 713000 + }, + { + "epoch": 3.26, + "learning_rate": 0.00010858717634585534, + "loss": 4.1197, + "step": 714000 + }, + { + "epoch": 3.26, + "learning_rate": 0.00010807701315830314, + "loss": 4.1174, + "step": 715000 + }, + { + "epoch": 3.27, + "learning_rate": 0.00010756862520028245, + "loss": 4.1188, + "step": 716000 + }, + { + "epoch": 3.27, + "learning_rate": 0.00010705997903373485, + "loss": 4.1191, + "step": 717000 + }, + { + "epoch": 3.28, + "learning_rate": 0.00010655209842052723, + "loss": 4.114, + "step": 718000 + }, + { + "epoch": 3.28, + "learning_rate": 0.00010604549426910888, + "loss": 4.1175, + "step": 719000 + }, + { + "epoch": 3.29, + "learning_rate": 0.00010553915653058473, + "loss": 4.1166, + "step": 720000 + }, + { + "epoch": 3.29, + "learning_rate": 0.00010503359687251983, + "loss": 4.1143, + "step": 721000 + }, + { + "epoch": 3.29, + "learning_rate": 0.00010452881945924391, + "loss": 4.1152, + "step": 722000 + }, + { + "epoch": 3.3, + "learning_rate": 0.00010402533204546334, + "loss": 4.1116, + "step": 723000 + }, + { + "epoch": 3.3, + "learning_rate": 0.00010352213079632074, + "loss": 4.1111, + "step": 724000 + }, + { + "epoch": 3.31, + "learning_rate": 0.00010301972424201705, + "loss": 4.1103, + "step": 725000 + }, + { + "epoch": 3.31, + "learning_rate": 0.00010251861772823774, + "loss": 4.1111, + "step": 726000 + }, + { + "epoch": 3.32, + "learning_rate": 0.00010201781216707713, + "loss": 4.1106, + "step": 727000 + }, + { + "epoch": 3.32, + "learning_rate": 0.00010151831328589558, + "loss": 4.1069, + "step": 728000 + }, + { + "epoch": 3.33, + "learning_rate": 0.00010101962398354699, + "loss": 4.1067, + "step": 729000 + }, + { + "epoch": 3.33, + "learning_rate": 0.00010052174835955799, + "loss": 4.1035, + "step": 730000 + }, + { + "epoch": 3.34, + "learning_rate": 0.00010002419336242872, + "loss": 4.1095, + "step": 731000 + }, + { + "epoch": 3.34, + "learning_rate": 9.952746187288931e-05, + "loss": 4.1049, + "step": 732000 + }, + { + "epoch": 3.35, + "learning_rate": 9.903155798255135e-05, + "loss": 4.0988, + "step": 733000 + }, + { + "epoch": 3.35, + "learning_rate": 9.853648577620898e-05, + "loss": 4.1043, + "step": 734000 + }, + { + "epoch": 3.35, + "learning_rate": 9.804274314943199e-05, + "loss": 4.1043, + "step": 735000 + }, + { + "epoch": 3.36, + "learning_rate": 9.754934569616405e-05, + "loss": 4.1018, + "step": 736000 + }, + { + "epoch": 3.36, + "learning_rate": 9.705777639819362e-05, + "loss": 4.099, + "step": 737000 + }, + { + "epoch": 3.37, + "learning_rate": 9.656606908833878e-05, + "loss": 4.1011, + "step": 738000 + }, + { + "epoch": 3.37, + "learning_rate": 9.6075213770881e-05, + "loss": 4.1025, + "step": 739000 + }, + { + "epoch": 3.38, + "learning_rate": 9.558570405937759e-05, + "loss": 4.1005, + "step": 740000 + }, + { + "epoch": 3.38, + "learning_rate": 9.509656398720454e-05, + "loss": 4.0979, + "step": 741000 + }, + { + "epoch": 3.39, + "learning_rate": 9.46082880118432e-05, + "loss": 4.0973, + "step": 742000 + }, + { + "epoch": 3.39, + "learning_rate": 9.412088015525628e-05, + "loss": 4.0912, + "step": 743000 + }, + { + "epoch": 3.4, + "learning_rate": 9.363434443225589e-05, + "loss": 4.0913, + "step": 744000 + }, + { + "epoch": 3.4, + "learning_rate": 9.31486848504702e-05, + "loss": 4.0922, + "step": 745000 + }, + { + "epoch": 3.4, + "learning_rate": 9.266390541031052e-05, + "loss": 4.095, + "step": 746000 + }, + { + "epoch": 3.41, + "learning_rate": 9.218049355729118e-05, + "loss": 4.0916, + "step": 747000 + }, + { + "epoch": 3.41, + "learning_rate": 9.169748548247643e-05, + "loss": 4.0896, + "step": 748000 + }, + { + "epoch": 3.42, + "learning_rate": 9.121585117197211e-05, + "loss": 4.0896, + "step": 749000 + }, + { + "epoch": 3.42, + "learning_rate": 9.073463036084202e-05, + "loss": 4.0849, + "step": 750000 + }, + { + "epoch": 3.43, + "learning_rate": 9.025430957607068e-05, + "loss": 4.0861, + "step": 751000 + }, + { + "epoch": 3.43, + "learning_rate": 8.977489277409341e-05, + "loss": 4.0873, + "step": 752000 + }, + { + "epoch": 3.44, + "learning_rate": 8.929686195794506e-05, + "loss": 4.083, + "step": 753000 + }, + { + "epoch": 3.44, + "learning_rate": 8.882021833036489e-05, + "loss": 4.0873, + "step": 754000 + }, + { + "epoch": 3.45, + "learning_rate": 8.834353438745977e-05, + "loss": 4.083, + "step": 755000 + }, + { + "epoch": 3.45, + "learning_rate": 8.786824547005008e-05, + "loss": 4.0832, + "step": 756000 + }, + { + "epoch": 3.45, + "learning_rate": 8.739340396441291e-05, + "loss": 4.0828, + "step": 757000 + }, + { + "epoch": 3.46, + "learning_rate": 8.691949000704588e-05, + "loss": 4.087, + "step": 758000 + }, + { + "epoch": 3.46, + "learning_rate": 8.644650750161096e-05, + "loss": 4.0797, + "step": 759000 + }, + { + "epoch": 3.47, + "learning_rate": 8.597446034409749e-05, + "loss": 4.0808, + "step": 760000 + }, + { + "epoch": 3.47, + "learning_rate": 8.55033524227903e-05, + "loss": 4.0762, + "step": 761000 + }, + { + "epoch": 3.48, + "learning_rate": 8.503365731066581e-05, + "loss": 4.0769, + "step": 762000 + }, + { + "epoch": 3.48, + "learning_rate": 8.456443854672643e-05, + "loss": 4.0744, + "step": 763000 + }, + { + "epoch": 3.49, + "learning_rate": 8.409617063343962e-05, + "loss": 4.0785, + "step": 764000 + }, + { + "epoch": 3.49, + "learning_rate": 8.362885742796067e-05, + "loss": 4.074, + "step": 765000 + }, + { + "epoch": 3.5, + "learning_rate": 8.316296865415034e-05, + "loss": 4.0752, + "step": 766000 + }, + { + "epoch": 3.5, + "learning_rate": 8.269757543994949e-05, + "loss": 4.0721, + "step": 767000 + }, + { + "epoch": 3.5, + "learning_rate": 8.223314845388103e-05, + "loss": 4.0745, + "step": 768000 + }, + { + "epoch": 3.51, + "learning_rate": 8.176969152146221e-05, + "loss": 4.0721, + "step": 769000 + }, + { + "epoch": 3.51, + "learning_rate": 8.130767045556329e-05, + "loss": 4.0721, + "step": 770000 + }, + { + "epoch": 3.52, + "learning_rate": 8.084616409542043e-05, + "loss": 4.0681, + "step": 771000 + }, + { + "epoch": 3.52, + "learning_rate": 8.038609924698259e-05, + "loss": 4.0728, + "step": 772000 + }, + { + "epoch": 3.53, + "learning_rate": 7.992701769691633e-05, + "loss": 4.0687, + "step": 773000 + }, + { + "epoch": 3.53, + "learning_rate": 7.946846516190165e-05, + "loss": 4.0675, + "step": 774000 + }, + { + "epoch": 3.54, + "learning_rate": 7.90109054534227e-05, + "loss": 4.0665, + "step": 775000 + }, + { + "epoch": 3.54, + "learning_rate": 7.855434234043022e-05, + "loss": 4.0655, + "step": 776000 + }, + { + "epoch": 3.55, + "learning_rate": 7.80992346454953e-05, + "loss": 4.0671, + "step": 777000 + }, + { + "epoch": 3.55, + "learning_rate": 7.764512904833741e-05, + "loss": 4.0638, + "step": 778000 + }, + { + "epoch": 3.56, + "learning_rate": 7.71915762338268e-05, + "loss": 4.0613, + "step": 779000 + }, + { + "epoch": 3.56, + "learning_rate": 7.673948703544935e-05, + "loss": 4.0572, + "step": 780000 + }, + { + "epoch": 3.56, + "learning_rate": 7.628796009423646e-05, + "loss": 4.0639, + "step": 781000 + }, + { + "epoch": 3.57, + "learning_rate": 7.583745217759814e-05, + "loss": 4.0613, + "step": 782000 + }, + { + "epoch": 3.57, + "learning_rate": 7.538841596949084e-05, + "loss": 4.0572, + "step": 783000 + }, + { + "epoch": 3.58, + "learning_rate": 7.493995619788687e-05, + "loss": 4.0566, + "step": 784000 + }, + { + "epoch": 3.58, + "learning_rate": 7.44925265544582e-05, + "loss": 4.0562, + "step": 785000 + }, + { + "epoch": 3.59, + "learning_rate": 7.404613072471351e-05, + "loss": 4.056, + "step": 786000 + }, + { + "epoch": 3.59, + "learning_rate": 7.360077238564593e-05, + "loss": 4.0507, + "step": 787000 + }, + { + "epoch": 3.6, + "learning_rate": 7.315645520570287e-05, + "loss": 4.0505, + "step": 788000 + }, + { + "epoch": 3.6, + "learning_rate": 7.271362559401307e-05, + "loss": 4.0534, + "step": 789000 + }, + { + "epoch": 3.61, + "learning_rate": 7.22714006530347e-05, + "loss": 4.0539, + "step": 790000 + }, + { + "epoch": 3.61, + "learning_rate": 7.183066846739989e-05, + "loss": 4.0536, + "step": 791000 + }, + { + "epoch": 3.61, + "learning_rate": 7.139055032133843e-05, + "loss": 4.0522, + "step": 792000 + }, + { + "epoch": 3.62, + "learning_rate": 7.095236859806331e-05, + "loss": 4.05, + "step": 793000 + }, + { + "epoch": 3.62, + "learning_rate": 7.051437066874354e-05, + "loss": 4.0474, + "step": 794000 + }, + { + "epoch": 3.63, + "learning_rate": 7.007743932145127e-05, + "loss": 4.0424, + "step": 795000 + }, + { + "epoch": 3.63, + "learning_rate": 6.964157815522e-05, + "loss": 4.0445, + "step": 796000 + }, + { + "epoch": 3.64, + "learning_rate": 6.920679076026799e-05, + "loss": 4.0437, + "step": 797000 + }, + { + "epoch": 3.64, + "learning_rate": 6.877308071796904e-05, + "loss": 4.0428, + "step": 798000 + }, + { + "epoch": 3.65, + "learning_rate": 6.834088368883074e-05, + "loss": 4.0413, + "step": 799000 + }, + { + "epoch": 3.65, + "learning_rate": 6.790933797416663e-05, + "loss": 4.0423, + "step": 800000 + }, + { + "epoch": 3.66, + "learning_rate": 6.747888029936322e-05, + "loss": 4.0412, + "step": 801000 + }, + { + "epoch": 3.66, + "learning_rate": 6.704994302979443e-05, + "loss": 4.0394, + "step": 802000 + }, + { + "epoch": 3.66, + "learning_rate": 6.66216709659637e-05, + "loss": 4.0399, + "step": 803000 + }, + { + "epoch": 3.67, + "learning_rate": 6.619449754859523e-05, + "loss": 4.0385, + "step": 804000 + }, + { + "epoch": 3.67, + "learning_rate": 6.576885181589794e-05, + "loss": 4.0375, + "step": 805000 + }, + { + "epoch": 3.68, + "learning_rate": 6.534388513092143e-05, + "loss": 4.0376, + "step": 806000 + }, + { + "epoch": 3.68, + "learning_rate": 6.492002761761704e-05, + "loss": 4.0367, + "step": 807000 + }, + { + "epoch": 3.69, + "learning_rate": 6.44972827673282e-05, + "loss": 4.0361, + "step": 808000 + }, + { + "epoch": 3.69, + "learning_rate": 6.40760751322673e-05, + "loss": 4.0294, + "step": 809000 + }, + { + "epoch": 3.7, + "learning_rate": 6.365556492400127e-05, + "loss": 4.0324, + "step": 810000 + }, + { + "epoch": 3.7, + "learning_rate": 6.323659661921848e-05, + "loss": 4.0314, + "step": 811000 + }, + { + "epoch": 3.71, + "learning_rate": 6.281833489418096e-05, + "loss": 4.0306, + "step": 812000 + }, + { + "epoch": 3.71, + "learning_rate": 6.240161971012996e-05, + "loss": 4.0301, + "step": 813000 + }, + { + "epoch": 3.71, + "learning_rate": 6.198562023551751e-05, + "loss": 4.0285, + "step": 814000 + }, + { + "epoch": 3.72, + "learning_rate": 6.157158618328416e-05, + "loss": 4.0266, + "step": 815000 + }, + { + "epoch": 3.72, + "learning_rate": 6.115786150987899e-05, + "loss": 4.0292, + "step": 816000 + }, + { + "epoch": 3.73, + "learning_rate": 6.0745280488710155e-05, + "loss": 4.0234, + "step": 817000 + }, + { + "epoch": 3.73, + "learning_rate": 6.0333846518236035e-05, + "loss": 4.0236, + "step": 818000 + }, + { + "epoch": 3.74, + "learning_rate": 5.9923562987466307e-05, + "loss": 4.0237, + "step": 819000 + }, + { + "epoch": 3.74, + "learning_rate": 5.951484182819116e-05, + "loss": 4.021, + "step": 820000 + }, + { + "epoch": 3.75, + "learning_rate": 5.910727554160531e-05, + "loss": 4.0171, + "step": 821000 + }, + { + "epoch": 3.75, + "learning_rate": 5.8700461244659956e-05, + "loss": 4.0197, + "step": 822000 + }, + { + "epoch": 3.76, + "learning_rate": 5.829481084172575e-05, + "loss": 4.0153, + "step": 823000 + }, + { + "epoch": 3.76, + "learning_rate": 5.789032767417306e-05, + "loss": 4.0209, + "step": 824000 + }, + { + "epoch": 3.76, + "learning_rate": 5.748701507375753e-05, + "loss": 4.0169, + "step": 825000 + }, + { + "epoch": 3.77, + "learning_rate": 5.708487636259276e-05, + "loss": 4.0142, + "step": 826000 + }, + { + "epoch": 3.77, + "learning_rate": 5.6684315225520025e-05, + "loss": 4.0168, + "step": 827000 + }, + { + "epoch": 3.78, + "learning_rate": 5.628453303834178e-05, + "loss": 4.0179, + "step": 828000 + }, + { + "epoch": 3.78, + "learning_rate": 5.588633265133554e-05, + "loss": 4.0114, + "step": 829000 + }, + { + "epoch": 3.79, + "learning_rate": 5.5489316965551574e-05, + "loss": 4.0113, + "step": 830000 + }, + { + "epoch": 3.79, + "learning_rate": 5.5093093617013605e-05, + "loss": 4.0101, + "step": 831000 + }, + { + "epoch": 3.8, + "learning_rate": 5.469806387662206e-05, + "loss": 4.012, + "step": 832000 + }, + { + "epoch": 3.8, + "learning_rate": 5.4304230998263825e-05, + "loss": 4.011, + "step": 833000 + }, + { + "epoch": 3.81, + "learning_rate": 5.391199025820963e-05, + "loss": 4.0085, + "step": 834000 + }, + { + "epoch": 3.81, + "learning_rate": 5.352055962116598e-05, + "loss": 4.0103, + "step": 835000 + }, + { + "epoch": 3.82, + "learning_rate": 5.313033554533935e-05, + "loss": 4.007, + "step": 836000 + }, + { + "epoch": 3.82, + "learning_rate": 5.2741321245032015e-05, + "loss": 4.0057, + "step": 837000 + }, + { + "epoch": 3.82, + "learning_rate": 5.235429431454388e-05, + "loss": 4.0023, + "step": 838000 + }, + { + "epoch": 3.83, + "learning_rate": 5.196770673276694e-05, + "loss": 4.0024, + "step": 839000 + }, + { + "epoch": 3.83, + "learning_rate": 5.158233850316285e-05, + "loss": 4.0052, + "step": 840000 + }, + { + "epoch": 3.84, + "learning_rate": 5.119895986925622e-05, + "loss": 3.9984, + "step": 841000 + }, + { + "epoch": 3.84, + "learning_rate": 5.0816037402308914e-05, + "loss": 4.0024, + "step": 842000 + }, + { + "epoch": 3.85, + "learning_rate": 5.0434343773913936e-05, + "loss": 3.9997, + "step": 843000 + }, + { + "epoch": 3.85, + "learning_rate": 5.005388212810789e-05, + "loss": 4.0001, + "step": 844000 + }, + { + "epoch": 3.86, + "learning_rate": 4.967465559877949e-05, + "loss": 3.9953, + "step": 845000 + }, + { + "epoch": 3.86, + "learning_rate": 4.929666730964366e-05, + "loss": 3.9974, + "step": 846000 + }, + { + "epoch": 3.87, + "learning_rate": 4.8920296500061624e-05, + "loss": 3.9978, + "step": 847000 + }, + { + "epoch": 3.87, + "learning_rate": 4.854479277562882e-05, + "loss": 3.9965, + "step": 848000 + }, + { + "epoch": 3.87, + "learning_rate": 4.8170910230147306e-05, + "loss": 3.9938, + "step": 849000 + }, + { + "epoch": 3.88, + "learning_rate": 4.77979034302229e-05, + "loss": 3.9955, + "step": 850000 + }, + { + "epoch": 3.88, + "learning_rate": 4.7426521455285876e-05, + "loss": 3.9919, + "step": 851000 + }, + { + "epoch": 3.89, + "learning_rate": 4.705602385748844e-05, + "loss": 3.9902, + "step": 852000 + }, + { + "epoch": 3.89, + "learning_rate": 4.668678606973318e-05, + "loss": 3.9888, + "step": 853000 + }, + { + "epoch": 3.9, + "learning_rate": 4.631881113345728e-05, + "loss": 3.9864, + "step": 854000 + }, + { + "epoch": 3.9, + "learning_rate": 4.5952834232442806e-05, + "loss": 3.9886, + "step": 855000 + }, + { + "epoch": 3.91, + "learning_rate": 4.5587391540988944e-05, + "loss": 3.9877, + "step": 856000 + }, + { + "epoch": 3.91, + "learning_rate": 4.5223220756802585e-05, + "loss": 3.9858, + "step": 857000 + }, + { + "epoch": 3.92, + "learning_rate": 4.4860324879583624e-05, + "loss": 3.9823, + "step": 858000 + }, + { + "epoch": 3.92, + "learning_rate": 4.4498706898530285e-05, + "loss": 3.9823, + "step": 859000 + }, + { + "epoch": 3.92, + "learning_rate": 4.413836979231471e-05, + "loss": 3.9826, + "step": 860000 + }, + { + "epoch": 3.93, + "learning_rate": 4.3779674940056856e-05, + "loss": 3.9845, + "step": 861000 + }, + { + "epoch": 3.93, + "learning_rate": 4.342190718903205e-05, + "loss": 3.9797, + "step": 862000 + }, + { + "epoch": 3.94, + "learning_rate": 4.3066140849412765e-05, + "loss": 3.9826, + "step": 863000 + }, + { + "epoch": 3.94, + "learning_rate": 4.271095293545859e-05, + "loss": 3.9786, + "step": 864000 + }, + { + "epoch": 3.95, + "learning_rate": 4.235706062219449e-05, + "loss": 3.9789, + "step": 865000 + }, + { + "epoch": 3.95, + "learning_rate": 4.200481876887719e-05, + "loss": 3.9804, + "step": 866000 + }, + { + "epoch": 3.96, + "learning_rate": 4.165352508853595e-05, + "loss": 3.9759, + "step": 867000 + }, + { + "epoch": 3.96, + "learning_rate": 4.13038850558964e-05, + "loss": 3.9766, + "step": 868000 + }, + { + "epoch": 3.97, + "learning_rate": 4.095520157140329e-05, + "loss": 3.9754, + "step": 869000 + }, + { + "epoch": 3.97, + "learning_rate": 4.0608174867936735e-05, + "loss": 3.9726, + "step": 870000 + }, + { + "epoch": 3.97, + "learning_rate": 4.026211305630183e-05, + "loss": 3.9728, + "step": 871000 + }, + { + "epoch": 3.98, + "learning_rate": 3.9917367016619276e-05, + "loss": 3.9719, + "step": 872000 + }, + { + "epoch": 3.98, + "learning_rate": 3.9573939588586015e-05, + "loss": 3.9689, + "step": 873000 + }, + { + "epoch": 3.99, + "learning_rate": 3.923183360103733e-05, + "loss": 3.9669, + "step": 874000 + }, + { + "epoch": 3.99, + "learning_rate": 3.88910518719237e-05, + "loss": 3.9705, + "step": 875000 + }, + { + "epoch": 4.0, + "learning_rate": 3.8551935999150546e-05, + "loss": 3.969, + "step": 876000 + }, + { + "epoch": 4.0, + "learning_rate": 3.821414732678987e-05, + "loss": 3.9472, + "step": 877000 + }, + { + "epoch": 4.01, + "learning_rate": 3.7877352503423325e-05, + "loss": 3.9284, + "step": 878000 + }, + { + "epoch": 4.01, + "learning_rate": 3.7541893095445734e-05, + "loss": 3.9269, + "step": 879000 + }, + { + "epoch": 4.02, + "learning_rate": 3.720810531795154e-05, + "loss": 3.9298, + "step": 880000 + }, + { + "epoch": 4.02, + "learning_rate": 3.687532367703408e-05, + "loss": 3.9306, + "step": 881000 + }, + { + "epoch": 4.03, + "learning_rate": 3.65438857052858e-05, + "loss": 3.9292, + "step": 882000 + }, + { + "epoch": 4.03, + "learning_rate": 3.6213794132784204e-05, + "loss": 3.9297, + "step": 883000 + }, + { + "epoch": 4.03, + "learning_rate": 3.588537974618371e-05, + "loss": 3.9289, + "step": 884000 + }, + { + "epoch": 4.04, + "learning_rate": 3.555798776484851e-05, + "loss": 3.9268, + "step": 885000 + }, + { + "epoch": 4.04, + "learning_rate": 3.5232601024993396e-05, + "loss": 3.933, + "step": 886000 + }, + { + "epoch": 4.05, + "learning_rate": 3.490791805247826e-05, + "loss": 3.9279, + "step": 887000 + }, + { + "epoch": 4.05, + "learning_rate": 3.458459495478781e-05, + "loss": 3.9265, + "step": 888000 + }, + { + "epoch": 4.06, + "learning_rate": 3.4262634395156536e-05, + "loss": 3.9279, + "step": 889000 + }, + { + "epoch": 4.06, + "learning_rate": 3.394235893817297e-05, + "loss": 3.9273, + "step": 890000 + }, + { + "epoch": 4.07, + "learning_rate": 3.3623130030302484e-05, + "loss": 3.924, + "step": 891000 + }, + { + "epoch": 4.07, + "learning_rate": 3.330527158014394e-05, + "loss": 3.9239, + "step": 892000 + }, + { + "epoch": 4.08, + "learning_rate": 3.298910200457324e-05, + "loss": 3.9256, + "step": 893000 + }, + { + "epoch": 4.08, + "learning_rate": 3.267399093621268e-05, + "loss": 3.9225, + "step": 894000 + }, + { + "epoch": 4.08, + "learning_rate": 3.2360258143687926e-05, + "loss": 3.9213, + "step": 895000 + }, + { + "epoch": 4.09, + "learning_rate": 3.204821787257311e-05, + "loss": 3.923, + "step": 896000 + }, + { + "epoch": 4.09, + "learning_rate": 3.173724798834707e-05, + "loss": 3.9165, + "step": 897000 + }, + { + "epoch": 4.1, + "learning_rate": 3.142797298671269e-05, + "loss": 3.9243, + "step": 898000 + }, + { + "epoch": 4.1, + "learning_rate": 3.111977624644229e-05, + "loss": 3.9195, + "step": 899000 + }, + { + "epoch": 4.11, + "learning_rate": 3.081297058418091e-05, + "loss": 3.9208, + "step": 900000 + }, + { + "epoch": 4.11, + "learning_rate": 3.0507558527107828e-05, + "loss": 3.92, + "step": 901000 + }, + { + "epoch": 4.12, + "learning_rate": 3.0203845908662563e-05, + "loss": 3.9204, + "step": 902000 + }, + { + "epoch": 4.12, + "learning_rate": 2.9901227197694415e-05, + "loss": 3.9196, + "step": 903000 + }, + { + "epoch": 4.13, + "learning_rate": 2.9600310118919393e-05, + "loss": 3.9197, + "step": 904000 + }, + { + "epoch": 4.13, + "learning_rate": 2.9300494714831896e-05, + "loss": 3.9205, + "step": 905000 + }, + { + "epoch": 4.13, + "learning_rate": 2.9002383080493055e-05, + "loss": 3.9151, + "step": 906000 + }, + { + "epoch": 4.14, + "learning_rate": 2.8705380851790375e-05, + "loss": 3.9172, + "step": 907000 + }, + { + "epoch": 4.14, + "learning_rate": 2.8409789588637402e-05, + "loss": 3.9158, + "step": 908000 + }, + { + "epoch": 4.15, + "learning_rate": 2.8115611725839808e-05, + "loss": 3.9135, + "step": 909000 + }, + { + "epoch": 4.15, + "learning_rate": 2.7823141740592663e-05, + "loss": 3.9149, + "step": 910000 + }, + { + "epoch": 4.16, + "learning_rate": 2.7531796516897657e-05, + "loss": 3.9118, + "step": 911000 + }, + { + "epoch": 4.16, + "learning_rate": 2.7242161139836732e-05, + "loss": 3.9082, + "step": 912000 + }, + { + "epoch": 4.17, + "learning_rate": 2.6953658144950188e-05, + "loss": 3.9131, + "step": 913000 + }, + { + "epoch": 4.17, + "learning_rate": 2.666686690950142e-05, + "loss": 3.9105, + "step": 914000 + }, + { + "epoch": 4.18, + "learning_rate": 2.6381215639576494e-05, + "loss": 3.9091, + "step": 915000 + }, + { + "epoch": 4.18, + "learning_rate": 2.6097277985549907e-05, + "loss": 3.9072, + "step": 916000 + }, + { + "epoch": 4.18, + "learning_rate": 2.581476991673275e-05, + "loss": 3.9109, + "step": 917000 + }, + { + "epoch": 4.19, + "learning_rate": 2.553341311615387e-05, + "loss": 3.9079, + "step": 918000 + }, + { + "epoch": 4.19, + "learning_rate": 2.5253493408841024e-05, + "loss": 3.9061, + "step": 919000 + }, + { + "epoch": 4.2, + "learning_rate": 2.4975290861076127e-05, + "loss": 3.9058, + "step": 920000 + }, + { + "epoch": 4.2, + "learning_rate": 2.469825080275776e-05, + "loss": 3.9049, + "step": 921000 + }, + { + "epoch": 4.21, + "learning_rate": 2.4422929591059718e-05, + "loss": 3.9037, + "step": 922000 + }, + { + "epoch": 4.21, + "learning_rate": 2.4148778300583463e-05, + "loss": 3.9052, + "step": 923000 + }, + { + "epoch": 4.22, + "learning_rate": 2.3876075508705364e-05, + "loss": 3.906, + "step": 924000 + }, + { + "epoch": 4.22, + "learning_rate": 2.3605364516460604e-05, + "loss": 3.9016, + "step": 925000 + }, + { + "epoch": 4.23, + "learning_rate": 2.3335562540463497e-05, + "loss": 3.902, + "step": 926000 + }, + { + "epoch": 4.23, + "learning_rate": 2.3067215761578686e-05, + "loss": 3.901, + "step": 927000 + }, + { + "epoch": 4.24, + "learning_rate": 2.2800326390197003e-05, + "loss": 3.9034, + "step": 928000 + }, + { + "epoch": 4.24, + "learning_rate": 2.2535161324668153e-05, + "loss": 3.8986, + "step": 929000 + }, + { + "epoch": 4.24, + "learning_rate": 2.227119188854776e-05, + "loss": 3.9001, + "step": 930000 + }, + { + "epoch": 4.25, + "learning_rate": 2.200868641683378e-05, + "loss": 3.896, + "step": 931000 + }, + { + "epoch": 4.25, + "learning_rate": 2.1747647071801923e-05, + "loss": 3.8955, + "step": 932000 + }, + { + "epoch": 4.26, + "learning_rate": 2.1488593679023983e-05, + "loss": 3.896, + "step": 933000 + }, + { + "epoch": 4.26, + "learning_rate": 2.1230490082903298e-05, + "loss": 3.8937, + "step": 934000 + }, + { + "epoch": 4.27, + "learning_rate": 2.0973859023521336e-05, + "loss": 3.893, + "step": 935000 + }, + { + "epoch": 4.27, + "learning_rate": 2.0718957033886022e-05, + "loss": 3.8921, + "step": 936000 + }, + { + "epoch": 4.28, + "learning_rate": 2.0465275899699664e-05, + "loss": 3.8936, + "step": 937000 + }, + { + "epoch": 4.28, + "learning_rate": 2.021307360537388e-05, + "loss": 3.8894, + "step": 938000 + }, + { + "epoch": 4.29, + "learning_rate": 1.9962352228316283e-05, + "loss": 3.8913, + "step": 939000 + }, + { + "epoch": 4.29, + "learning_rate": 1.9713362330696583e-05, + "loss": 3.8938, + "step": 940000 + }, + { + "epoch": 4.29, + "learning_rate": 1.946560748553077e-05, + "loss": 3.8904, + "step": 941000 + }, + { + "epoch": 4.3, + "learning_rate": 1.921958523886409e-05, + "loss": 3.8881, + "step": 942000 + }, + { + "epoch": 4.3, + "learning_rate": 1.8974805080506908e-05, + "loss": 3.8859, + "step": 943000 + }, + { + "epoch": 4.31, + "learning_rate": 1.8732001127734854e-05, + "loss": 3.8898, + "step": 944000 + }, + { + "epoch": 4.31, + "learning_rate": 1.8490202219074714e-05, + "loss": 3.888, + "step": 945000 + }, + { + "epoch": 4.32, + "learning_rate": 1.8250137975426186e-05, + "loss": 3.8873, + "step": 946000 + }, + { + "epoch": 4.32, + "learning_rate": 1.8011329765448747e-05, + "loss": 3.8839, + "step": 947000 + }, + { + "epoch": 4.33, + "learning_rate": 1.7774020608654827e-05, + "loss": 3.8851, + "step": 948000 + }, + { + "epoch": 4.33, + "learning_rate": 1.753821245977625e-05, + "loss": 3.8815, + "step": 949000 + }, + { + "epoch": 4.34, + "learning_rate": 1.730414081501248e-05, + "loss": 3.8797, + "step": 950000 + }, + { + "epoch": 4.34, + "learning_rate": 1.7071338990848274e-05, + "loss": 3.8825, + "step": 951000 + }, + { + "epoch": 4.34, + "learning_rate": 1.6840274504384723e-05, + "loss": 3.8792, + "step": 952000 + }, + { + "epoch": 4.35, + "learning_rate": 1.6610715701279632e-05, + "loss": 3.8822, + "step": 953000 + }, + { + "epoch": 4.35, + "learning_rate": 1.6382436945055167e-05, + "loss": 3.8788, + "step": 954000 + }, + { + "epoch": 4.36, + "learning_rate": 1.615567065931629e-05, + "loss": 3.8825, + "step": 955000 + }, + { + "epoch": 4.36, + "learning_rate": 1.5930643206869322e-05, + "loss": 3.8789, + "step": 956000 + }, + { + "epoch": 4.37, + "learning_rate": 1.5706905936180028e-05, + "loss": 3.8776, + "step": 957000 + }, + { + "epoch": 4.37, + "learning_rate": 1.5484908160738844e-05, + "loss": 3.88, + "step": 958000 + }, + { + "epoch": 4.38, + "learning_rate": 1.5264207269471153e-05, + "loss": 3.8763, + "step": 959000 + }, + { + "epoch": 4.38, + "learning_rate": 1.5045028059623756e-05, + "loss": 3.8744, + "step": 960000 + }, + { + "epoch": 4.39, + "learning_rate": 1.4827372336590928e-05, + "loss": 3.8755, + "step": 961000 + }, + { + "epoch": 4.39, + "learning_rate": 1.4611457261190308e-05, + "loss": 3.8775, + "step": 962000 + }, + { + "epoch": 4.39, + "learning_rate": 1.4397066191369536e-05, + "loss": 3.8748, + "step": 963000 + }, + { + "epoch": 4.4, + "learning_rate": 1.4183988576170026e-05, + "loss": 3.8712, + "step": 964000 + }, + { + "epoch": 4.4, + "learning_rate": 1.3972441540226522e-05, + "loss": 3.8697, + "step": 965000 + }, + { + "epoch": 4.41, + "learning_rate": 1.3762426826066322e-05, + "loss": 3.8722, + "step": 966000 + }, + { + "epoch": 4.41, + "learning_rate": 1.3554153877426224e-05, + "loss": 3.8705, + "step": 967000 + }, + { + "epoch": 4.42, + "learning_rate": 1.3347207447291144e-05, + "loss": 3.8681, + "step": 968000 + }, + { + "epoch": 4.42, + "learning_rate": 1.314179848903565e-05, + "loss": 3.8657, + "step": 969000 + }, + { + "epoch": 4.43, + "learning_rate": 1.2938131795049502e-05, + "loss": 3.8674, + "step": 970000 + }, + { + "epoch": 4.43, + "learning_rate": 1.2735801302100369e-05, + "loss": 3.8668, + "step": 971000 + }, + { + "epoch": 4.44, + "learning_rate": 1.2535213334175821e-05, + "loss": 3.8706, + "step": 972000 + }, + { + "epoch": 4.44, + "learning_rate": 1.2335967966295303e-05, + "loss": 3.8628, + "step": 973000 + }, + { + "epoch": 4.44, + "learning_rate": 1.2138268399943431e-05, + "loss": 3.8666, + "step": 974000 + }, + { + "epoch": 4.45, + "learning_rate": 1.1942116263585212e-05, + "loss": 3.8637, + "step": 975000 + }, + { + "epoch": 4.45, + "learning_rate": 1.1747707001746943e-05, + "loss": 3.8626, + "step": 976000 + }, + { + "epoch": 4.46, + "learning_rate": 1.1554653008327055e-05, + "loss": 3.8644, + "step": 977000 + }, + { + "epoch": 4.46, + "learning_rate": 1.1363532705509805e-05, + "loss": 3.8628, + "step": 978000 + }, + { + "epoch": 4.47, + "learning_rate": 1.1173581654855314e-05, + "loss": 3.864, + "step": 979000 + }, + { + "epoch": 4.47, + "learning_rate": 1.0985185980385471e-05, + "loss": 3.8634, + "step": 980000 + }, + { + "epoch": 4.48, + "learning_rate": 1.079834723392832e-05, + "loss": 3.8611, + "step": 981000 + }, + { + "epoch": 4.48, + "learning_rate": 1.0613066954487539e-05, + "loss": 3.8614, + "step": 982000 + }, + { + "epoch": 4.49, + "learning_rate": 1.0429529608794375e-05, + "loss": 3.8612, + "step": 983000 + }, + { + "epoch": 4.49, + "learning_rate": 1.024736926677754e-05, + "loss": 3.8581, + "step": 984000 + }, + { + "epoch": 4.5, + "learning_rate": 1.0066951746339515e-05, + "loss": 3.8562, + "step": 985000 + }, + { + "epoch": 4.5, + "learning_rate": 9.887917337602925e-06, + "loss": 3.8566, + "step": 986000 + }, + { + "epoch": 4.5, + "learning_rate": 9.71044889515631e-06, + "loss": 3.8534, + "step": 987000 + }, + { + "epoch": 4.51, + "learning_rate": 9.534722998420087e-06, + "loss": 3.8577, + "step": 988000 + }, + { + "epoch": 4.51, + "learning_rate": 9.360389291505156e-06, + "loss": 3.8599, + "step": 989000 + }, + { + "epoch": 4.52, + "learning_rate": 9.187625896164997e-06, + "loss": 3.8554, + "step": 990000 + }, + { + "epoch": 4.52, + "learning_rate": 9.016434235463455e-06, + "loss": 3.8554, + "step": 991000 + }, + { + "epoch": 4.53, + "learning_rate": 8.846984551782144e-06, + "loss": 3.8531, + "step": 992000 + }, + { + "epoch": 4.53, + "learning_rate": 8.678939002516817e-06, + "loss": 3.8537, + "step": 993000 + }, + { + "epoch": 4.54, + "learning_rate": 8.512635059971796e-06, + "loss": 3.8556, + "step": 994000 + }, + { + "epoch": 4.54, + "learning_rate": 8.34774115340684e-06, + "loss": 3.8552, + "step": 995000 + }, + { + "epoch": 4.55, + "learning_rate": 8.184588425936723e-06, + "loss": 3.8518, + "step": 996000 + }, + { + "epoch": 4.55, + "learning_rate": 8.022851589599123e-06, + "loss": 3.8519, + "step": 997000 + }, + { + "epoch": 4.55, + "learning_rate": 7.862855447419604e-06, + "loss": 3.8497, + "step": 998000 + }, + { + "epoch": 4.56, + "learning_rate": 7.70428100492051e-06, + "loss": 3.8534, + "step": 999000 + }, + { + "epoch": 4.56, + "learning_rate": 7.54729051547387e-06, + "loss": 3.8516, + "step": 1000000 + }, + { + "epoch": 4.57, + "learning_rate": 7.392039885206847e-06, + "loss": 3.8476, + "step": 1001000 + }, + { + "epoch": 4.57, + "learning_rate": 7.238219581070471e-06, + "loss": 3.8473, + "step": 1002000 + }, + { + "epoch": 4.58, + "learning_rate": 7.085987068966549e-06, + "loss": 3.8497, + "step": 1003000 + }, + { + "epoch": 4.58, + "learning_rate": 6.935343602844757e-06, + "loss": 3.8455, + "step": 1004000 + }, + { + "epoch": 4.59, + "learning_rate": 6.786438681986962e-06, + "loss": 3.8503, + "step": 1005000 + }, + { + "epoch": 4.59, + "learning_rate": 6.638975425188365e-06, + "loss": 3.8475, + "step": 1006000 + }, + { + "epoch": 4.6, + "learning_rate": 6.493249970997628e-06, + "loss": 3.8456, + "step": 1007000 + }, + { + "epoch": 4.6, + "learning_rate": 6.349115258944571e-06, + "loss": 3.8407, + "step": 1008000 + }, + { + "epoch": 4.6, + "learning_rate": 6.2064305863833495e-06, + "loss": 3.8426, + "step": 1009000 + }, + { + "epoch": 4.61, + "learning_rate": 6.065342204771441e-06, + "loss": 3.8456, + "step": 1010000 + }, + { + "epoch": 4.61, + "learning_rate": 5.92585127626355e-06, + "loss": 3.8446, + "step": 1011000 + }, + { + "epoch": 4.62, + "learning_rate": 5.7880960433015715e-06, + "loss": 3.8439, + "step": 1012000 + }, + { + "epoch": 4.62, + "learning_rate": 5.651801854522143e-06, + "loss": 3.8432, + "step": 1013000 + }, + { + "epoch": 4.63, + "learning_rate": 5.517108525207015e-06, + "loss": 3.8411, + "step": 1014000 + }, + { + "epoch": 4.63, + "learning_rate": 5.384017164834387e-06, + "loss": 3.8405, + "step": 1015000 + }, + { + "epoch": 4.64, + "learning_rate": 5.25265955688945e-06, + "loss": 3.8406, + "step": 1016000 + }, + { + "epoch": 4.64, + "learning_rate": 5.122773805360459e-06, + "loss": 3.8425, + "step": 1017000 + }, + { + "epoch": 4.65, + "learning_rate": 4.9946207493118515e-06, + "loss": 3.8429, + "step": 1018000 + }, + { + "epoch": 4.65, + "learning_rate": 4.867944881850673e-06, + "loss": 3.84, + "step": 1019000 + }, + { + "epoch": 4.65, + "learning_rate": 4.743000595890457e-06, + "loss": 3.8434, + "step": 1020000 + }, + { + "epoch": 4.66, + "learning_rate": 4.619538782067134e-06, + "loss": 3.8404, + "step": 1021000 + }, + { + "epoch": 4.66, + "learning_rate": 4.497686330529982e-06, + "loss": 3.8382, + "step": 1022000 + }, + { + "epoch": 4.67, + "learning_rate": 4.377444244986006e-06, + "loss": 3.8392, + "step": 1023000 + }, + { + "epoch": 4.67, + "learning_rate": 4.2589313414077795e-06, + "loss": 3.8395, + "step": 1024000 + }, + { + "epoch": 4.68, + "learning_rate": 4.1420275474132856e-06, + "loss": 3.8404, + "step": 1025000 + }, + { + "epoch": 4.68, + "learning_rate": 4.02661922185521e-06, + "loss": 3.841, + "step": 1026000 + }, + { + "epoch": 4.69, + "learning_rate": 3.912938129952815e-06, + "loss": 3.8376, + "step": 1027000 + }, + { + "epoch": 4.69, + "learning_rate": 3.8007576184877935e-06, + "loss": 3.8383, + "step": 1028000 + }, + { + "epoch": 4.7, + "learning_rate": 3.6901932136656604e-06, + "loss": 3.837, + "step": 1029000 + }, + { + "epoch": 4.7, + "learning_rate": 3.5812458262129755e-06, + "loss": 3.8348, + "step": 1030000 + }, + { + "epoch": 4.71, + "learning_rate": 3.4740228745658187e-06, + "loss": 3.8393, + "step": 1031000 + }, + { + "epoch": 4.71, + "learning_rate": 3.368310581510614e-06, + "loss": 3.8335, + "step": 1032000 + }, + { + "epoch": 4.71, + "learning_rate": 3.2643212405075284e-06, + "loss": 3.833, + "step": 1033000 + }, + { + "epoch": 4.72, + "learning_rate": 3.161949185382773e-06, + "loss": 3.8357, + "step": 1034000 + }, + { + "epoch": 4.72, + "learning_rate": 3.0610952138760753e-06, + "loss": 3.8334, + "step": 1035000 + }, + { + "epoch": 4.73, + "learning_rate": 2.9618634416622936e-06, + "loss": 3.8355, + "step": 1036000 + }, + { + "epoch": 4.73, + "learning_rate": 2.864351483910399e-06, + "loss": 3.8369, + "step": 1037000 + }, + { + "epoch": 4.74, + "learning_rate": 2.768364924832545e-06, + "loss": 3.8369, + "step": 1038000 + }, + { + "epoch": 4.74, + "learning_rate": 2.6740029762871932e-06, + "loss": 3.8313, + "step": 1039000 + }, + { + "epoch": 4.75, + "learning_rate": 2.581358339964313e-06, + "loss": 3.8306, + "step": 1040000 + }, + { + "epoch": 4.75, + "learning_rate": 2.4902463043641854e-06, + "loss": 3.8327, + "step": 1041000 + }, + { + "epoch": 4.76, + "learning_rate": 2.4007611701787116e-06, + "loss": 3.834, + "step": 1042000 + }, + { + "epoch": 4.76, + "learning_rate": 2.3129036745030752e-06, + "loss": 3.8316, + "step": 1043000 + }, + { + "epoch": 4.76, + "learning_rate": 2.226759956554547e-06, + "loss": 3.8326, + "step": 1044000 + }, + { + "epoch": 4.77, + "learning_rate": 2.1421582661275585e-06, + "loss": 3.8354, + "step": 1045000 + }, + { + "epoch": 4.77, + "learning_rate": 2.0592685019640958e-06, + "loss": 3.8307, + "step": 1046000 + }, + { + "epoch": 4.78, + "learning_rate": 1.977925401473013e-06, + "loss": 3.8299, + "step": 1047000 + }, + { + "epoch": 4.78, + "learning_rate": 1.8982923194333036e-06, + "loss": 3.8297, + "step": 1048000 + }, + { + "epoch": 4.79, + "learning_rate": 1.8202104863079827e-06, + "loss": 3.8298, + "step": 1049000 + }, + { + "epoch": 4.79, + "learning_rate": 1.7438367075362172e-06, + "loss": 3.8303, + "step": 1050000 + }, + { + "epoch": 4.8, + "learning_rate": 1.6690927139422218e-06, + "loss": 3.8317, + "step": 1051000 + }, + { + "epoch": 4.8, + "learning_rate": 1.5959067502205883e-06, + "loss": 3.8296, + "step": 1052000 + }, + { + "epoch": 4.81, + "learning_rate": 1.5243550546499618e-06, + "loss": 3.8309, + "step": 1053000 + }, + { + "epoch": 4.81, + "learning_rate": 1.4544382166065795e-06, + "loss": 3.8339, + "step": 1054000 + }, + { + "epoch": 4.81, + "learning_rate": 1.3861568120002276e-06, + "loss": 3.831, + "step": 1055000 + }, + { + "epoch": 4.82, + "learning_rate": 1.3195114032695576e-06, + "loss": 3.8265, + "step": 1056000 + }, + { + "epoch": 4.82, + "learning_rate": 1.2545667306077758e-06, + "loss": 3.8309, + "step": 1057000 + }, + { + "epoch": 4.83, + "learning_rate": 1.1911933096932392e-06, + "loss": 3.8254, + "step": 1058000 + }, + { + "epoch": 4.83, + "learning_rate": 1.1294574905821087e-06, + "loss": 3.8288, + "step": 1059000 + }, + { + "epoch": 4.84, + "learning_rate": 1.0694190611034273e-06, + "loss": 3.8261, + "step": 1060000 + }, + { + "epoch": 4.84, + "learning_rate": 1.0109583188243843e-06, + "loss": 3.8253, + "step": 1061000 + }, + { + "epoch": 4.85, + "learning_rate": 9.541366629567838e-07, + "loss": 3.8274, + "step": 1062000 + }, + { + "epoch": 4.85, + "learning_rate": 8.989545615444961e-07, + "loss": 3.8264, + "step": 1063000 + }, + { + "epoch": 4.86, + "learning_rate": 8.454651918863299e-07, + "loss": 3.8278, + "step": 1064000 + }, + { + "epoch": 4.86, + "learning_rate": 7.935619088263124e-07, + "loss": 3.8267, + "step": 1065000 + }, + { + "epoch": 4.86, + "learning_rate": 7.433489455357823e-07, + "loss": 3.8298, + "step": 1066000 + }, + { + "epoch": 4.87, + "learning_rate": 6.94726189238426e-07, + "loss": 3.8305, + "step": 1067000 + }, + { + "epoch": 4.87, + "learning_rate": 6.477912850886725e-07, + "loss": 3.8305, + "step": 1068000 + }, + { + "epoch": 4.88, + "learning_rate": 6.024951762708009e-07, + "loss": 3.8248, + "step": 1069000 + }, + { + "epoch": 4.88, + "learning_rate": 5.588382351461308e-07, + "loss": 3.826, + "step": 1070000 + }, + { + "epoch": 4.89, + "learning_rate": 5.16779582648863e-07, + "loss": 3.8261, + "step": 1071000 + }, + { + "epoch": 4.89, + "learning_rate": 4.76364087147263e-07, + "loss": 3.8238, + "step": 1072000 + }, + { + "epoch": 4.9, + "learning_rate": 4.375920815465229e-07, + "loss": 3.8293, + "step": 1073000 + }, + { + "epoch": 4.9, + "learning_rate": 4.004638852143083e-07, + "loss": 3.8244, + "step": 1074000 + }, + { + "epoch": 4.91, + "learning_rate": 3.6497980397816043e-07, + "loss": 3.8275, + "step": 1075000 + }, + { + "epoch": 4.91, + "learning_rate": 3.3117314832133985e-07, + "loss": 3.8278, + "step": 1076000 + }, + { + "epoch": 4.92, + "learning_rate": 2.989765157657809e-07, + "loss": 3.824, + "step": 1077000 + }, + { + "epoch": 4.92, + "learning_rate": 2.684545642082537e-07, + "loss": 3.8268, + "step": 1078000 + }, + { + "epoch": 4.92, + "learning_rate": 2.395464400940739e-07, + "loss": 3.8244, + "step": 1079000 + }, + { + "epoch": 4.93, + "learning_rate": 2.1228375656396903e-07, + "loss": 3.8236, + "step": 1080000 + }, + { + "epoch": 4.93, + "learning_rate": 1.8666673818257262e-07, + "loss": 3.8255, + "step": 1081000 + }, + { + "epoch": 4.94, + "learning_rate": 1.6271874491924355e-07, + "loss": 3.828, + "step": 1082000 + }, + { + "epoch": 4.94, + "learning_rate": 1.4041353457650008e-07, + "loss": 3.8259, + "step": 1083000 + }, + { + "epoch": 4.95, + "learning_rate": 1.1973143077612658e-07, + "loss": 3.8265, + "step": 1084000 + }, + { + "epoch": 4.95, + "learning_rate": 1.0069575448430346e-07, + "loss": 3.826, + "step": 1085000 + }, + { + "epoch": 4.96, + "learning_rate": 8.330666249920515e-08, + "loss": 3.8267, + "step": 1086000 + }, + { + "epoch": 4.96, + "learning_rate": 6.759413926236135e-08, + "loss": 3.8269, + "step": 1087000 + }, + { + "epoch": 4.97, + "learning_rate": 5.349533819716257e-08, + "loss": 3.8255, + "step": 1088000 + }, + { + "epoch": 4.97, + "learning_rate": 4.1043510231775216e-08, + "loss": 3.8275, + "step": 1089000 + }, + { + "epoch": 4.97, + "learning_rate": 3.0248739940019756e-08, + "loss": 3.8277, + "step": 1090000 + } + ], + "max_steps": 1095620, + "num_train_epochs": 5, + "total_flos": 7.86932880566174e+19, + "trial_name": null, + "trial_params": null +}