{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.51566991968441, "global_step": 120000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.0004000105276443632, "loss": 10.1199, "step": 200 }, { "epoch": 0.0, "learning_rate": 0.00040004211053127486, "loss": 9.997, "step": 400 }, { "epoch": 0.0, "learning_rate": 0.000400094748522194, "loss": 9.9386, "step": 600 }, { "epoch": 0.0, "learning_rate": 0.00040016844138622554, "loss": 9.8988, "step": 800 }, { "epoch": 0.0, "learning_rate": 0.0004002631888001141, "loss": 9.8579, "step": 1000 }, { "epoch": 0.01, "learning_rate": 0.0004003789903482477, "loss": 9.8159, "step": 1200 }, { "epoch": 0.01, "learning_rate": 0.0004005158455226594, "loss": 9.7867, "step": 1400 }, { "epoch": 0.01, "learning_rate": 0.0004006737537230326, "loss": 9.7605, "step": 1600 }, { "epoch": 0.01, "learning_rate": 0.0004008527142566991, "loss": 9.7357, "step": 1800 }, { "epoch": 0.01, "learning_rate": 0.0004010527263386479, "loss": 9.7138, "step": 2000 }, { "epoch": 0.01, "learning_rate": 0.00040127378909152016, "loss": 9.6894, "step": 2200 }, { "epoch": 0.01, "learning_rate": 0.000401515901545621, "loss": 9.6634, "step": 2400 }, { "epoch": 0.01, "learning_rate": 0.00040177906263891804, "loss": 9.6451, "step": 2600 }, { "epoch": 0.01, "learning_rate": 0.00040206327121705167, "loss": 9.6279, "step": 2800 }, { "epoch": 0.01, "learning_rate": 0.00040236852603333685, "loss": 9.6038, "step": 3000 }, { "epoch": 0.01, "learning_rate": 0.0004026948257487631, "loss": 9.5874, "step": 3200 }, { "epoch": 0.01, "learning_rate": 0.00040304216893201697, "loss": 9.5729, "step": 3400 }, { "epoch": 0.02, "learning_rate": 0.0004034105540594666, "loss": 9.547, "step": 3600 }, { "epoch": 0.02, "learning_rate": 0.0004037999795151858, "loss": 9.5348, "step": 3800 }, { "epoch": 0.02, "learning_rate": 0.0004042104435909525, "loss": 9.5207, "step": 4000 }, { "epoch": 0.02, "learning_rate": 0.0004046419444862573, "loss": 9.5061, "step": 4200 }, { "epoch": 0.02, "learning_rate": 0.0004050944803083139, "loss": 9.493, "step": 4400 }, { "epoch": 0.02, "learning_rate": 0.0004055680490720661, "loss": 9.4782, "step": 4600 }, { "epoch": 0.02, "learning_rate": 0.0004060626487001964, "loss": 9.4636, "step": 4800 }, { "epoch": 0.02, "learning_rate": 0.0004065782770231313, "loss": 9.4546, "step": 5000 }, { "epoch": 0.02, "learning_rate": 0.000407114931779062, "loss": 9.4453, "step": 5200 }, { "epoch": 0.02, "learning_rate": 0.00040767261061393917, "loss": 9.4174, "step": 5400 }, { "epoch": 0.02, "learning_rate": 0.00040825131108149573, "loss": 9.4159, "step": 5600 }, { "epoch": 0.02, "learning_rate": 0.00040885103064325357, "loss": 9.3993, "step": 5800 }, { "epoch": 0.03, "learning_rate": 0.00040947176666852707, "loss": 9.3953, "step": 6000 }, { "epoch": 0.03, "learning_rate": 0.00041011351643444917, "loss": 9.3854, "step": 6200 }, { "epoch": 0.03, "learning_rate": 0.0004107762771259713, "loss": 9.3679, "step": 6400 }, { "epoch": 0.03, "learning_rate": 0.0004114600458358809, "loss": 9.3595, "step": 6600 }, { "epoch": 0.03, "learning_rate": 0.00041216481956481664, "loss": 9.3504, "step": 6800 }, { "epoch": 0.03, "learning_rate": 0.00041289059522127414, "loss": 9.3417, "step": 7000 }, { "epoch": 0.03, "learning_rate": 0.0004136373696216229, "loss": 9.3275, "step": 7200 }, { "epoch": 0.03, "learning_rate": 0.0004144051394901274, "loss": 9.3201, "step": 7400 }, { "epoch": 0.03, "learning_rate": 0.0004151939014589469, "loss": 9.3123, "step": 7600 }, { "epoch": 0.03, "learning_rate": 0.0004160036520681667, "loss": 9.3084, "step": 7800 }, { "epoch": 0.03, "learning_rate": 0.0004168343877657965, "loss": 9.2954, "step": 8000 }, { "epoch": 0.04, "learning_rate": 0.00041768179413688954, "loss": 9.2862, "step": 8200 }, { "epoch": 0.04, "learning_rate": 0.00041855438410810103, "loss": 9.283, "step": 8400 }, { "epoch": 0.04, "learning_rate": 0.00041944794797888797, "loss": 9.2711, "step": 8600 }, { "epoch": 0.04, "learning_rate": 0.00042036248182962185, "loss": 9.2726, "step": 8800 }, { "epoch": 0.04, "learning_rate": 0.0004212979816486783, "loss": 9.2621, "step": 9000 }, { "epoch": 0.04, "learning_rate": 0.00042225444333247354, "loss": 9.2527, "step": 9200 }, { "epoch": 0.04, "learning_rate": 0.0004232318626854678, "loss": 9.2453, "step": 9400 }, { "epoch": 0.04, "learning_rate": 0.0004242302354201949, "loss": 9.2314, "step": 9600 }, { "epoch": 0.04, "learning_rate": 0.000425249557157276, "loss": 9.2337, "step": 9800 }, { "epoch": 0.04, "learning_rate": 0.00042628982342543184, "loss": 9.2276, "step": 10000 }, { "epoch": 0.04, "learning_rate": 0.0004273456715498305, "loss": 9.2181, "step": 10200 }, { "epoch": 0.04, "learning_rate": 0.00042842770843401837, "loss": 9.2142, "step": 10400 }, { "epoch": 0.05, "learning_rate": 0.0004295306759082608, "loss": 9.2052, "step": 10600 }, { "epoch": 0.05, "learning_rate": 0.00043065456913437584, "loss": 9.1994, "step": 10800 }, { "epoch": 0.05, "learning_rate": 0.00043179938318238693, "loss": 9.2017, "step": 11000 }, { "epoch": 0.05, "learning_rate": 0.0004329651130305402, "loss": 9.1991, "step": 11200 }, { "epoch": 0.05, "learning_rate": 0.0004341517535653445, "loss": 9.1921, "step": 11400 }, { "epoch": 0.05, "learning_rate": 0.00043535929958157804, "loss": 9.1786, "step": 11600 }, { "epoch": 0.05, "learning_rate": 0.0004365877457823183, "loss": 9.1766, "step": 11800 }, { "epoch": 0.05, "learning_rate": 0.00043783708677896244, "loss": 9.1614, "step": 12000 }, { "epoch": 0.05, "learning_rate": 0.0004391073170912519, "loss": 9.1717, "step": 12200 }, { "epoch": 0.05, "learning_rate": 0.0004403984311473017, "loss": 9.1551, "step": 12400 }, { "epoch": 0.05, "learning_rate": 0.0004417104232836127, "loss": 9.1542, "step": 12600 }, { "epoch": 0.06, "learning_rate": 0.00044304328774510786, "loss": 9.1525, "step": 12800 }, { "epoch": 0.06, "learning_rate": 0.000444397018685155, "loss": 9.1443, "step": 13000 }, { "epoch": 0.06, "learning_rate": 0.00044577161016558405, "loss": 9.1301, "step": 13200 }, { "epoch": 0.06, "learning_rate": 0.0004471670561567286, "loss": 9.1343, "step": 13400 }, { "epoch": 0.06, "learning_rate": 0.00044858335053743655, "loss": 9.1287, "step": 13600 }, { "epoch": 0.06, "learning_rate": 0.0004500204870951062, "loss": 9.1189, "step": 13800 }, { "epoch": 0.06, "learning_rate": 0.00045147845952571257, "loss": 9.1171, "step": 14000 }, { "epoch": 0.06, "learning_rate": 0.0004529498156216581, "loss": 9.1105, "step": 14200 }, { "epoch": 0.06, "learning_rate": 0.0004544493364218305, "loss": 9.0969, "step": 14400 }, { "epoch": 0.06, "learning_rate": 0.00045596967366771067, "loss": 9.1014, "step": 14600 }, { "epoch": 0.06, "learning_rate": 0.00045751082069031036, "loss": 9.0951, "step": 14800 }, { "epoch": 0.06, "learning_rate": 0.00045907277072936015, "loss": 9.0867, "step": 15000 }, { "epoch": 0.07, "learning_rate": 0.00046065551693333547, "loss": 9.0872, "step": 15200 }, { "epoch": 0.07, "learning_rate": 0.00046225905235949306, "loss": 9.0708, "step": 15400 }, { "epoch": 0.07, "learning_rate": 0.0004638833699738953, "loss": 9.0716, "step": 15600 }, { "epoch": 0.07, "learning_rate": 0.00046552846265143777, "loss": 9.071, "step": 15800 }, { "epoch": 0.07, "learning_rate": 0.00046719432317589814, "loss": 9.0618, "step": 16000 }, { "epoch": 0.07, "learning_rate": 0.0004688724595049813, "loss": 9.0518, "step": 16200 }, { "epoch": 0.07, "learning_rate": 0.0004705797299630679, "loss": 9.0442, "step": 16400 }, { "epoch": 0.07, "learning_rate": 0.0004723077461105934, "loss": 9.0477, "step": 16600 }, { "epoch": 0.07, "learning_rate": 0.0004740565003675777, "loss": 9.0397, "step": 16800 }, { "epoch": 0.07, "learning_rate": 0.0004758259850630858, "loss": 9.0355, "step": 17000 }, { "epoch": 0.07, "learning_rate": 0.00047761619243523283, "loss": 9.0248, "step": 17200 }, { "epoch": 0.07, "learning_rate": 0.0004794271146312465, "loss": 9.0137, "step": 17400 }, { "epoch": 0.08, "learning_rate": 0.00048125874370748105, "loss": 9.0205, "step": 17600 }, { "epoch": 0.08, "learning_rate": 0.00048311107162946065, "loss": 9.0008, "step": 17800 }, { "epoch": 0.08, "learning_rate": 0.00048498409027191575, "loss": 8.9975, "step": 18000 }, { "epoch": 0.08, "learning_rate": 0.0004868682714790542, "loss": 8.9992, "step": 18200 }, { "epoch": 0.08, "learning_rate": 0.0004887825434734695, "loss": 8.9777, "step": 18400 }, { "epoch": 0.08, "learning_rate": 0.0004907174813103439, "loss": 8.9871, "step": 18600 }, { "epoch": 0.08, "learning_rate": 0.0004926730765020346, "loss": 8.9765, "step": 18800 }, { "epoch": 0.08, "learning_rate": 0.0004946393879009196, "loss": 8.9754, "step": 19000 }, { "epoch": 0.08, "learning_rate": 0.0004966361687980866, "loss": 8.9678, "step": 19200 }, { "epoch": 0.08, "learning_rate": 0.000498653581087638, "loss": 8.9677, "step": 19400 }, { "epoch": 0.08, "learning_rate": 0.0005006916159201579, "loss": 8.9644, "step": 19600 }, { "epoch": 0.09, "learning_rate": 0.0005027502643557748, "loss": 8.9642, "step": 19800 }, { "epoch": 0.09, "learning_rate": 0.0005048295173641828, "loss": 8.9569, "step": 20000 }, { "epoch": 0.09, "learning_rate": 0.0005069293658247036, "loss": 8.9605, "step": 20200 }, { "epoch": 0.09, "learning_rate": 0.0005090498005263129, "loss": 8.9431, "step": 20400 }, { "epoch": 0.09, "learning_rate": 0.000511190812167682, "loss": 8.9431, "step": 20600 }, { "epoch": 0.09, "learning_rate": 0.000513352391357226, "loss": 8.9342, "step": 20800 }, { "epoch": 0.09, "learning_rate": 0.0005155345286131357, "loss": 8.9324, "step": 21000 }, { "epoch": 0.09, "learning_rate": 0.0005177372143634305, "loss": 8.9382, "step": 21200 }, { "epoch": 0.09, "learning_rate": 0.0005199604389459836, "loss": 8.9424, "step": 21400 }, { "epoch": 0.09, "learning_rate": 0.0005222041926085837, "loss": 8.9157, "step": 21600 }, { "epoch": 0.09, "learning_rate": 0.0005244684655089597, "loss": 8.9236, "step": 21800 }, { "epoch": 0.09, "learning_rate": 0.0005267532477148378, "loss": 8.9246, "step": 22000 }, { "epoch": 0.1, "learning_rate": 0.0005290585292039816, "loss": 8.9268, "step": 22200 }, { "epoch": 0.1, "learning_rate": 0.0005313842998642265, "loss": 8.9203, "step": 22400 }, { "epoch": 0.1, "learning_rate": 0.0005337305494935388, "loss": 8.9095, "step": 22600 }, { "epoch": 0.1, "learning_rate": 0.0005360972678000522, "loss": 8.9061, "step": 22800 }, { "epoch": 0.1, "learning_rate": 0.0005384724576463773, "loss": 8.9117, "step": 23000 }, { "epoch": 0.1, "learning_rate": 0.0005408799798596632, "loss": 8.9043, "step": 23200 }, { "epoch": 0.1, "learning_rate": 0.0005433079393890421, "loss": 8.9012, "step": 23400 }, { "epoch": 0.1, "learning_rate": 0.0005457563255842242, "loss": 8.8969, "step": 23600 }, { "epoch": 0.1, "learning_rate": 0.0005482251277053145, "loss": 8.9013, "step": 23800 }, { "epoch": 0.1, "learning_rate": 0.0005507143349228714, "loss": 8.8912, "step": 24000 }, { "epoch": 0.1, "learning_rate": 0.0005532239363179401, "loss": 8.8896, "step": 24200 }, { "epoch": 0.1, "learning_rate": 0.0005557539208821075, "loss": 8.8838, "step": 24400 }, { "epoch": 0.11, "learning_rate": 0.0005583042775175479, "loss": 8.889, "step": 24600 }, { "epoch": 0.11, "learning_rate": 0.0005608749950370764, "loss": 8.888, "step": 24800 }, { "epoch": 0.11, "learning_rate": 0.0005634530562276738, "loss": 8.8814, "step": 25000 }, { "epoch": 0.11, "learning_rate": 0.0005660643599338256, "loss": 8.8755, "step": 25200 }, { "epoch": 0.11, "learning_rate": 0.0005686959904843206, "loss": 8.8667, "step": 25400 }, { "epoch": 0.11, "learning_rate": 0.0005713479363354621, "loss": 8.8748, "step": 25600 }, { "epoch": 0.11, "learning_rate": 0.000574020185854441, "loss": 8.862, "step": 25800 }, { "epoch": 0.11, "learning_rate": 0.0005767127273193853, "loss": 8.8534, "step": 26000 }, { "epoch": 0.11, "learning_rate": 0.0005794255489194114, "loss": 8.8655, "step": 26200 }, { "epoch": 0.11, "learning_rate": 0.0005821586387546804, "loss": 8.8574, "step": 26400 }, { "epoch": 0.11, "learning_rate": 0.0005849119848364386, "loss": 8.8531, "step": 26600 }, { "epoch": 0.12, "learning_rate": 0.0005876855750870848, "loss": 8.8479, "step": 26800 }, { "epoch": 0.12, "learning_rate": 0.0005904653779220791, "loss": 8.8405, "step": 27000 }, { "epoch": 0.12, "learning_rate": 0.0005932793188544346, "loss": 8.8435, "step": 27200 }, { "epoch": 0.12, "learning_rate": 0.0005961134672522114, "loss": 8.8425, "step": 27400 }, { "epoch": 0.12, "learning_rate": 0.0005989678106833648, "loss": 8.8389, "step": 27600 }, { "epoch": 0.12, "learning_rate": 0.0006018423366272695, "loss": 8.8525, "step": 27800 }, { "epoch": 0.12, "learning_rate": 0.0006047370324747583, "loss": 8.8273, "step": 28000 }, { "epoch": 0.12, "learning_rate": 0.0006076518855281984, "loss": 8.8306, "step": 28200 }, { "epoch": 0.12, "learning_rate": 0.000610586883001531, "loss": 8.8437, "step": 28400 }, { "epoch": 0.12, "learning_rate": 0.000613542012020336, "loss": 8.8236, "step": 28600 }, { "epoch": 0.12, "learning_rate": 0.0006165172596218869, "loss": 8.8274, "step": 28800 }, { "epoch": 0.12, "learning_rate": 0.0006194975859987236, "loss": 8.8275, "step": 29000 }, { "epoch": 0.13, "learning_rate": 0.0006225129310954997, "loss": 8.8211, "step": 29200 }, { "epoch": 0.13, "learning_rate": 0.0006255483554239195, "loss": 8.8177, "step": 29400 }, { "epoch": 0.13, "learning_rate": 0.000628603845669035, "loss": 8.8223, "step": 29600 }, { "epoch": 0.13, "learning_rate": 0.0006316793884278832, "loss": 8.8123, "step": 29800 }, { "epoch": 0.13, "learning_rate": 0.0006347749702095389, "loss": 8.8107, "step": 30000 }, { "epoch": 0.13, "learning_rate": 0.0006378905774351747, "loss": 8.8122, "step": 30200 }, { "epoch": 0.13, "learning_rate": 0.0006410261964381238, "loss": 8.811, "step": 30400 }, { "epoch": 0.13, "learning_rate": 0.000644181813463934, "loss": 8.813, "step": 30600 }, { "epoch": 0.13, "learning_rate": 0.0006473574146704329, "loss": 8.8057, "step": 30800 }, { "epoch": 0.13, "learning_rate": 0.0006505369586176524, "loss": 8.8033, "step": 31000 }, { "epoch": 0.13, "learning_rate": 0.0006537523865622775, "loss": 8.795, "step": 31200 }, { "epoch": 0.13, "learning_rate": 0.0006569877567060931, "loss": 8.7938, "step": 31400 }, { "epoch": 0.14, "learning_rate": 0.0006602430548570907, "loss": 8.7969, "step": 31600 }, { "epoch": 0.14, "learning_rate": 0.000663518266735847, "loss": 8.7966, "step": 31800 }, { "epoch": 0.14, "learning_rate": 0.0006668133779755819, "loss": 8.7936, "step": 32000 }, { "epoch": 0.14, "learning_rate": 0.0006701283741222287, "loss": 8.7888, "step": 32200 }, { "epoch": 0.14, "learning_rate": 0.0006734632406344993, "loss": 8.7829, "step": 32400 }, { "epoch": 0.14, "learning_rate": 0.0006768179628839337, "loss": 8.7789, "step": 32600 }, { "epoch": 0.14, "learning_rate": 0.0006801925261549872, "loss": 8.778, "step": 32800 }, { "epoch": 0.14, "learning_rate": 0.0006835698944044951, "loss": 8.7897, "step": 33000 }, { "epoch": 0.14, "learning_rate": 0.0006869839962045932, "loss": 8.779, "step": 33200 }, { "epoch": 0.14, "learning_rate": 0.0006904178944328165, "loss": 8.7697, "step": 33400 }, { "epoch": 0.14, "learning_rate": 0.0006938715740263026, "loss": 8.7818, "step": 33600 }, { "epoch": 0.15, "learning_rate": 0.0006973450198354252, "loss": 8.7667, "step": 33800 }, { "epoch": 0.15, "learning_rate": 0.0007008382166238496, "loss": 8.7759, "step": 34000 }, { "epoch": 0.15, "learning_rate": 0.0007043511490686036, "loss": 8.7797, "step": 34200 }, { "epoch": 0.15, "learning_rate": 0.0007078838017601421, "loss": 8.7644, "step": 34400 }, { "epoch": 0.15, "learning_rate": 0.0007114361592024231, "loss": 8.7678, "step": 34600 }, { "epoch": 0.15, "learning_rate": 0.0007150082058129618, "loss": 8.7672, "step": 34800 }, { "epoch": 0.15, "learning_rate": 0.0007185819184105553, "loss": 8.7672, "step": 35000 }, { "epoch": 0.15, "learning_rate": 0.0007221750927446872, "loss": 8.7573, "step": 35200 }, { "epoch": 0.15, "learning_rate": 0.0007258059161614535, "loss": 8.7584, "step": 35400 }, { "epoch": 0.15, "learning_rate": 0.0007294563657132755, "loss": 8.7442, "step": 35600 }, { "epoch": 0.15, "learning_rate": 0.0007331264253873856, "loss": 8.7595, "step": 35800 }, { "epoch": 0.15, "learning_rate": 0.0007368160790850002, "loss": 8.7564, "step": 36000 }, { "epoch": 0.16, "learning_rate": 0.0007405253106213833, "loss": 8.7517, "step": 36200 }, { "epoch": 0.16, "learning_rate": 0.0007442541037259286, "loss": 8.7583, "step": 36400 }, { "epoch": 0.16, "learning_rate": 0.0007480024420422077, "loss": 8.7426, "step": 36600 }, { "epoch": 0.16, "learning_rate": 0.0007517703091280727, "loss": 8.7519, "step": 36800 }, { "epoch": 0.16, "learning_rate": 0.0007555576884556992, "loss": 8.7393, "step": 37000 }, { "epoch": 0.16, "learning_rate": 0.0007593645634116821, "loss": 8.7262, "step": 37200 }, { "epoch": 0.16, "learning_rate": 0.0007631717371015569, "loss": 8.7383, "step": 37400 }, { "epoch": 0.16, "learning_rate": 0.0007670174558631893, "loss": 8.7365, "step": 37600 }, { "epoch": 0.16, "learning_rate": 0.0007708826199846926, "loss": 8.7385, "step": 37800 }, { "epoch": 0.16, "learning_rate": 0.0007747672125114589, "loss": 8.7385, "step": 38000 }, { "epoch": 0.16, "learning_rate": 0.0007786712164036449, "loss": 8.7164, "step": 38200 }, { "epoch": 0.17, "learning_rate": 0.0007825946145362667, "loss": 8.7262, "step": 38400 }, { "epoch": 0.17, "learning_rate": 0.0007865373896992697, "loss": 8.728, "step": 38600 }, { "epoch": 0.17, "learning_rate": 0.0007904995245975929, "loss": 8.7281, "step": 38800 }, { "epoch": 0.17, "learning_rate": 0.0007944810018512619, "loss": 8.7179, "step": 39000 }, { "epoch": 0.17, "learning_rate": 0.000798481803995452, "loss": 8.7264, "step": 39200 }, { "epoch": 0.17, "learning_rate": 0.0008025019134805696, "loss": 8.7229, "step": 39400 }, { "epoch": 0.17, "learning_rate": 0.0008065210677225022, "loss": 8.7246, "step": 39600 }, { "epoch": 0.17, "learning_rate": 0.000810579642586285, "loss": 8.7199, "step": 39800 }, { "epoch": 0.17, "learning_rate": 0.0008146574717236045, "loss": 8.7209, "step": 40000 }, { "epoch": 0.17, "learning_rate": 0.0008187545372469861, "loss": 8.7075, "step": 40200 }, { "epoch": 0.17, "learning_rate": 0.0008228708211845768, "loss": 8.7101, "step": 40400 }, { "epoch": 0.17, "learning_rate": 0.0008270063054802209, "loss": 8.7144, "step": 40600 }, { "epoch": 0.18, "learning_rate": 0.0008311609719935404, "loss": 8.7173, "step": 40800 }, { "epoch": 0.18, "learning_rate": 0.0008353348025000144, "loss": 8.7183, "step": 41000 }, { "epoch": 0.18, "learning_rate": 0.0008395277786910574, "loss": 8.7107, "step": 41200 }, { "epoch": 0.18, "learning_rate": 0.0008437398821741025, "loss": 8.7113, "step": 41400 }, { "epoch": 0.18, "learning_rate": 0.0008479710944726774, "loss": 8.7085, "step": 41600 }, { "epoch": 0.18, "learning_rate": 0.0008522000980575213, "loss": 8.7115, "step": 41800 }, { "epoch": 0.18, "learning_rate": 0.0008564693769110079, "loss": 8.7055, "step": 42000 }, { "epoch": 0.18, "learning_rate": 0.0008607577087418623, "loss": 8.6935, "step": 42200 }, { "epoch": 0.18, "learning_rate": 0.0008650650747392373, "loss": 8.7042, "step": 42400 }, { "epoch": 0.18, "learning_rate": 0.0008693914560087938, "loss": 8.6849, "step": 42600 }, { "epoch": 0.18, "learning_rate": 0.0008737368335727785, "loss": 8.6876, "step": 42800 }, { "epoch": 0.18, "learning_rate": 0.0008781011883701138, "loss": 8.6922, "step": 43000 }, { "epoch": 0.19, "learning_rate": 0.0008824845012564749, "loss": 8.6922, "step": 43200 }, { "epoch": 0.19, "learning_rate": 0.000886886753004381, "loss": 8.6853, "step": 43400 }, { "epoch": 0.19, "learning_rate": 0.000891307924303272, "loss": 8.6936, "step": 43600 }, { "epoch": 0.19, "learning_rate": 0.0008957257484203587, "loss": 8.6995, "step": 43800 }, { "epoch": 0.19, "learning_rate": 0.0009001846062028449, "loss": 8.6841, "step": 44000 }, { "epoch": 0.19, "learning_rate": 0.0009046623252050388, "loss": 8.6735, "step": 44200 }, { "epoch": 0.19, "learning_rate": 0.0009091588857853411, "loss": 8.6888, "step": 44400 }, { "epoch": 0.19, "learning_rate": 0.0009136742682195071, "loss": 8.6788, "step": 44600 }, { "epoch": 0.19, "learning_rate": 0.0009182084527007278, "loss": 8.6817, "step": 44800 }, { "epoch": 0.19, "learning_rate": 0.0009227614193397203, "loss": 8.671, "step": 45000 }, { "epoch": 0.19, "learning_rate": 0.0009273331481648092, "loss": 8.6738, "step": 45200 }, { "epoch": 0.2, "learning_rate": 0.0009319236191220222, "loss": 8.6657, "step": 45400 }, { "epoch": 0.2, "learning_rate": 0.0009365328120751783, "loss": 8.6599, "step": 45600 }, { "epoch": 0.2, "learning_rate": 0.0009411375208451828, "loss": 8.6747, "step": 45800 }, { "epoch": 0.2, "learning_rate": 0.0009457840036964961, "loss": 8.6698, "step": 46000 }, { "epoch": 0.2, "learning_rate": 0.0009504491477449178, "loss": 8.6765, "step": 46200 }, { "epoch": 0.2, "learning_rate": 0.0009551329325267026, "loss": 8.6732, "step": 46400 }, { "epoch": 0.2, "learning_rate": 0.0009598353374963477, "loss": 8.6654, "step": 46600 }, { "epoch": 0.2, "learning_rate": 0.0009645563420266623, "loss": 8.6614, "step": 46800 }, { "epoch": 0.2, "learning_rate": 0.0009692959254088748, "loss": 8.6672, "step": 47000 }, { "epoch": 0.2, "learning_rate": 0.0009740540668527146, "loss": 8.6508, "step": 47200 }, { "epoch": 0.2, "learning_rate": 0.0009788307454865058, "loss": 8.6533, "step": 47400 }, { "epoch": 0.2, "learning_rate": 0.0009836259403572592, "loss": 8.656, "step": 47600 }, { "epoch": 0.21, "learning_rate": 0.0009884155160084767, "loss": 8.65, "step": 47800 }, { "epoch": 0.21, "learning_rate": 0.0009932475878516138, "loss": 8.6559, "step": 48000 }, { "epoch": 0.21, "learning_rate": 0.0009980981126919714, "loss": 8.646, "step": 48200 }, { "epoch": 0.21, "learning_rate": 0.0010029670692526266, "loss": 8.6504, "step": 48400 }, { "epoch": 0.21, "learning_rate": 0.001007854436175815, "loss": 8.6437, "step": 48600 }, { "epoch": 0.21, "learning_rate": 0.00101276019202301, "loss": 8.6483, "step": 48800 }, { "epoch": 0.21, "learning_rate": 0.0010176843152750244, "loss": 8.6381, "step": 49000 }, { "epoch": 0.21, "learning_rate": 0.001022602026387454, "loss": 8.6419, "step": 49200 }, { "epoch": 0.21, "learning_rate": 0.0010275627280027944, "loss": 8.6381, "step": 49400 }, { "epoch": 0.21, "learning_rate": 0.0010325417320913577, "loss": 8.6274, "step": 49600 }, { "epoch": 0.21, "learning_rate": 0.0010375390168126473, "loss": 8.6264, "step": 49800 }, { "epoch": 0.21, "learning_rate": 0.0010425545602459826, "loss": 8.6279, "step": 50000 }, { "epoch": 0.22, "learning_rate": 0.0010475883403905893, "loss": 8.636, "step": 50200 }, { "epoch": 0.22, "learning_rate": 0.001052640335165696, "loss": 8.6305, "step": 50400 }, { "epoch": 0.22, "learning_rate": 0.001057710522410639, "loss": 8.6259, "step": 50600 }, { "epoch": 0.22, "learning_rate": 0.001062798879884943, "loss": 8.6288, "step": 50800 }, { "epoch": 0.22, "learning_rate": 0.0010679053852684361, "loss": 8.6286, "step": 51000 }, { "epoch": 0.22, "learning_rate": 0.0010730300161613388, "loss": 8.6203, "step": 51200 }, { "epoch": 0.22, "learning_rate": 0.0010781469914207427, "loss": 8.618, "step": 51400 }, { "epoch": 0.22, "learning_rate": 0.0010833077154690767, "loss": 8.6183, "step": 51600 }, { "epoch": 0.22, "learning_rate": 0.0010884864974642153, "loss": 8.6244, "step": 51800 }, { "epoch": 0.22, "learning_rate": 0.0010936833146893334, "loss": 8.6129, "step": 52000 }, { "epoch": 0.22, "learning_rate": 0.001098898144348496, "loss": 8.622, "step": 52200 }, { "epoch": 0.23, "learning_rate": 0.001104130963566756, "loss": 8.6104, "step": 52400 }, { "epoch": 0.23, "learning_rate": 0.001109381749390256, "loss": 8.603, "step": 52600 }, { "epoch": 0.23, "learning_rate": 0.00111465047878633, "loss": 8.6093, "step": 52800 }, { "epoch": 0.23, "learning_rate": 0.001119937128643592, "loss": 8.5969, "step": 53000 }, { "epoch": 0.23, "learning_rate": 0.0011252416757720606, "loss": 8.5992, "step": 53200 }, { "epoch": 0.23, "learning_rate": 0.0011305374403745901, "loss": 8.6047, "step": 53400 }, { "epoch": 0.23, "learning_rate": 0.001135877622966507, "loss": 8.5958, "step": 53600 }, { "epoch": 0.23, "learning_rate": 0.001141235632906355, "loss": 8.5948, "step": 53800 }, { "epoch": 0.23, "learning_rate": 0.0011466114466911256, "loss": 8.5896, "step": 54000 }, { "epoch": 0.23, "learning_rate": 0.001152005040739713, "loss": 8.5887, "step": 54200 }, { "epoch": 0.23, "learning_rate": 0.0011574163913930131, "loss": 8.5862, "step": 54400 }, { "epoch": 0.23, "learning_rate": 0.0011628454749140395, "loss": 8.5949, "step": 54600 }, { "epoch": 0.24, "learning_rate": 0.0011682922674880192, "loss": 8.588, "step": 54800 }, { "epoch": 0.24, "learning_rate": 0.0011737567452224911, "loss": 8.5918, "step": 55000 }, { "epoch": 0.24, "learning_rate": 0.0011792388841474245, "loss": 8.5904, "step": 55200 }, { "epoch": 0.24, "learning_rate": 0.0011847111175024606, "loss": 8.5739, "step": 55400 }, { "epoch": 0.24, "learning_rate": 0.0011902284185834888, "loss": 8.5756, "step": 55600 }, { "epoch": 0.24, "learning_rate": 0.0011957633086016797, "loss": 8.568, "step": 55800 }, { "epoch": 0.24, "learning_rate": 0.0012013157632781366, "loss": 8.5696, "step": 56000 }, { "epoch": 0.24, "learning_rate": 0.00120688575825691, "loss": 8.5768, "step": 56200 }, { "epoch": 0.24, "learning_rate": 0.0012124732691051188, "loss": 8.5696, "step": 56400 }, { "epoch": 0.24, "learning_rate": 0.0012180782713130424, "loss": 8.5687, "step": 56600 }, { "epoch": 0.24, "learning_rate": 0.0012237007402942333, "loss": 8.56, "step": 56800 }, { "epoch": 0.24, "learning_rate": 0.0012293406513856284, "loss": 8.56, "step": 57000 }, { "epoch": 0.25, "learning_rate": 0.0012349979798476525, "loss": 8.5602, "step": 57200 }, { "epoch": 0.25, "learning_rate": 0.0012406442840364133, "loss": 8.5551, "step": 57400 }, { "epoch": 0.25, "learning_rate": 0.0012463362859392122, "loss": 8.5556, "step": 57600 }, { "epoch": 0.25, "learning_rate": 0.0012520456306609733, "loss": 8.5508, "step": 57800 }, { "epoch": 0.25, "learning_rate": 0.0012577722931575563, "loss": 8.549, "step": 58000 }, { "epoch": 0.25, "learning_rate": 0.0012634874855592566, "loss": 8.5582, "step": 58200 }, { "epoch": 0.25, "learning_rate": 0.001269248621894795, "loss": 8.554, "step": 58400 }, { "epoch": 0.25, "learning_rate": 0.0012750270005439136, "loss": 8.5453, "step": 58600 }, { "epoch": 0.25, "learning_rate": 0.0012808225961596451, "loss": 8.5545, "step": 58800 }, { "epoch": 0.25, "learning_rate": 0.0012866353833195041, "loss": 8.543, "step": 59000 }, { "epoch": 0.25, "learning_rate": 0.0012924653365255934, "loss": 8.5454, "step": 59200 }, { "epoch": 0.26, "learning_rate": 0.001298312430204715, "loss": 8.5496, "step": 59400 }, { "epoch": 0.26, "learning_rate": 0.0013041766387084808, "loss": 8.5348, "step": 59600 }, { "epoch": 0.26, "learning_rate": 0.0013100579363134381, "loss": 8.5311, "step": 59800 }, { "epoch": 0.26, "learning_rate": 0.001315956297221161, "loss": 8.5378, "step": 60000 }, { "epoch": 0.26, "learning_rate": 0.0013218716955583822, "loss": 8.5304, "step": 60200 }, { "epoch": 0.26, "learning_rate": 0.0013278041053770978, "loss": 8.5166, "step": 60400 }, { "epoch": 0.26, "learning_rate": 0.0013337237114701053, "loss": 8.5324, "step": 60600 }, { "epoch": 0.26, "learning_rate": 0.001339689981377656, "loss": 8.5196, "step": 60800 }, { "epoch": 0.26, "learning_rate": 0.0013456731846064624, "loss": 8.5191, "step": 61000 }, { "epoch": 0.26, "learning_rate": 0.0013516732949110932, "loss": 8.5285, "step": 61200 }, { "epoch": 0.26, "learning_rate": 0.0013576902859719474, "loss": 8.5143, "step": 61400 }, { "epoch": 0.26, "learning_rate": 0.0013637241313953895, "loss": 8.519, "step": 61600 }, { "epoch": 0.27, "learning_rate": 0.0013697748047138431, "loss": 8.4964, "step": 61800 }, { "epoch": 0.27, "learning_rate": 0.0013758422793859176, "loss": 8.5216, "step": 62000 }, { "epoch": 0.27, "learning_rate": 0.001381926528796519, "loss": 8.5098, "step": 62200 }, { "epoch": 0.27, "learning_rate": 0.0013880275262569807, "loss": 8.511, "step": 62400 }, { "epoch": 0.27, "learning_rate": 0.0013941452450051631, "loss": 8.5124, "step": 62600 }, { "epoch": 0.27, "learning_rate": 0.001400248944656608, "loss": 8.5054, "step": 62800 }, { "epoch": 0.27, "learning_rate": 0.0014063999421298785, "loss": 8.5016, "step": 63000 }, { "epoch": 0.27, "learning_rate": 0.0014125675802999262, "loss": 8.501, "step": 63200 }, { "epoch": 0.27, "learning_rate": 0.001418751832112295, "loss": 8.505, "step": 63400 }, { "epoch": 0.27, "learning_rate": 0.0014249526704396467, "loss": 8.4953, "step": 63600 }, { "epoch": 0.27, "learning_rate": 0.0014311700680818915, "loss": 8.5074, "step": 63800 }, { "epoch": 0.28, "learning_rate": 0.0014374039977662987, "loss": 8.4991, "step": 64000 }, { "epoch": 0.28, "learning_rate": 0.0014436544321476206, "loss": 8.5087, "step": 64200 }, { "epoch": 0.28, "learning_rate": 0.0014499213438082127, "loss": 8.4871, "step": 64400 }, { "epoch": 0.28, "learning_rate": 0.0014562047052581514, "loss": 8.4954, "step": 64600 }, { "epoch": 0.28, "learning_rate": 0.0014624729492123557, "loss": 8.4791, "step": 64800 }, { "epoch": 0.28, "learning_rate": 0.0014687890455785963, "loss": 8.4853, "step": 65000 }, { "epoch": 0.28, "learning_rate": 0.0014751215089706584, "loss": 8.4855, "step": 65200 }, { "epoch": 0.28, "learning_rate": 0.0014814703116110776, "loss": 8.4707, "step": 65400 }, { "epoch": 0.28, "learning_rate": 0.001487835425650709, "loss": 8.4743, "step": 65600 }, { "epoch": 0.28, "learning_rate": 0.001494216823168866, "loss": 8.4717, "step": 65800 }, { "epoch": 0.28, "learning_rate": 0.0015006144761734279, "loss": 8.4823, "step": 66000 }, { "epoch": 0.28, "learning_rate": 0.001507028356600975, "loss": 8.4708, "step": 66200 }, { "epoch": 0.29, "learning_rate": 0.0015134584363168998, "loss": 8.4649, "step": 66400 }, { "epoch": 0.29, "learning_rate": 0.001519904687115537, "loss": 8.4695, "step": 66600 }, { "epoch": 0.29, "learning_rate": 0.0015263347286438994, "loss": 8.4759, "step": 66800 }, { "epoch": 0.29, "learning_rate": 0.0015328131562056986, "loss": 8.4655, "step": 67000 }, { "epoch": 0.29, "learning_rate": 0.0015393076699503766, "loss": 8.4752, "step": 67200 }, { "epoch": 0.29, "learning_rate": 0.0015458182413896245, "loss": 8.4535, "step": 67400 }, { "epoch": 0.29, "learning_rate": 0.001552344841964707, "loss": 8.4535, "step": 67600 }, { "epoch": 0.29, "learning_rate": 0.0015588874430465648, "loss": 8.4519, "step": 67800 }, { "epoch": 0.29, "learning_rate": 0.001565446015935959, "loss": 8.4568, "step": 68000 }, { "epoch": 0.29, "learning_rate": 0.00157202053186359, "loss": 8.4524, "step": 68200 }, { "epoch": 0.29, "learning_rate": 0.0015786109619902212, "loss": 8.4589, "step": 68400 }, { "epoch": 0.29, "learning_rate": 0.0015852172774068075, "loss": 8.4559, "step": 68600 }, { "epoch": 0.3, "learning_rate": 0.0015918062988814347, "loss": 8.446, "step": 68800 }, { "epoch": 0.3, "learning_rate": 0.0015984442188082624, "loss": 8.439, "step": 69000 }, { "epoch": 0.3, "learning_rate": 0.0016050979370261006, "loss": 8.4504, "step": 69200 }, { "epoch": 0.3, "learning_rate": 0.0016117674243482875, "loss": 8.4487, "step": 69400 }, { "epoch": 0.3, "learning_rate": 0.0016184526515189961, "loss": 8.4472, "step": 69600 }, { "epoch": 0.3, "learning_rate": 0.0016251535892133542, "loss": 8.433, "step": 69800 }, { "epoch": 0.3, "learning_rate": 0.001631870208037572, "loss": 8.426, "step": 70000 }, { "epoch": 0.3, "learning_rate": 0.0016386024785290804, "loss": 8.4311, "step": 70200 }, { "epoch": 0.3, "learning_rate": 0.0016453503711566474, "loss": 8.432, "step": 70400 }, { "epoch": 0.3, "learning_rate": 0.0016520800001573153, "loss": 8.4282, "step": 70600 }, { "epoch": 0.3, "learning_rate": 0.0016588589704489114, "loss": 8.4386, "step": 70800 }, { "epoch": 0.31, "learning_rate": 0.0016656534740210893, "loss": 8.4158, "step": 71000 }, { "epoch": 0.31, "learning_rate": 0.0016724634810696363, "loss": 8.4242, "step": 71200 }, { "epoch": 0.31, "learning_rate": 0.0016792889617223312, "loss": 8.4279, "step": 71400 }, { "epoch": 0.31, "learning_rate": 0.0016861298860390735, "loss": 8.4242, "step": 71600 }, { "epoch": 0.31, "learning_rate": 0.0016929862240120247, "loss": 8.4271, "step": 71800 }, { "epoch": 0.31, "learning_rate": 0.0016998579455657307, "loss": 8.4265, "step": 72000 }, { "epoch": 0.31, "learning_rate": 0.0017067450205572581, "loss": 8.421, "step": 72200 }, { "epoch": 0.31, "learning_rate": 0.0017136474187763266, "loss": 8.4156, "step": 72400 }, { "epoch": 0.31, "learning_rate": 0.0017205304834985446, "loss": 8.421, "step": 72600 }, { "epoch": 0.31, "learning_rate": 0.0017274633610356825, "loss": 8.4256, "step": 72800 }, { "epoch": 0.31, "learning_rate": 0.0017344114709189774, "loss": 8.4191, "step": 73000 }, { "epoch": 0.31, "learning_rate": 0.0017413747826704132, "loss": 8.4015, "step": 73200 }, { "epoch": 0.32, "learning_rate": 0.001748318335641869, "loss": 8.4129, "step": 73400 }, { "epoch": 0.32, "learning_rate": 0.0017553118838016506, "loss": 8.4179, "step": 73600 }, { "epoch": 0.32, "learning_rate": 0.0017623205421495314, "loss": 8.4142, "step": 73800 }, { "epoch": 0.32, "learning_rate": 0.0017693442799418986, "loss": 8.4005, "step": 74000 }, { "epoch": 0.32, "learning_rate": 0.0017763830663689965, "loss": 8.41, "step": 74200 }, { "epoch": 0.32, "learning_rate": 0.0017834368705550597, "loss": 8.4162, "step": 74400 }, { "epoch": 0.32, "learning_rate": 0.001790505661558443, "loss": 8.4081, "step": 74600 }, { "epoch": 0.32, "learning_rate": 0.0017975894083717692, "loss": 8.4027, "step": 74800 }, { "epoch": 0.32, "learning_rate": 0.0018046880799220469, "loss": 8.4097, "step": 75000 }, { "epoch": 0.32, "learning_rate": 0.0018118016450708232, "loss": 8.4077, "step": 75200 }, { "epoch": 0.32, "learning_rate": 0.0018189300726143137, "loss": 8.4086, "step": 75400 }, { "epoch": 0.32, "learning_rate": 0.0018260375781495742, "loss": 8.4084, "step": 75600 }, { "epoch": 0.33, "learning_rate": 0.001833195562689592, "loss": 8.4166, "step": 75800 }, { "epoch": 0.33, "learning_rate": 0.0018403683157795104, "loss": 8.4121, "step": 76000 }, { "epoch": 0.33, "learning_rate": 0.0018475558059559121, "loss": 8.404, "step": 76200 }, { "epoch": 0.33, "learning_rate": 0.001854758001690741, "loss": 8.4035, "step": 76400 }, { "epoch": 0.33, "learning_rate": 0.0018619748713914318, "loss": 8.4044, "step": 76600 }, { "epoch": 0.33, "learning_rate": 0.0018692063834010522, "loss": 8.3948, "step": 76800 }, { "epoch": 0.33, "learning_rate": 0.0018764525059984417, "loss": 8.3935, "step": 77000 }, { "epoch": 0.33, "learning_rate": 0.001883713207398349, "loss": 8.4028, "step": 77200 }, { "epoch": 0.33, "learning_rate": 0.0018909884557515733, "loss": 8.401, "step": 77400 }, { "epoch": 0.33, "learning_rate": 0.0018982417342748425, "loss": 8.4085, "step": 77600 }, { "epoch": 0.33, "learning_rate": 0.0019055459083963232, "loss": 8.3979, "step": 77800 }, { "epoch": 0.34, "learning_rate": 0.00191282790468081, "loss": 8.4027, "step": 78000 }, { "epoch": 0.34, "learning_rate": 0.0019201608770511077, "loss": 8.401, "step": 78200 }, { "epoch": 0.34, "learning_rate": 0.001927508236496343, "loss": 8.4054, "step": 78400 }, { "epoch": 0.34, "learning_rate": 0.0019348699507871943, "loss": 8.408, "step": 78600 }, { "epoch": 0.34, "learning_rate": 0.0019422459876313608, "loss": 8.4064, "step": 78800 }, { "epoch": 0.34, "learning_rate": 0.0019496363146737205, "loss": 8.4066, "step": 79000 }, { "epoch": 0.34, "learning_rate": 0.001957040899496469, "loss": 8.4061, "step": 79200 }, { "epoch": 0.34, "learning_rate": 0.0019644597096192574, "loss": 8.411, "step": 79400 }, { "epoch": 0.34, "learning_rate": 0.00197189271249934, "loss": 8.3999, "step": 79600 }, { "epoch": 0.34, "learning_rate": 0.001979339875531708, "loss": 8.4002, "step": 79800 }, { "epoch": 0.34, "learning_rate": 0.001986801166049247, "loss": 8.4058, "step": 80000 }, { "epoch": 0.34, "learning_rate": 0.0019942391393900083, "loss": 8.4172, "step": 80200 }, { "epoch": 0.35, "learning_rate": 0.002001728516400637, "loss": 8.4089, "step": 80400 }, { "epoch": 0.35, "learning_rate": 0.002009231922688247, "loss": 8.4087, "step": 80600 }, { "epoch": 0.35, "learning_rate": 0.002016749325339009, "loss": 8.4115, "step": 80800 }, { "epoch": 0.35, "learning_rate": 0.0020242806913776997, "loss": 8.4317, "step": 81000 }, { "epoch": 0.35, "learning_rate": 0.0020318259877678373, "loss": 8.4049, "step": 81200 }, { "epoch": 0.35, "learning_rate": 0.002039385181411845, "loss": 8.4129, "step": 81400 }, { "epoch": 0.35, "learning_rate": 0.002046958239151178, "loss": 8.424, "step": 81600 }, { "epoch": 0.35, "learning_rate": 0.0020545451277664776, "loss": 8.4146, "step": 81800 }, { "epoch": 0.35, "learning_rate": 0.0020621458139777164, "loss": 8.4233, "step": 82000 }, { "epoch": 0.35, "learning_rate": 0.002069722158008656, "loss": 8.4199, "step": 82200 }, { "epoch": 0.35, "learning_rate": 0.0020773502707586607, "loss": 8.4134, "step": 82400 }, { "epoch": 0.35, "learning_rate": 0.0020849920810694245, "loss": 8.415, "step": 82600 }, { "epoch": 0.36, "learning_rate": 0.0020926475554200047, "loss": 8.4301, "step": 82800 }, { "epoch": 0.36, "learning_rate": 0.0021003166602295217, "loss": 8.4128, "step": 83000 }, { "epoch": 0.36, "learning_rate": 0.002107999361857309, "loss": 8.4284, "step": 83200 }, { "epoch": 0.36, "learning_rate": 0.002115695626603048, "loss": 8.422, "step": 83400 }, { "epoch": 0.36, "learning_rate": 0.002123405420706933, "loss": 8.4173, "step": 83600 }, { "epoch": 0.36, "learning_rate": 0.002131128710349813, "loss": 8.4245, "step": 83800 }, { "epoch": 0.36, "learning_rate": 0.002138865461653332, "loss": 8.427, "step": 84000 }, { "epoch": 0.36, "learning_rate": 0.0021465768564397046, "loss": 8.4287, "step": 84200 }, { "epoch": 0.36, "learning_rate": 0.002154340362309423, "loss": 8.4361, "step": 84400 }, { "epoch": 0.36, "learning_rate": 0.002162078310516678, "loss": 8.4299, "step": 84600 }, { "epoch": 0.36, "learning_rate": 0.0021698684354139377, "loss": 8.4447, "step": 84800 }, { "epoch": 0.37, "learning_rate": 0.0021776718520393184, "loss": 8.4399, "step": 85000 }, { "epoch": 0.37, "learning_rate": 0.0021854885261629875, "loss": 8.4469, "step": 85200 }, { "epoch": 0.37, "learning_rate": 0.0021933184234969594, "loss": 8.4328, "step": 85400 }, { "epoch": 0.37, "learning_rate": 0.0022011615096952444, "loss": 8.4504, "step": 85600 }, { "epoch": 0.37, "learning_rate": 0.002209017750354, "loss": 8.4383, "step": 85800 }, { "epoch": 0.37, "learning_rate": 0.0022168871110116815, "loss": 8.4472, "step": 86000 }, { "epoch": 0.37, "learning_rate": 0.0022247695571491945, "loss": 8.448, "step": 86200 }, { "epoch": 0.37, "learning_rate": 0.0022326650541900405, "loss": 8.4451, "step": 86400 }, { "epoch": 0.37, "learning_rate": 0.0022405339926133165, "loss": 8.4415, "step": 86600 }, { "epoch": 0.37, "learning_rate": 0.0022484554226809986, "loss": 8.4439, "step": 86800 }, { "epoch": 0.37, "learning_rate": 0.0022563897997535266, "loss": 8.4497, "step": 87000 }, { "epoch": 0.37, "learning_rate": 0.0022643370890266133, "loss": 8.452, "step": 87200 }, { "epoch": 0.38, "learning_rate": 0.0022722972556393217, "loss": 8.4677, "step": 87400 }, { "epoch": 0.38, "learning_rate": 0.0022802702646742383, "loss": 8.4595, "step": 87600 }, { "epoch": 0.38, "learning_rate": 0.002288256081157608, "loss": 8.4595, "step": 87800 }, { "epoch": 0.38, "learning_rate": 0.002296254670059502, "loss": 8.4642, "step": 88000 }, { "epoch": 0.38, "learning_rate": 0.0023042659962939603, "loss": 8.4709, "step": 88200 }, { "epoch": 0.38, "learning_rate": 0.0023122900247191545, "loss": 8.4679, "step": 88400 }, { "epoch": 0.38, "learning_rate": 0.002320286505209589, "loss": 8.4603, "step": 88600 }, { "epoch": 0.38, "learning_rate": 0.0023283357692971242, "loss": 8.4662, "step": 88800 }, { "epoch": 0.38, "learning_rate": 0.002336397629992889, "loss": 8.4653, "step": 89000 }, { "epoch": 0.38, "learning_rate": 0.002344472051933384, "loss": 8.4833, "step": 89200 }, { "epoch": 0.38, "learning_rate": 0.002352558999700007, "loss": 8.4974, "step": 89400 }, { "epoch": 0.39, "learning_rate": 0.002360658437819213, "loss": 8.4881, "step": 89600 }, { "epoch": 0.39, "learning_rate": 0.0023687703307626647, "loss": 8.4878, "step": 89800 }, { "epoch": 0.39, "learning_rate": 0.0023768946429473976, "loss": 8.4846, "step": 90000 }, { "epoch": 0.39, "learning_rate": 0.002385031338735963, "loss": 8.4866, "step": 90200 }, { "epoch": 0.39, "learning_rate": 0.0023931803824365962, "loss": 8.4847, "step": 90400 }, { "epoch": 0.39, "learning_rate": 0.002401300900956714, "loss": 8.4934, "step": 90600 }, { "epoch": 0.39, "learning_rate": 0.002409474471896992, "loss": 8.4872, "step": 90800 }, { "epoch": 0.39, "learning_rate": 0.0024176602835290807, "loss": 8.4977, "step": 91000 }, { "epoch": 0.39, "learning_rate": 0.0024258582999457665, "loss": 8.4967, "step": 91200 }, { "epoch": 0.39, "learning_rate": 0.0024340684851863, "loss": 8.505, "step": 91400 }, { "epoch": 0.39, "learning_rate": 0.002442290803236551, "loss": 8.5126, "step": 91600 }, { "epoch": 0.39, "learning_rate": 0.0024505252180291688, "loss": 8.5033, "step": 91800 }, { "epoch": 0.4, "learning_rate": 0.0024587304311256865, "loss": 8.513, "step": 92000 }, { "epoch": 0.4, "learning_rate": 0.0024669888709567232, "loss": 8.5082, "step": 92200 }, { "epoch": 0.4, "learning_rate": 0.0024752592991915973, "loss": 8.517, "step": 92400 }, { "epoch": 0.4, "learning_rate": 0.0024835416795519205, "loss": 8.5293, "step": 92600 }, { "epoch": 0.4, "learning_rate": 0.002491835975706881, "loss": 8.5094, "step": 92800 }, { "epoch": 0.4, "learning_rate": 0.0025001421512733943, "loss": 8.5139, "step": 93000 }, { "epoch": 0.4, "learning_rate": 0.0025084601698162666, "loss": 8.5099, "step": 93200 }, { "epoch": 0.4, "learning_rate": 0.0025167899948483575, "loss": 8.5185, "step": 93400 }, { "epoch": 0.4, "learning_rate": 0.0025251315898307336, "loss": 8.5143, "step": 93600 }, { "epoch": 0.4, "learning_rate": 0.002533484918172837, "loss": 8.5277, "step": 93800 }, { "epoch": 0.4, "learning_rate": 0.0025418499432326358, "loss": 8.5231, "step": 94000 }, { "epoch": 0.4, "learning_rate": 0.002550184715947826, "loss": 8.5436, "step": 94200 }, { "epoch": 0.41, "learning_rate": 0.0025585729662869474, "loss": 8.5373, "step": 94400 }, { "epoch": 0.41, "learning_rate": 0.002566972803294579, "loss": 8.5347, "step": 94600 }, { "epoch": 0.41, "learning_rate": 0.00257538419012468, "loss": 8.5544, "step": 94800 }, { "epoch": 0.41, "learning_rate": 0.0025838070898805453, "loss": 8.5339, "step": 95000 }, { "epoch": 0.41, "learning_rate": 0.002592241465614974, "loss": 8.5405, "step": 95200 }, { "epoch": 0.41, "learning_rate": 0.002600687280330416, "loss": 8.5501, "step": 95400 }, { "epoch": 0.41, "learning_rate": 0.0026091444969791513, "loss": 8.5344, "step": 95600 }, { "epoch": 0.41, "learning_rate": 0.002617613078463441, "loss": 8.5477, "step": 95800 }, { "epoch": 0.41, "learning_rate": 0.002626092987635699, "loss": 8.5443, "step": 96000 }, { "epoch": 0.41, "learning_rate": 0.002634541703276827, "loss": 8.5398, "step": 96200 }, { "epoch": 0.41, "learning_rate": 0.002643044100010169, "loss": 8.5523, "step": 96400 }, { "epoch": 0.42, "learning_rate": 0.002651557712877833, "loss": 8.5562, "step": 96600 }, { "epoch": 0.42, "learning_rate": 0.0026600825045346955, "loss": 8.5525, "step": 96800 }, { "epoch": 0.42, "learning_rate": 0.0026686184375866043, "loss": 8.5728, "step": 97000 }, { "epoch": 0.42, "learning_rate": 0.002677165474590528, "loss": 8.5631, "step": 97200 }, { "epoch": 0.42, "learning_rate": 0.002685723578054729, "loss": 8.5658, "step": 97400 }, { "epoch": 0.42, "learning_rate": 0.0026942927104389334, "loss": 8.566, "step": 97600 }, { "epoch": 0.42, "learning_rate": 0.002702872834154482, "loss": 8.5716, "step": 97800 }, { "epoch": 0.42, "learning_rate": 0.0027114639115645017, "loss": 8.5697, "step": 98000 }, { "epoch": 0.42, "learning_rate": 0.002720022867925799, "loss": 8.5726, "step": 98200 }, { "epoch": 0.42, "learning_rate": 0.0027286356853246747, "loss": 8.5718, "step": 98400 }, { "epoch": 0.42, "learning_rate": 0.0027372593434088002, "loss": 8.5716, "step": 98600 }, { "epoch": 0.42, "learning_rate": 0.002745893804350339, "loss": 8.5767, "step": 98800 }, { "epoch": 0.43, "learning_rate": 0.00275453903027407, "loss": 8.5957, "step": 99000 }, { "epoch": 0.43, "learning_rate": 0.0027631949832575475, "loss": 8.5881, "step": 99200 }, { "epoch": 0.43, "learning_rate": 0.002771861625331276, "loss": 8.5835, "step": 99400 }, { "epoch": 0.43, "learning_rate": 0.002780495505581529, "loss": 8.5905, "step": 99600 }, { "epoch": 0.43, "learning_rate": 0.002789183358769584, "loss": 8.5938, "step": 99800 }, { "epoch": 0.43, "learning_rate": 0.0027978817870494, "loss": 8.5906, "step": 100000 }, { "epoch": 0.43, "learning_rate": 0.0028065907522651585, "loss": 8.5938, "step": 100200 }, { "epoch": 0.43, "learning_rate": 0.002815310216214826, "loss": 8.5887, "step": 100400 }, { "epoch": 0.43, "learning_rate": 0.00282404014065031, "loss": 8.5922, "step": 100600 }, { "epoch": 0.43, "learning_rate": 0.0028327804872776367, "loss": 8.5926, "step": 100800 }, { "epoch": 0.43, "learning_rate": 0.002841531217757113, "loss": 8.5978, "step": 101000 }, { "epoch": 0.43, "learning_rate": 0.0028502922937035, "loss": 8.5984, "step": 101200 }, { "epoch": 0.44, "learning_rate": 0.0028590636766861726, "loss": 8.6046, "step": 101400 }, { "epoch": 0.44, "learning_rate": 0.0028678453282293013, "loss": 8.6093, "step": 101600 }, { "epoch": 0.44, "learning_rate": 0.0028766372098120076, "loss": 8.6083, "step": 101800 }, { "epoch": 0.44, "learning_rate": 0.0028854392828685377, "loss": 8.6057, "step": 102000 }, { "epoch": 0.44, "learning_rate": 0.0028942515087884407, "loss": 8.6146, "step": 102200 }, { "epoch": 0.44, "learning_rate": 0.00290307384891672, "loss": 8.608, "step": 102400 }, { "epoch": 0.44, "learning_rate": 0.00291190626455402, "loss": 8.6081, "step": 102600 }, { "epoch": 0.44, "learning_rate": 0.0029207044797924615, "loss": 8.6164, "step": 102800 }, { "epoch": 0.44, "learning_rate": 0.0029295568802797795, "loss": 8.6008, "step": 103000 }, { "epoch": 0.44, "learning_rate": 0.0029384192401078115, "loss": 8.6166, "step": 103200 }, { "epoch": 0.44, "learning_rate": 0.00294729152040165, "loss": 8.5962, "step": 103400 }, { "epoch": 0.45, "learning_rate": 0.002956173682242877, "loss": 8.6129, "step": 103600 }, { "epoch": 0.45, "learning_rate": 0.002965065686669722, "loss": 8.6092, "step": 103800 }, { "epoch": 0.45, "learning_rate": 0.0029739674946772463, "loss": 8.6189, "step": 104000 }, { "epoch": 0.45, "learning_rate": 0.002982879067217503, "loss": 8.612, "step": 104200 }, { "epoch": 0.45, "learning_rate": 0.0029918003651997144, "loss": 8.6135, "step": 104400 }, { "epoch": 0.45, "learning_rate": 0.003000731349490442, "loss": 8.6182, "step": 104600 }, { "epoch": 0.45, "learning_rate": 0.0030096719809137584, "loss": 8.6423, "step": 104800 }, { "epoch": 0.45, "learning_rate": 0.003018622220251419, "loss": 8.6145, "step": 105000 }, { "epoch": 0.45, "learning_rate": 0.0030275372054660438, "loss": 8.6249, "step": 105200 }, { "epoch": 0.45, "learning_rate": 0.0030365064952603237, "loss": 8.6265, "step": 105400 }, { "epoch": 0.45, "learning_rate": 0.0030454852752588536, "loss": 8.6304, "step": 105600 }, { "epoch": 0.45, "learning_rate": 0.0030544735060760494, "loss": 8.6309, "step": 105800 }, { "epoch": 0.46, "learning_rate": 0.0030634711482848704, "loss": 8.6258, "step": 106000 }, { "epoch": 0.46, "learning_rate": 0.003072478162416994, "loss": 8.6328, "step": 106200 }, { "epoch": 0.46, "learning_rate": 0.003081494508962985, "loss": 8.6298, "step": 106400 }, { "epoch": 0.46, "learning_rate": 0.0030905201483724717, "loss": 8.639, "step": 106600 }, { "epoch": 0.46, "learning_rate": 0.0030995550410543226, "loss": 8.6212, "step": 106800 }, { "epoch": 0.46, "learning_rate": 0.0031085991473768114, "loss": 8.6374, "step": 107000 }, { "epoch": 0.46, "learning_rate": 0.003117652427667799, "loss": 8.6326, "step": 107200 }, { "epoch": 0.46, "learning_rate": 0.0031267148422149046, "loss": 8.6291, "step": 107400 }, { "epoch": 0.46, "learning_rate": 0.003135740971163656, "loss": 8.6375, "step": 107600 }, { "epoch": 0.46, "learning_rate": 0.0031448214897512507, "loss": 8.6226, "step": 107800 }, { "epoch": 0.46, "learning_rate": 0.003153911023417371, "loss": 8.6359, "step": 108000 }, { "epoch": 0.46, "learning_rate": 0.003163009532290608, "loss": 8.6491, "step": 108200 }, { "epoch": 0.47, "learning_rate": 0.0031721169764601844, "loss": 8.6405, "step": 108400 }, { "epoch": 0.47, "learning_rate": 0.0031812333159761293, "loss": 8.632, "step": 108600 }, { "epoch": 0.47, "learning_rate": 0.003190358510849451, "loss": 8.6363, "step": 108800 }, { "epoch": 0.47, "learning_rate": 0.0031994925210523124, "loss": 8.6316, "step": 109000 }, { "epoch": 0.47, "learning_rate": 0.0032086353065182106, "loss": 8.6423, "step": 109200 }, { "epoch": 0.47, "learning_rate": 0.003217786827142146, "loss": 8.6274, "step": 109400 }, { "epoch": 0.47, "learning_rate": 0.003226947042780804, "loss": 8.6366, "step": 109600 }, { "epoch": 0.47, "learning_rate": 0.003236070047437989, "loss": 8.6388, "step": 109800 }, { "epoch": 0.47, "learning_rate": 0.003245247489550804, "loss": 8.6364, "step": 110000 }, { "epoch": 0.47, "learning_rate": 0.0032544335062216403, "loss": 8.6422, "step": 110200 }, { "epoch": 0.47, "learning_rate": 0.0032636280571558636, "loss": 8.618, "step": 110400 }, { "epoch": 0.48, "learning_rate": 0.003272831102021408, "loss": 8.6276, "step": 110600 }, { "epoch": 0.48, "learning_rate": 0.003282042600448948, "loss": 8.6454, "step": 110800 }, { "epoch": 0.48, "learning_rate": 0.0032912625120320753, "loss": 8.6388, "step": 111000 }, { "epoch": 0.48, "learning_rate": 0.0033004907963274733, "loss": 8.6339, "step": 111200 }, { "epoch": 0.48, "learning_rate": 0.003309727412855108, "loss": 8.6243, "step": 111400 }, { "epoch": 0.48, "learning_rate": 0.0033189723210983865, "loss": 8.6264, "step": 111600 }, { "epoch": 0.48, "learning_rate": 0.0033282254805043487, "loss": 8.6401, "step": 111800 }, { "epoch": 0.48, "learning_rate": 0.003337440523277331, "loss": 8.6366, "step": 112000 }, { "epoch": 0.48, "learning_rate": 0.0033467100224565524, "loss": 8.6338, "step": 112200 }, { "epoch": 0.48, "learning_rate": 0.003355987651126521, "loss": 8.6377, "step": 112400 }, { "epoch": 0.48, "learning_rate": 0.0033652733685907424, "loss": 8.6414, "step": 112600 }, { "epoch": 0.48, "learning_rate": 0.0033745671341172496, "loss": 8.6264, "step": 112800 }, { "epoch": 0.49, "learning_rate": 0.0033838689069387654, "loss": 8.6289, "step": 113000 }, { "epoch": 0.49, "learning_rate": 0.00339317864625289, "loss": 8.6244, "step": 113200 }, { "epoch": 0.49, "learning_rate": 0.003402496311222283, "loss": 8.6287, "step": 113400 }, { "epoch": 0.49, "learning_rate": 0.0034118218609748346, "loss": 8.6251, "step": 113600 }, { "epoch": 0.49, "learning_rate": 0.003421155254603846, "loss": 8.6214, "step": 113800 }, { "epoch": 0.49, "learning_rate": 0.0034304964511682147, "loss": 8.6303, "step": 114000 }, { "epoch": 0.49, "learning_rate": 0.0034398454096926092, "loss": 8.6369, "step": 114200 }, { "epoch": 0.49, "learning_rate": 0.003449202089167651, "loss": 8.6236, "step": 114400 }, { "epoch": 0.49, "learning_rate": 0.0034585196077173436, "loss": 8.6251, "step": 114600 }, { "epoch": 0.49, "learning_rate": 0.003467891567838331, "loss": 8.6295, "step": 114800 }, { "epoch": 0.49, "learning_rate": 0.003477271125884973, "loss": 8.6219, "step": 115000 }, { "epoch": 0.5, "learning_rate": 0.0034866582407136653, "loss": 8.6271, "step": 115200 }, { "epoch": 0.5, "learning_rate": 0.003496052871147656, "loss": 8.6372, "step": 115400 }, { "epoch": 0.5, "learning_rate": 0.0035054549759772242, "loss": 8.6238, "step": 115600 }, { "epoch": 0.5, "learning_rate": 0.0035148645139598637, "loss": 8.6207, "step": 115800 }, { "epoch": 0.5, "learning_rate": 0.0035242814438204637, "loss": 8.6099, "step": 116000 }, { "epoch": 0.5, "learning_rate": 0.0035337057242514833, "loss": 8.6142, "step": 116200 }, { "epoch": 0.5, "learning_rate": 0.0035431373139131472, "loss": 8.6033, "step": 116400 }, { "epoch": 0.5, "learning_rate": 0.0035525761714336104, "loss": 8.6178, "step": 116600 }, { "epoch": 0.5, "learning_rate": 0.0035619750070819923, "loss": 8.6138, "step": 116800 }, { "epoch": 0.5, "learning_rate": 0.0035714282402552104, "loss": 8.6143, "step": 117000 }, { "epoch": 0.5, "learning_rate": 0.0035808886171885554, "loss": 8.6034, "step": 117200 }, { "epoch": 0.5, "learning_rate": 0.0035903560963839124, "loss": 8.6156, "step": 117400 }, { "epoch": 0.51, "learning_rate": 0.0035998306363120057, "loss": 8.6148, "step": 117600 }, { "epoch": 0.51, "learning_rate": 0.0036093121954125906, "loss": 8.6039, "step": 117800 }, { "epoch": 0.51, "learning_rate": 0.003618800732094636, "loss": 8.6107, "step": 118000 }, { "epoch": 0.51, "learning_rate": 0.0036282962047364973, "loss": 8.6094, "step": 118200 }, { "epoch": 0.51, "learning_rate": 0.0036377985716861084, "loss": 8.616, "step": 118400 }, { "epoch": 0.51, "learning_rate": 0.003647307791261164, "loss": 8.6135, "step": 118600 }, { "epoch": 0.51, "learning_rate": 0.003656823821749292, "loss": 8.6062, "step": 118800 }, { "epoch": 0.51, "learning_rate": 0.0036662989906407328, "loss": 8.6029, "step": 119000 }, { "epoch": 0.51, "learning_rate": 0.0036758284841655496, "loss": 8.6011, "step": 119200 }, { "epoch": 0.51, "learning_rate": 0.0036853646634968946, "loss": 8.5993, "step": 119400 }, { "epoch": 0.51, "learning_rate": 0.003694907486804143, "loss": 8.6029, "step": 119600 }, { "epoch": 0.51, "learning_rate": 0.00370445691222752, "loss": 8.6018, "step": 119800 }, { "epoch": 0.52, "learning_rate": 0.003714012897878298, "loss": 8.5978, "step": 120000 } ], "max_steps": 1000000, "num_train_epochs": 5, "total_flos": 1.9125959786496e+17, "trial_name": null, "trial_params": null }