{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9992908258655148, "global_step": 93000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.9946274686781424e-05, "loss": 7.7636, "step": 100 }, { "epoch": 0.0, "learning_rate": 4.989254937356285e-05, "loss": 6.4663, "step": 200 }, { "epoch": 0.0, "learning_rate": 4.9838824060344274e-05, "loss": 6.0292, "step": 300 }, { "epoch": 0.0, "learning_rate": 4.9785098747125696e-05, "loss": 5.7747, "step": 400 }, { "epoch": 0.01, "learning_rate": 4.9731373433907124e-05, "loss": 5.592, "step": 500 }, { "epoch": 0.01, "learning_rate": 4.9677648120688546e-05, "loss": 5.4925, "step": 600 }, { "epoch": 0.01, "learning_rate": 4.962392280746997e-05, "loss": 5.3733, "step": 700 }, { "epoch": 0.01, "learning_rate": 4.957019749425139e-05, "loss": 5.2595, "step": 800 }, { "epoch": 0.01, "learning_rate": 4.951647218103282e-05, "loss": 5.179, "step": 900 }, { "epoch": 0.01, "learning_rate": 4.946274686781424e-05, "loss": 5.1054, "step": 1000 }, { "epoch": 0.01, "learning_rate": 4.940902155459566e-05, "loss": 5.053, "step": 1100 }, { "epoch": 0.01, "learning_rate": 4.935529624137709e-05, "loss": 4.9869, "step": 1200 }, { "epoch": 0.01, "learning_rate": 4.930157092815851e-05, "loss": 4.9311, "step": 1300 }, { "epoch": 0.02, "learning_rate": 4.924784561493994e-05, "loss": 4.859, "step": 1400 }, { "epoch": 0.02, "learning_rate": 4.919412030172136e-05, "loss": 4.8506, "step": 1500 }, { "epoch": 0.02, "learning_rate": 4.914039498850279e-05, "loss": 4.8073, "step": 1600 }, { "epoch": 0.02, "learning_rate": 4.908666967528421e-05, "loss": 4.762, "step": 1700 }, { "epoch": 0.02, "learning_rate": 4.903294436206564e-05, "loss": 4.7197, "step": 1800 }, { "epoch": 0.02, "learning_rate": 4.897921904884706e-05, "loss": 4.6881, "step": 1900 }, { "epoch": 0.02, "learning_rate": 4.892549373562848e-05, "loss": 4.6526, "step": 2000 }, { "epoch": 0.02, "learning_rate": 4.8871768422409904e-05, "loss": 4.6091, "step": 2100 }, { "epoch": 0.02, "learning_rate": 4.881804310919133e-05, "loss": 4.5926, "step": 2200 }, { "epoch": 0.02, "learning_rate": 4.8764317795972754e-05, "loss": 4.557, "step": 2300 }, { "epoch": 0.03, "learning_rate": 4.8710592482754176e-05, "loss": 4.5554, "step": 2400 }, { "epoch": 0.03, "learning_rate": 4.8656867169535604e-05, "loss": 4.5275, "step": 2500 }, { "epoch": 0.03, "learning_rate": 4.8603141856317026e-05, "loss": 4.5023, "step": 2600 }, { "epoch": 0.03, "learning_rate": 4.854941654309845e-05, "loss": 4.471, "step": 2700 }, { "epoch": 0.03, "learning_rate": 4.8495691229879876e-05, "loss": 4.4634, "step": 2800 }, { "epoch": 0.03, "learning_rate": 4.84419659166613e-05, "loss": 4.4476, "step": 2900 }, { "epoch": 0.03, "learning_rate": 4.838824060344272e-05, "loss": 4.425, "step": 3000 }, { "epoch": 0.03, "learning_rate": 4.833451529022414e-05, "loss": 4.4013, "step": 3100 }, { "epoch": 0.03, "learning_rate": 4.828078997700557e-05, "loss": 4.3986, "step": 3200 }, { "epoch": 0.04, "learning_rate": 4.822706466378699e-05, "loss": 4.3495, "step": 3300 }, { "epoch": 0.04, "learning_rate": 4.817333935056841e-05, "loss": 4.3396, "step": 3400 }, { "epoch": 0.04, "learning_rate": 4.811961403734984e-05, "loss": 4.3402, "step": 3500 }, { "epoch": 0.04, "learning_rate": 4.806588872413126e-05, "loss": 4.2908, "step": 3600 }, { "epoch": 0.04, "learning_rate": 4.8012163410912684e-05, "loss": 4.2989, "step": 3700 }, { "epoch": 0.04, "learning_rate": 4.795843809769411e-05, "loss": 4.2971, "step": 3800 }, { "epoch": 0.04, "learning_rate": 4.7904712784475534e-05, "loss": 4.2611, "step": 3900 }, { "epoch": 0.04, "learning_rate": 4.7850987471256956e-05, "loss": 4.2654, "step": 4000 }, { "epoch": 0.04, "learning_rate": 4.7797262158038384e-05, "loss": 4.2545, "step": 4100 }, { "epoch": 0.05, "learning_rate": 4.7743536844819806e-05, "loss": 4.2243, "step": 4200 }, { "epoch": 0.05, "learning_rate": 4.768981153160123e-05, "loss": 4.2148, "step": 4300 }, { "epoch": 0.05, "learning_rate": 4.7636086218382656e-05, "loss": 4.2041, "step": 4400 }, { "epoch": 0.05, "learning_rate": 4.758236090516408e-05, "loss": 4.1845, "step": 4500 }, { "epoch": 0.05, "learning_rate": 4.7528635591945506e-05, "loss": 4.1751, "step": 4600 }, { "epoch": 0.05, "learning_rate": 4.747491027872693e-05, "loss": 4.1826, "step": 4700 }, { "epoch": 0.05, "learning_rate": 4.7421184965508356e-05, "loss": 4.1715, "step": 4800 }, { "epoch": 0.05, "learning_rate": 4.736745965228978e-05, "loss": 4.1371, "step": 4900 }, { "epoch": 0.05, "learning_rate": 4.73137343390712e-05, "loss": 4.1361, "step": 5000 }, { "epoch": 0.05, "learning_rate": 4.726000902585263e-05, "loss": 4.1224, "step": 5100 }, { "epoch": 0.06, "learning_rate": 4.720628371263405e-05, "loss": 4.1226, "step": 5200 }, { "epoch": 0.06, "learning_rate": 4.715255839941547e-05, "loss": 4.0926, "step": 5300 }, { "epoch": 0.06, "learning_rate": 4.70988330861969e-05, "loss": 4.0933, "step": 5400 }, { "epoch": 0.06, "learning_rate": 4.704510777297832e-05, "loss": 4.0768, "step": 5500 }, { "epoch": 0.06, "learning_rate": 4.699138245975974e-05, "loss": 4.0659, "step": 5600 }, { "epoch": 0.06, "learning_rate": 4.6937657146541164e-05, "loss": 4.0727, "step": 5700 }, { "epoch": 0.06, "learning_rate": 4.688393183332259e-05, "loss": 4.0501, "step": 5800 }, { "epoch": 0.06, "learning_rate": 4.6830206520104014e-05, "loss": 4.0401, "step": 5900 }, { "epoch": 0.06, "learning_rate": 4.6776481206885436e-05, "loss": 4.0494, "step": 6000 }, { "epoch": 0.07, "learning_rate": 4.6722755893666864e-05, "loss": 4.012, "step": 6100 }, { "epoch": 0.07, "learning_rate": 4.6669030580448286e-05, "loss": 4.0165, "step": 6200 }, { "epoch": 0.07, "learning_rate": 4.661530526722971e-05, "loss": 4.0203, "step": 6300 }, { "epoch": 0.07, "learning_rate": 4.6561579954011136e-05, "loss": 4.0026, "step": 6400 }, { "epoch": 0.07, "learning_rate": 4.650785464079256e-05, "loss": 4.0005, "step": 6500 }, { "epoch": 0.07, "learning_rate": 4.645412932757398e-05, "loss": 3.9956, "step": 6600 }, { "epoch": 0.07, "learning_rate": 4.640040401435541e-05, "loss": 3.9539, "step": 6700 }, { "epoch": 0.07, "learning_rate": 4.634667870113683e-05, "loss": 3.9839, "step": 6800 }, { "epoch": 0.07, "learning_rate": 4.629295338791825e-05, "loss": 3.9575, "step": 6900 }, { "epoch": 0.08, "learning_rate": 4.623922807469967e-05, "loss": 3.9549, "step": 7000 }, { "epoch": 0.08, "learning_rate": 4.61855027614811e-05, "loss": 3.9721, "step": 7100 }, { "epoch": 0.08, "learning_rate": 4.613177744826252e-05, "loss": 3.9359, "step": 7200 }, { "epoch": 0.08, "learning_rate": 4.607805213504395e-05, "loss": 3.9508, "step": 7300 }, { "epoch": 0.08, "learning_rate": 4.602432682182537e-05, "loss": 3.936, "step": 7400 }, { "epoch": 0.08, "learning_rate": 4.59706015086068e-05, "loss": 3.9156, "step": 7500 }, { "epoch": 0.08, "learning_rate": 4.591687619538822e-05, "loss": 3.8848, "step": 7600 }, { "epoch": 0.08, "learning_rate": 4.586315088216965e-05, "loss": 3.9082, "step": 7700 }, { "epoch": 0.08, "learning_rate": 4.580942556895107e-05, "loss": 3.8896, "step": 7800 }, { "epoch": 0.08, "learning_rate": 4.5755700255732494e-05, "loss": 3.9116, "step": 7900 }, { "epoch": 0.09, "learning_rate": 4.5701974942513916e-05, "loss": 3.8913, "step": 8000 }, { "epoch": 0.09, "learning_rate": 4.5648249629295344e-05, "loss": 3.8855, "step": 8100 }, { "epoch": 0.09, "learning_rate": 4.5594524316076766e-05, "loss": 3.8861, "step": 8200 }, { "epoch": 0.09, "learning_rate": 4.554079900285819e-05, "loss": 3.8898, "step": 8300 }, { "epoch": 0.09, "learning_rate": 4.5487073689639616e-05, "loss": 3.8646, "step": 8400 }, { "epoch": 0.09, "learning_rate": 4.543334837642104e-05, "loss": 3.8709, "step": 8500 }, { "epoch": 0.09, "learning_rate": 4.537962306320246e-05, "loss": 3.8733, "step": 8600 }, { "epoch": 0.09, "learning_rate": 4.532589774998389e-05, "loss": 3.8561, "step": 8700 }, { "epoch": 0.09, "learning_rate": 4.527217243676531e-05, "loss": 3.8441, "step": 8800 }, { "epoch": 0.1, "learning_rate": 4.521844712354673e-05, "loss": 3.8287, "step": 8900 }, { "epoch": 0.1, "learning_rate": 4.516472181032816e-05, "loss": 3.8163, "step": 9000 }, { "epoch": 0.1, "learning_rate": 4.511099649710958e-05, "loss": 3.8502, "step": 9100 }, { "epoch": 0.1, "learning_rate": 4.5057271183891e-05, "loss": 3.8384, "step": 9200 }, { "epoch": 0.1, "learning_rate": 4.5003545870672424e-05, "loss": 3.8434, "step": 9300 }, { "epoch": 0.1, "learning_rate": 4.494982055745385e-05, "loss": 3.8016, "step": 9400 }, { "epoch": 0.1, "learning_rate": 4.4896095244235274e-05, "loss": 3.8089, "step": 9500 }, { "epoch": 0.1, "learning_rate": 4.4842369931016696e-05, "loss": 3.7775, "step": 9600 }, { "epoch": 0.1, "learning_rate": 4.4788644617798124e-05, "loss": 3.8009, "step": 9700 }, { "epoch": 0.11, "learning_rate": 4.4734919304579546e-05, "loss": 3.7925, "step": 9800 }, { "epoch": 0.11, "learning_rate": 4.468119399136097e-05, "loss": 3.781, "step": 9900 }, { "epoch": 0.11, "learning_rate": 4.4627468678142396e-05, "loss": 3.7895, "step": 10000 }, { "epoch": 0.11, "learning_rate": 4.457374336492382e-05, "loss": 3.7623, "step": 10100 }, { "epoch": 0.11, "learning_rate": 4.452001805170524e-05, "loss": 3.771, "step": 10200 }, { "epoch": 0.11, "learning_rate": 4.446629273848667e-05, "loss": 3.777, "step": 10300 }, { "epoch": 0.11, "learning_rate": 4.441256742526809e-05, "loss": 3.7779, "step": 10400 }, { "epoch": 0.11, "learning_rate": 4.435884211204952e-05, "loss": 3.7824, "step": 10500 }, { "epoch": 0.11, "learning_rate": 4.430511679883094e-05, "loss": 3.7539, "step": 10600 }, { "epoch": 0.11, "learning_rate": 4.425139148561237e-05, "loss": 3.7346, "step": 10700 }, { "epoch": 0.12, "learning_rate": 4.419766617239379e-05, "loss": 3.7459, "step": 10800 }, { "epoch": 0.12, "learning_rate": 4.414394085917521e-05, "loss": 3.7569, "step": 10900 }, { "epoch": 0.12, "learning_rate": 4.409021554595664e-05, "loss": 3.7202, "step": 11000 }, { "epoch": 0.12, "learning_rate": 4.403649023273806e-05, "loss": 3.7187, "step": 11100 }, { "epoch": 0.12, "learning_rate": 4.398276491951948e-05, "loss": 3.7233, "step": 11200 }, { "epoch": 0.12, "learning_rate": 4.392903960630091e-05, "loss": 3.7294, "step": 11300 }, { "epoch": 0.12, "learning_rate": 4.387531429308233e-05, "loss": 3.7285, "step": 11400 }, { "epoch": 0.12, "learning_rate": 4.3821588979863754e-05, "loss": 3.7293, "step": 11500 }, { "epoch": 0.12, "learning_rate": 4.376786366664518e-05, "loss": 3.7306, "step": 11600 }, { "epoch": 0.13, "learning_rate": 4.3714138353426604e-05, "loss": 3.6982, "step": 11700 }, { "epoch": 0.13, "learning_rate": 4.3660413040208026e-05, "loss": 3.7008, "step": 11800 }, { "epoch": 0.13, "learning_rate": 4.360668772698945e-05, "loss": 3.7103, "step": 11900 }, { "epoch": 0.13, "learning_rate": 4.3552962413770876e-05, "loss": 3.7016, "step": 12000 }, { "epoch": 0.13, "learning_rate": 4.34992371005523e-05, "loss": 3.7023, "step": 12100 }, { "epoch": 0.13, "learning_rate": 4.344551178733372e-05, "loss": 3.6621, "step": 12200 }, { "epoch": 0.13, "learning_rate": 4.339178647411515e-05, "loss": 3.6578, "step": 12300 }, { "epoch": 0.13, "learning_rate": 4.333806116089657e-05, "loss": 3.6704, "step": 12400 }, { "epoch": 0.13, "learning_rate": 4.328433584767799e-05, "loss": 3.6973, "step": 12500 }, { "epoch": 0.14, "learning_rate": 4.323061053445942e-05, "loss": 3.6702, "step": 12600 }, { "epoch": 0.14, "learning_rate": 4.317688522124084e-05, "loss": 3.6582, "step": 12700 }, { "epoch": 0.14, "learning_rate": 4.312315990802226e-05, "loss": 3.6654, "step": 12800 }, { "epoch": 0.14, "learning_rate": 4.3069434594803684e-05, "loss": 3.6911, "step": 12900 }, { "epoch": 0.14, "learning_rate": 4.301570928158511e-05, "loss": 3.6679, "step": 13000 }, { "epoch": 0.14, "learning_rate": 4.2961983968366534e-05, "loss": 3.6774, "step": 13100 }, { "epoch": 0.14, "learning_rate": 4.290825865514796e-05, "loss": 3.6684, "step": 13200 }, { "epoch": 0.14, "learning_rate": 4.2854533341929384e-05, "loss": 3.6527, "step": 13300 }, { "epoch": 0.14, "learning_rate": 4.280080802871081e-05, "loss": 3.6351, "step": 13400 }, { "epoch": 0.15, "learning_rate": 4.2747082715492234e-05, "loss": 3.6591, "step": 13500 }, { "epoch": 0.15, "learning_rate": 4.269335740227366e-05, "loss": 3.6161, "step": 13600 }, { "epoch": 0.15, "learning_rate": 4.2639632089055084e-05, "loss": 3.649, "step": 13700 }, { "epoch": 0.15, "learning_rate": 4.2585906775836506e-05, "loss": 3.6286, "step": 13800 }, { "epoch": 0.15, "learning_rate": 4.2532181462617934e-05, "loss": 3.6198, "step": 13900 }, { "epoch": 0.15, "learning_rate": 4.2478456149399356e-05, "loss": 3.6225, "step": 14000 }, { "epoch": 0.15, "learning_rate": 4.242473083618078e-05, "loss": 3.6132, "step": 14100 }, { "epoch": 0.15, "learning_rate": 4.23710055229622e-05, "loss": 3.6314, "step": 14200 }, { "epoch": 0.15, "learning_rate": 4.231728020974363e-05, "loss": 3.6117, "step": 14300 }, { "epoch": 0.15, "learning_rate": 4.226355489652505e-05, "loss": 3.6054, "step": 14400 }, { "epoch": 0.16, "learning_rate": 4.220982958330647e-05, "loss": 3.6041, "step": 14500 }, { "epoch": 0.16, "learning_rate": 4.21561042700879e-05, "loss": 3.617, "step": 14600 }, { "epoch": 0.16, "learning_rate": 4.210237895686932e-05, "loss": 3.6008, "step": 14700 }, { "epoch": 0.16, "learning_rate": 4.204865364365074e-05, "loss": 3.6203, "step": 14800 }, { "epoch": 0.16, "learning_rate": 4.199492833043217e-05, "loss": 3.6015, "step": 14900 }, { "epoch": 0.16, "learning_rate": 4.194120301721359e-05, "loss": 3.6095, "step": 15000 }, { "epoch": 0.16, "learning_rate": 4.1887477703995014e-05, "loss": 3.5943, "step": 15100 }, { "epoch": 0.16, "learning_rate": 4.183375239077644e-05, "loss": 3.5944, "step": 15200 }, { "epoch": 0.16, "learning_rate": 4.1780027077557864e-05, "loss": 3.6065, "step": 15300 }, { "epoch": 0.17, "learning_rate": 4.1726301764339286e-05, "loss": 3.5875, "step": 15400 }, { "epoch": 0.17, "learning_rate": 4.167257645112071e-05, "loss": 3.5759, "step": 15500 }, { "epoch": 0.17, "learning_rate": 4.1618851137902136e-05, "loss": 3.5856, "step": 15600 }, { "epoch": 0.17, "learning_rate": 4.156512582468356e-05, "loss": 3.5943, "step": 15700 }, { "epoch": 0.17, "learning_rate": 4.151140051146498e-05, "loss": 3.5796, "step": 15800 }, { "epoch": 0.17, "learning_rate": 4.145767519824641e-05, "loss": 3.5752, "step": 15900 }, { "epoch": 0.17, "learning_rate": 4.140394988502783e-05, "loss": 3.5666, "step": 16000 }, { "epoch": 0.17, "learning_rate": 4.135022457180926e-05, "loss": 3.5624, "step": 16100 }, { "epoch": 0.17, "learning_rate": 4.129649925859068e-05, "loss": 3.5564, "step": 16200 }, { "epoch": 0.18, "learning_rate": 4.124277394537211e-05, "loss": 3.5533, "step": 16300 }, { "epoch": 0.18, "learning_rate": 4.118904863215353e-05, "loss": 3.5688, "step": 16400 }, { "epoch": 0.18, "learning_rate": 4.113532331893496e-05, "loss": 3.5587, "step": 16500 }, { "epoch": 0.18, "learning_rate": 4.108159800571638e-05, "loss": 3.5537, "step": 16600 }, { "epoch": 0.18, "learning_rate": 4.10278726924978e-05, "loss": 3.5594, "step": 16700 }, { "epoch": 0.18, "learning_rate": 4.097414737927922e-05, "loss": 3.551, "step": 16800 }, { "epoch": 0.18, "learning_rate": 4.092042206606065e-05, "loss": 3.5696, "step": 16900 }, { "epoch": 0.18, "learning_rate": 4.086669675284207e-05, "loss": 3.5331, "step": 17000 }, { "epoch": 0.18, "learning_rate": 4.0812971439623494e-05, "loss": 3.5312, "step": 17100 }, { "epoch": 0.18, "learning_rate": 4.075924612640492e-05, "loss": 3.5508, "step": 17200 }, { "epoch": 0.19, "learning_rate": 4.0705520813186344e-05, "loss": 3.5345, "step": 17300 }, { "epoch": 0.19, "learning_rate": 4.0651795499967766e-05, "loss": 3.5264, "step": 17400 }, { "epoch": 0.19, "learning_rate": 4.0598070186749194e-05, "loss": 3.5412, "step": 17500 }, { "epoch": 0.19, "learning_rate": 4.0544344873530616e-05, "loss": 3.5237, "step": 17600 }, { "epoch": 0.19, "learning_rate": 4.049061956031204e-05, "loss": 3.5538, "step": 17700 }, { "epoch": 0.19, "learning_rate": 4.043689424709346e-05, "loss": 3.5171, "step": 17800 }, { "epoch": 0.19, "learning_rate": 4.038316893387489e-05, "loss": 3.525, "step": 17900 }, { "epoch": 0.19, "learning_rate": 4.032944362065631e-05, "loss": 3.5248, "step": 18000 }, { "epoch": 0.19, "learning_rate": 4.027571830743773e-05, "loss": 3.5384, "step": 18100 }, { "epoch": 0.2, "learning_rate": 4.022199299421916e-05, "loss": 3.5208, "step": 18200 }, { "epoch": 0.2, "learning_rate": 4.016826768100058e-05, "loss": 3.509, "step": 18300 }, { "epoch": 0.2, "learning_rate": 4.0114542367782e-05, "loss": 3.4961, "step": 18400 }, { "epoch": 0.2, "learning_rate": 4.006081705456343e-05, "loss": 3.5075, "step": 18500 }, { "epoch": 0.2, "learning_rate": 4.000709174134485e-05, "loss": 3.5087, "step": 18600 }, { "epoch": 0.2, "learning_rate": 3.9953366428126274e-05, "loss": 3.5195, "step": 18700 }, { "epoch": 0.2, "learning_rate": 3.98996411149077e-05, "loss": 3.5037, "step": 18800 }, { "epoch": 0.2, "learning_rate": 3.9845915801689124e-05, "loss": 3.4878, "step": 18900 }, { "epoch": 0.2, "learning_rate": 3.9792190488470546e-05, "loss": 3.4923, "step": 19000 }, { "epoch": 0.21, "learning_rate": 3.9738465175251974e-05, "loss": 3.4896, "step": 19100 }, { "epoch": 0.21, "learning_rate": 3.9684739862033396e-05, "loss": 3.4887, "step": 19200 }, { "epoch": 0.21, "learning_rate": 3.9631014548814824e-05, "loss": 3.4944, "step": 19300 }, { "epoch": 0.21, "learning_rate": 3.9577289235596246e-05, "loss": 3.4762, "step": 19400 }, { "epoch": 0.21, "learning_rate": 3.9523563922377674e-05, "loss": 3.4909, "step": 19500 }, { "epoch": 0.21, "learning_rate": 3.9469838609159096e-05, "loss": 3.4957, "step": 19600 }, { "epoch": 0.21, "learning_rate": 3.941611329594052e-05, "loss": 3.486, "step": 19700 }, { "epoch": 0.21, "learning_rate": 3.9362387982721946e-05, "loss": 3.47, "step": 19800 }, { "epoch": 0.21, "learning_rate": 3.930866266950337e-05, "loss": 3.4813, "step": 19900 }, { "epoch": 0.21, "learning_rate": 3.925493735628479e-05, "loss": 3.4835, "step": 20000 }, { "epoch": 0.22, "learning_rate": 3.920121204306622e-05, "loss": 3.4677, "step": 20100 }, { "epoch": 0.22, "learning_rate": 3.914748672984764e-05, "loss": 3.4771, "step": 20200 }, { "epoch": 0.22, "learning_rate": 3.909376141662906e-05, "loss": 3.4582, "step": 20300 }, { "epoch": 0.22, "learning_rate": 3.904003610341048e-05, "loss": 3.4537, "step": 20400 }, { "epoch": 0.22, "learning_rate": 3.898631079019191e-05, "loss": 3.4821, "step": 20500 }, { "epoch": 0.22, "learning_rate": 3.893258547697333e-05, "loss": 3.4783, "step": 20600 }, { "epoch": 0.22, "learning_rate": 3.8878860163754754e-05, "loss": 3.476, "step": 20700 }, { "epoch": 0.22, "learning_rate": 3.882513485053618e-05, "loss": 3.4741, "step": 20800 }, { "epoch": 0.22, "learning_rate": 3.8771409537317604e-05, "loss": 3.437, "step": 20900 }, { "epoch": 0.23, "learning_rate": 3.8717684224099026e-05, "loss": 3.4547, "step": 21000 }, { "epoch": 0.23, "learning_rate": 3.8663958910880454e-05, "loss": 3.451, "step": 21100 }, { "epoch": 0.23, "learning_rate": 3.8610233597661876e-05, "loss": 3.4419, "step": 21200 }, { "epoch": 0.23, "learning_rate": 3.85565082844433e-05, "loss": 3.4506, "step": 21300 }, { "epoch": 0.23, "learning_rate": 3.8502782971224726e-05, "loss": 3.431, "step": 21400 }, { "epoch": 0.23, "learning_rate": 3.844905765800615e-05, "loss": 3.4346, "step": 21500 }, { "epoch": 0.23, "learning_rate": 3.839533234478757e-05, "loss": 3.4519, "step": 21600 }, { "epoch": 0.23, "learning_rate": 3.834160703156899e-05, "loss": 3.416, "step": 21700 }, { "epoch": 0.23, "learning_rate": 3.828788171835042e-05, "loss": 3.4357, "step": 21800 }, { "epoch": 0.24, "learning_rate": 3.823415640513184e-05, "loss": 3.4319, "step": 21900 }, { "epoch": 0.24, "learning_rate": 3.818043109191327e-05, "loss": 3.4303, "step": 22000 }, { "epoch": 0.24, "learning_rate": 3.812670577869469e-05, "loss": 3.4253, "step": 22100 }, { "epoch": 0.24, "learning_rate": 3.807298046547612e-05, "loss": 3.4216, "step": 22200 }, { "epoch": 0.24, "learning_rate": 3.801925515225754e-05, "loss": 3.4161, "step": 22300 }, { "epoch": 0.24, "learning_rate": 3.796552983903897e-05, "loss": 3.4169, "step": 22400 }, { "epoch": 0.24, "learning_rate": 3.791180452582039e-05, "loss": 3.4159, "step": 22500 }, { "epoch": 0.24, "learning_rate": 3.785807921260181e-05, "loss": 3.4083, "step": 22600 }, { "epoch": 0.24, "learning_rate": 3.7804353899383234e-05, "loss": 3.4377, "step": 22700 }, { "epoch": 0.24, "learning_rate": 3.775062858616466e-05, "loss": 3.4212, "step": 22800 }, { "epoch": 0.25, "learning_rate": 3.7696903272946084e-05, "loss": 3.4037, "step": 22900 }, { "epoch": 0.25, "learning_rate": 3.7643177959727506e-05, "loss": 3.4036, "step": 23000 }, { "epoch": 0.25, "learning_rate": 3.7589452646508934e-05, "loss": 3.4091, "step": 23100 }, { "epoch": 0.25, "learning_rate": 3.7535727333290356e-05, "loss": 3.4189, "step": 23200 }, { "epoch": 0.25, "learning_rate": 3.748200202007178e-05, "loss": 3.3798, "step": 23300 }, { "epoch": 0.25, "learning_rate": 3.7428276706853206e-05, "loss": 3.3914, "step": 23400 }, { "epoch": 0.25, "learning_rate": 3.737455139363463e-05, "loss": 3.3939, "step": 23500 }, { "epoch": 0.25, "learning_rate": 3.732082608041605e-05, "loss": 3.4001, "step": 23600 }, { "epoch": 0.25, "learning_rate": 3.726710076719748e-05, "loss": 3.4028, "step": 23700 }, { "epoch": 0.26, "learning_rate": 3.72133754539789e-05, "loss": 3.379, "step": 23800 }, { "epoch": 0.26, "learning_rate": 3.715965014076032e-05, "loss": 3.3824, "step": 23900 }, { "epoch": 0.26, "learning_rate": 3.710592482754174e-05, "loss": 3.392, "step": 24000 }, { "epoch": 0.26, "learning_rate": 3.705219951432317e-05, "loss": 3.3953, "step": 24100 }, { "epoch": 0.26, "learning_rate": 3.699847420110459e-05, "loss": 3.3776, "step": 24200 }, { "epoch": 0.26, "learning_rate": 3.6944748887886014e-05, "loss": 3.3937, "step": 24300 }, { "epoch": 0.26, "learning_rate": 3.689102357466744e-05, "loss": 3.3954, "step": 24400 }, { "epoch": 0.26, "learning_rate": 3.6837298261448864e-05, "loss": 3.3905, "step": 24500 }, { "epoch": 0.26, "learning_rate": 3.6783572948230286e-05, "loss": 3.3676, "step": 24600 }, { "epoch": 0.27, "learning_rate": 3.6729847635011714e-05, "loss": 3.3678, "step": 24700 }, { "epoch": 0.27, "learning_rate": 3.6676122321793136e-05, "loss": 3.3548, "step": 24800 }, { "epoch": 0.27, "learning_rate": 3.662239700857456e-05, "loss": 3.3502, "step": 24900 }, { "epoch": 0.27, "learning_rate": 3.6568671695355986e-05, "loss": 3.3743, "step": 25000 }, { "epoch": 0.27, "learning_rate": 3.651494638213741e-05, "loss": 3.3593, "step": 25100 }, { "epoch": 0.27, "learning_rate": 3.6461221068918836e-05, "loss": 3.3581, "step": 25200 }, { "epoch": 0.27, "learning_rate": 3.640749575570026e-05, "loss": 3.356, "step": 25300 }, { "epoch": 0.27, "learning_rate": 3.6353770442481686e-05, "loss": 3.3333, "step": 25400 }, { "epoch": 0.27, "learning_rate": 3.630004512926311e-05, "loss": 3.3463, "step": 25500 }, { "epoch": 0.28, "learning_rate": 3.624631981604453e-05, "loss": 3.3273, "step": 25600 }, { "epoch": 0.28, "learning_rate": 3.619259450282596e-05, "loss": 3.354, "step": 25700 }, { "epoch": 0.28, "learning_rate": 3.613886918960738e-05, "loss": 3.3302, "step": 25800 }, { "epoch": 0.28, "learning_rate": 3.60851438763888e-05, "loss": 3.3569, "step": 25900 }, { "epoch": 0.28, "learning_rate": 3.603141856317023e-05, "loss": 3.339, "step": 26000 }, { "epoch": 0.28, "learning_rate": 3.597769324995165e-05, "loss": 3.3359, "step": 26100 }, { "epoch": 0.28, "learning_rate": 3.592396793673307e-05, "loss": 3.3382, "step": 26200 }, { "epoch": 0.28, "learning_rate": 3.58702426235145e-05, "loss": 3.3257, "step": 26300 }, { "epoch": 0.28, "learning_rate": 3.581651731029592e-05, "loss": 3.3159, "step": 26400 }, { "epoch": 0.28, "learning_rate": 3.5762791997077344e-05, "loss": 3.3236, "step": 26500 }, { "epoch": 0.29, "learning_rate": 3.5709066683858766e-05, "loss": 3.3226, "step": 26600 }, { "epoch": 0.29, "learning_rate": 3.5655341370640194e-05, "loss": 3.3157, "step": 26700 }, { "epoch": 0.29, "learning_rate": 3.5601616057421616e-05, "loss": 3.3511, "step": 26800 }, { "epoch": 0.29, "learning_rate": 3.554789074420304e-05, "loss": 3.3082, "step": 26900 }, { "epoch": 0.29, "learning_rate": 3.5494165430984466e-05, "loss": 3.3117, "step": 27000 }, { "epoch": 0.29, "learning_rate": 3.544044011776589e-05, "loss": 3.3165, "step": 27100 }, { "epoch": 0.29, "learning_rate": 3.538671480454731e-05, "loss": 3.3211, "step": 27200 }, { "epoch": 0.29, "learning_rate": 3.533298949132874e-05, "loss": 3.3134, "step": 27300 }, { "epoch": 0.29, "learning_rate": 3.527926417811016e-05, "loss": 3.3036, "step": 27400 }, { "epoch": 0.3, "learning_rate": 3.522553886489158e-05, "loss": 3.3088, "step": 27500 }, { "epoch": 0.3, "learning_rate": 3.5171813551673e-05, "loss": 3.2946, "step": 27600 }, { "epoch": 0.3, "learning_rate": 3.511808823845443e-05, "loss": 3.2971, "step": 27700 }, { "epoch": 0.3, "learning_rate": 3.506436292523585e-05, "loss": 3.2943, "step": 27800 }, { "epoch": 0.3, "learning_rate": 3.501063761201728e-05, "loss": 3.2995, "step": 27900 }, { "epoch": 0.3, "learning_rate": 3.49569122987987e-05, "loss": 3.2877, "step": 28000 }, { "epoch": 0.3, "learning_rate": 3.490318698558013e-05, "loss": 3.2879, "step": 28100 }, { "epoch": 0.3, "learning_rate": 3.484946167236155e-05, "loss": 3.2784, "step": 28200 }, { "epoch": 0.3, "learning_rate": 3.479573635914298e-05, "loss": 3.3089, "step": 28300 }, { "epoch": 0.31, "learning_rate": 3.47420110459244e-05, "loss": 3.2818, "step": 28400 }, { "epoch": 0.31, "learning_rate": 3.4688285732705824e-05, "loss": 3.2698, "step": 28500 }, { "epoch": 0.31, "learning_rate": 3.463456041948725e-05, "loss": 3.2706, "step": 28600 }, { "epoch": 0.31, "learning_rate": 3.4580835106268674e-05, "loss": 3.2884, "step": 28700 }, { "epoch": 0.31, "learning_rate": 3.4527109793050096e-05, "loss": 3.2786, "step": 28800 }, { "epoch": 0.31, "learning_rate": 3.447338447983152e-05, "loss": 3.2662, "step": 28900 }, { "epoch": 0.31, "learning_rate": 3.4419659166612946e-05, "loss": 3.2616, "step": 29000 }, { "epoch": 0.31, "learning_rate": 3.436593385339437e-05, "loss": 3.2569, "step": 29100 }, { "epoch": 0.31, "learning_rate": 3.431220854017579e-05, "loss": 3.2689, "step": 29200 }, { "epoch": 0.31, "learning_rate": 3.425848322695722e-05, "loss": 3.2591, "step": 29300 }, { "epoch": 0.32, "learning_rate": 3.420475791373864e-05, "loss": 3.2453, "step": 29400 }, { "epoch": 0.32, "learning_rate": 3.415103260052006e-05, "loss": 3.2755, "step": 29500 }, { "epoch": 0.32, "learning_rate": 3.409730728730149e-05, "loss": 3.2599, "step": 29600 }, { "epoch": 0.32, "learning_rate": 3.404358197408291e-05, "loss": 3.2462, "step": 29700 }, { "epoch": 0.32, "learning_rate": 3.398985666086433e-05, "loss": 3.2592, "step": 29800 }, { "epoch": 0.32, "learning_rate": 3.393613134764576e-05, "loss": 3.2619, "step": 29900 }, { "epoch": 0.32, "learning_rate": 3.388240603442718e-05, "loss": 3.252, "step": 30000 }, { "epoch": 0.32, "learning_rate": 3.3828680721208604e-05, "loss": 3.2454, "step": 30100 }, { "epoch": 0.32, "learning_rate": 3.3774955407990026e-05, "loss": 3.2344, "step": 30200 }, { "epoch": 0.33, "learning_rate": 3.3721230094771454e-05, "loss": 3.2465, "step": 30300 }, { "epoch": 0.33, "learning_rate": 3.3667504781552876e-05, "loss": 3.2462, "step": 30400 }, { "epoch": 0.33, "learning_rate": 3.36137794683343e-05, "loss": 3.2342, "step": 30500 }, { "epoch": 0.33, "learning_rate": 3.3560054155115726e-05, "loss": 3.2186, "step": 30600 }, { "epoch": 0.33, "learning_rate": 3.350632884189715e-05, "loss": 3.2556, "step": 30700 }, { "epoch": 0.33, "learning_rate": 3.345260352867857e-05, "loss": 3.2477, "step": 30800 }, { "epoch": 0.33, "learning_rate": 3.339887821546e-05, "loss": 3.2139, "step": 30900 }, { "epoch": 0.33, "learning_rate": 3.334515290224142e-05, "loss": 3.2478, "step": 31000 }, { "epoch": 0.33, "learning_rate": 3.329142758902285e-05, "loss": 3.2423, "step": 31100 }, { "epoch": 0.34, "learning_rate": 3.323770227580427e-05, "loss": 3.242, "step": 31200 }, { "epoch": 0.34, "learning_rate": 3.31839769625857e-05, "loss": 3.2452, "step": 31300 }, { "epoch": 0.34, "learning_rate": 3.313025164936712e-05, "loss": 3.217, "step": 31400 }, { "epoch": 0.34, "learning_rate": 3.307652633614854e-05, "loss": 3.2212, "step": 31500 }, { "epoch": 0.34, "learning_rate": 3.302280102292997e-05, "loss": 3.2293, "step": 31600 }, { "epoch": 0.34, "learning_rate": 3.296907570971139e-05, "loss": 3.2257, "step": 31700 }, { "epoch": 0.34, "learning_rate": 3.291535039649281e-05, "loss": 3.1933, "step": 31800 }, { "epoch": 0.34, "learning_rate": 3.286162508327424e-05, "loss": 3.2092, "step": 31900 }, { "epoch": 0.34, "learning_rate": 3.280789977005566e-05, "loss": 3.1993, "step": 32000 }, { "epoch": 0.34, "learning_rate": 3.2754174456837084e-05, "loss": 3.2368, "step": 32100 }, { "epoch": 0.35, "learning_rate": 3.270044914361851e-05, "loss": 3.2216, "step": 32200 }, { "epoch": 0.35, "learning_rate": 3.2646723830399934e-05, "loss": 3.1913, "step": 32300 }, { "epoch": 0.35, "learning_rate": 3.2592998517181356e-05, "loss": 3.2121, "step": 32400 }, { "epoch": 0.35, "learning_rate": 3.253927320396278e-05, "loss": 3.224, "step": 32500 }, { "epoch": 0.35, "learning_rate": 3.2485547890744206e-05, "loss": 3.2091, "step": 32600 }, { "epoch": 0.35, "learning_rate": 3.243182257752563e-05, "loss": 3.1823, "step": 32700 }, { "epoch": 0.35, "learning_rate": 3.237809726430705e-05, "loss": 3.188, "step": 32800 }, { "epoch": 0.35, "learning_rate": 3.232437195108848e-05, "loss": 3.2111, "step": 32900 }, { "epoch": 0.35, "learning_rate": 3.22706466378699e-05, "loss": 3.2252, "step": 33000 }, { "epoch": 0.36, "learning_rate": 3.221692132465132e-05, "loss": 3.1869, "step": 33100 }, { "epoch": 0.36, "learning_rate": 3.216319601143275e-05, "loss": 3.2025, "step": 33200 }, { "epoch": 0.36, "learning_rate": 3.210947069821417e-05, "loss": 3.2068, "step": 33300 }, { "epoch": 0.36, "learning_rate": 3.205574538499559e-05, "loss": 3.1969, "step": 33400 }, { "epoch": 0.36, "learning_rate": 3.200202007177702e-05, "loss": 3.2075, "step": 33500 }, { "epoch": 0.36, "learning_rate": 3.194829475855844e-05, "loss": 3.2, "step": 33600 }, { "epoch": 0.36, "learning_rate": 3.1894569445339864e-05, "loss": 3.1982, "step": 33700 }, { "epoch": 0.36, "learning_rate": 3.184084413212129e-05, "loss": 3.185, "step": 33800 }, { "epoch": 0.36, "learning_rate": 3.1787118818902714e-05, "loss": 3.1821, "step": 33900 }, { "epoch": 0.37, "learning_rate": 3.173339350568414e-05, "loss": 3.1602, "step": 34000 }, { "epoch": 0.37, "learning_rate": 3.1679668192465564e-05, "loss": 3.1737, "step": 34100 }, { "epoch": 0.37, "learning_rate": 3.162594287924699e-05, "loss": 3.1797, "step": 34200 }, { "epoch": 0.37, "learning_rate": 3.1572217566028414e-05, "loss": 3.1765, "step": 34300 }, { "epoch": 0.37, "learning_rate": 3.1518492252809836e-05, "loss": 3.1776, "step": 34400 }, { "epoch": 0.37, "learning_rate": 3.1464766939591264e-05, "loss": 3.1902, "step": 34500 }, { "epoch": 0.37, "learning_rate": 3.1411041626372686e-05, "loss": 3.1847, "step": 34600 }, { "epoch": 0.37, "learning_rate": 3.135731631315411e-05, "loss": 3.1871, "step": 34700 }, { "epoch": 0.37, "learning_rate": 3.1303590999935536e-05, "loss": 3.1669, "step": 34800 }, { "epoch": 0.38, "learning_rate": 3.124986568671696e-05, "loss": 3.1794, "step": 34900 }, { "epoch": 0.38, "learning_rate": 3.119614037349838e-05, "loss": 3.1571, "step": 35000 }, { "epoch": 0.38, "learning_rate": 3.11424150602798e-05, "loss": 3.1599, "step": 35100 }, { "epoch": 0.38, "learning_rate": 3.108868974706123e-05, "loss": 3.167, "step": 35200 }, { "epoch": 0.38, "learning_rate": 3.103496443384265e-05, "loss": 3.1612, "step": 35300 }, { "epoch": 0.38, "learning_rate": 3.098123912062407e-05, "loss": 3.1751, "step": 35400 }, { "epoch": 0.38, "learning_rate": 3.09275138074055e-05, "loss": 3.1877, "step": 35500 }, { "epoch": 0.38, "learning_rate": 3.087378849418692e-05, "loss": 3.168, "step": 35600 }, { "epoch": 0.38, "learning_rate": 3.0820063180968344e-05, "loss": 3.1767, "step": 35700 }, { "epoch": 0.38, "learning_rate": 3.076633786774977e-05, "loss": 3.1769, "step": 35800 }, { "epoch": 0.39, "learning_rate": 3.0712612554531194e-05, "loss": 3.1486, "step": 35900 }, { "epoch": 0.39, "learning_rate": 3.0658887241312616e-05, "loss": 3.164, "step": 36000 }, { "epoch": 0.39, "learning_rate": 3.0605161928094044e-05, "loss": 3.1753, "step": 36100 }, { "epoch": 0.39, "learning_rate": 3.0551436614875466e-05, "loss": 3.1644, "step": 36200 }, { "epoch": 0.39, "learning_rate": 3.049771130165689e-05, "loss": 3.1607, "step": 36300 }, { "epoch": 0.39, "learning_rate": 3.0443985988438312e-05, "loss": 3.1605, "step": 36400 }, { "epoch": 0.39, "learning_rate": 3.039026067521974e-05, "loss": 3.148, "step": 36500 }, { "epoch": 0.39, "learning_rate": 3.0336535362001162e-05, "loss": 3.1428, "step": 36600 }, { "epoch": 0.39, "learning_rate": 3.0282810048782584e-05, "loss": 3.1677, "step": 36700 }, { "epoch": 0.4, "learning_rate": 3.0229084735564012e-05, "loss": 3.1525, "step": 36800 }, { "epoch": 0.4, "learning_rate": 3.0175359422345434e-05, "loss": 3.1598, "step": 36900 }, { "epoch": 0.4, "learning_rate": 3.0121634109126856e-05, "loss": 3.1578, "step": 37000 }, { "epoch": 0.4, "learning_rate": 3.0067908795908284e-05, "loss": 3.1406, "step": 37100 }, { "epoch": 0.4, "learning_rate": 3.0014183482689706e-05, "loss": 3.1457, "step": 37200 }, { "epoch": 0.4, "learning_rate": 2.9960458169471127e-05, "loss": 3.1567, "step": 37300 }, { "epoch": 0.4, "learning_rate": 2.9906732856252552e-05, "loss": 3.1567, "step": 37400 }, { "epoch": 0.4, "learning_rate": 2.9853007543033977e-05, "loss": 3.1289, "step": 37500 }, { "epoch": 0.4, "learning_rate": 2.9799282229815402e-05, "loss": 3.1511, "step": 37600 }, { "epoch": 0.41, "learning_rate": 2.9745556916596824e-05, "loss": 3.1567, "step": 37700 }, { "epoch": 0.41, "learning_rate": 2.9691831603378252e-05, "loss": 3.1316, "step": 37800 }, { "epoch": 0.41, "learning_rate": 2.9638106290159674e-05, "loss": 3.1228, "step": 37900 }, { "epoch": 0.41, "learning_rate": 2.9584380976941096e-05, "loss": 3.1531, "step": 38000 }, { "epoch": 0.41, "learning_rate": 2.9530655663722524e-05, "loss": 3.1342, "step": 38100 }, { "epoch": 0.41, "learning_rate": 2.9476930350503946e-05, "loss": 3.15, "step": 38200 }, { "epoch": 0.41, "learning_rate": 2.9423205037285367e-05, "loss": 3.1372, "step": 38300 }, { "epoch": 0.41, "learning_rate": 2.9369479724066796e-05, "loss": 3.1432, "step": 38400 }, { "epoch": 0.41, "learning_rate": 2.9315754410848217e-05, "loss": 3.1214, "step": 38500 }, { "epoch": 0.41, "learning_rate": 2.926202909762964e-05, "loss": 3.1377, "step": 38600 }, { "epoch": 0.42, "learning_rate": 2.920830378441106e-05, "loss": 3.1357, "step": 38700 }, { "epoch": 0.42, "learning_rate": 2.915457847119249e-05, "loss": 3.1488, "step": 38800 }, { "epoch": 0.42, "learning_rate": 2.910085315797391e-05, "loss": 3.1583, "step": 38900 }, { "epoch": 0.42, "learning_rate": 2.9047127844755336e-05, "loss": 3.1319, "step": 39000 }, { "epoch": 0.42, "learning_rate": 2.899340253153676e-05, "loss": 3.1085, "step": 39100 }, { "epoch": 0.42, "learning_rate": 2.8939677218318186e-05, "loss": 3.1086, "step": 39200 }, { "epoch": 0.42, "learning_rate": 2.8885951905099607e-05, "loss": 3.1263, "step": 39300 }, { "epoch": 0.42, "learning_rate": 2.8832226591881036e-05, "loss": 3.1347, "step": 39400 }, { "epoch": 0.42, "learning_rate": 2.8778501278662457e-05, "loss": 3.1416, "step": 39500 }, { "epoch": 0.43, "learning_rate": 2.872477596544388e-05, "loss": 3.1167, "step": 39600 }, { "epoch": 0.43, "learning_rate": 2.8671050652225307e-05, "loss": 3.1124, "step": 39700 }, { "epoch": 0.43, "learning_rate": 2.861732533900673e-05, "loss": 3.1183, "step": 39800 }, { "epoch": 0.43, "learning_rate": 2.856360002578815e-05, "loss": 3.1373, "step": 39900 }, { "epoch": 0.43, "learning_rate": 2.8509874712569572e-05, "loss": 3.1123, "step": 40000 }, { "epoch": 0.43, "learning_rate": 2.8456149399351e-05, "loss": 3.1323, "step": 40100 }, { "epoch": 0.43, "learning_rate": 2.8402424086132422e-05, "loss": 3.1216, "step": 40200 }, { "epoch": 0.43, "learning_rate": 2.8348698772913844e-05, "loss": 3.1145, "step": 40300 }, { "epoch": 0.43, "learning_rate": 2.8294973459695272e-05, "loss": 3.1367, "step": 40400 }, { "epoch": 0.44, "learning_rate": 2.8241248146476694e-05, "loss": 3.1081, "step": 40500 }, { "epoch": 0.44, "learning_rate": 2.818752283325812e-05, "loss": 3.1199, "step": 40600 }, { "epoch": 0.44, "learning_rate": 2.8133797520039544e-05, "loss": 3.1251, "step": 40700 }, { "epoch": 0.44, "learning_rate": 2.808007220682097e-05, "loss": 3.1267, "step": 40800 }, { "epoch": 0.44, "learning_rate": 2.802634689360239e-05, "loss": 3.1151, "step": 40900 }, { "epoch": 0.44, "learning_rate": 2.797262158038382e-05, "loss": 3.1247, "step": 41000 }, { "epoch": 0.44, "learning_rate": 2.791889626716524e-05, "loss": 3.1093, "step": 41100 }, { "epoch": 0.44, "learning_rate": 2.7865170953946662e-05, "loss": 3.1166, "step": 41200 }, { "epoch": 0.44, "learning_rate": 2.7811445640728084e-05, "loss": 3.1109, "step": 41300 }, { "epoch": 0.44, "learning_rate": 2.7757720327509512e-05, "loss": 3.1196, "step": 41400 }, { "epoch": 0.45, "learning_rate": 2.7703995014290934e-05, "loss": 3.1193, "step": 41500 }, { "epoch": 0.45, "learning_rate": 2.7650269701072356e-05, "loss": 3.1105, "step": 41600 }, { "epoch": 0.45, "learning_rate": 2.7596544387853784e-05, "loss": 3.136, "step": 41700 }, { "epoch": 0.45, "learning_rate": 2.7542819074635206e-05, "loss": 3.1115, "step": 41800 }, { "epoch": 0.45, "learning_rate": 2.7489093761416627e-05, "loss": 3.0942, "step": 41900 }, { "epoch": 0.45, "learning_rate": 2.7435368448198056e-05, "loss": 3.1059, "step": 42000 }, { "epoch": 0.45, "learning_rate": 2.7381643134979477e-05, "loss": 3.1198, "step": 42100 }, { "epoch": 0.45, "learning_rate": 2.7327917821760902e-05, "loss": 3.0933, "step": 42200 }, { "epoch": 0.45, "learning_rate": 2.7274192508542327e-05, "loss": 3.0963, "step": 42300 }, { "epoch": 0.46, "learning_rate": 2.7220467195323752e-05, "loss": 3.1158, "step": 42400 }, { "epoch": 0.46, "learning_rate": 2.7166741882105174e-05, "loss": 3.098, "step": 42500 }, { "epoch": 0.46, "learning_rate": 2.7113016568886596e-05, "loss": 3.0857, "step": 42600 }, { "epoch": 0.46, "learning_rate": 2.7059291255668024e-05, "loss": 3.0835, "step": 42700 }, { "epoch": 0.46, "learning_rate": 2.7005565942449446e-05, "loss": 3.1004, "step": 42800 }, { "epoch": 0.46, "learning_rate": 2.6951840629230867e-05, "loss": 3.0934, "step": 42900 }, { "epoch": 0.46, "learning_rate": 2.6898115316012296e-05, "loss": 3.1017, "step": 43000 }, { "epoch": 0.46, "learning_rate": 2.6844390002793717e-05, "loss": 3.1098, "step": 43100 }, { "epoch": 0.46, "learning_rate": 2.679066468957514e-05, "loss": 3.1095, "step": 43200 }, { "epoch": 0.47, "learning_rate": 2.6736939376356567e-05, "loss": 3.0855, "step": 43300 }, { "epoch": 0.47, "learning_rate": 2.668321406313799e-05, "loss": 3.0745, "step": 43400 }, { "epoch": 0.47, "learning_rate": 2.6629488749919414e-05, "loss": 3.0847, "step": 43500 }, { "epoch": 0.47, "learning_rate": 2.6575763436700836e-05, "loss": 3.1013, "step": 43600 }, { "epoch": 0.47, "learning_rate": 2.6522038123482264e-05, "loss": 3.0905, "step": 43700 }, { "epoch": 0.47, "learning_rate": 2.6468312810263686e-05, "loss": 3.0946, "step": 43800 }, { "epoch": 0.47, "learning_rate": 2.6414587497045107e-05, "loss": 3.0838, "step": 43900 }, { "epoch": 0.47, "learning_rate": 2.6360862183826536e-05, "loss": 3.0921, "step": 44000 }, { "epoch": 0.47, "learning_rate": 2.6307136870607957e-05, "loss": 3.0972, "step": 44100 }, { "epoch": 0.47, "learning_rate": 2.625341155738938e-05, "loss": 3.0854, "step": 44200 }, { "epoch": 0.48, "learning_rate": 2.6199686244170807e-05, "loss": 3.0998, "step": 44300 }, { "epoch": 0.48, "learning_rate": 2.614596093095223e-05, "loss": 3.0921, "step": 44400 }, { "epoch": 0.48, "learning_rate": 2.609223561773365e-05, "loss": 3.0999, "step": 44500 }, { "epoch": 0.48, "learning_rate": 2.603851030451508e-05, "loss": 3.0988, "step": 44600 }, { "epoch": 0.48, "learning_rate": 2.59847849912965e-05, "loss": 3.0667, "step": 44700 }, { "epoch": 0.48, "learning_rate": 2.5931059678077922e-05, "loss": 3.0695, "step": 44800 }, { "epoch": 0.48, "learning_rate": 2.5877334364859347e-05, "loss": 3.0685, "step": 44900 }, { "epoch": 0.48, "learning_rate": 2.5823609051640772e-05, "loss": 3.0959, "step": 45000 }, { "epoch": 0.48, "learning_rate": 2.5769883738422197e-05, "loss": 3.0912, "step": 45100 }, { "epoch": 0.49, "learning_rate": 2.571615842520362e-05, "loss": 3.0751, "step": 45200 }, { "epoch": 0.49, "learning_rate": 2.5662433111985047e-05, "loss": 3.0864, "step": 45300 }, { "epoch": 0.49, "learning_rate": 2.560870779876647e-05, "loss": 3.0713, "step": 45400 }, { "epoch": 0.49, "learning_rate": 2.555498248554789e-05, "loss": 3.069, "step": 45500 }, { "epoch": 0.49, "learning_rate": 2.550125717232932e-05, "loss": 3.0644, "step": 45600 }, { "epoch": 0.49, "learning_rate": 2.544753185911074e-05, "loss": 3.061, "step": 45700 }, { "epoch": 0.49, "learning_rate": 2.5393806545892162e-05, "loss": 3.0784, "step": 45800 }, { "epoch": 0.49, "learning_rate": 2.534008123267359e-05, "loss": 3.0646, "step": 45900 }, { "epoch": 0.49, "learning_rate": 2.5286355919455012e-05, "loss": 3.0699, "step": 46000 }, { "epoch": 0.5, "learning_rate": 2.5232630606236434e-05, "loss": 3.083, "step": 46100 }, { "epoch": 0.5, "learning_rate": 2.5178905293017856e-05, "loss": 3.0713, "step": 46200 }, { "epoch": 0.5, "learning_rate": 2.5125179979799284e-05, "loss": 3.0824, "step": 46300 }, { "epoch": 0.5, "learning_rate": 2.5071454666580706e-05, "loss": 3.0586, "step": 46400 }, { "epoch": 0.5, "learning_rate": 2.501772935336213e-05, "loss": 3.062, "step": 46500 }, { "epoch": 0.5, "learning_rate": 2.4964004040143556e-05, "loss": 3.0625, "step": 46600 }, { "epoch": 0.5, "learning_rate": 2.491027872692498e-05, "loss": 3.0978, "step": 46700 }, { "epoch": 0.5, "learning_rate": 2.4856553413706406e-05, "loss": 3.0756, "step": 46800 }, { "epoch": 0.5, "learning_rate": 2.4802828100487827e-05, "loss": 3.0696, "step": 46900 }, { "epoch": 0.51, "learning_rate": 2.4749102787269252e-05, "loss": 3.0827, "step": 47000 }, { "epoch": 0.51, "learning_rate": 2.4695377474050674e-05, "loss": 3.0644, "step": 47100 }, { "epoch": 0.51, "learning_rate": 2.46416521608321e-05, "loss": 3.0676, "step": 47200 }, { "epoch": 0.51, "learning_rate": 2.4587926847613524e-05, "loss": 3.0826, "step": 47300 }, { "epoch": 0.51, "learning_rate": 2.4534201534394946e-05, "loss": 3.0575, "step": 47400 }, { "epoch": 0.51, "learning_rate": 2.448047622117637e-05, "loss": 3.0869, "step": 47500 }, { "epoch": 0.51, "learning_rate": 2.4426750907957792e-05, "loss": 3.0646, "step": 47600 }, { "epoch": 0.51, "learning_rate": 2.4373025594739217e-05, "loss": 3.0734, "step": 47700 }, { "epoch": 0.51, "learning_rate": 2.4319300281520642e-05, "loss": 3.0642, "step": 47800 }, { "epoch": 0.51, "learning_rate": 2.4265574968302064e-05, "loss": 3.0662, "step": 47900 }, { "epoch": 0.52, "learning_rate": 2.421184965508349e-05, "loss": 3.054, "step": 48000 }, { "epoch": 0.52, "learning_rate": 2.4158124341864914e-05, "loss": 3.0639, "step": 48100 }, { "epoch": 0.52, "learning_rate": 2.410439902864634e-05, "loss": 3.0488, "step": 48200 }, { "epoch": 0.52, "learning_rate": 2.4050673715427764e-05, "loss": 3.064, "step": 48300 }, { "epoch": 0.52, "learning_rate": 2.3996948402209186e-05, "loss": 3.0576, "step": 48400 }, { "epoch": 0.52, "learning_rate": 2.394322308899061e-05, "loss": 3.0664, "step": 48500 }, { "epoch": 0.52, "learning_rate": 2.3889497775772036e-05, "loss": 3.0598, "step": 48600 }, { "epoch": 0.52, "learning_rate": 2.3835772462553457e-05, "loss": 3.0482, "step": 48700 }, { "epoch": 0.52, "learning_rate": 2.3782047149334882e-05, "loss": 3.0439, "step": 48800 }, { "epoch": 0.53, "learning_rate": 2.3728321836116304e-05, "loss": 3.0662, "step": 48900 }, { "epoch": 0.53, "learning_rate": 2.367459652289773e-05, "loss": 3.0659, "step": 49000 }, { "epoch": 0.53, "learning_rate": 2.3620871209679154e-05, "loss": 3.043, "step": 49100 }, { "epoch": 0.53, "learning_rate": 2.3567145896460576e-05, "loss": 3.0675, "step": 49200 }, { "epoch": 0.53, "learning_rate": 2.3513420583242e-05, "loss": 3.0336, "step": 49300 }, { "epoch": 0.53, "learning_rate": 2.3459695270023426e-05, "loss": 3.0522, "step": 49400 }, { "epoch": 0.53, "learning_rate": 2.340596995680485e-05, "loss": 3.0555, "step": 49500 }, { "epoch": 0.53, "learning_rate": 2.3352244643586276e-05, "loss": 3.0536, "step": 49600 }, { "epoch": 0.53, "learning_rate": 2.3298519330367697e-05, "loss": 3.0615, "step": 49700 }, { "epoch": 0.54, "learning_rate": 2.3244794017149122e-05, "loss": 3.0615, "step": 49800 }, { "epoch": 0.54, "learning_rate": 2.3191068703930547e-05, "loss": 3.0581, "step": 49900 }, { "epoch": 0.54, "learning_rate": 2.313734339071197e-05, "loss": 3.0552, "step": 50000 }, { "epoch": 0.54, "learning_rate": 2.3083618077493394e-05, "loss": 3.0419, "step": 50100 }, { "epoch": 0.54, "learning_rate": 2.3029892764274816e-05, "loss": 3.0583, "step": 50200 }, { "epoch": 0.54, "learning_rate": 2.297616745105624e-05, "loss": 3.0504, "step": 50300 }, { "epoch": 0.54, "learning_rate": 2.2922442137837666e-05, "loss": 3.0505, "step": 50400 }, { "epoch": 0.54, "learning_rate": 2.2868716824619087e-05, "loss": 3.0702, "step": 50500 }, { "epoch": 0.54, "learning_rate": 2.2814991511400512e-05, "loss": 3.0522, "step": 50600 }, { "epoch": 0.54, "learning_rate": 2.2761266198181934e-05, "loss": 3.0587, "step": 50700 }, { "epoch": 0.55, "learning_rate": 2.270754088496336e-05, "loss": 3.0547, "step": 50800 }, { "epoch": 0.55, "learning_rate": 2.2653815571744784e-05, "loss": 3.0581, "step": 50900 }, { "epoch": 0.55, "learning_rate": 2.260009025852621e-05, "loss": 3.0313, "step": 51000 }, { "epoch": 0.55, "learning_rate": 2.2546364945307634e-05, "loss": 3.047, "step": 51100 }, { "epoch": 0.55, "learning_rate": 2.2492639632089056e-05, "loss": 3.0245, "step": 51200 }, { "epoch": 0.55, "learning_rate": 2.243891431887048e-05, "loss": 3.0535, "step": 51300 }, { "epoch": 0.55, "learning_rate": 2.2385189005651906e-05, "loss": 3.049, "step": 51400 }, { "epoch": 0.55, "learning_rate": 2.2331463692433327e-05, "loss": 3.0531, "step": 51500 }, { "epoch": 0.55, "learning_rate": 2.2277738379214752e-05, "loss": 3.0522, "step": 51600 }, { "epoch": 0.56, "learning_rate": 2.2224013065996177e-05, "loss": 3.0573, "step": 51700 }, { "epoch": 0.56, "learning_rate": 2.21702877527776e-05, "loss": 3.0484, "step": 51800 }, { "epoch": 0.56, "learning_rate": 2.2116562439559024e-05, "loss": 3.0458, "step": 51900 }, { "epoch": 0.56, "learning_rate": 2.2062837126340446e-05, "loss": 3.0582, "step": 52000 }, { "epoch": 0.56, "learning_rate": 2.200911181312187e-05, "loss": 3.0332, "step": 52100 }, { "epoch": 0.56, "learning_rate": 2.1955386499903296e-05, "loss": 3.0337, "step": 52200 }, { "epoch": 0.56, "learning_rate": 2.1901661186684717e-05, "loss": 3.0453, "step": 52300 }, { "epoch": 0.56, "learning_rate": 2.1847935873466142e-05, "loss": 3.06, "step": 52400 }, { "epoch": 0.56, "learning_rate": 2.1794210560247567e-05, "loss": 3.0498, "step": 52500 }, { "epoch": 0.57, "learning_rate": 2.1740485247028992e-05, "loss": 3.0439, "step": 52600 }, { "epoch": 0.57, "learning_rate": 2.1686759933810417e-05, "loss": 3.0293, "step": 52700 }, { "epoch": 0.57, "learning_rate": 2.163303462059184e-05, "loss": 3.0305, "step": 52800 }, { "epoch": 0.57, "learning_rate": 2.1579309307373264e-05, "loss": 3.0425, "step": 52900 }, { "epoch": 0.57, "learning_rate": 2.152558399415469e-05, "loss": 3.0513, "step": 53000 }, { "epoch": 0.57, "learning_rate": 2.147185868093611e-05, "loss": 3.029, "step": 53100 }, { "epoch": 0.57, "learning_rate": 2.1418133367717536e-05, "loss": 3.0513, "step": 53200 }, { "epoch": 0.57, "learning_rate": 2.1364408054498957e-05, "loss": 3.0481, "step": 53300 }, { "epoch": 0.57, "learning_rate": 2.1310682741280382e-05, "loss": 3.0453, "step": 53400 }, { "epoch": 0.57, "learning_rate": 2.1256957428061807e-05, "loss": 3.0295, "step": 53500 }, { "epoch": 0.58, "learning_rate": 2.120323211484323e-05, "loss": 3.0375, "step": 53600 }, { "epoch": 0.58, "learning_rate": 2.1149506801624654e-05, "loss": 3.0304, "step": 53700 }, { "epoch": 0.58, "learning_rate": 2.109578148840608e-05, "loss": 3.0349, "step": 53800 }, { "epoch": 0.58, "learning_rate": 2.1042056175187504e-05, "loss": 3.0427, "step": 53900 }, { "epoch": 0.58, "learning_rate": 2.098833086196893e-05, "loss": 3.0211, "step": 54000 }, { "epoch": 0.58, "learning_rate": 2.093460554875035e-05, "loss": 3.0192, "step": 54100 }, { "epoch": 0.58, "learning_rate": 2.0880880235531776e-05, "loss": 3.0284, "step": 54200 }, { "epoch": 0.58, "learning_rate": 2.0827154922313197e-05, "loss": 3.0343, "step": 54300 }, { "epoch": 0.58, "learning_rate": 2.0773429609094622e-05, "loss": 3.0187, "step": 54400 }, { "epoch": 0.59, "learning_rate": 2.0719704295876047e-05, "loss": 3.046, "step": 54500 }, { "epoch": 0.59, "learning_rate": 2.066597898265747e-05, "loss": 3.0448, "step": 54600 }, { "epoch": 0.59, "learning_rate": 2.0612253669438894e-05, "loss": 3.0487, "step": 54700 }, { "epoch": 0.59, "learning_rate": 2.055852835622032e-05, "loss": 3.0403, "step": 54800 }, { "epoch": 0.59, "learning_rate": 2.050480304300174e-05, "loss": 3.0143, "step": 54900 }, { "epoch": 0.59, "learning_rate": 2.0451077729783166e-05, "loss": 3.0194, "step": 55000 }, { "epoch": 0.59, "learning_rate": 2.0397352416564587e-05, "loss": 3.0362, "step": 55100 }, { "epoch": 0.59, "learning_rate": 2.0343627103346012e-05, "loss": 3.0367, "step": 55200 }, { "epoch": 0.59, "learning_rate": 2.0289901790127437e-05, "loss": 3.0162, "step": 55300 }, { "epoch": 0.6, "learning_rate": 2.0236176476908862e-05, "loss": 3.0031, "step": 55400 }, { "epoch": 0.6, "learning_rate": 2.0182451163690287e-05, "loss": 3.0176, "step": 55500 }, { "epoch": 0.6, "learning_rate": 2.012872585047171e-05, "loss": 3.0395, "step": 55600 }, { "epoch": 0.6, "learning_rate": 2.0075000537253134e-05, "loss": 3.0336, "step": 55700 }, { "epoch": 0.6, "learning_rate": 2.002127522403456e-05, "loss": 3.0195, "step": 55800 }, { "epoch": 0.6, "learning_rate": 1.996754991081598e-05, "loss": 3.0234, "step": 55900 }, { "epoch": 0.6, "learning_rate": 1.9913824597597406e-05, "loss": 3.0178, "step": 56000 }, { "epoch": 0.6, "learning_rate": 1.9860099284378827e-05, "loss": 3.0128, "step": 56100 }, { "epoch": 0.6, "learning_rate": 1.9806373971160252e-05, "loss": 3.0201, "step": 56200 }, { "epoch": 0.6, "learning_rate": 1.9752648657941677e-05, "loss": 3.0197, "step": 56300 }, { "epoch": 0.61, "learning_rate": 1.96989233447231e-05, "loss": 3.0305, "step": 56400 }, { "epoch": 0.61, "learning_rate": 1.9645198031504524e-05, "loss": 3.0272, "step": 56500 }, { "epoch": 0.61, "learning_rate": 1.959147271828595e-05, "loss": 3.02, "step": 56600 }, { "epoch": 0.61, "learning_rate": 1.953774740506737e-05, "loss": 3.0387, "step": 56700 }, { "epoch": 0.61, "learning_rate": 1.9484022091848796e-05, "loss": 3.0086, "step": 56800 }, { "epoch": 0.61, "learning_rate": 1.943029677863022e-05, "loss": 3.0139, "step": 56900 }, { "epoch": 0.61, "learning_rate": 1.9376571465411646e-05, "loss": 3.0279, "step": 57000 }, { "epoch": 0.61, "learning_rate": 1.932284615219307e-05, "loss": 3.0129, "step": 57100 }, { "epoch": 0.61, "learning_rate": 1.9269120838974492e-05, "loss": 3.0109, "step": 57200 }, { "epoch": 0.62, "learning_rate": 1.9215395525755917e-05, "loss": 3.0356, "step": 57300 }, { "epoch": 0.62, "learning_rate": 1.916167021253734e-05, "loss": 3.0204, "step": 57400 }, { "epoch": 0.62, "learning_rate": 1.9107944899318764e-05, "loss": 3.0166, "step": 57500 }, { "epoch": 0.62, "learning_rate": 1.905421958610019e-05, "loss": 3.0198, "step": 57600 }, { "epoch": 0.62, "learning_rate": 1.900049427288161e-05, "loss": 3.0122, "step": 57700 }, { "epoch": 0.62, "learning_rate": 1.8946768959663036e-05, "loss": 3.0142, "step": 57800 }, { "epoch": 0.62, "learning_rate": 1.889304364644446e-05, "loss": 3.0273, "step": 57900 }, { "epoch": 0.62, "learning_rate": 1.8839318333225882e-05, "loss": 3.0013, "step": 58000 }, { "epoch": 0.62, "learning_rate": 1.8785593020007307e-05, "loss": 3.0138, "step": 58100 }, { "epoch": 0.63, "learning_rate": 1.873186770678873e-05, "loss": 3.0252, "step": 58200 }, { "epoch": 0.63, "learning_rate": 1.8678142393570154e-05, "loss": 3.0066, "step": 58300 }, { "epoch": 0.63, "learning_rate": 1.862441708035158e-05, "loss": 3.009, "step": 58400 }, { "epoch": 0.63, "learning_rate": 1.8570691767133004e-05, "loss": 3.0229, "step": 58500 }, { "epoch": 0.63, "learning_rate": 1.851696645391443e-05, "loss": 3.0152, "step": 58600 }, { "epoch": 0.63, "learning_rate": 1.846324114069585e-05, "loss": 3.0065, "step": 58700 }, { "epoch": 0.63, "learning_rate": 1.8409515827477276e-05, "loss": 3.0281, "step": 58800 }, { "epoch": 0.63, "learning_rate": 1.83557905142587e-05, "loss": 3.0189, "step": 58900 }, { "epoch": 0.63, "learning_rate": 1.8302065201040122e-05, "loss": 3.0405, "step": 59000 }, { "epoch": 0.64, "learning_rate": 1.8248339887821547e-05, "loss": 2.9986, "step": 59100 }, { "epoch": 0.64, "learning_rate": 1.819461457460297e-05, "loss": 3.0205, "step": 59200 }, { "epoch": 0.64, "learning_rate": 1.8140889261384394e-05, "loss": 3.0166, "step": 59300 }, { "epoch": 0.64, "learning_rate": 1.808716394816582e-05, "loss": 3.0273, "step": 59400 }, { "epoch": 0.64, "learning_rate": 1.803343863494724e-05, "loss": 3.0055, "step": 59500 }, { "epoch": 0.64, "learning_rate": 1.7979713321728666e-05, "loss": 3.0037, "step": 59600 }, { "epoch": 0.64, "learning_rate": 1.792598800851009e-05, "loss": 3.0052, "step": 59700 }, { "epoch": 0.64, "learning_rate": 1.7872262695291516e-05, "loss": 2.9829, "step": 59800 }, { "epoch": 0.64, "learning_rate": 1.781853738207294e-05, "loss": 3.023, "step": 59900 }, { "epoch": 0.64, "learning_rate": 1.7764812068854362e-05, "loss": 3.0099, "step": 60000 }, { "epoch": 0.65, "learning_rate": 1.7711086755635787e-05, "loss": 3.0027, "step": 60100 }, { "epoch": 0.65, "learning_rate": 1.7657361442417212e-05, "loss": 3.0003, "step": 60200 }, { "epoch": 0.65, "learning_rate": 1.7603636129198634e-05, "loss": 2.9984, "step": 60300 }, { "epoch": 0.65, "learning_rate": 1.754991081598006e-05, "loss": 3.0119, "step": 60400 }, { "epoch": 0.65, "learning_rate": 1.749618550276148e-05, "loss": 3.0055, "step": 60500 }, { "epoch": 0.65, "learning_rate": 1.7442460189542906e-05, "loss": 2.9947, "step": 60600 }, { "epoch": 0.65, "learning_rate": 1.738873487632433e-05, "loss": 3.0096, "step": 60700 }, { "epoch": 0.65, "learning_rate": 1.7335009563105752e-05, "loss": 3.0118, "step": 60800 }, { "epoch": 0.65, "learning_rate": 1.7281284249887177e-05, "loss": 3.0163, "step": 60900 }, { "epoch": 0.66, "learning_rate": 1.7227558936668602e-05, "loss": 2.9818, "step": 61000 }, { "epoch": 0.66, "learning_rate": 1.7173833623450024e-05, "loss": 3.0006, "step": 61100 }, { "epoch": 0.66, "learning_rate": 1.712010831023145e-05, "loss": 3.0059, "step": 61200 }, { "epoch": 0.66, "learning_rate": 1.7066382997012874e-05, "loss": 3.0002, "step": 61300 }, { "epoch": 0.66, "learning_rate": 1.70126576837943e-05, "loss": 3.0119, "step": 61400 }, { "epoch": 0.66, "learning_rate": 1.6958932370575724e-05, "loss": 3.0018, "step": 61500 }, { "epoch": 0.66, "learning_rate": 1.6905207057357146e-05, "loss": 2.9874, "step": 61600 }, { "epoch": 0.66, "learning_rate": 1.685148174413857e-05, "loss": 3.0156, "step": 61700 }, { "epoch": 0.66, "learning_rate": 1.6797756430919992e-05, "loss": 2.9997, "step": 61800 }, { "epoch": 0.67, "learning_rate": 1.6744031117701417e-05, "loss": 2.9948, "step": 61900 }, { "epoch": 0.67, "learning_rate": 1.6690305804482842e-05, "loss": 2.9873, "step": 62000 }, { "epoch": 0.67, "learning_rate": 1.6636580491264264e-05, "loss": 2.9876, "step": 62100 }, { "epoch": 0.67, "learning_rate": 1.658285517804569e-05, "loss": 2.9849, "step": 62200 }, { "epoch": 0.67, "learning_rate": 1.652912986482711e-05, "loss": 2.9775, "step": 62300 }, { "epoch": 0.67, "learning_rate": 1.6475404551608536e-05, "loss": 3.0072, "step": 62400 }, { "epoch": 0.67, "learning_rate": 1.642167923838996e-05, "loss": 2.998, "step": 62500 }, { "epoch": 0.67, "learning_rate": 1.6367953925171382e-05, "loss": 3.013, "step": 62600 }, { "epoch": 0.67, "learning_rate": 1.6314228611952807e-05, "loss": 2.9793, "step": 62700 }, { "epoch": 0.67, "learning_rate": 1.6260503298734232e-05, "loss": 3.0064, "step": 62800 }, { "epoch": 0.68, "learning_rate": 1.6206777985515657e-05, "loss": 2.9916, "step": 62900 }, { "epoch": 0.68, "learning_rate": 1.6153052672297082e-05, "loss": 2.9871, "step": 63000 }, { "epoch": 0.68, "learning_rate": 1.6099327359078504e-05, "loss": 3.0033, "step": 63100 }, { "epoch": 0.68, "learning_rate": 1.604560204585993e-05, "loss": 3.006, "step": 63200 }, { "epoch": 0.68, "learning_rate": 1.5991876732641354e-05, "loss": 2.9815, "step": 63300 }, { "epoch": 0.68, "learning_rate": 1.5938151419422776e-05, "loss": 3.0001, "step": 63400 }, { "epoch": 0.68, "learning_rate": 1.58844261062042e-05, "loss": 2.9908, "step": 63500 }, { "epoch": 0.68, "learning_rate": 1.5830700792985622e-05, "loss": 2.9877, "step": 63600 }, { "epoch": 0.68, "learning_rate": 1.5776975479767047e-05, "loss": 2.9779, "step": 63700 }, { "epoch": 0.69, "learning_rate": 1.5723250166548472e-05, "loss": 3.009, "step": 63800 }, { "epoch": 0.69, "learning_rate": 1.5669524853329894e-05, "loss": 3.0003, "step": 63900 }, { "epoch": 0.69, "learning_rate": 1.561579954011132e-05, "loss": 2.9948, "step": 64000 }, { "epoch": 0.69, "learning_rate": 1.5562074226892744e-05, "loss": 2.9978, "step": 64100 }, { "epoch": 0.69, "learning_rate": 1.550834891367417e-05, "loss": 3.0091, "step": 64200 }, { "epoch": 0.69, "learning_rate": 1.5454623600455594e-05, "loss": 2.9987, "step": 64300 }, { "epoch": 0.69, "learning_rate": 1.5400898287237016e-05, "loss": 3.0084, "step": 64400 }, { "epoch": 0.69, "learning_rate": 1.534717297401844e-05, "loss": 2.9817, "step": 64500 }, { "epoch": 0.69, "learning_rate": 1.5293447660799866e-05, "loss": 2.986, "step": 64600 }, { "epoch": 0.7, "learning_rate": 1.5239722347581287e-05, "loss": 2.9616, "step": 64700 }, { "epoch": 0.7, "learning_rate": 1.5185997034362712e-05, "loss": 3.0026, "step": 64800 }, { "epoch": 0.7, "learning_rate": 1.5132271721144134e-05, "loss": 3.0036, "step": 64900 }, { "epoch": 0.7, "learning_rate": 1.5078546407925559e-05, "loss": 2.9779, "step": 65000 }, { "epoch": 0.7, "learning_rate": 1.5024821094706984e-05, "loss": 2.9908, "step": 65100 }, { "epoch": 0.7, "learning_rate": 1.4971095781488406e-05, "loss": 2.9599, "step": 65200 }, { "epoch": 0.7, "learning_rate": 1.491737046826983e-05, "loss": 2.983, "step": 65300 }, { "epoch": 0.7, "learning_rate": 1.4863645155051254e-05, "loss": 2.9879, "step": 65400 }, { "epoch": 0.7, "learning_rate": 1.4809919841832679e-05, "loss": 2.9967, "step": 65500 }, { "epoch": 0.7, "learning_rate": 1.4756194528614104e-05, "loss": 2.9856, "step": 65600 }, { "epoch": 0.71, "learning_rate": 1.4702469215395526e-05, "loss": 2.9935, "step": 65700 }, { "epoch": 0.71, "learning_rate": 1.464874390217695e-05, "loss": 3.0014, "step": 65800 }, { "epoch": 0.71, "learning_rate": 1.4595018588958376e-05, "loss": 2.9788, "step": 65900 }, { "epoch": 0.71, "learning_rate": 1.4541293275739797e-05, "loss": 2.9903, "step": 66000 }, { "epoch": 0.71, "learning_rate": 1.4487567962521222e-05, "loss": 2.9852, "step": 66100 }, { "epoch": 0.71, "learning_rate": 1.4433842649302646e-05, "loss": 2.9745, "step": 66200 }, { "epoch": 0.71, "learning_rate": 1.438011733608407e-05, "loss": 2.9798, "step": 66300 }, { "epoch": 0.71, "learning_rate": 1.4326392022865496e-05, "loss": 2.9742, "step": 66400 }, { "epoch": 0.71, "learning_rate": 1.4272666709646917e-05, "loss": 2.9823, "step": 66500 }, { "epoch": 0.72, "learning_rate": 1.4218941396428342e-05, "loss": 3.0184, "step": 66600 }, { "epoch": 0.72, "learning_rate": 1.4165216083209766e-05, "loss": 2.9815, "step": 66700 }, { "epoch": 0.72, "learning_rate": 1.411149076999119e-05, "loss": 2.9928, "step": 66800 }, { "epoch": 0.72, "learning_rate": 1.4057765456772614e-05, "loss": 2.9789, "step": 66900 }, { "epoch": 0.72, "learning_rate": 1.4004040143554037e-05, "loss": 2.9855, "step": 67000 }, { "epoch": 0.72, "learning_rate": 1.3950314830335462e-05, "loss": 2.9864, "step": 67100 }, { "epoch": 0.72, "learning_rate": 1.3896589517116884e-05, "loss": 2.9785, "step": 67200 }, { "epoch": 0.72, "learning_rate": 1.3842864203898309e-05, "loss": 2.9839, "step": 67300 }, { "epoch": 0.72, "learning_rate": 1.3789138890679734e-05, "loss": 2.9921, "step": 67400 }, { "epoch": 0.73, "learning_rate": 1.3735413577461157e-05, "loss": 2.9869, "step": 67500 }, { "epoch": 0.73, "learning_rate": 1.3681688264242582e-05, "loss": 2.9844, "step": 67600 }, { "epoch": 0.73, "learning_rate": 1.3627962951024007e-05, "loss": 2.9994, "step": 67700 }, { "epoch": 0.73, "learning_rate": 1.3574237637805429e-05, "loss": 3.0018, "step": 67800 }, { "epoch": 0.73, "learning_rate": 1.3520512324586854e-05, "loss": 2.9944, "step": 67900 }, { "epoch": 0.73, "learning_rate": 1.3466787011368275e-05, "loss": 2.9901, "step": 68000 }, { "epoch": 0.73, "learning_rate": 1.34130616981497e-05, "loss": 2.9732, "step": 68100 }, { "epoch": 0.73, "learning_rate": 1.3359336384931126e-05, "loss": 2.9796, "step": 68200 }, { "epoch": 0.73, "learning_rate": 1.3305611071712549e-05, "loss": 2.9846, "step": 68300 }, { "epoch": 0.73, "learning_rate": 1.3251885758493974e-05, "loss": 2.9687, "step": 68400 }, { "epoch": 0.74, "learning_rate": 1.3198160445275395e-05, "loss": 2.9571, "step": 68500 }, { "epoch": 0.74, "learning_rate": 1.314443513205682e-05, "loss": 2.9861, "step": 68600 }, { "epoch": 0.74, "learning_rate": 1.3090709818838246e-05, "loss": 2.9738, "step": 68700 }, { "epoch": 0.74, "learning_rate": 1.3036984505619667e-05, "loss": 2.9821, "step": 68800 }, { "epoch": 0.74, "learning_rate": 1.2983259192401092e-05, "loss": 2.9789, "step": 68900 }, { "epoch": 0.74, "learning_rate": 1.2929533879182515e-05, "loss": 2.974, "step": 69000 }, { "epoch": 0.74, "learning_rate": 1.287580856596394e-05, "loss": 2.9888, "step": 69100 }, { "epoch": 0.74, "learning_rate": 1.2822083252745366e-05, "loss": 2.9694, "step": 69200 }, { "epoch": 0.74, "learning_rate": 1.2768357939526787e-05, "loss": 2.9983, "step": 69300 }, { "epoch": 0.75, "learning_rate": 1.2714632626308212e-05, "loss": 2.9751, "step": 69400 }, { "epoch": 0.75, "learning_rate": 1.2660907313089637e-05, "loss": 2.9692, "step": 69500 }, { "epoch": 0.75, "learning_rate": 1.2607181999871059e-05, "loss": 2.984, "step": 69600 }, { "epoch": 0.75, "learning_rate": 1.2553456686652484e-05, "loss": 2.9872, "step": 69700 }, { "epoch": 0.75, "learning_rate": 1.2499731373433909e-05, "loss": 2.9707, "step": 69800 }, { "epoch": 0.75, "learning_rate": 1.2446006060215332e-05, "loss": 2.9773, "step": 69900 }, { "epoch": 0.75, "learning_rate": 1.2392280746996756e-05, "loss": 2.9541, "step": 70000 }, { "epoch": 0.75, "learning_rate": 1.2338555433778179e-05, "loss": 2.9744, "step": 70100 }, { "epoch": 0.75, "learning_rate": 1.2284830120559604e-05, "loss": 2.9663, "step": 70200 }, { "epoch": 0.76, "learning_rate": 1.2231104807341027e-05, "loss": 2.9829, "step": 70300 }, { "epoch": 0.76, "learning_rate": 1.217737949412245e-05, "loss": 2.9608, "step": 70400 }, { "epoch": 0.76, "learning_rate": 1.2123654180903876e-05, "loss": 2.9643, "step": 70500 }, { "epoch": 0.76, "learning_rate": 1.20699288676853e-05, "loss": 2.98, "step": 70600 }, { "epoch": 0.76, "learning_rate": 1.2016203554466724e-05, "loss": 2.9789, "step": 70700 }, { "epoch": 0.76, "learning_rate": 1.1962478241248147e-05, "loss": 2.9822, "step": 70800 }, { "epoch": 0.76, "learning_rate": 1.190875292802957e-05, "loss": 2.9851, "step": 70900 }, { "epoch": 0.76, "learning_rate": 1.1855027614810994e-05, "loss": 2.984, "step": 71000 }, { "epoch": 0.76, "learning_rate": 1.1801302301592419e-05, "loss": 2.965, "step": 71100 }, { "epoch": 0.77, "learning_rate": 1.1747576988373842e-05, "loss": 2.9655, "step": 71200 }, { "epoch": 0.77, "learning_rate": 1.1693851675155267e-05, "loss": 2.9702, "step": 71300 }, { "epoch": 0.77, "learning_rate": 1.164012636193669e-05, "loss": 2.955, "step": 71400 }, { "epoch": 0.77, "learning_rate": 1.1586401048718116e-05, "loss": 2.9786, "step": 71500 }, { "epoch": 0.77, "learning_rate": 1.1532675735499539e-05, "loss": 2.9561, "step": 71600 }, { "epoch": 0.77, "learning_rate": 1.1478950422280962e-05, "loss": 2.9682, "step": 71700 }, { "epoch": 0.77, "learning_rate": 1.1425225109062385e-05, "loss": 2.9724, "step": 71800 }, { "epoch": 0.77, "learning_rate": 1.1371499795843809e-05, "loss": 2.9945, "step": 71900 }, { "epoch": 0.77, "learning_rate": 1.1317774482625234e-05, "loss": 2.9724, "step": 72000 }, { "epoch": 0.77, "learning_rate": 1.1264049169406659e-05, "loss": 2.9741, "step": 72100 }, { "epoch": 0.78, "learning_rate": 1.1210323856188082e-05, "loss": 2.9821, "step": 72200 }, { "epoch": 0.78, "learning_rate": 1.1156598542969505e-05, "loss": 2.9709, "step": 72300 }, { "epoch": 0.78, "learning_rate": 1.110287322975093e-05, "loss": 2.9856, "step": 72400 }, { "epoch": 0.78, "learning_rate": 1.1049147916532354e-05, "loss": 2.9851, "step": 72500 }, { "epoch": 0.78, "learning_rate": 1.0995422603313777e-05, "loss": 2.9593, "step": 72600 }, { "epoch": 0.78, "learning_rate": 1.0941697290095202e-05, "loss": 2.9824, "step": 72700 }, { "epoch": 0.78, "learning_rate": 1.0887971976876627e-05, "loss": 2.9747, "step": 72800 }, { "epoch": 0.78, "learning_rate": 1.083424666365805e-05, "loss": 2.9932, "step": 72900 }, { "epoch": 0.78, "learning_rate": 1.0780521350439474e-05, "loss": 2.9805, "step": 73000 }, { "epoch": 0.79, "learning_rate": 1.0726796037220897e-05, "loss": 2.9625, "step": 73100 }, { "epoch": 0.79, "learning_rate": 1.067307072400232e-05, "loss": 2.9838, "step": 73200 }, { "epoch": 0.79, "learning_rate": 1.0619345410783745e-05, "loss": 2.9478, "step": 73300 }, { "epoch": 0.79, "learning_rate": 1.0565620097565169e-05, "loss": 2.9683, "step": 73400 }, { "epoch": 0.79, "learning_rate": 1.0511894784346594e-05, "loss": 2.9608, "step": 73500 }, { "epoch": 0.79, "learning_rate": 1.0458169471128017e-05, "loss": 2.978, "step": 73600 }, { "epoch": 0.79, "learning_rate": 1.0404444157909442e-05, "loss": 2.968, "step": 73700 }, { "epoch": 0.79, "learning_rate": 1.0350718844690866e-05, "loss": 2.9686, "step": 73800 }, { "epoch": 0.79, "learning_rate": 1.0296993531472289e-05, "loss": 2.9813, "step": 73900 }, { "epoch": 0.8, "learning_rate": 1.0243268218253712e-05, "loss": 2.9761, "step": 74000 }, { "epoch": 0.8, "learning_rate": 1.0189542905035135e-05, "loss": 2.9858, "step": 74100 }, { "epoch": 0.8, "learning_rate": 1.013581759181656e-05, "loss": 2.9616, "step": 74200 }, { "epoch": 0.8, "learning_rate": 1.0082092278597986e-05, "loss": 2.9711, "step": 74300 }, { "epoch": 0.8, "learning_rate": 1.0028366965379409e-05, "loss": 2.9809, "step": 74400 }, { "epoch": 0.8, "learning_rate": 9.974641652160832e-06, "loss": 2.9619, "step": 74500 }, { "epoch": 0.8, "learning_rate": 9.920916338942257e-06, "loss": 2.9628, "step": 74600 }, { "epoch": 0.8, "learning_rate": 9.86719102572368e-06, "loss": 2.9793, "step": 74700 }, { "epoch": 0.8, "learning_rate": 9.813465712505104e-06, "loss": 2.9596, "step": 74800 }, { "epoch": 0.8, "learning_rate": 9.759740399286529e-06, "loss": 2.9769, "step": 74900 }, { "epoch": 0.81, "learning_rate": 9.706015086067952e-06, "loss": 2.9778, "step": 75000 }, { "epoch": 0.81, "learning_rate": 9.652289772849377e-06, "loss": 2.9649, "step": 75100 }, { "epoch": 0.81, "learning_rate": 9.5985644596308e-06, "loss": 2.9731, "step": 75200 }, { "epoch": 0.81, "learning_rate": 9.544839146412224e-06, "loss": 2.9628, "step": 75300 }, { "epoch": 0.81, "learning_rate": 9.491113833193647e-06, "loss": 2.9702, "step": 75400 }, { "epoch": 0.81, "learning_rate": 9.437388519975072e-06, "loss": 2.9645, "step": 75500 }, { "epoch": 0.81, "learning_rate": 9.383663206756495e-06, "loss": 2.9748, "step": 75600 }, { "epoch": 0.81, "learning_rate": 9.32993789353792e-06, "loss": 2.9696, "step": 75700 }, { "epoch": 0.81, "learning_rate": 9.276212580319344e-06, "loss": 2.9631, "step": 75800 }, { "epoch": 0.82, "learning_rate": 9.222487267100767e-06, "loss": 2.959, "step": 75900 }, { "epoch": 0.82, "learning_rate": 9.168761953882192e-06, "loss": 2.9405, "step": 76000 }, { "epoch": 0.82, "learning_rate": 9.115036640663615e-06, "loss": 2.9729, "step": 76100 }, { "epoch": 0.82, "learning_rate": 9.061311327445039e-06, "loss": 2.9618, "step": 76200 }, { "epoch": 0.82, "learning_rate": 9.007586014226462e-06, "loss": 2.9566, "step": 76300 }, { "epoch": 0.82, "learning_rate": 8.953860701007887e-06, "loss": 2.9647, "step": 76400 }, { "epoch": 0.82, "learning_rate": 8.900135387789312e-06, "loss": 2.9536, "step": 76500 }, { "epoch": 0.82, "learning_rate": 8.846410074570735e-06, "loss": 2.9571, "step": 76600 }, { "epoch": 0.82, "learning_rate": 8.792684761352159e-06, "loss": 2.9656, "step": 76700 }, { "epoch": 0.83, "learning_rate": 8.738959448133582e-06, "loss": 2.9677, "step": 76800 }, { "epoch": 0.83, "learning_rate": 8.685234134915007e-06, "loss": 2.9682, "step": 76900 }, { "epoch": 0.83, "learning_rate": 8.63150882169643e-06, "loss": 2.9556, "step": 77000 }, { "epoch": 0.83, "learning_rate": 8.577783508477854e-06, "loss": 2.9492, "step": 77100 }, { "epoch": 0.83, "learning_rate": 8.524058195259279e-06, "loss": 2.9708, "step": 77200 }, { "epoch": 0.83, "learning_rate": 8.470332882040704e-06, "loss": 2.9656, "step": 77300 }, { "epoch": 0.83, "learning_rate": 8.416607568822127e-06, "loss": 2.9421, "step": 77400 }, { "epoch": 0.83, "learning_rate": 8.36288225560355e-06, "loss": 2.9586, "step": 77500 }, { "epoch": 0.83, "learning_rate": 8.309156942384974e-06, "loss": 2.9567, "step": 77600 }, { "epoch": 0.83, "learning_rate": 8.255431629166399e-06, "loss": 2.9479, "step": 77700 }, { "epoch": 0.84, "learning_rate": 8.201706315947822e-06, "loss": 2.9617, "step": 77800 }, { "epoch": 0.84, "learning_rate": 8.147981002729247e-06, "loss": 2.9502, "step": 77900 }, { "epoch": 0.84, "learning_rate": 8.09425568951067e-06, "loss": 2.9544, "step": 78000 }, { "epoch": 0.84, "learning_rate": 8.040530376292094e-06, "loss": 2.9817, "step": 78100 }, { "epoch": 0.84, "learning_rate": 7.986805063073519e-06, "loss": 2.9541, "step": 78200 }, { "epoch": 0.84, "learning_rate": 7.933079749854942e-06, "loss": 2.9334, "step": 78300 }, { "epoch": 0.84, "learning_rate": 7.879354436636365e-06, "loss": 2.961, "step": 78400 }, { "epoch": 0.84, "learning_rate": 7.825629123417789e-06, "loss": 2.9474, "step": 78500 }, { "epoch": 0.84, "learning_rate": 7.771903810199214e-06, "loss": 2.9542, "step": 78600 }, { "epoch": 0.85, "learning_rate": 7.718178496980639e-06, "loss": 2.9471, "step": 78700 }, { "epoch": 0.85, "learning_rate": 7.664453183762062e-06, "loss": 2.9687, "step": 78800 }, { "epoch": 0.85, "learning_rate": 7.6107278705434855e-06, "loss": 2.9729, "step": 78900 }, { "epoch": 0.85, "learning_rate": 7.557002557324909e-06, "loss": 2.9371, "step": 79000 }, { "epoch": 0.85, "learning_rate": 7.503277244106334e-06, "loss": 2.9538, "step": 79100 }, { "epoch": 0.85, "learning_rate": 7.449551930887758e-06, "loss": 2.9722, "step": 79200 }, { "epoch": 0.85, "learning_rate": 7.395826617669181e-06, "loss": 2.9506, "step": 79300 }, { "epoch": 0.85, "learning_rate": 7.342101304450605e-06, "loss": 2.9599, "step": 79400 }, { "epoch": 0.85, "learning_rate": 7.28837599123203e-06, "loss": 2.9455, "step": 79500 }, { "epoch": 0.86, "learning_rate": 7.234650678013454e-06, "loss": 2.9531, "step": 79600 }, { "epoch": 0.86, "learning_rate": 7.180925364794877e-06, "loss": 2.9683, "step": 79700 }, { "epoch": 0.86, "learning_rate": 7.1272000515763005e-06, "loss": 2.9588, "step": 79800 }, { "epoch": 0.86, "learning_rate": 7.073474738357725e-06, "loss": 2.9451, "step": 79900 }, { "epoch": 0.86, "learning_rate": 7.01974942513915e-06, "loss": 2.9675, "step": 80000 }, { "epoch": 0.86, "learning_rate": 6.966024111920573e-06, "loss": 2.977, "step": 80100 }, { "epoch": 0.86, "learning_rate": 6.912298798701996e-06, "loss": 2.9605, "step": 80200 }, { "epoch": 0.86, "learning_rate": 6.8585734854834205e-06, "loss": 2.9543, "step": 80300 }, { "epoch": 0.86, "learning_rate": 6.8048481722648455e-06, "loss": 2.9674, "step": 80400 }, { "epoch": 0.86, "learning_rate": 6.751122859046269e-06, "loss": 2.9502, "step": 80500 }, { "epoch": 0.87, "learning_rate": 6.697397545827692e-06, "loss": 2.9688, "step": 80600 }, { "epoch": 0.87, "learning_rate": 6.643672232609116e-06, "loss": 2.953, "step": 80700 }, { "epoch": 0.87, "learning_rate": 6.58994691939054e-06, "loss": 2.9611, "step": 80800 }, { "epoch": 0.87, "learning_rate": 6.536221606171965e-06, "loss": 2.9709, "step": 80900 }, { "epoch": 0.87, "learning_rate": 6.482496292953388e-06, "loss": 2.9602, "step": 81000 }, { "epoch": 0.87, "learning_rate": 6.428770979734812e-06, "loss": 2.9573, "step": 81100 }, { "epoch": 0.87, "learning_rate": 6.3750456665162355e-06, "loss": 2.9386, "step": 81200 }, { "epoch": 0.87, "learning_rate": 6.3213203532976605e-06, "loss": 2.9374, "step": 81300 }, { "epoch": 0.87, "learning_rate": 6.267595040079084e-06, "loss": 2.9537, "step": 81400 }, { "epoch": 0.88, "learning_rate": 6.213869726860508e-06, "loss": 2.9391, "step": 81500 }, { "epoch": 0.88, "learning_rate": 6.160144413641932e-06, "loss": 2.9754, "step": 81600 }, { "epoch": 0.88, "learning_rate": 6.1064191004233555e-06, "loss": 2.9434, "step": 81700 }, { "epoch": 0.88, "learning_rate": 6.05269378720478e-06, "loss": 2.9432, "step": 81800 }, { "epoch": 0.88, "learning_rate": 5.998968473986204e-06, "loss": 2.9243, "step": 81900 }, { "epoch": 0.88, "learning_rate": 5.945243160767627e-06, "loss": 2.9679, "step": 82000 }, { "epoch": 0.88, "learning_rate": 5.891517847549051e-06, "loss": 2.9509, "step": 82100 }, { "epoch": 0.88, "learning_rate": 5.8377925343304755e-06, "loss": 2.9465, "step": 82200 }, { "epoch": 0.88, "learning_rate": 5.7840672211119e-06, "loss": 2.962, "step": 82300 }, { "epoch": 0.89, "learning_rate": 5.730341907893323e-06, "loss": 2.9654, "step": 82400 }, { "epoch": 0.89, "learning_rate": 5.676616594674747e-06, "loss": 2.9357, "step": 82500 }, { "epoch": 0.89, "learning_rate": 5.622891281456171e-06, "loss": 2.9672, "step": 82600 }, { "epoch": 0.89, "learning_rate": 5.569165968237595e-06, "loss": 2.9374, "step": 82700 }, { "epoch": 0.89, "learning_rate": 5.515440655019019e-06, "loss": 2.954, "step": 82800 }, { "epoch": 0.89, "learning_rate": 5.461715341800443e-06, "loss": 2.9793, "step": 82900 }, { "epoch": 0.89, "learning_rate": 5.407990028581867e-06, "loss": 2.9376, "step": 83000 }, { "epoch": 0.89, "learning_rate": 5.3542647153632905e-06, "loss": 2.9496, "step": 83100 }, { "epoch": 0.89, "learning_rate": 5.300539402144715e-06, "loss": 2.958, "step": 83200 }, { "epoch": 0.9, "learning_rate": 5.246814088926139e-06, "loss": 2.9588, "step": 83300 }, { "epoch": 0.9, "learning_rate": 5.193088775707563e-06, "loss": 2.9388, "step": 83400 }, { "epoch": 0.9, "learning_rate": 5.139363462488986e-06, "loss": 2.9555, "step": 83500 }, { "epoch": 0.9, "learning_rate": 5.0856381492704105e-06, "loss": 2.9699, "step": 83600 }, { "epoch": 0.9, "learning_rate": 5.031912836051835e-06, "loss": 2.9568, "step": 83700 }, { "epoch": 0.9, "learning_rate": 4.978187522833258e-06, "loss": 2.9578, "step": 83800 }, { "epoch": 0.9, "learning_rate": 4.924462209614682e-06, "loss": 2.955, "step": 83900 }, { "epoch": 0.9, "learning_rate": 4.870736896396106e-06, "loss": 2.9452, "step": 84000 }, { "epoch": 0.9, "learning_rate": 4.8170115831775305e-06, "loss": 2.9506, "step": 84100 }, { "epoch": 0.9, "learning_rate": 4.763286269958954e-06, "loss": 2.954, "step": 84200 }, { "epoch": 0.91, "learning_rate": 4.709560956740378e-06, "loss": 2.9648, "step": 84300 }, { "epoch": 0.91, "learning_rate": 4.655835643521802e-06, "loss": 2.9492, "step": 84400 }, { "epoch": 0.91, "learning_rate": 4.602110330303226e-06, "loss": 2.9439, "step": 84500 }, { "epoch": 0.91, "learning_rate": 4.54838501708465e-06, "loss": 2.9686, "step": 84600 }, { "epoch": 0.91, "learning_rate": 4.494659703866074e-06, "loss": 2.9298, "step": 84700 }, { "epoch": 0.91, "learning_rate": 4.440934390647498e-06, "loss": 2.9509, "step": 84800 }, { "epoch": 0.91, "learning_rate": 4.387209077428921e-06, "loss": 2.9489, "step": 84900 }, { "epoch": 0.91, "learning_rate": 4.3334837642103455e-06, "loss": 2.9448, "step": 85000 }, { "epoch": 0.91, "learning_rate": 4.27975845099177e-06, "loss": 2.9309, "step": 85100 }, { "epoch": 0.92, "learning_rate": 4.226033137773194e-06, "loss": 2.9514, "step": 85200 }, { "epoch": 0.92, "learning_rate": 4.172307824554617e-06, "loss": 2.9408, "step": 85300 }, { "epoch": 0.92, "learning_rate": 4.118582511336041e-06, "loss": 2.9532, "step": 85400 }, { "epoch": 0.92, "learning_rate": 4.0648571981174655e-06, "loss": 2.9538, "step": 85500 }, { "epoch": 0.92, "learning_rate": 4.01113188489889e-06, "loss": 2.9652, "step": 85600 }, { "epoch": 0.92, "learning_rate": 3.957406571680313e-06, "loss": 2.96, "step": 85700 }, { "epoch": 0.92, "learning_rate": 3.903681258461737e-06, "loss": 2.9516, "step": 85800 }, { "epoch": 0.92, "learning_rate": 3.849955945243161e-06, "loss": 2.9592, "step": 85900 }, { "epoch": 0.92, "learning_rate": 3.7962306320245846e-06, "loss": 2.9412, "step": 86000 }, { "epoch": 0.93, "learning_rate": 3.742505318806009e-06, "loss": 2.9633, "step": 86100 }, { "epoch": 0.93, "learning_rate": 3.6887800055874325e-06, "loss": 2.9539, "step": 86200 }, { "epoch": 0.93, "learning_rate": 3.635054692368857e-06, "loss": 2.9439, "step": 86300 }, { "epoch": 0.93, "learning_rate": 3.5813293791502805e-06, "loss": 2.9333, "step": 86400 }, { "epoch": 0.93, "learning_rate": 3.527604065931705e-06, "loss": 2.9486, "step": 86500 }, { "epoch": 0.93, "learning_rate": 3.4738787527131284e-06, "loss": 2.9526, "step": 86600 }, { "epoch": 0.93, "learning_rate": 3.420153439494552e-06, "loss": 2.9414, "step": 86700 }, { "epoch": 0.93, "learning_rate": 3.3664281262759763e-06, "loss": 2.9411, "step": 86800 }, { "epoch": 0.93, "learning_rate": 3.3127028130574e-06, "loss": 2.9524, "step": 86900 }, { "epoch": 0.93, "learning_rate": 3.258977499838824e-06, "loss": 2.935, "step": 87000 }, { "epoch": 0.94, "learning_rate": 3.205252186620248e-06, "loss": 2.9573, "step": 87100 }, { "epoch": 0.94, "learning_rate": 3.151526873401672e-06, "loss": 2.9461, "step": 87200 }, { "epoch": 0.94, "learning_rate": 3.097801560183096e-06, "loss": 2.9542, "step": 87300 }, { "epoch": 0.94, "learning_rate": 3.04407624696452e-06, "loss": 2.9429, "step": 87400 }, { "epoch": 0.94, "learning_rate": 2.9903509337459438e-06, "loss": 2.9306, "step": 87500 }, { "epoch": 0.94, "learning_rate": 2.936625620527368e-06, "loss": 2.9299, "step": 87600 }, { "epoch": 0.94, "learning_rate": 2.8829003073087917e-06, "loss": 2.9419, "step": 87700 }, { "epoch": 0.94, "learning_rate": 2.829174994090216e-06, "loss": 2.9431, "step": 87800 }, { "epoch": 0.94, "learning_rate": 2.7754496808716396e-06, "loss": 2.9366, "step": 87900 }, { "epoch": 0.95, "learning_rate": 2.7217243676530638e-06, "loss": 2.9337, "step": 88000 }, { "epoch": 0.95, "learning_rate": 2.667999054434487e-06, "loss": 2.9475, "step": 88100 }, { "epoch": 0.95, "learning_rate": 2.6142737412159113e-06, "loss": 2.9555, "step": 88200 }, { "epoch": 0.95, "learning_rate": 2.5605484279973355e-06, "loss": 2.9391, "step": 88300 }, { "epoch": 0.95, "learning_rate": 2.506823114778759e-06, "loss": 2.9554, "step": 88400 }, { "epoch": 0.95, "learning_rate": 2.4530978015601834e-06, "loss": 2.9429, "step": 88500 }, { "epoch": 0.95, "learning_rate": 2.399372488341607e-06, "loss": 2.9365, "step": 88600 }, { "epoch": 0.95, "learning_rate": 2.3456471751230313e-06, "loss": 2.9501, "step": 88700 }, { "epoch": 0.95, "learning_rate": 2.291921861904455e-06, "loss": 2.9633, "step": 88800 }, { "epoch": 0.96, "learning_rate": 2.238196548685879e-06, "loss": 2.9606, "step": 88900 }, { "epoch": 0.96, "learning_rate": 2.184471235467303e-06, "loss": 2.9359, "step": 89000 }, { "epoch": 0.96, "learning_rate": 2.1307459222487267e-06, "loss": 2.9599, "step": 89100 }, { "epoch": 0.96, "learning_rate": 2.0770206090301504e-06, "loss": 2.9549, "step": 89200 }, { "epoch": 0.96, "learning_rate": 2.0232952958115746e-06, "loss": 2.9354, "step": 89300 }, { "epoch": 0.96, "learning_rate": 1.9695699825929984e-06, "loss": 2.9683, "step": 89400 }, { "epoch": 0.96, "learning_rate": 1.9158446693744225e-06, "loss": 2.9424, "step": 89500 }, { "epoch": 0.96, "learning_rate": 1.8621193561558465e-06, "loss": 2.9396, "step": 89600 }, { "epoch": 0.96, "learning_rate": 1.8083940429372704e-06, "loss": 2.9411, "step": 89700 }, { "epoch": 0.96, "learning_rate": 1.7546687297186944e-06, "loss": 2.9527, "step": 89800 }, { "epoch": 0.97, "learning_rate": 1.7009434165001184e-06, "loss": 2.9359, "step": 89900 }, { "epoch": 0.97, "learning_rate": 1.6472181032815423e-06, "loss": 2.9402, "step": 90000 }, { "epoch": 0.97, "learning_rate": 1.593492790062966e-06, "loss": 2.9416, "step": 90100 }, { "epoch": 0.97, "learning_rate": 1.5397674768443902e-06, "loss": 2.9483, "step": 90200 }, { "epoch": 0.97, "learning_rate": 1.486042163625814e-06, "loss": 2.948, "step": 90300 }, { "epoch": 0.97, "learning_rate": 1.432316850407238e-06, "loss": 2.9253, "step": 90400 }, { "epoch": 0.97, "learning_rate": 1.378591537188662e-06, "loss": 2.9431, "step": 90500 }, { "epoch": 0.97, "learning_rate": 1.3248662239700859e-06, "loss": 2.9398, "step": 90600 }, { "epoch": 0.97, "learning_rate": 1.2711409107515098e-06, "loss": 2.9433, "step": 90700 }, { "epoch": 0.98, "learning_rate": 1.2174155975329336e-06, "loss": 2.9571, "step": 90800 }, { "epoch": 0.98, "learning_rate": 1.1636902843143575e-06, "loss": 2.9343, "step": 90900 }, { "epoch": 0.98, "learning_rate": 1.1099649710957815e-06, "loss": 2.9358, "step": 91000 }, { "epoch": 0.98, "learning_rate": 1.0562396578772054e-06, "loss": 2.9459, "step": 91100 }, { "epoch": 0.98, "learning_rate": 1.0025143446586294e-06, "loss": 2.9464, "step": 91200 }, { "epoch": 0.98, "learning_rate": 9.487890314400532e-07, "loss": 2.9523, "step": 91300 }, { "epoch": 0.98, "learning_rate": 8.950637182214772e-07, "loss": 2.9304, "step": 91400 }, { "epoch": 0.98, "learning_rate": 8.413384050029012e-07, "loss": 2.9382, "step": 91500 }, { "epoch": 0.98, "learning_rate": 7.876130917843251e-07, "loss": 2.9499, "step": 91600 }, { "epoch": 0.99, "learning_rate": 7.338877785657491e-07, "loss": 2.9389, "step": 91700 }, { "epoch": 0.99, "learning_rate": 6.80162465347173e-07, "loss": 2.9355, "step": 91800 }, { "epoch": 0.99, "learning_rate": 6.264371521285969e-07, "loss": 2.9386, "step": 91900 }, { "epoch": 0.99, "learning_rate": 5.727118389100209e-07, "loss": 2.9285, "step": 92000 }, { "epoch": 0.99, "learning_rate": 5.189865256914448e-07, "loss": 2.9309, "step": 92100 }, { "epoch": 0.99, "learning_rate": 4.652612124728687e-07, "loss": 2.9544, "step": 92200 }, { "epoch": 0.99, "learning_rate": 4.115358992542927e-07, "loss": 2.9408, "step": 92300 }, { "epoch": 0.99, "learning_rate": 3.5781058603571663e-07, "loss": 2.9369, "step": 92400 }, { "epoch": 0.99, "learning_rate": 3.0408527281714054e-07, "loss": 2.9509, "step": 92500 }, { "epoch": 0.99, "learning_rate": 2.5035995959856444e-07, "loss": 2.9322, "step": 92600 }, { "epoch": 1.0, "learning_rate": 1.966346463799884e-07, "loss": 2.9314, "step": 92700 }, { "epoch": 1.0, "learning_rate": 1.4290933316141234e-07, "loss": 2.9489, "step": 92800 }, { "epoch": 1.0, "learning_rate": 8.918401994283627e-08, "loss": 2.948, "step": 92900 }, { "epoch": 1.0, "learning_rate": 3.5458706724260206e-08, "loss": 2.9466, "step": 93000 } ], "max_steps": 93066, "num_train_epochs": 1, "total_flos": 7.94540378161152e+17, "trial_name": null, "trial_params": null }