{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.796324097211897, "global_step": 8000000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.7664e-06, "loss": 7.1306, "step": 500 }, { "epoch": 0.0, "learning_rate": 5.5664e-06, "loss": 4.7491, "step": 1000 }, { "epoch": 0.0, "learning_rate": 8.3664e-06, "loss": 4.5563, "step": 1500 }, { "epoch": 0.0, "learning_rate": 1.1166399999999999e-05, "loss": 4.4179, "step": 2000 }, { "epoch": 0.0, "learning_rate": 1.39664e-05, "loss": 4.2868, "step": 2500 }, { "epoch": 0.0, "learning_rate": 1.67664e-05, "loss": 4.1711, "step": 3000 }, { "epoch": 0.0, "learning_rate": 1.95664e-05, "loss": 4.1089, "step": 3500 }, { "epoch": 0.0, "learning_rate": 2.23664e-05, "loss": 4.0387, "step": 4000 }, { "epoch": 0.0, "learning_rate": 2.51664e-05, "loss": 3.9442, "step": 4500 }, { "epoch": 0.0, "learning_rate": 2.79664e-05, "loss": 3.8552, "step": 5000 }, { "epoch": 0.0, "learning_rate": 3.07664e-05, "loss": 3.8383, "step": 5500 }, { "epoch": 0.0, "learning_rate": 3.3566400000000004e-05, "loss": 3.7433, "step": 6000 }, { "epoch": 0.0, "learning_rate": 3.63664e-05, "loss": 3.672, "step": 6500 }, { "epoch": 0.0, "learning_rate": 3.91664e-05, "loss": 3.5705, "step": 7000 }, { "epoch": 0.0, "learning_rate": 4.19664e-05, "loss": 3.4977, "step": 7500 }, { "epoch": 0.0, "learning_rate": 4.47664e-05, "loss": 3.4228, "step": 8000 }, { "epoch": 0.01, "learning_rate": 4.75664e-05, "loss": 3.3206, "step": 8500 }, { "epoch": 0.01, "learning_rate": 5.03664e-05, "loss": 3.3024, "step": 9000 }, { "epoch": 0.01, "learning_rate": 5.31608e-05, "loss": 3.238, "step": 9500 }, { "epoch": 0.01, "learning_rate": 5.5960799999999995e-05, "loss": 3.1835, "step": 10000 }, { "epoch": 0.01, "learning_rate": 5.599792943395728e-05, "loss": 3.1321, "step": 10500 }, { "epoch": 0.01, "learning_rate": 5.599582946839672e-05, "loss": 2.9498, "step": 11000 }, { "epoch": 0.01, "learning_rate": 5.5993733702767274e-05, "loss": 2.9067, "step": 11500 }, { "epoch": 0.01, "learning_rate": 5.599163373720671e-05, "loss": 2.8201, "step": 12000 }, { "epoch": 0.01, "learning_rate": 5.598953377164614e-05, "loss": 2.7697, "step": 12500 }, { "epoch": 0.01, "learning_rate": 5.598743380608558e-05, "loss": 2.6956, "step": 13000 }, { "epoch": 0.01, "learning_rate": 5.5985333840525015e-05, "loss": 2.6767, "step": 13500 }, { "epoch": 0.01, "learning_rate": 5.598323387496445e-05, "loss": 2.6511, "step": 14000 }, { "epoch": 0.01, "learning_rate": 5.598113390940389e-05, "loss": 2.6005, "step": 14500 }, { "epoch": 0.01, "learning_rate": 5.597903394384332e-05, "loss": 2.5646, "step": 15000 }, { "epoch": 0.01, "learning_rate": 5.5976933978282755e-05, "loss": 2.5398, "step": 15500 }, { "epoch": 0.01, "learning_rate": 5.5974838212653316e-05, "loss": 2.5406, "step": 16000 }, { "epoch": 0.01, "learning_rate": 5.597273824709275e-05, "loss": 2.5244, "step": 16500 }, { "epoch": 0.01, "learning_rate": 5.59706424814633e-05, "loss": 2.4838, "step": 17000 }, { "epoch": 0.01, "learning_rate": 5.5968542515902736e-05, "loss": 2.4476, "step": 17500 }, { "epoch": 0.01, "learning_rate": 5.5966442550342176e-05, "loss": 2.3907, "step": 18000 }, { "epoch": 0.01, "learning_rate": 5.596434258478161e-05, "loss": 2.4071, "step": 18500 }, { "epoch": 0.01, "learning_rate": 5.596224261922104e-05, "loss": 2.3964, "step": 19000 }, { "epoch": 0.01, "learning_rate": 5.5960142653660483e-05, "loss": 2.3317, "step": 19500 }, { "epoch": 0.01, "learning_rate": 5.595804268809992e-05, "loss": 2.3686, "step": 20000 }, { "epoch": 0.01, "learning_rate": 5.5955942722539344e-05, "loss": 2.3225, "step": 20500 }, { "epoch": 0.01, "learning_rate": 5.5953842756978784e-05, "loss": 2.2592, "step": 21000 }, { "epoch": 0.01, "learning_rate": 5.595174279141822e-05, "loss": 2.2759, "step": 21500 }, { "epoch": 0.01, "learning_rate": 5.594964282585766e-05, "loss": 2.2664, "step": 22000 }, { "epoch": 0.01, "learning_rate": 5.594754286029709e-05, "loss": 2.281, "step": 22500 }, { "epoch": 0.01, "learning_rate": 5.594545129459877e-05, "loss": 2.239, "step": 23000 }, { "epoch": 0.01, "learning_rate": 5.5943351329038205e-05, "loss": 2.2078, "step": 23500 }, { "epoch": 0.01, "learning_rate": 5.594125556340876e-05, "loss": 2.2347, "step": 24000 }, { "epoch": 0.01, "learning_rate": 5.593915559784819e-05, "loss": 2.1976, "step": 24500 }, { "epoch": 0.01, "learning_rate": 5.593705563228763e-05, "loss": 2.1759, "step": 25000 }, { "epoch": 0.02, "learning_rate": 5.5934955666727066e-05, "loss": 2.1614, "step": 25500 }, { "epoch": 0.02, "learning_rate": 5.593285990109762e-05, "loss": 2.2002, "step": 26000 }, { "epoch": 0.02, "learning_rate": 5.593075993553705e-05, "loss": 2.167, "step": 26500 }, { "epoch": 0.02, "learning_rate": 5.592865996997649e-05, "loss": 2.1416, "step": 27000 }, { "epoch": 0.02, "learning_rate": 5.5926560004415926e-05, "loss": 2.1846, "step": 27500 }, { "epoch": 0.02, "learning_rate": 5.592446003885537e-05, "loss": 2.1457, "step": 28000 }, { "epoch": 0.02, "learning_rate": 5.59223600732948e-05, "loss": 2.1489, "step": 28500 }, { "epoch": 0.02, "learning_rate": 5.5920260107734234e-05, "loss": 2.0987, "step": 29000 }, { "epoch": 0.02, "learning_rate": 5.5918160142173674e-05, "loss": 2.102, "step": 29500 }, { "epoch": 0.02, "learning_rate": 5.59160601766131e-05, "loss": 2.1108, "step": 30000 }, { "epoch": 0.02, "learning_rate": 5.5913960211052534e-05, "loss": 2.1382, "step": 30500 }, { "epoch": 0.02, "learning_rate": 5.5911864445423094e-05, "loss": 2.0619, "step": 31000 }, { "epoch": 0.02, "learning_rate": 5.590976867979365e-05, "loss": 2.0802, "step": 31500 }, { "epoch": 0.02, "learning_rate": 5.590766871423309e-05, "loss": 2.0627, "step": 32000 }, { "epoch": 0.02, "learning_rate": 5.590556874867252e-05, "loss": 2.0826, "step": 32500 }, { "epoch": 0.02, "learning_rate": 5.5903468783111955e-05, "loss": 2.0486, "step": 33000 }, { "epoch": 0.02, "learning_rate": 5.5901368817551395e-05, "loss": 2.0652, "step": 33500 }, { "epoch": 0.02, "learning_rate": 5.589926885199083e-05, "loss": 2.048, "step": 34000 }, { "epoch": 0.02, "learning_rate": 5.589716888643026e-05, "loss": 2.0727, "step": 34500 }, { "epoch": 0.02, "learning_rate": 5.5895068920869696e-05, "loss": 2.0037, "step": 35000 }, { "epoch": 0.02, "learning_rate": 5.589296895530913e-05, "loss": 2.0548, "step": 35500 }, { "epoch": 0.02, "learning_rate": 5.589086898974857e-05, "loss": 1.9999, "step": 36000 }, { "epoch": 0.02, "learning_rate": 5.5888769024188e-05, "loss": 2.0511, "step": 36500 }, { "epoch": 0.02, "learning_rate": 5.5886669058627436e-05, "loss": 1.982, "step": 37000 }, { "epoch": 0.02, "learning_rate": 5.588457749292912e-05, "loss": 2.007, "step": 37500 }, { "epoch": 0.02, "learning_rate": 5.588247752736855e-05, "loss": 2.0074, "step": 38000 }, { "epoch": 0.02, "learning_rate": 5.588037756180799e-05, "loss": 2.0068, "step": 38500 }, { "epoch": 0.02, "learning_rate": 5.5878277596247424e-05, "loss": 2.0297, "step": 39000 }, { "epoch": 0.02, "learning_rate": 5.587617763068685e-05, "loss": 1.9801, "step": 39500 }, { "epoch": 0.02, "learning_rate": 5.587408186505741e-05, "loss": 1.968, "step": 40000 }, { "epoch": 0.02, "learning_rate": 5.587198189949685e-05, "loss": 1.9741, "step": 40500 }, { "epoch": 0.02, "learning_rate": 5.5869881933936285e-05, "loss": 2.0031, "step": 41000 }, { "epoch": 0.02, "learning_rate": 5.586778196837572e-05, "loss": 1.9725, "step": 41500 }, { "epoch": 0.03, "learning_rate": 5.586568200281515e-05, "loss": 1.9465, "step": 42000 }, { "epoch": 0.03, "learning_rate": 5.586358623718571e-05, "loss": 1.9521, "step": 42500 }, { "epoch": 0.03, "learning_rate": 5.5861490471556265e-05, "loss": 1.96, "step": 43000 }, { "epoch": 0.03, "learning_rate": 5.58593905059957e-05, "loss": 1.9815, "step": 43500 }, { "epoch": 0.03, "learning_rate": 5.585729054043514e-05, "loss": 1.9682, "step": 44000 }, { "epoch": 0.03, "learning_rate": 5.585519057487457e-05, "loss": 1.9458, "step": 44500 }, { "epoch": 0.03, "learning_rate": 5.5853090609314006e-05, "loss": 1.9562, "step": 45000 }, { "epoch": 0.03, "learning_rate": 5.5850990643753446e-05, "loss": 1.9166, "step": 45500 }, { "epoch": 0.03, "learning_rate": 5.584889067819288e-05, "loss": 1.9057, "step": 46000 }, { "epoch": 0.03, "learning_rate": 5.5846790712632306e-05, "loss": 1.9332, "step": 46500 }, { "epoch": 0.03, "learning_rate": 5.584469074707175e-05, "loss": 1.9453, "step": 47000 }, { "epoch": 0.03, "learning_rate": 5.584259078151118e-05, "loss": 1.9018, "step": 47500 }, { "epoch": 0.03, "learning_rate": 5.5840490815950614e-05, "loss": 1.9193, "step": 48000 }, { "epoch": 0.03, "learning_rate": 5.5838390850390054e-05, "loss": 1.9188, "step": 48500 }, { "epoch": 0.03, "learning_rate": 5.583629088482949e-05, "loss": 1.851, "step": 49000 }, { "epoch": 0.03, "learning_rate": 5.583419511920004e-05, "loss": 1.9075, "step": 49500 }, { "epoch": 0.03, "learning_rate": 5.583209515363948e-05, "loss": 1.8626, "step": 50000 }, { "epoch": 0.03, "learning_rate": 5.5829995188078915e-05, "loss": 1.9197, "step": 50500 }, { "epoch": 0.03, "learning_rate": 5.582789522251835e-05, "loss": 1.8625, "step": 51000 }, { "epoch": 0.03, "learning_rate": 5.582579525695779e-05, "loss": 1.903, "step": 51500 }, { "epoch": 0.03, "learning_rate": 5.582369949132834e-05, "loss": 1.8981, "step": 52000 }, { "epoch": 0.03, "learning_rate": 5.58216037256989e-05, "loss": 1.8916, "step": 52500 }, { "epoch": 0.03, "learning_rate": 5.5819503760138336e-05, "loss": 1.9053, "step": 53000 }, { "epoch": 0.03, "learning_rate": 5.581740379457776e-05, "loss": 1.8785, "step": 53500 }, { "epoch": 0.03, "learning_rate": 5.58153038290172e-05, "loss": 1.8709, "step": 54000 }, { "epoch": 0.03, "learning_rate": 5.5813203863456636e-05, "loss": 1.8518, "step": 54500 }, { "epoch": 0.03, "learning_rate": 5.581110389789607e-05, "loss": 1.8553, "step": 55000 }, { "epoch": 0.03, "learning_rate": 5.580900393233551e-05, "loss": 1.8435, "step": 55500 }, { "epoch": 0.03, "learning_rate": 5.580690396677494e-05, "loss": 1.8547, "step": 56000 }, { "epoch": 0.03, "learning_rate": 5.5804804001214377e-05, "loss": 1.8521, "step": 56500 }, { "epoch": 0.03, "learning_rate": 5.580270403565382e-05, "loss": 1.8389, "step": 57000 }, { "epoch": 0.03, "learning_rate": 5.580060407009325e-05, "loss": 1.8671, "step": 57500 }, { "epoch": 0.03, "learning_rate": 5.5798508304463804e-05, "loss": 1.8519, "step": 58000 }, { "epoch": 0.04, "learning_rate": 5.5796408338903244e-05, "loss": 1.8339, "step": 58500 }, { "epoch": 0.04, "learning_rate": 5.579430837334268e-05, "loss": 1.8504, "step": 59000 }, { "epoch": 0.04, "learning_rate": 5.579220840778211e-05, "loss": 1.8243, "step": 59500 }, { "epoch": 0.04, "learning_rate": 5.579010844222155e-05, "loss": 1.8522, "step": 60000 }, { "epoch": 0.04, "learning_rate": 5.5788012676592105e-05, "loss": 1.8346, "step": 60500 }, { "epoch": 0.04, "learning_rate": 5.578591271103154e-05, "loss": 1.8302, "step": 61000 }, { "epoch": 0.04, "learning_rate": 5.578381694540209e-05, "loss": 1.8138, "step": 61500 }, { "epoch": 0.04, "learning_rate": 5.5781716979841525e-05, "loss": 1.8075, "step": 62000 }, { "epoch": 0.04, "learning_rate": 5.5779621214212086e-05, "loss": 1.8539, "step": 62500 }, { "epoch": 0.04, "learning_rate": 5.577752124865152e-05, "loss": 1.8535, "step": 63000 }, { "epoch": 0.04, "learning_rate": 5.577542128309095e-05, "loss": 1.8144, "step": 63500 }, { "epoch": 0.04, "learning_rate": 5.577332131753039e-05, "loss": 1.8078, "step": 64000 }, { "epoch": 0.04, "learning_rate": 5.5771221351969826e-05, "loss": 1.8121, "step": 64500 }, { "epoch": 0.04, "learning_rate": 5.576912138640926e-05, "loss": 1.819, "step": 65000 }, { "epoch": 0.04, "learning_rate": 5.57670214208487e-05, "loss": 1.7988, "step": 65500 }, { "epoch": 0.04, "learning_rate": 5.5764921455288133e-05, "loss": 1.8449, "step": 66000 }, { "epoch": 0.04, "learning_rate": 5.576282148972757e-05, "loss": 1.8113, "step": 66500 }, { "epoch": 0.04, "learning_rate": 5.576072152416701e-05, "loss": 1.8032, "step": 67000 }, { "epoch": 0.04, "learning_rate": 5.575862155860644e-05, "loss": 1.7805, "step": 67500 }, { "epoch": 0.04, "learning_rate": 5.5756521593045874e-05, "loss": 1.8332, "step": 68000 }, { "epoch": 0.04, "learning_rate": 5.575442582741643e-05, "loss": 1.8081, "step": 68500 }, { "epoch": 0.04, "learning_rate": 5.575232586185587e-05, "loss": 1.7867, "step": 69000 }, { "epoch": 0.04, "learning_rate": 5.57502258962953e-05, "loss": 1.8534, "step": 69500 }, { "epoch": 0.04, "learning_rate": 5.5748125930734735e-05, "loss": 1.7946, "step": 70000 }, { "epoch": 0.04, "learning_rate": 5.574603016510529e-05, "loss": 1.7681, "step": 70500 }, { "epoch": 0.04, "learning_rate": 5.574393439947585e-05, "loss": 1.8173, "step": 71000 }, { "epoch": 0.04, "learning_rate": 5.574183443391528e-05, "loss": 1.7993, "step": 71500 }, { "epoch": 0.04, "learning_rate": 5.5739734468354716e-05, "loss": 1.8121, "step": 72000 }, { "epoch": 0.04, "learning_rate": 5.5737634502794156e-05, "loss": 1.7741, "step": 72500 }, { "epoch": 0.04, "learning_rate": 5.573553453723359e-05, "loss": 1.7632, "step": 73000 }, { "epoch": 0.04, "learning_rate": 5.573343457167302e-05, "loss": 1.7428, "step": 73500 }, { "epoch": 0.04, "learning_rate": 5.573133460611246e-05, "loss": 1.7824, "step": 74000 }, { "epoch": 0.04, "learning_rate": 5.5729234640551896e-05, "loss": 1.7765, "step": 74500 }, { "epoch": 0.04, "learning_rate": 5.572713467499133e-05, "loss": 1.7981, "step": 75000 }, { "epoch": 0.05, "learning_rate": 5.5725038909361883e-05, "loss": 1.7925, "step": 75500 }, { "epoch": 0.05, "learning_rate": 5.5722938943801324e-05, "loss": 1.7825, "step": 76000 }, { "epoch": 0.05, "learning_rate": 5.572083897824076e-05, "loss": 1.7829, "step": 76500 }, { "epoch": 0.05, "learning_rate": 5.571873901268019e-05, "loss": 1.79, "step": 77000 }, { "epoch": 0.05, "learning_rate": 5.571663904711963e-05, "loss": 1.799, "step": 77500 }, { "epoch": 0.05, "learning_rate": 5.5714543281490184e-05, "loss": 1.7995, "step": 78000 }, { "epoch": 0.05, "learning_rate": 5.571244331592962e-05, "loss": 1.806, "step": 78500 }, { "epoch": 0.05, "learning_rate": 5.571034335036905e-05, "loss": 1.7516, "step": 79000 }, { "epoch": 0.05, "learning_rate": 5.570824338480849e-05, "loss": 1.7678, "step": 79500 }, { "epoch": 0.05, "learning_rate": 5.5706143419247925e-05, "loss": 1.7673, "step": 80000 }, { "epoch": 0.05, "learning_rate": 5.570404345368736e-05, "loss": 1.7944, "step": 80500 }, { "epoch": 0.05, "learning_rate": 5.570194348812679e-05, "loss": 1.8062, "step": 81000 }, { "epoch": 0.05, "learning_rate": 5.5699843522566225e-05, "loss": 1.7369, "step": 81500 }, { "epoch": 0.05, "learning_rate": 5.5697747756936786e-05, "loss": 1.7562, "step": 82000 }, { "epoch": 0.05, "learning_rate": 5.5695647791376226e-05, "loss": 1.7348, "step": 82500 }, { "epoch": 0.05, "learning_rate": 5.569355622567789e-05, "loss": 1.8007, "step": 83000 }, { "epoch": 0.05, "learning_rate": 5.569145626011733e-05, "loss": 1.7627, "step": 83500 }, { "epoch": 0.05, "learning_rate": 5.568935629455677e-05, "loss": 1.7367, "step": 84000 }, { "epoch": 0.05, "learning_rate": 5.56872563289962e-05, "loss": 1.7428, "step": 84500 }, { "epoch": 0.05, "learning_rate": 5.568515636343564e-05, "loss": 1.7706, "step": 85000 }, { "epoch": 0.05, "learning_rate": 5.5683056397875074e-05, "loss": 1.7478, "step": 85500 }, { "epoch": 0.05, "learning_rate": 5.568095643231451e-05, "loss": 1.7316, "step": 86000 }, { "epoch": 0.05, "learning_rate": 5.567885646675395e-05, "loss": 1.7544, "step": 86500 }, { "epoch": 0.05, "learning_rate": 5.567675650119338e-05, "loss": 1.7324, "step": 87000 }, { "epoch": 0.05, "learning_rate": 5.5674660735563935e-05, "loss": 1.7232, "step": 87500 }, { "epoch": 0.05, "learning_rate": 5.5672560770003375e-05, "loss": 1.742, "step": 88000 }, { "epoch": 0.05, "learning_rate": 5.567046080444281e-05, "loss": 1.7131, "step": 88500 }, { "epoch": 0.05, "learning_rate": 5.566836083888224e-05, "loss": 1.7744, "step": 89000 }, { "epoch": 0.05, "learning_rate": 5.5666265073252795e-05, "loss": 1.7089, "step": 89500 }, { "epoch": 0.05, "learning_rate": 5.5664165107692235e-05, "loss": 1.7256, "step": 90000 }, { "epoch": 0.05, "learning_rate": 5.566206514213167e-05, "loss": 1.7423, "step": 90500 }, { "epoch": 0.05, "learning_rate": 5.56599651765711e-05, "loss": 1.7465, "step": 91000 }, { "epoch": 0.05, "learning_rate": 5.565786521101054e-05, "loss": 1.7184, "step": 91500 }, { "epoch": 0.06, "learning_rate": 5.5655769445381096e-05, "loss": 1.7434, "step": 92000 }, { "epoch": 0.06, "learning_rate": 5.565366947982053e-05, "loss": 1.7604, "step": 92500 }, { "epoch": 0.06, "learning_rate": 5.565157371419108e-05, "loss": 1.7488, "step": 93000 }, { "epoch": 0.06, "learning_rate": 5.5649473748630523e-05, "loss": 1.697, "step": 93500 }, { "epoch": 0.06, "learning_rate": 5.564737378306996e-05, "loss": 1.7406, "step": 94000 }, { "epoch": 0.06, "learning_rate": 5.564527381750939e-05, "loss": 1.756, "step": 94500 }, { "epoch": 0.06, "learning_rate": 5.564317385194883e-05, "loss": 1.7186, "step": 95000 }, { "epoch": 0.06, "learning_rate": 5.5641073886388264e-05, "loss": 1.7215, "step": 95500 }, { "epoch": 0.06, "learning_rate": 5.56389739208277e-05, "loss": 1.7408, "step": 96000 }, { "epoch": 0.06, "learning_rate": 5.563687395526714e-05, "loss": 1.7173, "step": 96500 }, { "epoch": 0.06, "learning_rate": 5.563477398970657e-05, "loss": 1.7422, "step": 97000 }, { "epoch": 0.06, "learning_rate": 5.5632674024146e-05, "loss": 1.7511, "step": 97500 }, { "epoch": 0.06, "learning_rate": 5.563057405858544e-05, "loss": 1.7128, "step": 98000 }, { "epoch": 0.06, "learning_rate": 5.562847409302487e-05, "loss": 1.7366, "step": 98500 }, { "epoch": 0.06, "learning_rate": 5.562638252732655e-05, "loss": 1.7102, "step": 99000 }, { "epoch": 0.06, "learning_rate": 5.5624282561765986e-05, "loss": 1.6987, "step": 99500 }, { "epoch": 0.06, "learning_rate": 5.5622182596205426e-05, "loss": 1.7162, "step": 100000 }, { "epoch": 0.06, "eval_loss": 1.6113003492355347, "eval_runtime": 1095.3065, "eval_samples_per_second": 480.888, "eval_steps_per_second": 80.148, "step": 100000 }, { "epoch": 0.06, "learning_rate": 5.562008263064486e-05, "loss": 1.693, "step": 100500 }, { "epoch": 0.06, "learning_rate": 5.561798266508429e-05, "loss": 1.7246, "step": 101000 }, { "epoch": 0.06, "learning_rate": 5.561588269952373e-05, "loss": 1.7385, "step": 101500 }, { "epoch": 0.06, "learning_rate": 5.561378273396316e-05, "loss": 1.7152, "step": 102000 }, { "epoch": 0.06, "learning_rate": 5.561168276840259e-05, "loss": 1.6918, "step": 102500 }, { "epoch": 0.06, "learning_rate": 5.5609587002773153e-05, "loss": 1.6783, "step": 103000 }, { "epoch": 0.06, "learning_rate": 5.5607487037212594e-05, "loss": 1.6992, "step": 103500 }, { "epoch": 0.06, "learning_rate": 5.560538707165203e-05, "loss": 1.7065, "step": 104000 }, { "epoch": 0.06, "learning_rate": 5.5603287106091454e-05, "loss": 1.6912, "step": 104500 }, { "epoch": 0.06, "learning_rate": 5.5601187140530894e-05, "loss": 1.7006, "step": 105000 }, { "epoch": 0.06, "learning_rate": 5.5599091374901454e-05, "loss": 1.7121, "step": 105500 }, { "epoch": 0.06, "learning_rate": 5.559699560927201e-05, "loss": 1.6932, "step": 106000 }, { "epoch": 0.06, "learning_rate": 5.559489564371144e-05, "loss": 1.6908, "step": 106500 }, { "epoch": 0.06, "learning_rate": 5.559279567815088e-05, "loss": 1.7198, "step": 107000 }, { "epoch": 0.06, "learning_rate": 5.5590695712590315e-05, "loss": 1.7275, "step": 107500 }, { "epoch": 0.06, "learning_rate": 5.558859574702975e-05, "loss": 1.6924, "step": 108000 }, { "epoch": 0.07, "learning_rate": 5.558649578146919e-05, "loss": 1.6792, "step": 108500 }, { "epoch": 0.07, "learning_rate": 5.5584395815908616e-05, "loss": 1.6821, "step": 109000 }, { "epoch": 0.07, "learning_rate": 5.558229585034805e-05, "loss": 1.6619, "step": 109500 }, { "epoch": 0.07, "learning_rate": 5.558020008471861e-05, "loss": 1.6954, "step": 110000 }, { "epoch": 0.07, "learning_rate": 5.557810011915805e-05, "loss": 1.7016, "step": 110500 }, { "epoch": 0.07, "learning_rate": 5.557600015359748e-05, "loss": 1.6689, "step": 111000 }, { "epoch": 0.07, "learning_rate": 5.557390018803691e-05, "loss": 1.7108, "step": 111500 }, { "epoch": 0.07, "learning_rate": 5.557180022247635e-05, "loss": 1.7, "step": 112000 }, { "epoch": 0.07, "learning_rate": 5.556970445684691e-05, "loss": 1.7109, "step": 112500 }, { "epoch": 0.07, "learning_rate": 5.5567608691217464e-05, "loss": 1.6864, "step": 113000 }, { "epoch": 0.07, "learning_rate": 5.55655087256569e-05, "loss": 1.6809, "step": 113500 }, { "epoch": 0.07, "learning_rate": 5.556341296002745e-05, "loss": 1.7365, "step": 114000 }, { "epoch": 0.07, "learning_rate": 5.556131299446689e-05, "loss": 1.6716, "step": 114500 }, { "epoch": 0.07, "learning_rate": 5.5559213028906325e-05, "loss": 1.7088, "step": 115000 }, { "epoch": 0.07, "learning_rate": 5.555711306334576e-05, "loss": 1.6476, "step": 115500 }, { "epoch": 0.07, "learning_rate": 5.555501729771631e-05, "loss": 1.6933, "step": 116000 }, { "epoch": 0.07, "learning_rate": 5.555291733215575e-05, "loss": 1.6808, "step": 116500 }, { "epoch": 0.07, "learning_rate": 5.5550817366595185e-05, "loss": 1.7114, "step": 117000 }, { "epoch": 0.07, "learning_rate": 5.554871740103462e-05, "loss": 1.6573, "step": 117500 }, { "epoch": 0.07, "learning_rate": 5.554661743547406e-05, "loss": 1.6965, "step": 118000 }, { "epoch": 0.07, "learning_rate": 5.554451746991349e-05, "loss": 1.6986, "step": 118500 }, { "epoch": 0.07, "learning_rate": 5.5542417504352926e-05, "loss": 1.6969, "step": 119000 }, { "epoch": 0.07, "learning_rate": 5.5540317538792366e-05, "loss": 1.6895, "step": 119500 }, { "epoch": 0.07, "learning_rate": 5.55382175732318e-05, "loss": 1.6495, "step": 120000 }, { "epoch": 0.07, "learning_rate": 5.553611760767123e-05, "loss": 1.6495, "step": 120500 }, { "epoch": 0.07, "learning_rate": 5.5534021842041793e-05, "loss": 1.6729, "step": 121000 }, { "epoch": 0.07, "learning_rate": 5.553192607641235e-05, "loss": 1.706, "step": 121500 }, { "epoch": 0.07, "learning_rate": 5.552982611085178e-05, "loss": 1.672, "step": 122000 }, { "epoch": 0.07, "learning_rate": 5.5527726145291214e-05, "loss": 1.6255, "step": 122500 }, { "epoch": 0.07, "learning_rate": 5.5525626179730654e-05, "loss": 1.6458, "step": 123000 }, { "epoch": 0.07, "learning_rate": 5.552352621417009e-05, "loss": 1.6765, "step": 123500 }, { "epoch": 0.07, "learning_rate": 5.552142624860952e-05, "loss": 1.6828, "step": 124000 }, { "epoch": 0.07, "learning_rate": 5.551932628304896e-05, "loss": 1.6512, "step": 124500 }, { "epoch": 0.07, "learning_rate": 5.5517226317488395e-05, "loss": 1.6695, "step": 125000 }, { "epoch": 0.08, "learning_rate": 5.551513055185895e-05, "loss": 1.6789, "step": 125500 }, { "epoch": 0.08, "learning_rate": 5.551303058629838e-05, "loss": 1.6785, "step": 126000 }, { "epoch": 0.08, "learning_rate": 5.551093062073782e-05, "loss": 1.6435, "step": 126500 }, { "epoch": 0.08, "learning_rate": 5.5508830655177255e-05, "loss": 1.6531, "step": 127000 }, { "epoch": 0.08, "learning_rate": 5.550673068961669e-05, "loss": 1.6556, "step": 127500 }, { "epoch": 0.08, "learning_rate": 5.550463072405613e-05, "loss": 1.6563, "step": 128000 }, { "epoch": 0.08, "learning_rate": 5.5502530758495556e-05, "loss": 1.6816, "step": 128500 }, { "epoch": 0.08, "learning_rate": 5.5500430792934996e-05, "loss": 1.67, "step": 129000 }, { "epoch": 0.08, "learning_rate": 5.549833082737443e-05, "loss": 1.6965, "step": 129500 }, { "epoch": 0.08, "learning_rate": 5.549623506174499e-05, "loss": 1.6701, "step": 130000 }, { "epoch": 0.08, "learning_rate": 5.5494139296115543e-05, "loss": 1.6774, "step": 130500 }, { "epoch": 0.08, "learning_rate": 5.549203933055498e-05, "loss": 1.6849, "step": 131000 }, { "epoch": 0.08, "learning_rate": 5.548993936499442e-05, "loss": 1.6633, "step": 131500 }, { "epoch": 0.08, "learning_rate": 5.548783939943385e-05, "loss": 1.6135, "step": 132000 }, { "epoch": 0.08, "learning_rate": 5.5485739433873284e-05, "loss": 1.6785, "step": 132500 }, { "epoch": 0.08, "learning_rate": 5.548363946831272e-05, "loss": 1.6718, "step": 133000 }, { "epoch": 0.08, "learning_rate": 5.548154370268328e-05, "loss": 1.6594, "step": 133500 }, { "epoch": 0.08, "learning_rate": 5.547944373712271e-05, "loss": 1.6686, "step": 134000 }, { "epoch": 0.08, "learning_rate": 5.5477343771562145e-05, "loss": 1.6388, "step": 134500 }, { "epoch": 0.08, "learning_rate": 5.5475243806001585e-05, "loss": 1.6506, "step": 135000 }, { "epoch": 0.08, "learning_rate": 5.547314804037214e-05, "loss": 1.6689, "step": 135500 }, { "epoch": 0.08, "learning_rate": 5.547104807481157e-05, "loss": 1.6153, "step": 136000 }, { "epoch": 0.08, "learning_rate": 5.546894810925101e-05, "loss": 1.6636, "step": 136500 }, { "epoch": 0.08, "learning_rate": 5.5466848143690446e-05, "loss": 1.6691, "step": 137000 }, { "epoch": 0.08, "learning_rate": 5.546474817812988e-05, "loss": 1.6591, "step": 137500 }, { "epoch": 0.08, "learning_rate": 5.546264821256931e-05, "loss": 1.6408, "step": 138000 }, { "epoch": 0.08, "learning_rate": 5.5460548247008746e-05, "loss": 1.6484, "step": 138500 }, { "epoch": 0.08, "learning_rate": 5.545844828144818e-05, "loss": 1.6661, "step": 139000 }, { "epoch": 0.08, "learning_rate": 5.545634831588762e-05, "loss": 1.6503, "step": 139500 }, { "epoch": 0.08, "learning_rate": 5.5454252550258173e-05, "loss": 1.6475, "step": 140000 }, { "epoch": 0.08, "learning_rate": 5.545215258469761e-05, "loss": 1.6633, "step": 140500 }, { "epoch": 0.08, "learning_rate": 5.545005261913704e-05, "loss": 1.6189, "step": 141000 }, { "epoch": 0.08, "learning_rate": 5.544795265357648e-05, "loss": 1.6433, "step": 141500 }, { "epoch": 0.09, "learning_rate": 5.544585688794704e-05, "loss": 1.5953, "step": 142000 }, { "epoch": 0.09, "learning_rate": 5.544375692238647e-05, "loss": 1.6335, "step": 142500 }, { "epoch": 0.09, "learning_rate": 5.544165695682591e-05, "loss": 1.6495, "step": 143000 }, { "epoch": 0.09, "learning_rate": 5.543955699126534e-05, "loss": 1.6592, "step": 143500 }, { "epoch": 0.09, "learning_rate": 5.54374612256359e-05, "loss": 1.6293, "step": 144000 }, { "epoch": 0.09, "learning_rate": 5.5435361260075335e-05, "loss": 1.6412, "step": 144500 }, { "epoch": 0.09, "learning_rate": 5.543326129451477e-05, "loss": 1.655, "step": 145000 }, { "epoch": 0.09, "learning_rate": 5.54311613289542e-05, "loss": 1.654, "step": 145500 }, { "epoch": 0.09, "learning_rate": 5.5429061363393636e-05, "loss": 1.6185, "step": 146000 }, { "epoch": 0.09, "learning_rate": 5.5426965597764196e-05, "loss": 1.6205, "step": 146500 }, { "epoch": 0.09, "learning_rate": 5.5424865632203636e-05, "loss": 1.6426, "step": 147000 }, { "epoch": 0.09, "learning_rate": 5.542276566664306e-05, "loss": 1.6475, "step": 147500 }, { "epoch": 0.09, "learning_rate": 5.5420665701082496e-05, "loss": 1.6265, "step": 148000 }, { "epoch": 0.09, "learning_rate": 5.5418565735521936e-05, "loss": 1.6394, "step": 148500 }, { "epoch": 0.09, "learning_rate": 5.541646576996137e-05, "loss": 1.628, "step": 149000 }, { "epoch": 0.09, "learning_rate": 5.54143658044008e-05, "loss": 1.6245, "step": 149500 }, { "epoch": 0.09, "learning_rate": 5.5412265838840244e-05, "loss": 1.6298, "step": 150000 }, { "epoch": 0.09, "learning_rate": 5.541016587327968e-05, "loss": 1.632, "step": 150500 }, { "epoch": 0.09, "learning_rate": 5.540807010765023e-05, "loss": 1.6021, "step": 151000 }, { "epoch": 0.09, "learning_rate": 5.540597014208967e-05, "loss": 1.6157, "step": 151500 }, { "epoch": 0.09, "learning_rate": 5.5403870176529104e-05, "loss": 1.6264, "step": 152000 }, { "epoch": 0.09, "learning_rate": 5.540177021096854e-05, "loss": 1.6485, "step": 152500 }, { "epoch": 0.09, "learning_rate": 5.539967444533909e-05, "loss": 1.6493, "step": 153000 }, { "epoch": 0.09, "learning_rate": 5.539757447977853e-05, "loss": 1.6467, "step": 153500 }, { "epoch": 0.09, "learning_rate": 5.5395474514217965e-05, "loss": 1.6432, "step": 154000 }, { "epoch": 0.09, "learning_rate": 5.539337874858852e-05, "loss": 1.6397, "step": 154500 }, { "epoch": 0.09, "learning_rate": 5.539127878302795e-05, "loss": 1.6255, "step": 155000 }, { "epoch": 0.09, "learning_rate": 5.538917881746739e-05, "loss": 1.6372, "step": 155500 }, { "epoch": 0.09, "learning_rate": 5.5387078851906826e-05, "loss": 1.6155, "step": 156000 }, { "epoch": 0.09, "learning_rate": 5.538497888634626e-05, "loss": 1.6305, "step": 156500 }, { "epoch": 0.09, "learning_rate": 5.538288312071682e-05, "loss": 1.6082, "step": 157000 }, { "epoch": 0.09, "learning_rate": 5.538078315515625e-05, "loss": 1.6183, "step": 157500 }, { "epoch": 0.09, "learning_rate": 5.5378683189595687e-05, "loss": 1.6143, "step": 158000 }, { "epoch": 0.1, "learning_rate": 5.537658322403513e-05, "loss": 1.6299, "step": 158500 }, { "epoch": 0.1, "learning_rate": 5.537448325847456e-05, "loss": 1.6296, "step": 159000 }, { "epoch": 0.1, "learning_rate": 5.5372383292913994e-05, "loss": 1.6464, "step": 159500 }, { "epoch": 0.1, "learning_rate": 5.5370283327353434e-05, "loss": 1.6393, "step": 160000 }, { "epoch": 0.1, "learning_rate": 5.536818336179287e-05, "loss": 1.6345, "step": 160500 }, { "epoch": 0.1, "learning_rate": 5.536608759616342e-05, "loss": 1.6183, "step": 161000 }, { "epoch": 0.1, "learning_rate": 5.5363987630602854e-05, "loss": 1.62, "step": 161500 }, { "epoch": 0.1, "learning_rate": 5.5361887665042295e-05, "loss": 1.62, "step": 162000 }, { "epoch": 0.1, "learning_rate": 5.535978769948173e-05, "loss": 1.6003, "step": 162500 }, { "epoch": 0.1, "learning_rate": 5.535769193385228e-05, "loss": 1.6346, "step": 163000 }, { "epoch": 0.1, "learning_rate": 5.5355591968291715e-05, "loss": 1.6618, "step": 163500 }, { "epoch": 0.1, "learning_rate": 5.5353492002731155e-05, "loss": 1.5938, "step": 164000 }, { "epoch": 0.1, "learning_rate": 5.535139203717059e-05, "loss": 1.6081, "step": 164500 }, { "epoch": 0.1, "learning_rate": 5.534929207161002e-05, "loss": 1.6232, "step": 165000 }, { "epoch": 0.1, "learning_rate": 5.534719210604946e-05, "loss": 1.6045, "step": 165500 }, { "epoch": 0.1, "learning_rate": 5.5345092140488896e-05, "loss": 1.6061, "step": 166000 }, { "epoch": 0.1, "learning_rate": 5.5342992174928336e-05, "loss": 1.6491, "step": 166500 }, { "epoch": 0.1, "learning_rate": 5.534089220936776e-05, "loss": 1.6291, "step": 167000 }, { "epoch": 0.1, "learning_rate": 5.533879644373832e-05, "loss": 1.6282, "step": 167500 }, { "epoch": 0.1, "learning_rate": 5.533669647817776e-05, "loss": 1.602, "step": 168000 }, { "epoch": 0.1, "learning_rate": 5.53345965126172e-05, "loss": 1.6096, "step": 168500 }, { "epoch": 0.1, "learning_rate": 5.533249654705663e-05, "loss": 1.6116, "step": 169000 }, { "epoch": 0.1, "learning_rate": 5.5330400781427184e-05, "loss": 1.6204, "step": 169500 }, { "epoch": 0.1, "learning_rate": 5.532830081586662e-05, "loss": 1.6099, "step": 170000 }, { "epoch": 0.1, "learning_rate": 5.532620085030606e-05, "loss": 1.644, "step": 170500 }, { "epoch": 0.1, "learning_rate": 5.532410088474549e-05, "loss": 1.6269, "step": 171000 }, { "epoch": 0.1, "learning_rate": 5.5322000919184925e-05, "loss": 1.5776, "step": 171500 }, { "epoch": 0.1, "learning_rate": 5.5319905153555485e-05, "loss": 1.6387, "step": 172000 }, { "epoch": 0.1, "learning_rate": 5.531780938792604e-05, "loss": 1.6198, "step": 172500 }, { "epoch": 0.1, "learning_rate": 5.531570942236547e-05, "loss": 1.5853, "step": 173000 }, { "epoch": 0.1, "learning_rate": 5.5313609456804905e-05, "loss": 1.6199, "step": 173500 }, { "epoch": 0.1, "learning_rate": 5.5311509491244346e-05, "loss": 1.5931, "step": 174000 }, { "epoch": 0.1, "learning_rate": 5.530940952568378e-05, "loss": 1.6317, "step": 174500 }, { "epoch": 0.1, "learning_rate": 5.530730956012321e-05, "loss": 1.6126, "step": 175000 }, { "epoch": 0.11, "learning_rate": 5.530520959456265e-05, "loss": 1.6168, "step": 175500 }, { "epoch": 0.11, "learning_rate": 5.5303113828933206e-05, "loss": 1.5932, "step": 176000 }, { "epoch": 0.11, "learning_rate": 5.530101386337264e-05, "loss": 1.6496, "step": 176500 }, { "epoch": 0.11, "learning_rate": 5.529891389781207e-05, "loss": 1.6044, "step": 177000 }, { "epoch": 0.11, "learning_rate": 5.5296813932251514e-05, "loss": 1.6055, "step": 177500 }, { "epoch": 0.11, "learning_rate": 5.529471816662207e-05, "loss": 1.5994, "step": 178000 }, { "epoch": 0.11, "learning_rate": 5.52926182010615e-05, "loss": 1.5919, "step": 178500 }, { "epoch": 0.11, "learning_rate": 5.529051823550094e-05, "loss": 1.5779, "step": 179000 }, { "epoch": 0.11, "learning_rate": 5.5288418269940374e-05, "loss": 1.6163, "step": 179500 }, { "epoch": 0.11, "learning_rate": 5.528631830437981e-05, "loss": 1.6164, "step": 180000 }, { "epoch": 0.11, "learning_rate": 5.528421833881925e-05, "loss": 1.6308, "step": 180500 }, { "epoch": 0.11, "learning_rate": 5.528211837325868e-05, "loss": 1.6218, "step": 181000 }, { "epoch": 0.11, "learning_rate": 5.528001840769811e-05, "loss": 1.5762, "step": 181500 }, { "epoch": 0.11, "learning_rate": 5.527792264206867e-05, "loss": 1.6014, "step": 182000 }, { "epoch": 0.11, "learning_rate": 5.527582267650811e-05, "loss": 1.6038, "step": 182500 }, { "epoch": 0.11, "learning_rate": 5.527372271094754e-05, "loss": 1.6381, "step": 183000 }, { "epoch": 0.11, "learning_rate": 5.5271622745386976e-05, "loss": 1.6253, "step": 183500 }, { "epoch": 0.11, "learning_rate": 5.526952277982641e-05, "loss": 1.6, "step": 184000 }, { "epoch": 0.11, "learning_rate": 5.526742281426584e-05, "loss": 1.5856, "step": 184500 }, { "epoch": 0.11, "learning_rate": 5.5265322848705276e-05, "loss": 1.6109, "step": 185000 }, { "epoch": 0.11, "learning_rate": 5.5263222883144716e-05, "loss": 1.6121, "step": 185500 }, { "epoch": 0.11, "learning_rate": 5.526112291758415e-05, "loss": 1.6169, "step": 186000 }, { "epoch": 0.11, "learning_rate": 5.52590271519547e-05, "loss": 1.5976, "step": 186500 }, { "epoch": 0.11, "learning_rate": 5.5256927186394144e-05, "loss": 1.6129, "step": 187000 }, { "epoch": 0.11, "learning_rate": 5.525482722083358e-05, "loss": 1.5925, "step": 187500 }, { "epoch": 0.11, "learning_rate": 5.525272725527301e-05, "loss": 1.6216, "step": 188000 }, { "epoch": 0.11, "learning_rate": 5.525062728971245e-05, "loss": 1.6201, "step": 188500 }, { "epoch": 0.11, "learning_rate": 5.5248531524083004e-05, "loss": 1.6284, "step": 189000 }, { "epoch": 0.11, "learning_rate": 5.5246435758453565e-05, "loss": 1.5996, "step": 189500 }, { "epoch": 0.11, "learning_rate": 5.5244335792893e-05, "loss": 1.5812, "step": 190000 }, { "epoch": 0.11, "learning_rate": 5.524224002726355e-05, "loss": 1.586, "step": 190500 }, { "epoch": 0.11, "learning_rate": 5.5240140061702985e-05, "loss": 1.6248, "step": 191000 }, { "epoch": 0.11, "learning_rate": 5.5238040096142425e-05, "loss": 1.6143, "step": 191500 }, { "epoch": 0.12, "learning_rate": 5.523594013058186e-05, "loss": 1.581, "step": 192000 }, { "epoch": 0.12, "learning_rate": 5.523384016502129e-05, "loss": 1.6128, "step": 192500 }, { "epoch": 0.12, "learning_rate": 5.523174019946073e-05, "loss": 1.5824, "step": 193000 }, { "epoch": 0.12, "learning_rate": 5.522964023390016e-05, "loss": 1.6002, "step": 193500 }, { "epoch": 0.12, "learning_rate": 5.52275402683396e-05, "loss": 1.6012, "step": 194000 }, { "epoch": 0.12, "learning_rate": 5.522544030277903e-05, "loss": 1.5855, "step": 194500 }, { "epoch": 0.12, "learning_rate": 5.522334453714959e-05, "loss": 1.6029, "step": 195000 }, { "epoch": 0.12, "learning_rate": 5.522124457158903e-05, "loss": 1.6014, "step": 195500 }, { "epoch": 0.12, "learning_rate": 5.521914460602846e-05, "loss": 1.6003, "step": 196000 }, { "epoch": 0.12, "learning_rate": 5.5217044640467894e-05, "loss": 1.57, "step": 196500 }, { "epoch": 0.12, "learning_rate": 5.521494467490733e-05, "loss": 1.6085, "step": 197000 }, { "epoch": 0.12, "learning_rate": 5.521284470934677e-05, "loss": 1.6124, "step": 197500 }, { "epoch": 0.12, "learning_rate": 5.52107447437862e-05, "loss": 1.5989, "step": 198000 }, { "epoch": 0.12, "learning_rate": 5.5208644778225634e-05, "loss": 1.5842, "step": 198500 }, { "epoch": 0.12, "learning_rate": 5.520654901259619e-05, "loss": 1.6082, "step": 199000 }, { "epoch": 0.12, "learning_rate": 5.520444904703563e-05, "loss": 1.6194, "step": 199500 }, { "epoch": 0.12, "learning_rate": 5.520235748133731e-05, "loss": 1.5846, "step": 200000 }, { "epoch": 0.12, "eval_loss": 1.5057743787765503, "eval_runtime": 1098.6714, "eval_samples_per_second": 479.415, "eval_steps_per_second": 79.903, "step": 200000 }, { "epoch": 0.12, "learning_rate": 5.520025751577674e-05, "loss": 1.6172, "step": 200500 }, { "epoch": 0.12, "learning_rate": 5.5198157550216175e-05, "loss": 1.5929, "step": 201000 }, { "epoch": 0.12, "learning_rate": 5.5196057584655616e-05, "loss": 1.5772, "step": 201500 }, { "epoch": 0.12, "learning_rate": 5.519395761909505e-05, "loss": 1.5737, "step": 202000 }, { "epoch": 0.12, "learning_rate": 5.519185765353448e-05, "loss": 1.575, "step": 202500 }, { "epoch": 0.12, "learning_rate": 5.5189757687973916e-05, "loss": 1.6277, "step": 203000 }, { "epoch": 0.12, "learning_rate": 5.518765772241335e-05, "loss": 1.5874, "step": 203500 }, { "epoch": 0.12, "learning_rate": 5.518555775685278e-05, "loss": 1.5785, "step": 204000 }, { "epoch": 0.12, "learning_rate": 5.518345779129222e-05, "loss": 1.5956, "step": 204500 }, { "epoch": 0.12, "learning_rate": 5.5181357825731657e-05, "loss": 1.5306, "step": 205000 }, { "epoch": 0.12, "learning_rate": 5.517925786017109e-05, "loss": 1.6043, "step": 205500 }, { "epoch": 0.12, "learning_rate": 5.517715789461053e-05, "loss": 1.6062, "step": 206000 }, { "epoch": 0.12, "learning_rate": 5.5175062128981084e-05, "loss": 1.5506, "step": 206500 }, { "epoch": 0.12, "learning_rate": 5.5172966363351644e-05, "loss": 1.6059, "step": 207000 }, { "epoch": 0.12, "learning_rate": 5.517086639779107e-05, "loss": 1.6007, "step": 207500 }, { "epoch": 0.12, "learning_rate": 5.516876643223051e-05, "loss": 1.6055, "step": 208000 }, { "epoch": 0.13, "learning_rate": 5.5166666466669945e-05, "loss": 1.6001, "step": 208500 }, { "epoch": 0.13, "learning_rate": 5.5164570701040505e-05, "loss": 1.5949, "step": 209000 }, { "epoch": 0.13, "learning_rate": 5.516247073547994e-05, "loss": 1.599, "step": 209500 }, { "epoch": 0.13, "learning_rate": 5.516037076991937e-05, "loss": 1.5766, "step": 210000 }, { "epoch": 0.13, "learning_rate": 5.5158270804358805e-05, "loss": 1.6037, "step": 210500 }, { "epoch": 0.13, "learning_rate": 5.515617083879824e-05, "loss": 1.567, "step": 211000 }, { "epoch": 0.13, "learning_rate": 5.515407087323768e-05, "loss": 1.5633, "step": 211500 }, { "epoch": 0.13, "learning_rate": 5.515197510760824e-05, "loss": 1.5883, "step": 212000 }, { "epoch": 0.13, "learning_rate": 5.5149875142047666e-05, "loss": 1.5684, "step": 212500 }, { "epoch": 0.13, "learning_rate": 5.51477751764871e-05, "loss": 1.5685, "step": 213000 }, { "epoch": 0.13, "learning_rate": 5.514567521092654e-05, "loss": 1.5924, "step": 213500 }, { "epoch": 0.13, "learning_rate": 5.514357524536597e-05, "loss": 1.5858, "step": 214000 }, { "epoch": 0.13, "learning_rate": 5.5141479479736534e-05, "loss": 1.5703, "step": 214500 }, { "epoch": 0.13, "learning_rate": 5.513937951417597e-05, "loss": 1.5536, "step": 215000 }, { "epoch": 0.13, "learning_rate": 5.51372795486154e-05, "loss": 1.5849, "step": 215500 }, { "epoch": 0.13, "learning_rate": 5.5135179583054834e-05, "loss": 1.5829, "step": 216000 }, { "epoch": 0.13, "learning_rate": 5.5133079617494274e-05, "loss": 1.579, "step": 216500 }, { "epoch": 0.13, "learning_rate": 5.513097965193371e-05, "loss": 1.5838, "step": 217000 }, { "epoch": 0.13, "learning_rate": 5.512887968637314e-05, "loss": 1.5788, "step": 217500 }, { "epoch": 0.13, "learning_rate": 5.512677972081258e-05, "loss": 1.6009, "step": 218000 }, { "epoch": 0.13, "learning_rate": 5.5124679755252015e-05, "loss": 1.5677, "step": 218500 }, { "epoch": 0.13, "learning_rate": 5.5122588189553695e-05, "loss": 1.5503, "step": 219000 }, { "epoch": 0.13, "learning_rate": 5.512048822399312e-05, "loss": 1.5778, "step": 219500 }, { "epoch": 0.13, "learning_rate": 5.5118388258432555e-05, "loss": 1.5821, "step": 220000 }, { "epoch": 0.13, "learning_rate": 5.5116288292871996e-05, "loss": 1.5648, "step": 220500 }, { "epoch": 0.13, "learning_rate": 5.5114192527242556e-05, "loss": 1.5803, "step": 221000 }, { "epoch": 0.13, "learning_rate": 5.511209256168199e-05, "loss": 1.5933, "step": 221500 }, { "epoch": 0.13, "learning_rate": 5.510999259612142e-05, "loss": 1.6024, "step": 222000 }, { "epoch": 0.13, "learning_rate": 5.510789683049198e-05, "loss": 1.5776, "step": 222500 }, { "epoch": 0.13, "learning_rate": 5.510579686493142e-05, "loss": 1.6148, "step": 223000 }, { "epoch": 0.13, "learning_rate": 5.510369689937085e-05, "loss": 1.5659, "step": 223500 }, { "epoch": 0.13, "learning_rate": 5.510159693381029e-05, "loss": 1.5784, "step": 224000 }, { "epoch": 0.13, "learning_rate": 5.509949696824972e-05, "loss": 1.5568, "step": 224500 }, { "epoch": 0.13, "learning_rate": 5.509739700268915e-05, "loss": 1.5756, "step": 225000 }, { "epoch": 0.14, "learning_rate": 5.509529703712859e-05, "loss": 1.5393, "step": 225500 }, { "epoch": 0.14, "learning_rate": 5.5093197071568024e-05, "loss": 1.5623, "step": 226000 }, { "epoch": 0.14, "learning_rate": 5.509109710600746e-05, "loss": 1.5507, "step": 226500 }, { "epoch": 0.14, "learning_rate": 5.50889971404469e-05, "loss": 1.5647, "step": 227000 }, { "epoch": 0.14, "learning_rate": 5.508689717488633e-05, "loss": 1.5549, "step": 227500 }, { "epoch": 0.14, "learning_rate": 5.5084797209325765e-05, "loss": 1.5854, "step": 228000 }, { "epoch": 0.14, "learning_rate": 5.508270144369632e-05, "loss": 1.556, "step": 228500 }, { "epoch": 0.14, "learning_rate": 5.508060147813576e-05, "loss": 1.5822, "step": 229000 }, { "epoch": 0.14, "learning_rate": 5.507850151257519e-05, "loss": 1.5834, "step": 229500 }, { "epoch": 0.14, "learning_rate": 5.5076401547014626e-05, "loss": 1.6046, "step": 230000 }, { "epoch": 0.14, "learning_rate": 5.5074301581454066e-05, "loss": 1.6046, "step": 230500 }, { "epoch": 0.14, "learning_rate": 5.5072210015755746e-05, "loss": 1.5461, "step": 231000 }, { "epoch": 0.14, "learning_rate": 5.507011005019517e-05, "loss": 1.5611, "step": 231500 }, { "epoch": 0.14, "learning_rate": 5.5068010084634606e-05, "loss": 1.5498, "step": 232000 }, { "epoch": 0.14, "learning_rate": 5.506591011907405e-05, "loss": 1.5501, "step": 232500 }, { "epoch": 0.14, "learning_rate": 5.506381015351348e-05, "loss": 1.548, "step": 233000 }, { "epoch": 0.14, "learning_rate": 5.5061710187952914e-05, "loss": 1.5708, "step": 233500 }, { "epoch": 0.14, "learning_rate": 5.505961442232347e-05, "loss": 1.5545, "step": 234000 }, { "epoch": 0.14, "learning_rate": 5.505751865669403e-05, "loss": 1.555, "step": 234500 }, { "epoch": 0.14, "learning_rate": 5.505541869113347e-05, "loss": 1.5698, "step": 235000 }, { "epoch": 0.14, "learning_rate": 5.50533187255729e-05, "loss": 1.5515, "step": 235500 }, { "epoch": 0.14, "learning_rate": 5.5051218760012335e-05, "loss": 1.5406, "step": 236000 }, { "epoch": 0.14, "learning_rate": 5.504911879445177e-05, "loss": 1.5443, "step": 236500 }, { "epoch": 0.14, "learning_rate": 5.50470188288912e-05, "loss": 1.5536, "step": 237000 }, { "epoch": 0.14, "learning_rate": 5.504491886333064e-05, "loss": 1.5699, "step": 237500 }, { "epoch": 0.14, "learning_rate": 5.5042818897770075e-05, "loss": 1.5656, "step": 238000 }, { "epoch": 0.14, "learning_rate": 5.504072313214063e-05, "loss": 1.5824, "step": 238500 }, { "epoch": 0.14, "learning_rate": 5.503862316658006e-05, "loss": 1.5363, "step": 239000 }, { "epoch": 0.14, "learning_rate": 5.50365232010195e-05, "loss": 1.5354, "step": 239500 }, { "epoch": 0.14, "learning_rate": 5.5034423235458936e-05, "loss": 1.5804, "step": 240000 }, { "epoch": 0.14, "learning_rate": 5.503232326989837e-05, "loss": 1.5398, "step": 240500 }, { "epoch": 0.14, "learning_rate": 5.503023170420005e-05, "loss": 1.5531, "step": 241000 }, { "epoch": 0.14, "learning_rate": 5.502813173863948e-05, "loss": 1.5521, "step": 241500 }, { "epoch": 0.15, "learning_rate": 5.5026031773078924e-05, "loss": 1.5674, "step": 242000 }, { "epoch": 0.15, "learning_rate": 5.502393180751836e-05, "loss": 1.5475, "step": 242500 }, { "epoch": 0.15, "learning_rate": 5.50218318419578e-05, "loss": 1.549, "step": 243000 }, { "epoch": 0.15, "learning_rate": 5.5019731876397224e-05, "loss": 1.5572, "step": 243500 }, { "epoch": 0.15, "learning_rate": 5.501763191083666e-05, "loss": 1.5546, "step": 244000 }, { "epoch": 0.15, "learning_rate": 5.50155319452761e-05, "loss": 1.54, "step": 244500 }, { "epoch": 0.15, "learning_rate": 5.501343197971553e-05, "loss": 1.5534, "step": 245000 }, { "epoch": 0.15, "learning_rate": 5.5011332014154965e-05, "loss": 1.5464, "step": 245500 }, { "epoch": 0.15, "learning_rate": 5.500923624852552e-05, "loss": 1.5766, "step": 246000 }, { "epoch": 0.15, "learning_rate": 5.500713628296496e-05, "loss": 1.5508, "step": 246500 }, { "epoch": 0.15, "learning_rate": 5.500503631740439e-05, "loss": 1.5527, "step": 247000 }, { "epoch": 0.15, "learning_rate": 5.5002936351843825e-05, "loss": 1.5564, "step": 247500 }, { "epoch": 0.15, "learning_rate": 5.5000836386283266e-05, "loss": 1.5638, "step": 248000 }, { "epoch": 0.15, "learning_rate": 5.49987364207227e-05, "loss": 1.5471, "step": 248500 }, { "epoch": 0.15, "learning_rate": 5.499663645516213e-05, "loss": 1.5921, "step": 249000 }, { "epoch": 0.15, "learning_rate": 5.499453648960157e-05, "loss": 1.5616, "step": 249500 }, { "epoch": 0.15, "learning_rate": 5.4992436524041006e-05, "loss": 1.5572, "step": 250000 }, { "epoch": 0.15, "learning_rate": 5.499034075841156e-05, "loss": 1.5648, "step": 250500 }, { "epoch": 0.15, "learning_rate": 5.4988240792851e-05, "loss": 1.5501, "step": 251000 }, { "epoch": 0.15, "learning_rate": 5.4986145027221554e-05, "loss": 1.6062, "step": 251500 }, { "epoch": 0.15, "learning_rate": 5.498404506166099e-05, "loss": 1.5563, "step": 252000 }, { "epoch": 0.15, "learning_rate": 5.498194509610042e-05, "loss": 1.5646, "step": 252500 }, { "epoch": 0.15, "learning_rate": 5.497984513053986e-05, "loss": 1.5585, "step": 253000 }, { "epoch": 0.15, "learning_rate": 5.4977745164979294e-05, "loss": 1.5076, "step": 253500 }, { "epoch": 0.15, "learning_rate": 5.497564519941873e-05, "loss": 1.5712, "step": 254000 }, { "epoch": 0.15, "learning_rate": 5.497354523385817e-05, "loss": 1.5462, "step": 254500 }, { "epoch": 0.15, "learning_rate": 5.49714452682976e-05, "loss": 1.5614, "step": 255000 }, { "epoch": 0.15, "learning_rate": 5.4969349502668155e-05, "loss": 1.5738, "step": 255500 }, { "epoch": 0.15, "learning_rate": 5.496724953710759e-05, "loss": 1.5401, "step": 256000 }, { "epoch": 0.15, "learning_rate": 5.496514957154703e-05, "loss": 1.548, "step": 256500 }, { "epoch": 0.15, "learning_rate": 5.496304960598646e-05, "loss": 1.5519, "step": 257000 }, { "epoch": 0.15, "learning_rate": 5.4960953840357016e-05, "loss": 1.5678, "step": 257500 }, { "epoch": 0.15, "learning_rate": 5.495885807472757e-05, "loss": 1.5166, "step": 258000 }, { "epoch": 0.15, "learning_rate": 5.495675810916701e-05, "loss": 1.5571, "step": 258500 }, { "epoch": 0.16, "learning_rate": 5.495465814360644e-05, "loss": 1.5538, "step": 259000 }, { "epoch": 0.16, "learning_rate": 5.4952558178045876e-05, "loss": 1.5246, "step": 259500 }, { "epoch": 0.16, "learning_rate": 5.4950458212485317e-05, "loss": 1.5855, "step": 260000 }, { "epoch": 0.16, "learning_rate": 5.494835824692475e-05, "loss": 1.5427, "step": 260500 }, { "epoch": 0.16, "learning_rate": 5.4946258281364184e-05, "loss": 1.5513, "step": 261000 }, { "epoch": 0.16, "learning_rate": 5.4944158315803624e-05, "loss": 1.5516, "step": 261500 }, { "epoch": 0.16, "learning_rate": 5.494205835024306e-05, "loss": 1.5543, "step": 262000 }, { "epoch": 0.16, "learning_rate": 5.493995838468249e-05, "loss": 1.5701, "step": 262500 }, { "epoch": 0.16, "learning_rate": 5.4937862619053044e-05, "loss": 1.5495, "step": 263000 }, { "epoch": 0.16, "learning_rate": 5.4935762653492484e-05, "loss": 1.5427, "step": 263500 }, { "epoch": 0.16, "learning_rate": 5.493366268793192e-05, "loss": 1.5414, "step": 264000 }, { "epoch": 0.16, "learning_rate": 5.493156272237135e-05, "loss": 1.5612, "step": 264500 }, { "epoch": 0.16, "learning_rate": 5.492946275681079e-05, "loss": 1.5663, "step": 265000 }, { "epoch": 0.16, "learning_rate": 5.4927366991181345e-05, "loss": 1.5413, "step": 265500 }, { "epoch": 0.16, "learning_rate": 5.492526702562078e-05, "loss": 1.5568, "step": 266000 }, { "epoch": 0.16, "learning_rate": 5.492316706006022e-05, "loss": 1.5595, "step": 266500 }, { "epoch": 0.16, "learning_rate": 5.492106709449965e-05, "loss": 1.5211, "step": 267000 }, { "epoch": 0.16, "learning_rate": 5.4918967128939086e-05, "loss": 1.5112, "step": 267500 }, { "epoch": 0.16, "learning_rate": 5.491686716337852e-05, "loss": 1.5416, "step": 268000 }, { "epoch": 0.16, "learning_rate": 5.491476719781795e-05, "loss": 1.5689, "step": 268500 }, { "epoch": 0.16, "learning_rate": 5.4912667232257386e-05, "loss": 1.5339, "step": 269000 }, { "epoch": 0.16, "learning_rate": 5.4910567266696826e-05, "loss": 1.5484, "step": 269500 }, { "epoch": 0.16, "learning_rate": 5.490847150106739e-05, "loss": 1.5524, "step": 270000 }, { "epoch": 0.16, "learning_rate": 5.490637573543794e-05, "loss": 1.5528, "step": 270500 }, { "epoch": 0.16, "learning_rate": 5.4904279969808494e-05, "loss": 1.5559, "step": 271000 }, { "epoch": 0.16, "learning_rate": 5.490218000424793e-05, "loss": 1.5594, "step": 271500 }, { "epoch": 0.16, "learning_rate": 5.490008003868737e-05, "loss": 1.5377, "step": 272000 }, { "epoch": 0.16, "learning_rate": 5.48979800731268e-05, "loss": 1.537, "step": 272500 }, { "epoch": 0.16, "learning_rate": 5.4895880107566235e-05, "loss": 1.5462, "step": 273000 }, { "epoch": 0.16, "learning_rate": 5.4893780142005675e-05, "loss": 1.5742, "step": 273500 }, { "epoch": 0.16, "learning_rate": 5.489168017644511e-05, "loss": 1.5052, "step": 274000 }, { "epoch": 0.16, "learning_rate": 5.488958021088454e-05, "loss": 1.583, "step": 274500 }, { "epoch": 0.16, "learning_rate": 5.4887480245323975e-05, "loss": 1.5206, "step": 275000 }, { "epoch": 0.17, "learning_rate": 5.4885384479694536e-05, "loss": 1.576, "step": 275500 }, { "epoch": 0.17, "learning_rate": 5.488328451413397e-05, "loss": 1.5534, "step": 276000 }, { "epoch": 0.17, "learning_rate": 5.488118874850452e-05, "loss": 1.5095, "step": 276500 }, { "epoch": 0.17, "learning_rate": 5.4879088782943956e-05, "loss": 1.5479, "step": 277000 }, { "epoch": 0.17, "learning_rate": 5.4876988817383396e-05, "loss": 1.5154, "step": 277500 }, { "epoch": 0.17, "learning_rate": 5.487488885182283e-05, "loss": 1.5269, "step": 278000 }, { "epoch": 0.17, "learning_rate": 5.487278888626226e-05, "loss": 1.5386, "step": 278500 }, { "epoch": 0.17, "learning_rate": 5.4870688920701703e-05, "loss": 1.5105, "step": 279000 }, { "epoch": 0.17, "learning_rate": 5.486858895514114e-05, "loss": 1.5905, "step": 279500 }, { "epoch": 0.17, "learning_rate": 5.486648898958057e-05, "loss": 1.5191, "step": 280000 }, { "epoch": 0.17, "learning_rate": 5.4864389024020004e-05, "loss": 1.5372, "step": 280500 }, { "epoch": 0.17, "learning_rate": 5.486228905845944e-05, "loss": 1.5744, "step": 281000 }, { "epoch": 0.17, "learning_rate": 5.486018909289888e-05, "loss": 1.5372, "step": 281500 }, { "epoch": 0.17, "learning_rate": 5.485809332726944e-05, "loss": 1.5391, "step": 282000 }, { "epoch": 0.17, "learning_rate": 5.4855993361708864e-05, "loss": 1.5146, "step": 282500 }, { "epoch": 0.17, "learning_rate": 5.48538933961483e-05, "loss": 1.522, "step": 283000 }, { "epoch": 0.17, "learning_rate": 5.485179343058774e-05, "loss": 1.5382, "step": 283500 }, { "epoch": 0.17, "learning_rate": 5.48496976649583e-05, "loss": 1.5458, "step": 284000 }, { "epoch": 0.17, "learning_rate": 5.4847597699397725e-05, "loss": 1.5505, "step": 284500 }, { "epoch": 0.17, "learning_rate": 5.484549773383716e-05, "loss": 1.5513, "step": 285000 }, { "epoch": 0.17, "learning_rate": 5.48433977682766e-05, "loss": 1.5208, "step": 285500 }, { "epoch": 0.17, "learning_rate": 5.484129780271603e-05, "loss": 1.5379, "step": 286000 }, { "epoch": 0.17, "learning_rate": 5.4839197837155466e-05, "loss": 1.5663, "step": 286500 }, { "epoch": 0.17, "learning_rate": 5.4837097871594906e-05, "loss": 1.5263, "step": 287000 }, { "epoch": 0.17, "learning_rate": 5.483499790603434e-05, "loss": 1.5318, "step": 287500 }, { "epoch": 0.17, "learning_rate": 5.483289794047377e-05, "loss": 1.5053, "step": 288000 }, { "epoch": 0.17, "learning_rate": 5.4830806374775453e-05, "loss": 1.528, "step": 288500 }, { "epoch": 0.17, "learning_rate": 5.4828706409214894e-05, "loss": 1.5249, "step": 289000 }, { "epoch": 0.17, "learning_rate": 5.482660644365432e-05, "loss": 1.5616, "step": 289500 }, { "epoch": 0.17, "learning_rate": 5.4824506478093754e-05, "loss": 1.5175, "step": 290000 }, { "epoch": 0.17, "learning_rate": 5.4822406512533194e-05, "loss": 1.5088, "step": 290500 }, { "epoch": 0.17, "learning_rate": 5.4820310746903754e-05, "loss": 1.5718, "step": 291000 }, { "epoch": 0.17, "learning_rate": 5.481821078134319e-05, "loss": 1.5678, "step": 291500 }, { "epoch": 0.18, "learning_rate": 5.4816110815782615e-05, "loss": 1.5313, "step": 292000 }, { "epoch": 0.18, "learning_rate": 5.4814010850222055e-05, "loss": 1.5149, "step": 292500 }, { "epoch": 0.18, "learning_rate": 5.481191088466149e-05, "loss": 1.542, "step": 293000 }, { "epoch": 0.18, "learning_rate": 5.480981091910092e-05, "loss": 1.5365, "step": 293500 }, { "epoch": 0.18, "learning_rate": 5.480771095354036e-05, "loss": 1.5118, "step": 294000 }, { "epoch": 0.18, "learning_rate": 5.4805610987979795e-05, "loss": 1.5491, "step": 294500 }, { "epoch": 0.18, "learning_rate": 5.480351102241923e-05, "loss": 1.5336, "step": 295000 }, { "epoch": 0.18, "learning_rate": 5.480141105685867e-05, "loss": 1.5175, "step": 295500 }, { "epoch": 0.18, "learning_rate": 5.479931529122922e-05, "loss": 1.5407, "step": 296000 }, { "epoch": 0.18, "learning_rate": 5.4797215325668656e-05, "loss": 1.5076, "step": 296500 }, { "epoch": 0.18, "learning_rate": 5.479511956003921e-05, "loss": 1.5337, "step": 297000 }, { "epoch": 0.18, "learning_rate": 5.479301959447865e-05, "loss": 1.5312, "step": 297500 }, { "epoch": 0.18, "learning_rate": 5.4790919628918083e-05, "loss": 1.5084, "step": 298000 }, { "epoch": 0.18, "learning_rate": 5.478881966335752e-05, "loss": 1.5111, "step": 298500 }, { "epoch": 0.18, "learning_rate": 5.478671969779696e-05, "loss": 1.5434, "step": 299000 }, { "epoch": 0.18, "learning_rate": 5.478461973223639e-05, "loss": 1.5214, "step": 299500 }, { "epoch": 0.18, "learning_rate": 5.4782519766675824e-05, "loss": 1.54, "step": 300000 }, { "epoch": 0.18, "eval_loss": 1.45011568069458, "eval_runtime": 1097.0232, "eval_samples_per_second": 480.136, "eval_steps_per_second": 80.023, "step": 300000 }, { "epoch": 0.18, "learning_rate": 5.478042400104638e-05, "loss": 1.5421, "step": 300500 }, { "epoch": 0.18, "learning_rate": 5.477832403548582e-05, "loss": 1.5362, "step": 301000 }, { "epoch": 0.18, "learning_rate": 5.477622406992525e-05, "loss": 1.5563, "step": 301500 }, { "epoch": 0.18, "learning_rate": 5.4774124104364685e-05, "loss": 1.5506, "step": 302000 }, { "epoch": 0.18, "learning_rate": 5.4772024138804125e-05, "loss": 1.5259, "step": 302500 }, { "epoch": 0.18, "learning_rate": 5.476992417324356e-05, "loss": 1.5519, "step": 303000 }, { "epoch": 0.18, "learning_rate": 5.476782840761411e-05, "loss": 1.5289, "step": 303500 }, { "epoch": 0.18, "learning_rate": 5.476572844205355e-05, "loss": 1.536, "step": 304000 }, { "epoch": 0.18, "learning_rate": 5.4763628476492986e-05, "loss": 1.5506, "step": 304500 }, { "epoch": 0.18, "learning_rate": 5.476152851093242e-05, "loss": 1.4987, "step": 305000 }, { "epoch": 0.18, "learning_rate": 5.475942854537186e-05, "loss": 1.5034, "step": 305500 }, { "epoch": 0.18, "learning_rate": 5.475732857981129e-05, "loss": 1.5534, "step": 306000 }, { "epoch": 0.18, "learning_rate": 5.4755228614250726e-05, "loss": 1.5103, "step": 306500 }, { "epoch": 0.18, "learning_rate": 5.475312864869016e-05, "loss": 1.5517, "step": 307000 }, { "epoch": 0.18, "learning_rate": 5.475103288306072e-05, "loss": 1.5442, "step": 307500 }, { "epoch": 0.18, "learning_rate": 5.4748937117431274e-05, "loss": 1.5199, "step": 308000 }, { "epoch": 0.18, "learning_rate": 5.474683715187071e-05, "loss": 1.4951, "step": 308500 }, { "epoch": 0.19, "learning_rate": 5.474473718631015e-05, "loss": 1.5294, "step": 309000 }, { "epoch": 0.19, "learning_rate": 5.474263722074958e-05, "loss": 1.5289, "step": 309500 }, { "epoch": 0.19, "learning_rate": 5.4740541455120134e-05, "loss": 1.4918, "step": 310000 }, { "epoch": 0.19, "learning_rate": 5.4738445689490695e-05, "loss": 1.5376, "step": 310500 }, { "epoch": 0.19, "learning_rate": 5.473634572393012e-05, "loss": 1.5349, "step": 311000 }, { "epoch": 0.19, "learning_rate": 5.473424575836956e-05, "loss": 1.4939, "step": 311500 }, { "epoch": 0.19, "learning_rate": 5.4732145792808995e-05, "loss": 1.5329, "step": 312000 }, { "epoch": 0.19, "learning_rate": 5.473004582724843e-05, "loss": 1.5036, "step": 312500 }, { "epoch": 0.19, "learning_rate": 5.472794586168787e-05, "loss": 1.5368, "step": 313000 }, { "epoch": 0.19, "learning_rate": 5.47258458961273e-05, "loss": 1.5455, "step": 313500 }, { "epoch": 0.19, "learning_rate": 5.4723745930566736e-05, "loss": 1.5161, "step": 314000 }, { "epoch": 0.19, "learning_rate": 5.4721645965006176e-05, "loss": 1.4974, "step": 314500 }, { "epoch": 0.19, "learning_rate": 5.471954599944561e-05, "loss": 1.5229, "step": 315000 }, { "epoch": 0.19, "learning_rate": 5.471744603388504e-05, "loss": 1.5048, "step": 315500 }, { "epoch": 0.19, "learning_rate": 5.471534606832448e-05, "loss": 1.551, "step": 316000 }, { "epoch": 0.19, "learning_rate": 5.471324610276391e-05, "loss": 1.5474, "step": 316500 }, { "epoch": 0.19, "learning_rate": 5.471114613720335e-05, "loss": 1.5466, "step": 317000 }, { "epoch": 0.19, "learning_rate": 5.470905037157391e-05, "loss": 1.5042, "step": 317500 }, { "epoch": 0.19, "learning_rate": 5.4706950406013344e-05, "loss": 1.5298, "step": 318000 }, { "epoch": 0.19, "learning_rate": 5.470485044045278e-05, "loss": 1.5123, "step": 318500 }, { "epoch": 0.19, "learning_rate": 5.470275047489221e-05, "loss": 1.5178, "step": 319000 }, { "epoch": 0.19, "learning_rate": 5.4700650509331644e-05, "loss": 1.5563, "step": 319500 }, { "epoch": 0.19, "learning_rate": 5.4698554743702205e-05, "loss": 1.5241, "step": 320000 }, { "epoch": 0.19, "learning_rate": 5.469645477814164e-05, "loss": 1.5211, "step": 320500 }, { "epoch": 0.19, "learning_rate": 5.469435901251219e-05, "loss": 1.5086, "step": 321000 }, { "epoch": 0.19, "learning_rate": 5.469225904695163e-05, "loss": 1.5145, "step": 321500 }, { "epoch": 0.19, "learning_rate": 5.4690159081391065e-05, "loss": 1.5319, "step": 322000 }, { "epoch": 0.19, "learning_rate": 5.46880591158305e-05, "loss": 1.5272, "step": 322500 }, { "epoch": 0.19, "learning_rate": 5.468595915026994e-05, "loss": 1.5315, "step": 323000 }, { "epoch": 0.19, "learning_rate": 5.4683859184709366e-05, "loss": 1.5158, "step": 323500 }, { "epoch": 0.19, "learning_rate": 5.4681759219148806e-05, "loss": 1.5042, "step": 324000 }, { "epoch": 0.19, "learning_rate": 5.467965925358824e-05, "loss": 1.5135, "step": 324500 }, { "epoch": 0.19, "learning_rate": 5.46775634879588e-05, "loss": 1.5164, "step": 325000 }, { "epoch": 0.2, "learning_rate": 5.467546352239823e-05, "loss": 1.578, "step": 325500 }, { "epoch": 0.2, "learning_rate": 5.467336355683767e-05, "loss": 1.5135, "step": 326000 }, { "epoch": 0.2, "learning_rate": 5.46712635912771e-05, "loss": 1.5154, "step": 326500 }, { "epoch": 0.2, "learning_rate": 5.4669163625716534e-05, "loss": 1.5164, "step": 327000 }, { "epoch": 0.2, "learning_rate": 5.4667063660155974e-05, "loss": 1.5277, "step": 327500 }, { "epoch": 0.2, "learning_rate": 5.466496369459541e-05, "loss": 1.5319, "step": 328000 }, { "epoch": 0.2, "learning_rate": 5.466286372903484e-05, "loss": 1.517, "step": 328500 }, { "epoch": 0.2, "learning_rate": 5.4660767963405394e-05, "loss": 1.5135, "step": 329000 }, { "epoch": 0.2, "learning_rate": 5.4658667997844835e-05, "loss": 1.5282, "step": 329500 }, { "epoch": 0.2, "learning_rate": 5.465656803228427e-05, "loss": 1.5398, "step": 330000 }, { "epoch": 0.2, "learning_rate": 5.465447226665483e-05, "loss": 1.5406, "step": 330500 }, { "epoch": 0.2, "learning_rate": 5.465237230109426e-05, "loss": 1.4958, "step": 331000 }, { "epoch": 0.2, "learning_rate": 5.4650272335533695e-05, "loss": 1.5157, "step": 331500 }, { "epoch": 0.2, "learning_rate": 5.464817236997313e-05, "loss": 1.5218, "step": 332000 }, { "epoch": 0.2, "learning_rate": 5.464607660434369e-05, "loss": 1.5262, "step": 332500 }, { "epoch": 0.2, "learning_rate": 5.464397663878312e-05, "loss": 1.5212, "step": 333000 }, { "epoch": 0.2, "learning_rate": 5.4641876673222556e-05, "loss": 1.5404, "step": 333500 }, { "epoch": 0.2, "learning_rate": 5.463977670766199e-05, "loss": 1.5006, "step": 334000 }, { "epoch": 0.2, "learning_rate": 5.463767674210143e-05, "loss": 1.5156, "step": 334500 }, { "epoch": 0.2, "learning_rate": 5.463558097647199e-05, "loss": 1.5274, "step": 335000 }, { "epoch": 0.2, "learning_rate": 5.463348101091142e-05, "loss": 1.5069, "step": 335500 }, { "epoch": 0.2, "learning_rate": 5.463138104535085e-05, "loss": 1.5011, "step": 336000 }, { "epoch": 0.2, "learning_rate": 5.462928107979029e-05, "loss": 1.5012, "step": 336500 }, { "epoch": 0.2, "learning_rate": 5.462718531416085e-05, "loss": 1.4935, "step": 337000 }, { "epoch": 0.2, "learning_rate": 5.4625085348600284e-05, "loss": 1.5293, "step": 337500 }, { "epoch": 0.2, "learning_rate": 5.462298538303972e-05, "loss": 1.4987, "step": 338000 }, { "epoch": 0.2, "learning_rate": 5.462088961741028e-05, "loss": 1.5118, "step": 338500 }, { "epoch": 0.2, "learning_rate": 5.461878965184971e-05, "loss": 1.5351, "step": 339000 }, { "epoch": 0.2, "learning_rate": 5.4616689686289145e-05, "loss": 1.4981, "step": 339500 }, { "epoch": 0.2, "learning_rate": 5.46145939206597e-05, "loss": 1.4874, "step": 340000 }, { "epoch": 0.2, "learning_rate": 5.461249395509914e-05, "loss": 1.5241, "step": 340500 }, { "epoch": 0.2, "learning_rate": 5.461039398953857e-05, "loss": 1.5351, "step": 341000 }, { "epoch": 0.2, "learning_rate": 5.4608294023978006e-05, "loss": 1.5116, "step": 341500 }, { "epoch": 0.21, "learning_rate": 5.4606194058417446e-05, "loss": 1.4991, "step": 342000 }, { "epoch": 0.21, "learning_rate": 5.460409409285687e-05, "loss": 1.5235, "step": 342500 }, { "epoch": 0.21, "learning_rate": 5.4601994127296306e-05, "loss": 1.4995, "step": 343000 }, { "epoch": 0.21, "learning_rate": 5.4599898361666866e-05, "loss": 1.4979, "step": 343500 }, { "epoch": 0.21, "learning_rate": 5.459779839610631e-05, "loss": 1.4825, "step": 344000 }, { "epoch": 0.21, "learning_rate": 5.459569843054574e-05, "loss": 1.527, "step": 344500 }, { "epoch": 0.21, "learning_rate": 5.4593598464985174e-05, "loss": 1.5359, "step": 345000 }, { "epoch": 0.21, "learning_rate": 5.4591502699355734e-05, "loss": 1.4982, "step": 345500 }, { "epoch": 0.21, "learning_rate": 5.458940273379517e-05, "loss": 1.5081, "step": 346000 }, { "epoch": 0.21, "learning_rate": 5.45873027682346e-05, "loss": 1.5767, "step": 346500 }, { "epoch": 0.21, "learning_rate": 5.458520280267404e-05, "loss": 1.5586, "step": 347000 }, { "epoch": 0.21, "learning_rate": 5.458310283711347e-05, "loss": 1.4968, "step": 347500 }, { "epoch": 0.21, "learning_rate": 5.45810028715529e-05, "loss": 1.5282, "step": 348000 }, { "epoch": 0.21, "learning_rate": 5.457890290599234e-05, "loss": 1.5299, "step": 348500 }, { "epoch": 0.21, "learning_rate": 5.4576802940431775e-05, "loss": 1.5139, "step": 349000 }, { "epoch": 0.21, "learning_rate": 5.457470297487121e-05, "loss": 1.5233, "step": 349500 }, { "epoch": 0.21, "learning_rate": 5.457260300931065e-05, "loss": 1.5259, "step": 350000 }, { "epoch": 0.21, "learning_rate": 5.457050304375008e-05, "loss": 1.5051, "step": 350500 }, { "epoch": 0.21, "learning_rate": 5.4568403078189516e-05, "loss": 1.5282, "step": 351000 }, { "epoch": 0.21, "learning_rate": 5.4566303112628956e-05, "loss": 1.4965, "step": 351500 }, { "epoch": 0.21, "learning_rate": 5.456420734699951e-05, "loss": 1.5031, "step": 352000 }, { "epoch": 0.21, "learning_rate": 5.456210738143894e-05, "loss": 1.5164, "step": 352500 }, { "epoch": 0.21, "learning_rate": 5.4560011615809496e-05, "loss": 1.5002, "step": 353000 }, { "epoch": 0.21, "learning_rate": 5.455791165024894e-05, "loss": 1.5069, "step": 353500 }, { "epoch": 0.21, "learning_rate": 5.455581168468837e-05, "loss": 1.551, "step": 354000 }, { "epoch": 0.21, "learning_rate": 5.4553711719127804e-05, "loss": 1.5206, "step": 354500 }, { "epoch": 0.21, "learning_rate": 5.4551611753567244e-05, "loss": 1.505, "step": 355000 }, { "epoch": 0.21, "learning_rate": 5.454951178800668e-05, "loss": 1.4968, "step": 355500 }, { "epoch": 0.21, "learning_rate": 5.454741182244611e-05, "loss": 1.5227, "step": 356000 }, { "epoch": 0.21, "learning_rate": 5.454531185688555e-05, "loss": 1.5417, "step": 356500 }, { "epoch": 0.21, "learning_rate": 5.4543211891324984e-05, "loss": 1.4893, "step": 357000 }, { "epoch": 0.21, "learning_rate": 5.454111612569554e-05, "loss": 1.4669, "step": 357500 }, { "epoch": 0.21, "learning_rate": 5.453901616013497e-05, "loss": 1.4808, "step": 358000 }, { "epoch": 0.21, "learning_rate": 5.453691619457441e-05, "loss": 1.4919, "step": 358500 }, { "epoch": 0.22, "learning_rate": 5.4534816229013845e-05, "loss": 1.5133, "step": 359000 }, { "epoch": 0.22, "learning_rate": 5.453272466331552e-05, "loss": 1.5113, "step": 359500 }, { "epoch": 0.22, "learning_rate": 5.453062889768608e-05, "loss": 1.4929, "step": 360000 }, { "epoch": 0.22, "learning_rate": 5.452852893212551e-05, "loss": 1.5048, "step": 360500 }, { "epoch": 0.22, "learning_rate": 5.452642896656495e-05, "loss": 1.5149, "step": 361000 }, { "epoch": 0.22, "learning_rate": 5.4524329001004386e-05, "loss": 1.523, "step": 361500 }, { "epoch": 0.22, "learning_rate": 5.452222903544381e-05, "loss": 1.5208, "step": 362000 }, { "epoch": 0.22, "learning_rate": 5.452013326981437e-05, "loss": 1.5038, "step": 362500 }, { "epoch": 0.22, "learning_rate": 5.4518033304253814e-05, "loss": 1.5023, "step": 363000 }, { "epoch": 0.22, "learning_rate": 5.451593333869325e-05, "loss": 1.507, "step": 363500 }, { "epoch": 0.22, "learning_rate": 5.4513833373132674e-05, "loss": 1.508, "step": 364000 }, { "epoch": 0.22, "learning_rate": 5.4511733407572114e-05, "loss": 1.506, "step": 364500 }, { "epoch": 0.22, "learning_rate": 5.450963344201155e-05, "loss": 1.5085, "step": 365000 }, { "epoch": 0.22, "learning_rate": 5.450753767638211e-05, "loss": 1.5063, "step": 365500 }, { "epoch": 0.22, "learning_rate": 5.450543771082154e-05, "loss": 1.5293, "step": 366000 }, { "epoch": 0.22, "learning_rate": 5.4503337745260975e-05, "loss": 1.4851, "step": 366500 }, { "epoch": 0.22, "learning_rate": 5.450123777970041e-05, "loss": 1.5298, "step": 367000 }, { "epoch": 0.22, "learning_rate": 5.449913781413985e-05, "loss": 1.4931, "step": 367500 }, { "epoch": 0.22, "learning_rate": 5.449703784857928e-05, "loss": 1.5235, "step": 368000 }, { "epoch": 0.22, "learning_rate": 5.4494937883018715e-05, "loss": 1.5155, "step": 368500 }, { "epoch": 0.22, "learning_rate": 5.4492837917458156e-05, "loss": 1.4876, "step": 369000 }, { "epoch": 0.22, "learning_rate": 5.449073795189759e-05, "loss": 1.5209, "step": 369500 }, { "epoch": 0.22, "learning_rate": 5.448864218626814e-05, "loss": 1.5165, "step": 370000 }, { "epoch": 0.22, "learning_rate": 5.4486542220707576e-05, "loss": 1.5166, "step": 370500 }, { "epoch": 0.22, "learning_rate": 5.4484442255147016e-05, "loss": 1.4801, "step": 371000 }, { "epoch": 0.22, "learning_rate": 5.448234648951757e-05, "loss": 1.4763, "step": 371500 }, { "epoch": 0.22, "learning_rate": 5.4480246523957e-05, "loss": 1.4971, "step": 372000 }, { "epoch": 0.22, "learning_rate": 5.447814655839644e-05, "loss": 1.5724, "step": 372500 }, { "epoch": 0.22, "learning_rate": 5.447604659283588e-05, "loss": 1.5202, "step": 373000 }, { "epoch": 0.22, "learning_rate": 5.447394662727531e-05, "loss": 1.4959, "step": 373500 }, { "epoch": 0.22, "learning_rate": 5.4471846661714744e-05, "loss": 1.525, "step": 374000 }, { "epoch": 0.22, "learning_rate": 5.4469746696154184e-05, "loss": 1.5232, "step": 374500 }, { "epoch": 0.22, "learning_rate": 5.446764673059362e-05, "loss": 1.4812, "step": 375000 }, { "epoch": 0.23, "learning_rate": 5.446554676503306e-05, "loss": 1.5136, "step": 375500 }, { "epoch": 0.23, "learning_rate": 5.446345099940361e-05, "loss": 1.528, "step": 376000 }, { "epoch": 0.23, "learning_rate": 5.4461351033843045e-05, "loss": 1.4843, "step": 376500 }, { "epoch": 0.23, "learning_rate": 5.44592552682136e-05, "loss": 1.5356, "step": 377000 }, { "epoch": 0.23, "learning_rate": 5.445715530265303e-05, "loss": 1.4876, "step": 377500 }, { "epoch": 0.23, "learning_rate": 5.445505533709247e-05, "loss": 1.4893, "step": 378000 }, { "epoch": 0.23, "learning_rate": 5.4452955371531906e-05, "loss": 1.4816, "step": 378500 }, { "epoch": 0.23, "learning_rate": 5.445085540597134e-05, "loss": 1.4766, "step": 379000 }, { "epoch": 0.23, "learning_rate": 5.444875544041078e-05, "loss": 1.5296, "step": 379500 }, { "epoch": 0.23, "learning_rate": 5.444665547485021e-05, "loss": 1.4964, "step": 380000 }, { "epoch": 0.23, "learning_rate": 5.4444555509289646e-05, "loss": 1.4718, "step": 380500 }, { "epoch": 0.23, "learning_rate": 5.4442455543729086e-05, "loss": 1.5037, "step": 381000 }, { "epoch": 0.23, "learning_rate": 5.444035977809964e-05, "loss": 1.5162, "step": 381500 }, { "epoch": 0.23, "learning_rate": 5.4438259812539073e-05, "loss": 1.468, "step": 382000 }, { "epoch": 0.23, "learning_rate": 5.4436159846978514e-05, "loss": 1.5389, "step": 382500 }, { "epoch": 0.23, "learning_rate": 5.443405988141795e-05, "loss": 1.4856, "step": 383000 }, { "epoch": 0.23, "learning_rate": 5.44319641157885e-05, "loss": 1.518, "step": 383500 }, { "epoch": 0.23, "learning_rate": 5.4429864150227934e-05, "loss": 1.5001, "step": 384000 }, { "epoch": 0.23, "learning_rate": 5.4427764184667374e-05, "loss": 1.5253, "step": 384500 }, { "epoch": 0.23, "learning_rate": 5.442566421910681e-05, "loss": 1.5021, "step": 385000 }, { "epoch": 0.23, "learning_rate": 5.442356845347736e-05, "loss": 1.4786, "step": 385500 }, { "epoch": 0.23, "learning_rate": 5.4421468487916795e-05, "loss": 1.4512, "step": 386000 }, { "epoch": 0.23, "learning_rate": 5.4419368522356235e-05, "loss": 1.4892, "step": 386500 }, { "epoch": 0.23, "learning_rate": 5.441726855679567e-05, "loss": 1.4961, "step": 387000 }, { "epoch": 0.23, "learning_rate": 5.44151685912351e-05, "loss": 1.48, "step": 387500 }, { "epoch": 0.23, "learning_rate": 5.441307282560566e-05, "loss": 1.4948, "step": 388000 }, { "epoch": 0.23, "learning_rate": 5.4410972860045096e-05, "loss": 1.4842, "step": 388500 }, { "epoch": 0.23, "learning_rate": 5.440887289448453e-05, "loss": 1.4811, "step": 389000 }, { "epoch": 0.23, "learning_rate": 5.440677292892397e-05, "loss": 1.4699, "step": 389500 }, { "epoch": 0.23, "learning_rate": 5.44046729633634e-05, "loss": 1.5123, "step": 390000 }, { "epoch": 0.23, "learning_rate": 5.4402572997802837e-05, "loss": 1.4864, "step": 390500 }, { "epoch": 0.23, "learning_rate": 5.440047303224227e-05, "loss": 1.5081, "step": 391000 }, { "epoch": 0.23, "learning_rate": 5.4398373066681703e-05, "loss": 1.4964, "step": 391500 }, { "epoch": 0.24, "learning_rate": 5.4396277301052264e-05, "loss": 1.5164, "step": 392000 }, { "epoch": 0.24, "learning_rate": 5.439418153542282e-05, "loss": 1.4802, "step": 392500 }, { "epoch": 0.24, "learning_rate": 5.439208156986225e-05, "loss": 1.5002, "step": 393000 }, { "epoch": 0.24, "learning_rate": 5.438998160430169e-05, "loss": 1.5082, "step": 393500 }, { "epoch": 0.24, "learning_rate": 5.4387881638741125e-05, "loss": 1.5304, "step": 394000 }, { "epoch": 0.24, "learning_rate": 5.438578167318056e-05, "loss": 1.4962, "step": 394500 }, { "epoch": 0.24, "learning_rate": 5.438368590755112e-05, "loss": 1.4784, "step": 395000 }, { "epoch": 0.24, "learning_rate": 5.438158594199055e-05, "loss": 1.4968, "step": 395500 }, { "epoch": 0.24, "learning_rate": 5.4379485976429985e-05, "loss": 1.4957, "step": 396000 }, { "epoch": 0.24, "learning_rate": 5.4377386010869425e-05, "loss": 1.5053, "step": 396500 }, { "epoch": 0.24, "learning_rate": 5.437528604530886e-05, "loss": 1.5131, "step": 397000 }, { "epoch": 0.24, "learning_rate": 5.437318607974829e-05, "loss": 1.4995, "step": 397500 }, { "epoch": 0.24, "learning_rate": 5.437108611418773e-05, "loss": 1.4867, "step": 398000 }, { "epoch": 0.24, "learning_rate": 5.436898614862716e-05, "loss": 1.4864, "step": 398500 }, { "epoch": 0.24, "learning_rate": 5.436688618306659e-05, "loss": 1.485, "step": 399000 }, { "epoch": 0.24, "learning_rate": 5.436478621750603e-05, "loss": 1.5123, "step": 399500 }, { "epoch": 0.24, "learning_rate": 5.436269045187659e-05, "loss": 1.5177, "step": 400000 }, { "epoch": 0.24, "eval_loss": 1.4208892583847046, "eval_runtime": 1102.0508, "eval_samples_per_second": 477.945, "eval_steps_per_second": 79.658, "step": 400000 }, { "epoch": 0.24, "learning_rate": 5.436059048631602e-05, "loss": 1.4802, "step": 400500 }, { "epoch": 0.24, "learning_rate": 5.4358490520755453e-05, "loss": 1.4977, "step": 401000 }, { "epoch": 0.24, "learning_rate": 5.4356394755126014e-05, "loss": 1.5022, "step": 401500 }, { "epoch": 0.24, "learning_rate": 5.4354294789565454e-05, "loss": 1.4652, "step": 402000 }, { "epoch": 0.24, "learning_rate": 5.435219902393601e-05, "loss": 1.4737, "step": 402500 }, { "epoch": 0.24, "learning_rate": 5.435009905837544e-05, "loss": 1.5345, "step": 403000 }, { "epoch": 0.24, "learning_rate": 5.434799909281488e-05, "loss": 1.5055, "step": 403500 }, { "epoch": 0.24, "learning_rate": 5.4345899127254315e-05, "loss": 1.5234, "step": 404000 }, { "epoch": 0.24, "learning_rate": 5.434379916169375e-05, "loss": 1.4716, "step": 404500 }, { "epoch": 0.24, "learning_rate": 5.434169919613319e-05, "loss": 1.4981, "step": 405000 }, { "epoch": 0.24, "learning_rate": 5.4339599230572615e-05, "loss": 1.5184, "step": 405500 }, { "epoch": 0.24, "learning_rate": 5.433749926501205e-05, "loss": 1.4997, "step": 406000 }, { "epoch": 0.24, "learning_rate": 5.433540349938261e-05, "loss": 1.4904, "step": 406500 }, { "epoch": 0.24, "learning_rate": 5.433330353382205e-05, "loss": 1.476, "step": 407000 }, { "epoch": 0.24, "learning_rate": 5.433120356826148e-05, "loss": 1.4736, "step": 407500 }, { "epoch": 0.24, "learning_rate": 5.432910360270091e-05, "loss": 1.5078, "step": 408000 }, { "epoch": 0.24, "learning_rate": 5.432700783707147e-05, "loss": 1.4695, "step": 408500 }, { "epoch": 0.25, "learning_rate": 5.432491207144203e-05, "loss": 1.5021, "step": 409000 }, { "epoch": 0.25, "learning_rate": 5.4322812105881464e-05, "loss": 1.5098, "step": 409500 }, { "epoch": 0.25, "learning_rate": 5.43207121403209e-05, "loss": 1.491, "step": 410000 }, { "epoch": 0.25, "learning_rate": 5.431861217476034e-05, "loss": 1.4809, "step": 410500 }, { "epoch": 0.25, "learning_rate": 5.431651220919977e-05, "loss": 1.4879, "step": 411000 }, { "epoch": 0.25, "learning_rate": 5.4314412243639204e-05, "loss": 1.5112, "step": 411500 }, { "epoch": 0.25, "learning_rate": 5.4312312278078644e-05, "loss": 1.5208, "step": 412000 }, { "epoch": 0.25, "learning_rate": 5.431021231251807e-05, "loss": 1.4875, "step": 412500 }, { "epoch": 0.25, "learning_rate": 5.4308112346957505e-05, "loss": 1.4839, "step": 413000 }, { "epoch": 0.25, "learning_rate": 5.4306012381396945e-05, "loss": 1.5076, "step": 413500 }, { "epoch": 0.25, "learning_rate": 5.430391241583638e-05, "loss": 1.5046, "step": 414000 }, { "epoch": 0.25, "learning_rate": 5.430181245027581e-05, "loss": 1.5053, "step": 414500 }, { "epoch": 0.25, "learning_rate": 5.429971248471525e-05, "loss": 1.479, "step": 415000 }, { "epoch": 0.25, "learning_rate": 5.4297616719085805e-05, "loss": 1.5168, "step": 415500 }, { "epoch": 0.25, "learning_rate": 5.429551675352524e-05, "loss": 1.46, "step": 416000 }, { "epoch": 0.25, "learning_rate": 5.429341678796467e-05, "loss": 1.4813, "step": 416500 }, { "epoch": 0.25, "learning_rate": 5.429131682240411e-05, "loss": 1.4848, "step": 417000 }, { "epoch": 0.25, "learning_rate": 5.4289221056774666e-05, "loss": 1.504, "step": 417500 }, { "epoch": 0.25, "learning_rate": 5.42871210912141e-05, "loss": 1.4767, "step": 418000 }, { "epoch": 0.25, "learning_rate": 5.428502532558466e-05, "loss": 1.4702, "step": 418500 }, { "epoch": 0.25, "learning_rate": 5.42829253600241e-05, "loss": 1.5099, "step": 419000 }, { "epoch": 0.25, "learning_rate": 5.428082539446353e-05, "loss": 1.4795, "step": 419500 }, { "epoch": 0.25, "learning_rate": 5.427872962883409e-05, "loss": 1.5049, "step": 420000 }, { "epoch": 0.25, "learning_rate": 5.427662966327352e-05, "loss": 1.5336, "step": 420500 }, { "epoch": 0.25, "learning_rate": 5.427452969771296e-05, "loss": 1.5044, "step": 421000 }, { "epoch": 0.25, "learning_rate": 5.4272429732152394e-05, "loss": 1.5018, "step": 421500 }, { "epoch": 0.25, "learning_rate": 5.427032976659182e-05, "loss": 1.4736, "step": 422000 }, { "epoch": 0.25, "learning_rate": 5.426822980103126e-05, "loss": 1.4819, "step": 422500 }, { "epoch": 0.25, "learning_rate": 5.4266129835470695e-05, "loss": 1.4975, "step": 423000 }, { "epoch": 0.25, "learning_rate": 5.426402986991013e-05, "loss": 1.497, "step": 423500 }, { "epoch": 0.25, "learning_rate": 5.426192990434957e-05, "loss": 1.4882, "step": 424000 }, { "epoch": 0.25, "learning_rate": 5.4259829938789e-05, "loss": 1.4715, "step": 424500 }, { "epoch": 0.25, "learning_rate": 5.4257729973228435e-05, "loss": 1.4678, "step": 425000 }, { "epoch": 0.26, "learning_rate": 5.4255630007667876e-05, "loss": 1.5004, "step": 425500 }, { "epoch": 0.26, "learning_rate": 5.425353004210731e-05, "loss": 1.4857, "step": 426000 }, { "epoch": 0.26, "learning_rate": 5.425143007654674e-05, "loss": 1.4719, "step": 426500 }, { "epoch": 0.26, "learning_rate": 5.424933011098618e-05, "loss": 1.5231, "step": 427000 }, { "epoch": 0.26, "learning_rate": 5.424723014542561e-05, "loss": 1.51, "step": 427500 }, { "epoch": 0.26, "learning_rate": 5.424513437979617e-05, "loss": 1.4926, "step": 428000 }, { "epoch": 0.26, "learning_rate": 5.424303441423561e-05, "loss": 1.4765, "step": 428500 }, { "epoch": 0.26, "learning_rate": 5.4240934448675044e-05, "loss": 1.4609, "step": 429000 }, { "epoch": 0.26, "learning_rate": 5.42388386830456e-05, "loss": 1.5168, "step": 429500 }, { "epoch": 0.26, "learning_rate": 5.423674291741615e-05, "loss": 1.481, "step": 430000 }, { "epoch": 0.26, "learning_rate": 5.4234642951855584e-05, "loss": 1.4786, "step": 430500 }, { "epoch": 0.26, "learning_rate": 5.4232542986295024e-05, "loss": 1.4847, "step": 431000 }, { "epoch": 0.26, "learning_rate": 5.423044302073446e-05, "loss": 1.4862, "step": 431500 }, { "epoch": 0.26, "learning_rate": 5.422834305517389e-05, "loss": 1.4737, "step": 432000 }, { "epoch": 0.26, "learning_rate": 5.422624308961333e-05, "loss": 1.4638, "step": 432500 }, { "epoch": 0.26, "learning_rate": 5.4224143124052765e-05, "loss": 1.4685, "step": 433000 }, { "epoch": 0.26, "learning_rate": 5.4222043158492205e-05, "loss": 1.5195, "step": 433500 }, { "epoch": 0.26, "learning_rate": 5.421994319293164e-05, "loss": 1.5087, "step": 434000 }, { "epoch": 0.26, "learning_rate": 5.421784322737107e-05, "loss": 1.469, "step": 434500 }, { "epoch": 0.26, "learning_rate": 5.4215743261810506e-05, "loss": 1.4829, "step": 435000 }, { "epoch": 0.26, "learning_rate": 5.421364329624994e-05, "loss": 1.4669, "step": 435500 }, { "epoch": 0.26, "learning_rate": 5.421154333068937e-05, "loss": 1.4557, "step": 436000 }, { "epoch": 0.26, "learning_rate": 5.420945176499105e-05, "loss": 1.5271, "step": 436500 }, { "epoch": 0.26, "learning_rate": 5.4207355999361607e-05, "loss": 1.5119, "step": 437000 }, { "epoch": 0.26, "learning_rate": 5.420525603380104e-05, "loss": 1.4962, "step": 437500 }, { "epoch": 0.26, "learning_rate": 5.420315606824048e-05, "loss": 1.4752, "step": 438000 }, { "epoch": 0.26, "learning_rate": 5.4201056102679914e-05, "loss": 1.4952, "step": 438500 }, { "epoch": 0.26, "learning_rate": 5.419895613711935e-05, "loss": 1.5031, "step": 439000 }, { "epoch": 0.26, "learning_rate": 5.419685617155879e-05, "loss": 1.5013, "step": 439500 }, { "epoch": 0.26, "learning_rate": 5.419476040592934e-05, "loss": 1.4796, "step": 440000 }, { "epoch": 0.26, "learning_rate": 5.4192660440368774e-05, "loss": 1.4715, "step": 440500 }, { "epoch": 0.26, "learning_rate": 5.4190560474808215e-05, "loss": 1.5148, "step": 441000 }, { "epoch": 0.26, "learning_rate": 5.418846050924765e-05, "loss": 1.4945, "step": 441500 }, { "epoch": 0.26, "learning_rate": 5.418636054368708e-05, "loss": 1.4679, "step": 442000 }, { "epoch": 0.27, "learning_rate": 5.418426057812652e-05, "loss": 1.4923, "step": 442500 }, { "epoch": 0.27, "learning_rate": 5.4182160612565955e-05, "loss": 1.4764, "step": 443000 }, { "epoch": 0.27, "learning_rate": 5.418006064700539e-05, "loss": 1.4813, "step": 443500 }, { "epoch": 0.27, "learning_rate": 5.417796488137594e-05, "loss": 1.5003, "step": 444000 }, { "epoch": 0.27, "learning_rate": 5.417586491581538e-05, "loss": 1.4896, "step": 444500 }, { "epoch": 0.27, "learning_rate": 5.4173764950254816e-05, "loss": 1.458, "step": 445000 }, { "epoch": 0.27, "learning_rate": 5.417166498469425e-05, "loss": 1.4845, "step": 445500 }, { "epoch": 0.27, "learning_rate": 5.416956501913369e-05, "loss": 1.4785, "step": 446000 }, { "epoch": 0.27, "learning_rate": 5.416746925350424e-05, "loss": 1.4766, "step": 446500 }, { "epoch": 0.27, "learning_rate": 5.416536928794368e-05, "loss": 1.4629, "step": 447000 }, { "epoch": 0.27, "learning_rate": 5.416327352231423e-05, "loss": 1.4764, "step": 447500 }, { "epoch": 0.27, "learning_rate": 5.416117355675367e-05, "loss": 1.4984, "step": 448000 }, { "epoch": 0.27, "learning_rate": 5.4159073591193104e-05, "loss": 1.5047, "step": 448500 }, { "epoch": 0.27, "learning_rate": 5.415697362563254e-05, "loss": 1.483, "step": 449000 }, { "epoch": 0.27, "learning_rate": 5.415487366007198e-05, "loss": 1.4727, "step": 449500 }, { "epoch": 0.27, "learning_rate": 5.415277369451141e-05, "loss": 1.466, "step": 450000 }, { "epoch": 0.27, "learning_rate": 5.4150673728950845e-05, "loss": 1.4967, "step": 450500 }, { "epoch": 0.27, "learning_rate": 5.4148573763390285e-05, "loss": 1.5263, "step": 451000 }, { "epoch": 0.27, "learning_rate": 5.414647379782971e-05, "loss": 1.4971, "step": 451500 }, { "epoch": 0.27, "learning_rate": 5.4144373832269145e-05, "loss": 1.4895, "step": 452000 }, { "epoch": 0.27, "learning_rate": 5.4142278066639705e-05, "loss": 1.4727, "step": 452500 }, { "epoch": 0.27, "learning_rate": 5.4140178101079146e-05, "loss": 1.5039, "step": 453000 }, { "epoch": 0.27, "learning_rate": 5.41380823354497e-05, "loss": 1.4509, "step": 453500 }, { "epoch": 0.27, "learning_rate": 5.413598236988913e-05, "loss": 1.4789, "step": 454000 }, { "epoch": 0.27, "learning_rate": 5.413388240432857e-05, "loss": 1.4908, "step": 454500 }, { "epoch": 0.27, "learning_rate": 5.4131782438768006e-05, "loss": 1.4847, "step": 455000 }, { "epoch": 0.27, "learning_rate": 5.412968247320744e-05, "loss": 1.4833, "step": 455500 }, { "epoch": 0.27, "learning_rate": 5.412758250764687e-05, "loss": 1.4864, "step": 456000 }, { "epoch": 0.27, "learning_rate": 5.412548254208631e-05, "loss": 1.4836, "step": 456500 }, { "epoch": 0.27, "learning_rate": 5.412338257652574e-05, "loss": 1.4851, "step": 457000 }, { "epoch": 0.27, "learning_rate": 5.412128261096518e-05, "loss": 1.4735, "step": 457500 }, { "epoch": 0.27, "learning_rate": 5.4119182645404614e-05, "loss": 1.4734, "step": 458000 }, { "epoch": 0.27, "learning_rate": 5.411708267984405e-05, "loss": 1.4755, "step": 458500 }, { "epoch": 0.28, "learning_rate": 5.41149869142146e-05, "loss": 1.4784, "step": 459000 }, { "epoch": 0.28, "learning_rate": 5.411288694865404e-05, "loss": 1.4712, "step": 459500 }, { "epoch": 0.28, "learning_rate": 5.4110786983093475e-05, "loss": 1.4509, "step": 460000 }, { "epoch": 0.28, "learning_rate": 5.410868701753291e-05, "loss": 1.4527, "step": 460500 }, { "epoch": 0.28, "learning_rate": 5.410658705197235e-05, "loss": 1.4796, "step": 461000 }, { "epoch": 0.28, "learning_rate": 5.41044912863429e-05, "loss": 1.4916, "step": 461500 }, { "epoch": 0.28, "learning_rate": 5.4102391320782335e-05, "loss": 1.4592, "step": 462000 }, { "epoch": 0.28, "learning_rate": 5.4100291355221776e-05, "loss": 1.5036, "step": 462500 }, { "epoch": 0.28, "learning_rate": 5.409819138966121e-05, "loss": 1.4846, "step": 463000 }, { "epoch": 0.28, "learning_rate": 5.409609142410064e-05, "loss": 1.4955, "step": 463500 }, { "epoch": 0.28, "learning_rate": 5.4093995658471196e-05, "loss": 1.4611, "step": 464000 }, { "epoch": 0.28, "learning_rate": 5.4091895692910636e-05, "loss": 1.4841, "step": 464500 }, { "epoch": 0.28, "learning_rate": 5.408979572735007e-05, "loss": 1.4605, "step": 465000 }, { "epoch": 0.28, "learning_rate": 5.40876957617895e-05, "loss": 1.4769, "step": 465500 }, { "epoch": 0.28, "learning_rate": 5.4085595796228943e-05, "loss": 1.4751, "step": 466000 }, { "epoch": 0.28, "learning_rate": 5.408349583066838e-05, "loss": 1.485, "step": 466500 }, { "epoch": 0.28, "learning_rate": 5.408139586510781e-05, "loss": 1.4905, "step": 467000 }, { "epoch": 0.28, "learning_rate": 5.407929589954725e-05, "loss": 1.4479, "step": 467500 }, { "epoch": 0.28, "learning_rate": 5.4077195933986684e-05, "loss": 1.4639, "step": 468000 }, { "epoch": 0.28, "learning_rate": 5.407509596842612e-05, "loss": 1.5038, "step": 468500 }, { "epoch": 0.28, "learning_rate": 5.407300020279667e-05, "loss": 1.4514, "step": 469000 }, { "epoch": 0.28, "learning_rate": 5.407090443716723e-05, "loss": 1.4981, "step": 469500 }, { "epoch": 0.28, "learning_rate": 5.4068804471606665e-05, "loss": 1.4475, "step": 470000 }, { "epoch": 0.28, "learning_rate": 5.40667045060461e-05, "loss": 1.4781, "step": 470500 }, { "epoch": 0.28, "learning_rate": 5.406460454048554e-05, "loss": 1.4666, "step": 471000 }, { "epoch": 0.28, "learning_rate": 5.406250877485609e-05, "loss": 1.4754, "step": 471500 }, { "epoch": 0.28, "learning_rate": 5.406041300922665e-05, "loss": 1.4858, "step": 472000 }, { "epoch": 0.28, "learning_rate": 5.4058313043666086e-05, "loss": 1.4444, "step": 472500 }, { "epoch": 0.28, "learning_rate": 5.405621307810551e-05, "loss": 1.5036, "step": 473000 }, { "epoch": 0.28, "learning_rate": 5.405411311254495e-05, "loss": 1.4703, "step": 473500 }, { "epoch": 0.28, "learning_rate": 5.4052013146984386e-05, "loss": 1.4395, "step": 474000 }, { "epoch": 0.28, "learning_rate": 5.404991318142382e-05, "loss": 1.458, "step": 474500 }, { "epoch": 0.28, "learning_rate": 5.404781321586326e-05, "loss": 1.4904, "step": 475000 }, { "epoch": 0.29, "learning_rate": 5.4045713250302694e-05, "loss": 1.4722, "step": 475500 }, { "epoch": 0.29, "learning_rate": 5.404361328474213e-05, "loss": 1.4849, "step": 476000 }, { "epoch": 0.29, "learning_rate": 5.404151331918157e-05, "loss": 1.4861, "step": 476500 }, { "epoch": 0.29, "learning_rate": 5.4039413353621e-05, "loss": 1.4941, "step": 477000 }, { "epoch": 0.29, "learning_rate": 5.4037313388060434e-05, "loss": 1.4892, "step": 477500 }, { "epoch": 0.29, "learning_rate": 5.4035213422499874e-05, "loss": 1.4417, "step": 478000 }, { "epoch": 0.29, "learning_rate": 5.40331134569393e-05, "loss": 1.4741, "step": 478500 }, { "epoch": 0.29, "learning_rate": 5.403101349137874e-05, "loss": 1.4821, "step": 479000 }, { "epoch": 0.29, "learning_rate": 5.4028913525818175e-05, "loss": 1.4759, "step": 479500 }, { "epoch": 0.29, "learning_rate": 5.4026817760188735e-05, "loss": 1.4759, "step": 480000 }, { "epoch": 0.29, "learning_rate": 5.402472199455929e-05, "loss": 1.4996, "step": 480500 }, { "epoch": 0.29, "learning_rate": 5.402262202899872e-05, "loss": 1.4596, "step": 481000 }, { "epoch": 0.29, "learning_rate": 5.402052206343816e-05, "loss": 1.4686, "step": 481500 }, { "epoch": 0.29, "learning_rate": 5.4018422097877596e-05, "loss": 1.4766, "step": 482000 }, { "epoch": 0.29, "learning_rate": 5.401632633224815e-05, "loss": 1.4807, "step": 482500 }, { "epoch": 0.29, "learning_rate": 5.401422636668758e-05, "loss": 1.4772, "step": 483000 }, { "epoch": 0.29, "learning_rate": 5.401212640112702e-05, "loss": 1.4749, "step": 483500 }, { "epoch": 0.29, "learning_rate": 5.4010026435566457e-05, "loss": 1.4796, "step": 484000 }, { "epoch": 0.29, "learning_rate": 5.400792647000589e-05, "loss": 1.4645, "step": 484500 }, { "epoch": 0.29, "learning_rate": 5.400582650444533e-05, "loss": 1.486, "step": 485000 }, { "epoch": 0.29, "learning_rate": 5.400372653888476e-05, "loss": 1.4682, "step": 485500 }, { "epoch": 0.29, "learning_rate": 5.40016265733242e-05, "loss": 1.4471, "step": 486000 }, { "epoch": 0.29, "learning_rate": 5.399952660776363e-05, "loss": 1.4638, "step": 486500 }, { "epoch": 0.29, "learning_rate": 5.399743084213419e-05, "loss": 1.4751, "step": 487000 }, { "epoch": 0.29, "learning_rate": 5.3995330876573624e-05, "loss": 1.4749, "step": 487500 }, { "epoch": 0.29, "learning_rate": 5.399323511094418e-05, "loss": 1.4473, "step": 488000 }, { "epoch": 0.29, "learning_rate": 5.399113514538362e-05, "loss": 1.4827, "step": 488500 }, { "epoch": 0.29, "learning_rate": 5.398903517982305e-05, "loss": 1.4956, "step": 489000 }, { "epoch": 0.29, "learning_rate": 5.3986935214262485e-05, "loss": 1.4725, "step": 489500 }, { "epoch": 0.29, "learning_rate": 5.3984835248701925e-05, "loss": 1.4544, "step": 490000 }, { "epoch": 0.29, "learning_rate": 5.398273948307248e-05, "loss": 1.4674, "step": 490500 }, { "epoch": 0.29, "learning_rate": 5.398063951751191e-05, "loss": 1.48, "step": 491000 }, { "epoch": 0.29, "learning_rate": 5.3978539551951346e-05, "loss": 1.4429, "step": 491500 }, { "epoch": 0.29, "learning_rate": 5.3976439586390786e-05, "loss": 1.4687, "step": 492000 }, { "epoch": 0.3, "learning_rate": 5.397433962083021e-05, "loss": 1.4786, "step": 492500 }, { "epoch": 0.3, "learning_rate": 5.397223965526965e-05, "loss": 1.4867, "step": 493000 }, { "epoch": 0.3, "learning_rate": 5.3970139689709086e-05, "loss": 1.5085, "step": 493500 }, { "epoch": 0.3, "learning_rate": 5.396803972414852e-05, "loss": 1.4532, "step": 494000 }, { "epoch": 0.3, "learning_rate": 5.396594395851908e-05, "loss": 1.4871, "step": 494500 }, { "epoch": 0.3, "learning_rate": 5.3963843992958514e-05, "loss": 1.4505, "step": 495000 }, { "epoch": 0.3, "learning_rate": 5.3961748227329074e-05, "loss": 1.485, "step": 495500 }, { "epoch": 0.3, "learning_rate": 5.395964826176851e-05, "loss": 1.4968, "step": 496000 }, { "epoch": 0.3, "learning_rate": 5.395754829620794e-05, "loss": 1.4476, "step": 496500 }, { "epoch": 0.3, "learning_rate": 5.395544833064738e-05, "loss": 1.4873, "step": 497000 }, { "epoch": 0.3, "learning_rate": 5.395334836508681e-05, "loss": 1.4653, "step": 497500 }, { "epoch": 0.3, "learning_rate": 5.395124839952624e-05, "loss": 1.4962, "step": 498000 }, { "epoch": 0.3, "learning_rate": 5.394914843396568e-05, "loss": 1.4837, "step": 498500 }, { "epoch": 0.3, "learning_rate": 5.3947048468405115e-05, "loss": 1.4668, "step": 499000 }, { "epoch": 0.3, "learning_rate": 5.3944956902706796e-05, "loss": 1.4675, "step": 499500 }, { "epoch": 0.3, "learning_rate": 5.394285693714623e-05, "loss": 1.4509, "step": 500000 }, { "epoch": 0.3, "eval_loss": 1.39423406124115, "eval_runtime": 1102.7794, "eval_samples_per_second": 477.63, "eval_steps_per_second": 79.605, "step": 500000 }, { "epoch": 0.3, "learning_rate": 5.394075697158567e-05, "loss": 1.4921, "step": 500500 }, { "epoch": 0.3, "learning_rate": 5.39386570060251e-05, "loss": 1.4708, "step": 501000 }, { "epoch": 0.3, "learning_rate": 5.3936557040464536e-05, "loss": 1.4328, "step": 501500 }, { "epoch": 0.3, "learning_rate": 5.3934457074903976e-05, "loss": 1.4618, "step": 502000 }, { "epoch": 0.3, "learning_rate": 5.393236130927453e-05, "loss": 1.469, "step": 502500 }, { "epoch": 0.3, "learning_rate": 5.3930261343713963e-05, "loss": 1.4599, "step": 503000 }, { "epoch": 0.3, "learning_rate": 5.39281613781534e-05, "loss": 1.4738, "step": 503500 }, { "epoch": 0.3, "learning_rate": 5.392606141259284e-05, "loss": 1.4419, "step": 504000 }, { "epoch": 0.3, "learning_rate": 5.3923961447032264e-05, "loss": 1.481, "step": 504500 }, { "epoch": 0.3, "learning_rate": 5.39218614814717e-05, "loss": 1.4842, "step": 505000 }, { "epoch": 0.3, "learning_rate": 5.391976151591114e-05, "loss": 1.4802, "step": 505500 }, { "epoch": 0.3, "learning_rate": 5.391766155035057e-05, "loss": 1.4596, "step": 506000 }, { "epoch": 0.3, "learning_rate": 5.391556578472113e-05, "loss": 1.4874, "step": 506500 }, { "epoch": 0.3, "learning_rate": 5.3913465819160565e-05, "loss": 1.4527, "step": 507000 }, { "epoch": 0.3, "learning_rate": 5.39113658536e-05, "loss": 1.483, "step": 507500 }, { "epoch": 0.3, "learning_rate": 5.390926588803943e-05, "loss": 1.4635, "step": 508000 }, { "epoch": 0.3, "learning_rate": 5.390716592247887e-05, "loss": 1.4686, "step": 508500 }, { "epoch": 0.31, "learning_rate": 5.3905065956918305e-05, "loss": 1.4861, "step": 509000 }, { "epoch": 0.31, "learning_rate": 5.390296599135774e-05, "loss": 1.4563, "step": 509500 }, { "epoch": 0.31, "learning_rate": 5.390087022572829e-05, "loss": 1.4874, "step": 510000 }, { "epoch": 0.31, "learning_rate": 5.389877026016773e-05, "loss": 1.4642, "step": 510500 }, { "epoch": 0.31, "learning_rate": 5.3896670294607166e-05, "loss": 1.4704, "step": 511000 }, { "epoch": 0.31, "learning_rate": 5.38945703290466e-05, "loss": 1.4708, "step": 511500 }, { "epoch": 0.31, "learning_rate": 5.389247456341716e-05, "loss": 1.464, "step": 512000 }, { "epoch": 0.31, "learning_rate": 5.389037879778772e-05, "loss": 1.459, "step": 512500 }, { "epoch": 0.31, "learning_rate": 5.3888278832227154e-05, "loss": 1.4629, "step": 513000 }, { "epoch": 0.31, "learning_rate": 5.388618306659771e-05, "loss": 1.4731, "step": 513500 }, { "epoch": 0.31, "learning_rate": 5.388408310103714e-05, "loss": 1.4593, "step": 514000 }, { "epoch": 0.31, "learning_rate": 5.388198313547658e-05, "loss": 1.4806, "step": 514500 }, { "epoch": 0.31, "learning_rate": 5.3879883169916014e-05, "loss": 1.4702, "step": 515000 }, { "epoch": 0.31, "learning_rate": 5.387778320435545e-05, "loss": 1.4674, "step": 515500 }, { "epoch": 0.31, "learning_rate": 5.387568323879489e-05, "loss": 1.4989, "step": 516000 }, { "epoch": 0.31, "learning_rate": 5.3873583273234315e-05, "loss": 1.4581, "step": 516500 }, { "epoch": 0.31, "learning_rate": 5.387148330767375e-05, "loss": 1.4312, "step": 517000 }, { "epoch": 0.31, "learning_rate": 5.386938334211319e-05, "loss": 1.4704, "step": 517500 }, { "epoch": 0.31, "learning_rate": 5.386728337655262e-05, "loss": 1.474, "step": 518000 }, { "epoch": 0.31, "learning_rate": 5.3865183410992055e-05, "loss": 1.4743, "step": 518500 }, { "epoch": 0.31, "learning_rate": 5.3863083445431496e-05, "loss": 1.4557, "step": 519000 }, { "epoch": 0.31, "learning_rate": 5.386098347987093e-05, "loss": 1.4193, "step": 519500 }, { "epoch": 0.31, "learning_rate": 5.385888771424148e-05, "loss": 1.4183, "step": 520000 }, { "epoch": 0.31, "learning_rate": 5.385678774868092e-05, "loss": 1.4506, "step": 520500 }, { "epoch": 0.31, "learning_rate": 5.385469198305148e-05, "loss": 1.4805, "step": 521000 }, { "epoch": 0.31, "learning_rate": 5.385259201749091e-05, "loss": 1.4719, "step": 521500 }, { "epoch": 0.31, "learning_rate": 5.3850492051930343e-05, "loss": 1.4597, "step": 522000 }, { "epoch": 0.31, "learning_rate": 5.3848392086369784e-05, "loss": 1.4465, "step": 522500 }, { "epoch": 0.31, "learning_rate": 5.3846296320740344e-05, "loss": 1.4688, "step": 523000 }, { "epoch": 0.31, "learning_rate": 5.384419635517977e-05, "loss": 1.4534, "step": 523500 }, { "epoch": 0.31, "learning_rate": 5.3842096389619204e-05, "loss": 1.4553, "step": 524000 }, { "epoch": 0.31, "learning_rate": 5.3839996424058644e-05, "loss": 1.4524, "step": 524500 }, { "epoch": 0.31, "learning_rate": 5.383789645849808e-05, "loss": 1.4509, "step": 525000 }, { "epoch": 0.32, "learning_rate": 5.383579649293751e-05, "loss": 1.5044, "step": 525500 }, { "epoch": 0.32, "learning_rate": 5.383369652737695e-05, "loss": 1.5006, "step": 526000 }, { "epoch": 0.32, "learning_rate": 5.3831600761747505e-05, "loss": 1.4834, "step": 526500 }, { "epoch": 0.32, "learning_rate": 5.382950079618694e-05, "loss": 1.4554, "step": 527000 }, { "epoch": 0.32, "learning_rate": 5.382740083062638e-05, "loss": 1.4694, "step": 527500 }, { "epoch": 0.32, "learning_rate": 5.382530086506581e-05, "loss": 1.4621, "step": 528000 }, { "epoch": 0.32, "learning_rate": 5.3823200899505246e-05, "loss": 1.4831, "step": 528500 }, { "epoch": 0.32, "learning_rate": 5.3821100933944686e-05, "loss": 1.4699, "step": 529000 }, { "epoch": 0.32, "learning_rate": 5.381900096838412e-05, "loss": 1.4635, "step": 529500 }, { "epoch": 0.32, "learning_rate": 5.381690100282355e-05, "loss": 1.4493, "step": 530000 }, { "epoch": 0.32, "learning_rate": 5.381480103726299e-05, "loss": 1.4588, "step": 530500 }, { "epoch": 0.32, "learning_rate": 5.3812701071702427e-05, "loss": 1.4337, "step": 531000 }, { "epoch": 0.32, "learning_rate": 5.381060110614185e-05, "loss": 1.4663, "step": 531500 }, { "epoch": 0.32, "learning_rate": 5.3808505340512414e-05, "loss": 1.4553, "step": 532000 }, { "epoch": 0.32, "learning_rate": 5.3806405374951854e-05, "loss": 1.4686, "step": 532500 }, { "epoch": 0.32, "learning_rate": 5.380430540939129e-05, "loss": 1.4886, "step": 533000 }, { "epoch": 0.32, "learning_rate": 5.380220544383072e-05, "loss": 1.4579, "step": 533500 }, { "epoch": 0.32, "learning_rate": 5.3800105478270154e-05, "loss": 1.4527, "step": 534000 }, { "epoch": 0.32, "learning_rate": 5.379800551270959e-05, "loss": 1.4672, "step": 534500 }, { "epoch": 0.32, "learning_rate": 5.379590554714902e-05, "loss": 1.4291, "step": 535000 }, { "epoch": 0.32, "learning_rate": 5.379380978151958e-05, "loss": 1.477, "step": 535500 }, { "epoch": 0.32, "learning_rate": 5.379170981595902e-05, "loss": 1.4374, "step": 536000 }, { "epoch": 0.32, "learning_rate": 5.378960985039845e-05, "loss": 1.4743, "step": 536500 }, { "epoch": 0.32, "learning_rate": 5.378750988483789e-05, "loss": 1.4532, "step": 537000 }, { "epoch": 0.32, "learning_rate": 5.378540991927732e-05, "loss": 1.4681, "step": 537500 }, { "epoch": 0.32, "learning_rate": 5.3783309953716756e-05, "loss": 1.4418, "step": 538000 }, { "epoch": 0.32, "learning_rate": 5.3781214188087316e-05, "loss": 1.4785, "step": 538500 }, { "epoch": 0.32, "learning_rate": 5.377911422252675e-05, "loss": 1.4813, "step": 539000 }, { "epoch": 0.32, "learning_rate": 5.377701845689731e-05, "loss": 1.4728, "step": 539500 }, { "epoch": 0.32, "learning_rate": 5.377491849133674e-05, "loss": 1.4488, "step": 540000 }, { "epoch": 0.32, "learning_rate": 5.377281852577618e-05, "loss": 1.4475, "step": 540500 }, { "epoch": 0.32, "learning_rate": 5.377071856021561e-05, "loss": 1.4984, "step": 541000 }, { "epoch": 0.32, "learning_rate": 5.3768618594655044e-05, "loss": 1.4632, "step": 541500 }, { "epoch": 0.32, "learning_rate": 5.376651862909448e-05, "loss": 1.418, "step": 542000 }, { "epoch": 0.33, "learning_rate": 5.376441866353392e-05, "loss": 1.4526, "step": 542500 }, { "epoch": 0.33, "learning_rate": 5.376231869797335e-05, "loss": 1.4693, "step": 543000 }, { "epoch": 0.33, "learning_rate": 5.3760218732412784e-05, "loss": 1.4522, "step": 543500 }, { "epoch": 0.33, "learning_rate": 5.3758118766852224e-05, "loss": 1.4922, "step": 544000 }, { "epoch": 0.33, "learning_rate": 5.375601880129166e-05, "loss": 1.433, "step": 544500 }, { "epoch": 0.33, "learning_rate": 5.375391883573109e-05, "loss": 1.4408, "step": 545000 }, { "epoch": 0.33, "learning_rate": 5.375181887017053e-05, "loss": 1.4421, "step": 545500 }, { "epoch": 0.33, "learning_rate": 5.3749718904609965e-05, "loss": 1.468, "step": 546000 }, { "epoch": 0.33, "learning_rate": 5.37476189390494e-05, "loss": 1.4545, "step": 546500 }, { "epoch": 0.33, "learning_rate": 5.374552737335107e-05, "loss": 1.4541, "step": 547000 }, { "epoch": 0.33, "learning_rate": 5.374342740779051e-05, "loss": 1.4912, "step": 547500 }, { "epoch": 0.33, "learning_rate": 5.3741327442229946e-05, "loss": 1.4484, "step": 548000 }, { "epoch": 0.33, "learning_rate": 5.373922747666938e-05, "loss": 1.4795, "step": 548500 }, { "epoch": 0.33, "learning_rate": 5.373712751110882e-05, "loss": 1.45, "step": 549000 }, { "epoch": 0.33, "learning_rate": 5.373502754554825e-05, "loss": 1.4788, "step": 549500 }, { "epoch": 0.33, "learning_rate": 5.3732927579987687e-05, "loss": 1.4774, "step": 550000 }, { "epoch": 0.33, "learning_rate": 5.373082761442713e-05, "loss": 1.4913, "step": 550500 }, { "epoch": 0.33, "learning_rate": 5.372872764886656e-05, "loss": 1.436, "step": 551000 }, { "epoch": 0.33, "learning_rate": 5.3726636083168234e-05, "loss": 1.4616, "step": 551500 }, { "epoch": 0.33, "learning_rate": 5.372453611760767e-05, "loss": 1.4495, "step": 552000 }, { "epoch": 0.33, "learning_rate": 5.372243615204711e-05, "loss": 1.4402, "step": 552500 }, { "epoch": 0.33, "learning_rate": 5.372033618648654e-05, "loss": 1.4483, "step": 553000 }, { "epoch": 0.33, "learning_rate": 5.3718236220925975e-05, "loss": 1.4489, "step": 553500 }, { "epoch": 0.33, "learning_rate": 5.3716136255365415e-05, "loss": 1.4528, "step": 554000 }, { "epoch": 0.33, "learning_rate": 5.371403628980485e-05, "loss": 1.4725, "step": 554500 }, { "epoch": 0.33, "learning_rate": 5.371193632424428e-05, "loss": 1.4451, "step": 555000 }, { "epoch": 0.33, "learning_rate": 5.370983635868372e-05, "loss": 1.4319, "step": 555500 }, { "epoch": 0.33, "learning_rate": 5.370774479298539e-05, "loss": 1.4718, "step": 556000 }, { "epoch": 0.33, "learning_rate": 5.370564482742483e-05, "loss": 1.4565, "step": 556500 }, { "epoch": 0.33, "learning_rate": 5.370354486186426e-05, "loss": 1.4427, "step": 557000 }, { "epoch": 0.33, "learning_rate": 5.3701444896303696e-05, "loss": 1.4279, "step": 557500 }, { "epoch": 0.33, "learning_rate": 5.3699344930743136e-05, "loss": 1.4681, "step": 558000 }, { "epoch": 0.33, "learning_rate": 5.369724916511369e-05, "loss": 1.4486, "step": 558500 }, { "epoch": 0.34, "learning_rate": 5.369514919955312e-05, "loss": 1.4583, "step": 559000 }, { "epoch": 0.34, "learning_rate": 5.3693049233992563e-05, "loss": 1.4572, "step": 559500 }, { "epoch": 0.34, "learning_rate": 5.3690949268432e-05, "loss": 1.4543, "step": 560000 }, { "epoch": 0.34, "learning_rate": 5.368884930287143e-05, "loss": 1.453, "step": 560500 }, { "epoch": 0.34, "learning_rate": 5.3686753537241984e-05, "loss": 1.4382, "step": 561000 }, { "epoch": 0.34, "learning_rate": 5.3684653571681424e-05, "loss": 1.4591, "step": 561500 }, { "epoch": 0.34, "learning_rate": 5.3682557806051985e-05, "loss": 1.4615, "step": 562000 }, { "epoch": 0.34, "learning_rate": 5.368045784049141e-05, "loss": 1.4834, "step": 562500 }, { "epoch": 0.34, "learning_rate": 5.3678357874930845e-05, "loss": 1.4664, "step": 563000 }, { "epoch": 0.34, "learning_rate": 5.3676257909370285e-05, "loss": 1.4678, "step": 563500 }, { "epoch": 0.34, "learning_rate": 5.367415794380972e-05, "loss": 1.4438, "step": 564000 }, { "epoch": 0.34, "learning_rate": 5.367205797824916e-05, "loss": 1.464, "step": 564500 }, { "epoch": 0.34, "learning_rate": 5.366995801268859e-05, "loss": 1.4623, "step": 565000 }, { "epoch": 0.34, "learning_rate": 5.3667858047128026e-05, "loss": 1.4471, "step": 565500 }, { "epoch": 0.34, "learning_rate": 5.3665758081567466e-05, "loss": 1.4515, "step": 566000 }, { "epoch": 0.34, "learning_rate": 5.36636581160069e-05, "loss": 1.4244, "step": 566500 }, { "epoch": 0.34, "learning_rate": 5.366155815044633e-05, "loss": 1.4558, "step": 567000 }, { "epoch": 0.34, "learning_rate": 5.365945818488577e-05, "loss": 1.4595, "step": 567500 }, { "epoch": 0.34, "learning_rate": 5.36573582193252e-05, "loss": 1.4305, "step": 568000 }, { "epoch": 0.34, "learning_rate": 5.365526245369576e-05, "loss": 1.4406, "step": 568500 }, { "epoch": 0.34, "learning_rate": 5.3653162488135193e-05, "loss": 1.462, "step": 569000 }, { "epoch": 0.34, "learning_rate": 5.3651062522574634e-05, "loss": 1.4595, "step": 569500 }, { "epoch": 0.34, "learning_rate": 5.364896255701407e-05, "loss": 1.4909, "step": 570000 }, { "epoch": 0.34, "learning_rate": 5.364687099131574e-05, "loss": 1.4719, "step": 570500 }, { "epoch": 0.34, "learning_rate": 5.3644771025755174e-05, "loss": 1.4807, "step": 571000 }, { "epoch": 0.34, "learning_rate": 5.3642671060194614e-05, "loss": 1.4544, "step": 571500 }, { "epoch": 0.34, "learning_rate": 5.364057109463405e-05, "loss": 1.4577, "step": 572000 }, { "epoch": 0.34, "learning_rate": 5.363847112907348e-05, "loss": 1.4659, "step": 572500 }, { "epoch": 0.34, "learning_rate": 5.363637116351292e-05, "loss": 1.454, "step": 573000 }, { "epoch": 0.34, "learning_rate": 5.3634271197952355e-05, "loss": 1.4702, "step": 573500 }, { "epoch": 0.34, "learning_rate": 5.363217543232291e-05, "loss": 1.4803, "step": 574000 }, { "epoch": 0.34, "learning_rate": 5.363007546676234e-05, "loss": 1.4818, "step": 574500 }, { "epoch": 0.34, "learning_rate": 5.362797550120178e-05, "loss": 1.4441, "step": 575000 }, { "epoch": 0.35, "learning_rate": 5.3625875535641216e-05, "loss": 1.4358, "step": 575500 }, { "epoch": 0.35, "learning_rate": 5.362377557008065e-05, "loss": 1.4538, "step": 576000 }, { "epoch": 0.35, "learning_rate": 5.362167560452009e-05, "loss": 1.469, "step": 576500 }, { "epoch": 0.35, "learning_rate": 5.361957563895952e-05, "loss": 1.4577, "step": 577000 }, { "epoch": 0.35, "learning_rate": 5.361747567339895e-05, "loss": 1.4498, "step": 577500 }, { "epoch": 0.35, "learning_rate": 5.361537570783839e-05, "loss": 1.4132, "step": 578000 }, { "epoch": 0.35, "learning_rate": 5.361327994220895e-05, "loss": 1.4081, "step": 578500 }, { "epoch": 0.35, "learning_rate": 5.3611179976648384e-05, "loss": 1.4546, "step": 579000 }, { "epoch": 0.35, "learning_rate": 5.360908001108782e-05, "loss": 1.4465, "step": 579500 }, { "epoch": 0.35, "learning_rate": 5.360698004552725e-05, "loss": 1.4338, "step": 580000 }, { "epoch": 0.35, "learning_rate": 5.3604880079966684e-05, "loss": 1.4582, "step": 580500 }, { "epoch": 0.35, "learning_rate": 5.3602784314337244e-05, "loss": 1.4748, "step": 581000 }, { "epoch": 0.35, "learning_rate": 5.3600684348776685e-05, "loss": 1.4371, "step": 581500 }, { "epoch": 0.35, "learning_rate": 5.359858438321612e-05, "loss": 1.4423, "step": 582000 }, { "epoch": 0.35, "learning_rate": 5.359648861758667e-05, "loss": 1.4646, "step": 582500 }, { "epoch": 0.35, "learning_rate": 5.3594388652026105e-05, "loss": 1.4183, "step": 583000 }, { "epoch": 0.35, "learning_rate": 5.3592288686465545e-05, "loss": 1.4732, "step": 583500 }, { "epoch": 0.35, "learning_rate": 5.359018872090498e-05, "loss": 1.4349, "step": 584000 }, { "epoch": 0.35, "learning_rate": 5.358808875534441e-05, "loss": 1.4537, "step": 584500 }, { "epoch": 0.35, "learning_rate": 5.3585992989714966e-05, "loss": 1.4631, "step": 585000 }, { "epoch": 0.35, "learning_rate": 5.3583893024154406e-05, "loss": 1.4499, "step": 585500 }, { "epoch": 0.35, "learning_rate": 5.358179305859384e-05, "loss": 1.4533, "step": 586000 }, { "epoch": 0.35, "learning_rate": 5.357969309303327e-05, "loss": 1.4466, "step": 586500 }, { "epoch": 0.35, "learning_rate": 5.3577593127472707e-05, "loss": 1.4415, "step": 587000 }, { "epoch": 0.35, "learning_rate": 5.357549316191214e-05, "loss": 1.4763, "step": 587500 }, { "epoch": 0.35, "learning_rate": 5.35733973962827e-05, "loss": 1.4567, "step": 588000 }, { "epoch": 0.35, "learning_rate": 5.357129743072214e-05, "loss": 1.4361, "step": 588500 }, { "epoch": 0.35, "learning_rate": 5.3569197465161574e-05, "loss": 1.4296, "step": 589000 }, { "epoch": 0.35, "learning_rate": 5.3567097499601e-05, "loss": 1.496, "step": 589500 }, { "epoch": 0.35, "learning_rate": 5.356499753404044e-05, "loss": 1.4276, "step": 590000 }, { "epoch": 0.35, "learning_rate": 5.3562897568479874e-05, "loss": 1.4369, "step": 590500 }, { "epoch": 0.35, "learning_rate": 5.356079760291931e-05, "loss": 1.4548, "step": 591000 }, { "epoch": 0.35, "learning_rate": 5.355869763735875e-05, "loss": 1.4422, "step": 591500 }, { "epoch": 0.35, "learning_rate": 5.35566018717293e-05, "loss": 1.4806, "step": 592000 }, { "epoch": 0.36, "learning_rate": 5.3554501906168735e-05, "loss": 1.4277, "step": 592500 }, { "epoch": 0.36, "learning_rate": 5.355240194060817e-05, "loss": 1.4337, "step": 593000 }, { "epoch": 0.36, "learning_rate": 5.355030197504761e-05, "loss": 1.4104, "step": 593500 }, { "epoch": 0.36, "learning_rate": 5.354820620941817e-05, "loss": 1.467, "step": 594000 }, { "epoch": 0.36, "learning_rate": 5.354611044378872e-05, "loss": 1.4678, "step": 594500 }, { "epoch": 0.36, "learning_rate": 5.3544010478228156e-05, "loss": 1.4167, "step": 595000 }, { "epoch": 0.36, "learning_rate": 5.3541910512667596e-05, "loss": 1.4224, "step": 595500 }, { "epoch": 0.36, "learning_rate": 5.353981054710703e-05, "loss": 1.4826, "step": 596000 }, { "epoch": 0.36, "learning_rate": 5.3537710581546457e-05, "loss": 1.4465, "step": 596500 }, { "epoch": 0.36, "learning_rate": 5.35356106159859e-05, "loss": 1.4431, "step": 597000 }, { "epoch": 0.36, "learning_rate": 5.353351065042533e-05, "loss": 1.4756, "step": 597500 }, { "epoch": 0.36, "learning_rate": 5.3531410684864764e-05, "loss": 1.4438, "step": 598000 }, { "epoch": 0.36, "learning_rate": 5.3529314919235324e-05, "loss": 1.45, "step": 598500 }, { "epoch": 0.36, "learning_rate": 5.352721495367476e-05, "loss": 1.445, "step": 599000 }, { "epoch": 0.36, "learning_rate": 5.352511498811419e-05, "loss": 1.4542, "step": 599500 }, { "epoch": 0.36, "learning_rate": 5.3523015022553624e-05, "loss": 1.4337, "step": 600000 }, { "epoch": 0.36, "eval_loss": 1.3768744468688965, "eval_runtime": 1111.0659, "eval_samples_per_second": 474.067, "eval_steps_per_second": 79.012, "step": 600000 }, { "epoch": 0.36, "learning_rate": 5.3520915056993065e-05, "loss": 1.4555, "step": 600500 }, { "epoch": 0.36, "learning_rate": 5.35188150914325e-05, "loss": 1.4661, "step": 601000 }, { "epoch": 0.36, "learning_rate": 5.351671932580305e-05, "loss": 1.4178, "step": 601500 }, { "epoch": 0.36, "learning_rate": 5.351461936024249e-05, "loss": 1.4682, "step": 602000 }, { "epoch": 0.36, "learning_rate": 5.351252359461305e-05, "loss": 1.427, "step": 602500 }, { "epoch": 0.36, "learning_rate": 5.3510423629052486e-05, "loss": 1.4369, "step": 603000 }, { "epoch": 0.36, "learning_rate": 5.350832786342304e-05, "loss": 1.4255, "step": 603500 }, { "epoch": 0.36, "learning_rate": 5.350622789786247e-05, "loss": 1.4287, "step": 604000 }, { "epoch": 0.36, "learning_rate": 5.350412793230191e-05, "loss": 1.4319, "step": 604500 }, { "epoch": 0.36, "learning_rate": 5.3502027966741346e-05, "loss": 1.4201, "step": 605000 }, { "epoch": 0.36, "learning_rate": 5.349992800118078e-05, "loss": 1.4515, "step": 605500 }, { "epoch": 0.36, "learning_rate": 5.349782803562022e-05, "loss": 1.4629, "step": 606000 }, { "epoch": 0.36, "learning_rate": 5.349572807005965e-05, "loss": 1.4473, "step": 606500 }, { "epoch": 0.36, "learning_rate": 5.349362810449908e-05, "loss": 1.4352, "step": 607000 }, { "epoch": 0.36, "learning_rate": 5.349153233886964e-05, "loss": 1.471, "step": 607500 }, { "epoch": 0.36, "learning_rate": 5.348943237330908e-05, "loss": 1.4547, "step": 608000 }, { "epoch": 0.36, "learning_rate": 5.348733240774851e-05, "loss": 1.4612, "step": 608500 }, { "epoch": 0.37, "learning_rate": 5.348523244218795e-05, "loss": 1.4406, "step": 609000 }, { "epoch": 0.37, "learning_rate": 5.348313247662738e-05, "loss": 1.4493, "step": 609500 }, { "epoch": 0.37, "learning_rate": 5.3481032511066815e-05, "loss": 1.451, "step": 610000 }, { "epoch": 0.37, "learning_rate": 5.3478932545506255e-05, "loss": 1.4441, "step": 610500 }, { "epoch": 0.37, "learning_rate": 5.347683257994569e-05, "loss": 1.4575, "step": 611000 }, { "epoch": 0.37, "learning_rate": 5.347473261438512e-05, "loss": 1.4635, "step": 611500 }, { "epoch": 0.37, "learning_rate": 5.34726410486868e-05, "loss": 1.4496, "step": 612000 }, { "epoch": 0.37, "learning_rate": 5.3470541083126236e-05, "loss": 1.469, "step": 612500 }, { "epoch": 0.37, "learning_rate": 5.3468441117565676e-05, "loss": 1.448, "step": 613000 }, { "epoch": 0.37, "learning_rate": 5.34663411520051e-05, "loss": 1.4622, "step": 613500 }, { "epoch": 0.37, "learning_rate": 5.3464241186444536e-05, "loss": 1.4492, "step": 614000 }, { "epoch": 0.37, "learning_rate": 5.3462145420815097e-05, "loss": 1.4249, "step": 614500 }, { "epoch": 0.37, "learning_rate": 5.346004545525454e-05, "loss": 1.4879, "step": 615000 }, { "epoch": 0.37, "learning_rate": 5.345794548969397e-05, "loss": 1.4732, "step": 615500 }, { "epoch": 0.37, "learning_rate": 5.3455845524133404e-05, "loss": 1.4502, "step": 616000 }, { "epoch": 0.37, "learning_rate": 5.345374555857284e-05, "loss": 1.423, "step": 616500 }, { "epoch": 0.37, "learning_rate": 5.345164559301227e-05, "loss": 1.4802, "step": 617000 }, { "epoch": 0.37, "learning_rate": 5.344954562745171e-05, "loss": 1.4899, "step": 617500 }, { "epoch": 0.37, "learning_rate": 5.3447449861822264e-05, "loss": 1.4651, "step": 618000 }, { "epoch": 0.37, "learning_rate": 5.34453498962617e-05, "loss": 1.4203, "step": 618500 }, { "epoch": 0.37, "learning_rate": 5.344324993070113e-05, "loss": 1.4236, "step": 619000 }, { "epoch": 0.37, "learning_rate": 5.344115416507169e-05, "loss": 1.4513, "step": 619500 }, { "epoch": 0.37, "learning_rate": 5.343905419951113e-05, "loss": 1.4665, "step": 620000 }, { "epoch": 0.37, "learning_rate": 5.343695423395056e-05, "loss": 1.4464, "step": 620500 }, { "epoch": 0.37, "learning_rate": 5.343485426838999e-05, "loss": 1.43, "step": 621000 }, { "epoch": 0.37, "learning_rate": 5.343275430282943e-05, "loss": 1.4221, "step": 621500 }, { "epoch": 0.37, "learning_rate": 5.3430654337268866e-05, "loss": 1.4269, "step": 622000 }, { "epoch": 0.37, "learning_rate": 5.34285543717083e-05, "loss": 1.4491, "step": 622500 }, { "epoch": 0.37, "learning_rate": 5.342645440614774e-05, "loss": 1.425, "step": 623000 }, { "epoch": 0.37, "learning_rate": 5.342435444058717e-05, "loss": 1.4478, "step": 623500 }, { "epoch": 0.37, "learning_rate": 5.3422258674957727e-05, "loss": 1.4586, "step": 624000 }, { "epoch": 0.37, "learning_rate": 5.342015870939717e-05, "loss": 1.448, "step": 624500 }, { "epoch": 0.37, "learning_rate": 5.34180587438366e-05, "loss": 1.4555, "step": 625000 }, { "epoch": 0.38, "learning_rate": 5.3415958778276034e-05, "loss": 1.4411, "step": 625500 }, { "epoch": 0.38, "learning_rate": 5.3413858812715474e-05, "loss": 1.4469, "step": 626000 }, { "epoch": 0.38, "learning_rate": 5.341175884715491e-05, "loss": 1.4492, "step": 626500 }, { "epoch": 0.38, "learning_rate": 5.340965888159434e-05, "loss": 1.4286, "step": 627000 }, { "epoch": 0.38, "learning_rate": 5.340755891603378e-05, "loss": 1.4145, "step": 627500 }, { "epoch": 0.38, "learning_rate": 5.3405463150404335e-05, "loss": 1.4191, "step": 628000 }, { "epoch": 0.38, "learning_rate": 5.340336318484377e-05, "loss": 1.4256, "step": 628500 }, { "epoch": 0.38, "learning_rate": 5.34012632192832e-05, "loss": 1.4367, "step": 629000 }, { "epoch": 0.38, "learning_rate": 5.3399167453653755e-05, "loss": 1.4325, "step": 629500 }, { "epoch": 0.38, "learning_rate": 5.3397067488093195e-05, "loss": 1.4389, "step": 630000 }, { "epoch": 0.38, "learning_rate": 5.339496752253263e-05, "loss": 1.452, "step": 630500 }, { "epoch": 0.38, "learning_rate": 5.339286755697207e-05, "loss": 1.4202, "step": 631000 }, { "epoch": 0.38, "learning_rate": 5.339077179134262e-05, "loss": 1.4379, "step": 631500 }, { "epoch": 0.38, "learning_rate": 5.3388671825782056e-05, "loss": 1.4422, "step": 632000 }, { "epoch": 0.38, "learning_rate": 5.338657186022149e-05, "loss": 1.4148, "step": 632500 }, { "epoch": 0.38, "learning_rate": 5.338447189466093e-05, "loss": 1.4406, "step": 633000 }, { "epoch": 0.38, "learning_rate": 5.338237192910036e-05, "loss": 1.4397, "step": 633500 }, { "epoch": 0.38, "learning_rate": 5.33802719635398e-05, "loss": 1.4582, "step": 634000 }, { "epoch": 0.38, "learning_rate": 5.337817199797924e-05, "loss": 1.4508, "step": 634500 }, { "epoch": 0.38, "learning_rate": 5.337607203241867e-05, "loss": 1.4748, "step": 635000 }, { "epoch": 0.38, "learning_rate": 5.33739720668581e-05, "loss": 1.4191, "step": 635500 }, { "epoch": 0.38, "learning_rate": 5.337187210129754e-05, "loss": 1.4539, "step": 636000 }, { "epoch": 0.38, "learning_rate": 5.33697763356681e-05, "loss": 1.4653, "step": 636500 }, { "epoch": 0.38, "learning_rate": 5.336767637010753e-05, "loss": 1.4614, "step": 637000 }, { "epoch": 0.38, "learning_rate": 5.3365580604478085e-05, "loss": 1.4223, "step": 637500 }, { "epoch": 0.38, "learning_rate": 5.3363480638917525e-05, "loss": 1.448, "step": 638000 }, { "epoch": 0.38, "learning_rate": 5.336138067335696e-05, "loss": 1.447, "step": 638500 }, { "epoch": 0.38, "learning_rate": 5.335928070779639e-05, "loss": 1.4191, "step": 639000 }, { "epoch": 0.38, "learning_rate": 5.335718074223583e-05, "loss": 1.4401, "step": 639500 }, { "epoch": 0.38, "learning_rate": 5.3355080776675266e-05, "loss": 1.4308, "step": 640000 }, { "epoch": 0.38, "learning_rate": 5.335298081111469e-05, "loss": 1.4421, "step": 640500 }, { "epoch": 0.38, "learning_rate": 5.335088504548525e-05, "loss": 1.4242, "step": 641000 }, { "epoch": 0.38, "learning_rate": 5.3348789279855806e-05, "loss": 1.4478, "step": 641500 }, { "epoch": 0.38, "learning_rate": 5.3346689314295246e-05, "loss": 1.4296, "step": 642000 }, { "epoch": 0.39, "learning_rate": 5.334458934873468e-05, "loss": 1.4458, "step": 642500 }, { "epoch": 0.39, "learning_rate": 5.334248938317411e-05, "loss": 1.4488, "step": 643000 }, { "epoch": 0.39, "learning_rate": 5.3340389417613554e-05, "loss": 1.4374, "step": 643500 }, { "epoch": 0.39, "learning_rate": 5.333828945205299e-05, "loss": 1.4245, "step": 644000 }, { "epoch": 0.39, "learning_rate": 5.333618948649242e-05, "loss": 1.4593, "step": 644500 }, { "epoch": 0.39, "learning_rate": 5.3334089520931854e-05, "loss": 1.431, "step": 645000 }, { "epoch": 0.39, "learning_rate": 5.333198955537129e-05, "loss": 1.4644, "step": 645500 }, { "epoch": 0.39, "learning_rate": 5.332988958981073e-05, "loss": 1.4543, "step": 646000 }, { "epoch": 0.39, "learning_rate": 5.332778962425016e-05, "loss": 1.442, "step": 646500 }, { "epoch": 0.39, "learning_rate": 5.3325689658689595e-05, "loss": 1.4349, "step": 647000 }, { "epoch": 0.39, "learning_rate": 5.332359389306015e-05, "loss": 1.4316, "step": 647500 }, { "epoch": 0.39, "learning_rate": 5.332149392749959e-05, "loss": 1.4708, "step": 648000 }, { "epoch": 0.39, "learning_rate": 5.331939816187015e-05, "loss": 1.4376, "step": 648500 }, { "epoch": 0.39, "learning_rate": 5.331729819630958e-05, "loss": 1.4279, "step": 649000 }, { "epoch": 0.39, "learning_rate": 5.3315198230749016e-05, "loss": 1.4284, "step": 649500 }, { "epoch": 0.39, "learning_rate": 5.331309826518845e-05, "loss": 1.4341, "step": 650000 }, { "epoch": 0.39, "learning_rate": 5.331099829962788e-05, "loss": 1.4804, "step": 650500 }, { "epoch": 0.39, "learning_rate": 5.3308898334067316e-05, "loss": 1.4413, "step": 651000 }, { "epoch": 0.39, "learning_rate": 5.3306798368506756e-05, "loss": 1.4023, "step": 651500 }, { "epoch": 0.39, "learning_rate": 5.330469840294619e-05, "loss": 1.43, "step": 652000 }, { "epoch": 0.39, "learning_rate": 5.330260263731674e-05, "loss": 1.4476, "step": 652500 }, { "epoch": 0.39, "learning_rate": 5.3300506871687304e-05, "loss": 1.4248, "step": 653000 }, { "epoch": 0.39, "learning_rate": 5.3298406906126744e-05, "loss": 1.4464, "step": 653500 }, { "epoch": 0.39, "learning_rate": 5.329630694056618e-05, "loss": 1.4334, "step": 654000 }, { "epoch": 0.39, "learning_rate": 5.3294206975005604e-05, "loss": 1.4424, "step": 654500 }, { "epoch": 0.39, "learning_rate": 5.3292107009445044e-05, "loss": 1.4482, "step": 655000 }, { "epoch": 0.39, "learning_rate": 5.329000704388448e-05, "loss": 1.4251, "step": 655500 }, { "epoch": 0.39, "learning_rate": 5.328791127825504e-05, "loss": 1.444, "step": 656000 }, { "epoch": 0.39, "learning_rate": 5.328581131269447e-05, "loss": 1.4468, "step": 656500 }, { "epoch": 0.39, "learning_rate": 5.3283711347133905e-05, "loss": 1.4211, "step": 657000 }, { "epoch": 0.39, "learning_rate": 5.328161138157334e-05, "loss": 1.4287, "step": 657500 }, { "epoch": 0.39, "learning_rate": 5.327951141601277e-05, "loss": 1.432, "step": 658000 }, { "epoch": 0.39, "learning_rate": 5.327741145045221e-05, "loss": 1.4243, "step": 658500 }, { "epoch": 0.4, "learning_rate": 5.3275311484891646e-05, "loss": 1.4279, "step": 659000 }, { "epoch": 0.4, "learning_rate": 5.327321151933108e-05, "loss": 1.471, "step": 659500 }, { "epoch": 0.4, "learning_rate": 5.327111155377052e-05, "loss": 1.4446, "step": 660000 }, { "epoch": 0.4, "learning_rate": 5.326901578814107e-05, "loss": 1.4615, "step": 660500 }, { "epoch": 0.4, "learning_rate": 5.3266915822580506e-05, "loss": 1.4471, "step": 661000 }, { "epoch": 0.4, "learning_rate": 5.3264815857019947e-05, "loss": 1.4619, "step": 661500 }, { "epoch": 0.4, "learning_rate": 5.326271589145938e-05, "loss": 1.4318, "step": 662000 }, { "epoch": 0.4, "learning_rate": 5.3260615925898813e-05, "loss": 1.4415, "step": 662500 }, { "epoch": 0.4, "learning_rate": 5.3258515960338254e-05, "loss": 1.4262, "step": 663000 }, { "epoch": 0.4, "learning_rate": 5.325641599477769e-05, "loss": 1.4446, "step": 663500 }, { "epoch": 0.4, "learning_rate": 5.325431602921712e-05, "loss": 1.4169, "step": 664000 }, { "epoch": 0.4, "learning_rate": 5.3252220263587674e-05, "loss": 1.4126, "step": 664500 }, { "epoch": 0.4, "learning_rate": 5.3250120298027114e-05, "loss": 1.4523, "step": 665000 }, { "epoch": 0.4, "learning_rate": 5.324802033246655e-05, "loss": 1.4505, "step": 665500 }, { "epoch": 0.4, "learning_rate": 5.32459245668371e-05, "loss": 1.4749, "step": 666000 }, { "epoch": 0.4, "learning_rate": 5.3243824601276535e-05, "loss": 1.4397, "step": 666500 }, { "epoch": 0.4, "learning_rate": 5.3241724635715975e-05, "loss": 1.4256, "step": 667000 }, { "epoch": 0.4, "learning_rate": 5.323962467015541e-05, "loss": 1.4611, "step": 667500 }, { "epoch": 0.4, "learning_rate": 5.323752890452596e-05, "loss": 1.4506, "step": 668000 }, { "epoch": 0.4, "learning_rate": 5.32354289389654e-05, "loss": 1.456, "step": 668500 }, { "epoch": 0.4, "learning_rate": 5.3233337373267076e-05, "loss": 1.4112, "step": 669000 }, { "epoch": 0.4, "learning_rate": 5.3231237407706516e-05, "loss": 1.4675, "step": 669500 }, { "epoch": 0.4, "learning_rate": 5.322913744214595e-05, "loss": 1.4507, "step": 670000 }, { "epoch": 0.4, "learning_rate": 5.322703747658538e-05, "loss": 1.4317, "step": 670500 }, { "epoch": 0.4, "learning_rate": 5.3224937511024823e-05, "loss": 1.4341, "step": 671000 }, { "epoch": 0.4, "learning_rate": 5.322283754546425e-05, "loss": 1.4129, "step": 671500 }, { "epoch": 0.4, "learning_rate": 5.3220737579903684e-05, "loss": 1.4284, "step": 672000 }, { "epoch": 0.4, "learning_rate": 5.3218637614343124e-05, "loss": 1.4382, "step": 672500 }, { "epoch": 0.4, "learning_rate": 5.321653764878256e-05, "loss": 1.4176, "step": 673000 }, { "epoch": 0.4, "learning_rate": 5.321443768322199e-05, "loss": 1.4286, "step": 673500 }, { "epoch": 0.4, "learning_rate": 5.321233771766143e-05, "loss": 1.4349, "step": 674000 }, { "epoch": 0.4, "learning_rate": 5.3210237752100864e-05, "loss": 1.4427, "step": 674500 }, { "epoch": 0.4, "learning_rate": 5.32081377865403e-05, "loss": 1.4347, "step": 675000 }, { "epoch": 0.4, "learning_rate": 5.320604202091086e-05, "loss": 1.4143, "step": 675500 }, { "epoch": 0.41, "learning_rate": 5.320394625528141e-05, "loss": 1.4515, "step": 676000 }, { "epoch": 0.41, "learning_rate": 5.3201846289720845e-05, "loss": 1.4268, "step": 676500 }, { "epoch": 0.41, "learning_rate": 5.319974632416028e-05, "loss": 1.4481, "step": 677000 }, { "epoch": 0.41, "learning_rate": 5.319764635859972e-05, "loss": 1.4725, "step": 677500 }, { "epoch": 0.41, "learning_rate": 5.319554639303915e-05, "loss": 1.4098, "step": 678000 }, { "epoch": 0.41, "learning_rate": 5.3193446427478586e-05, "loss": 1.4437, "step": 678500 }, { "epoch": 0.41, "learning_rate": 5.3191346461918026e-05, "loss": 1.3956, "step": 679000 }, { "epoch": 0.41, "learning_rate": 5.318925069628858e-05, "loss": 1.4491, "step": 679500 }, { "epoch": 0.41, "learning_rate": 5.318715493065914e-05, "loss": 1.4341, "step": 680000 }, { "epoch": 0.41, "learning_rate": 5.3185054965098574e-05, "loss": 1.4301, "step": 680500 }, { "epoch": 0.41, "learning_rate": 5.318295499953801e-05, "loss": 1.3966, "step": 681000 }, { "epoch": 0.41, "learning_rate": 5.318085503397744e-05, "loss": 1.4268, "step": 681500 }, { "epoch": 0.41, "learning_rate": 5.3178755068416874e-05, "loss": 1.4356, "step": 682000 }, { "epoch": 0.41, "learning_rate": 5.3176655102856314e-05, "loss": 1.4598, "step": 682500 }, { "epoch": 0.41, "learning_rate": 5.317455513729575e-05, "loss": 1.4122, "step": 683000 }, { "epoch": 0.41, "learning_rate": 5.317245517173518e-05, "loss": 1.4324, "step": 683500 }, { "epoch": 0.41, "learning_rate": 5.317035520617462e-05, "loss": 1.4231, "step": 684000 }, { "epoch": 0.41, "learning_rate": 5.3168259440545175e-05, "loss": 1.3964, "step": 684500 }, { "epoch": 0.41, "learning_rate": 5.316615947498461e-05, "loss": 1.4346, "step": 685000 }, { "epoch": 0.41, "learning_rate": 5.316405950942404e-05, "loss": 1.4072, "step": 685500 }, { "epoch": 0.41, "learning_rate": 5.316195954386348e-05, "loss": 1.4381, "step": 686000 }, { "epoch": 0.41, "learning_rate": 5.3159859578302915e-05, "loss": 1.4389, "step": 686500 }, { "epoch": 0.41, "learning_rate": 5.315775961274235e-05, "loss": 1.4436, "step": 687000 }, { "epoch": 0.41, "learning_rate": 5.315565964718179e-05, "loss": 1.4558, "step": 687500 }, { "epoch": 0.41, "learning_rate": 5.315355968162122e-05, "loss": 1.4319, "step": 688000 }, { "epoch": 0.41, "learning_rate": 5.3151459716060656e-05, "loss": 1.4506, "step": 688500 }, { "epoch": 0.41, "learning_rate": 5.3149363950431216e-05, "loss": 1.4629, "step": 689000 }, { "epoch": 0.41, "learning_rate": 5.314726398487065e-05, "loss": 1.4172, "step": 689500 }, { "epoch": 0.41, "learning_rate": 5.314516401931008e-05, "loss": 1.4572, "step": 690000 }, { "epoch": 0.41, "learning_rate": 5.3143064053749524e-05, "loss": 1.4266, "step": 690500 }, { "epoch": 0.41, "learning_rate": 5.314096408818895e-05, "loss": 1.4258, "step": 691000 }, { "epoch": 0.41, "learning_rate": 5.3138864122628384e-05, "loss": 1.4577, "step": 691500 }, { "epoch": 0.41, "learning_rate": 5.3136764157067824e-05, "loss": 1.4386, "step": 692000 }, { "epoch": 0.42, "learning_rate": 5.313466419150726e-05, "loss": 1.4349, "step": 692500 }, { "epoch": 0.42, "learning_rate": 5.313257262580894e-05, "loss": 1.4594, "step": 693000 }, { "epoch": 0.42, "learning_rate": 5.313047266024837e-05, "loss": 1.4586, "step": 693500 }, { "epoch": 0.42, "learning_rate": 5.3128372694687805e-05, "loss": 1.4089, "step": 694000 }, { "epoch": 0.42, "learning_rate": 5.3126272729127245e-05, "loss": 1.4087, "step": 694500 }, { "epoch": 0.42, "learning_rate": 5.312417276356668e-05, "loss": 1.4497, "step": 695000 }, { "epoch": 0.42, "learning_rate": 5.312207279800611e-05, "loss": 1.4195, "step": 695500 }, { "epoch": 0.42, "learning_rate": 5.3119972832445545e-05, "loss": 1.3999, "step": 696000 }, { "epoch": 0.42, "learning_rate": 5.311787286688498e-05, "loss": 1.429, "step": 696500 }, { "epoch": 0.42, "learning_rate": 5.311577710125554e-05, "loss": 1.4587, "step": 697000 }, { "epoch": 0.42, "learning_rate": 5.311368133562609e-05, "loss": 1.437, "step": 697500 }, { "epoch": 0.42, "learning_rate": 5.311158137006553e-05, "loss": 1.4342, "step": 698000 }, { "epoch": 0.42, "learning_rate": 5.3109481404504967e-05, "loss": 1.4075, "step": 698500 }, { "epoch": 0.42, "learning_rate": 5.31073814389444e-05, "loss": 1.4341, "step": 699000 }, { "epoch": 0.42, "learning_rate": 5.3105285673314954e-05, "loss": 1.4322, "step": 699500 }, { "epoch": 0.42, "learning_rate": 5.3103185707754394e-05, "loss": 1.4627, "step": 700000 }, { "epoch": 0.42, "eval_loss": 1.3590091466903687, "eval_runtime": 1101.3908, "eval_samples_per_second": 478.232, "eval_steps_per_second": 79.706, "step": 700000 }, { "epoch": 0.42, "learning_rate": 5.310108574219383e-05, "loss": 1.408, "step": 700500 }, { "epoch": 0.42, "learning_rate": 5.309898577663326e-05, "loss": 1.4505, "step": 701000 }, { "epoch": 0.42, "learning_rate": 5.30968858110727e-05, "loss": 1.4231, "step": 701500 }, { "epoch": 0.42, "learning_rate": 5.3094785845512134e-05, "loss": 1.4554, "step": 702000 }, { "epoch": 0.42, "learning_rate": 5.309268587995157e-05, "loss": 1.4322, "step": 702500 }, { "epoch": 0.42, "learning_rate": 5.3090585914391e-05, "loss": 1.4357, "step": 703000 }, { "epoch": 0.42, "learning_rate": 5.3088485948830435e-05, "loss": 1.4412, "step": 703500 }, { "epoch": 0.42, "learning_rate": 5.3086385983269875e-05, "loss": 1.4213, "step": 704000 }, { "epoch": 0.42, "learning_rate": 5.308428601770931e-05, "loss": 1.4415, "step": 704500 }, { "epoch": 0.42, "learning_rate": 5.308218605214874e-05, "loss": 1.4051, "step": 705000 }, { "epoch": 0.42, "learning_rate": 5.308008608658818e-05, "loss": 1.4593, "step": 705500 }, { "epoch": 0.42, "learning_rate": 5.3077994520889856e-05, "loss": 1.4116, "step": 706000 }, { "epoch": 0.42, "learning_rate": 5.3075894555329296e-05, "loss": 1.3986, "step": 706500 }, { "epoch": 0.42, "learning_rate": 5.307379458976873e-05, "loss": 1.4291, "step": 707000 }, { "epoch": 0.42, "learning_rate": 5.307169462420816e-05, "loss": 1.4258, "step": 707500 }, { "epoch": 0.42, "learning_rate": 5.3069594658647596e-05, "loss": 1.43, "step": 708000 }, { "epoch": 0.42, "learning_rate": 5.306749889301816e-05, "loss": 1.4185, "step": 708500 }, { "epoch": 0.43, "learning_rate": 5.306539892745759e-05, "loss": 1.4218, "step": 709000 }, { "epoch": 0.43, "learning_rate": 5.3063298961897024e-05, "loss": 1.4296, "step": 709500 }, { "epoch": 0.43, "learning_rate": 5.306119899633646e-05, "loss": 1.425, "step": 710000 }, { "epoch": 0.43, "learning_rate": 5.305909903077589e-05, "loss": 1.4172, "step": 710500 }, { "epoch": 0.43, "learning_rate": 5.305699906521533e-05, "loss": 1.4091, "step": 711000 }, { "epoch": 0.43, "learning_rate": 5.3054899099654764e-05, "loss": 1.4459, "step": 711500 }, { "epoch": 0.43, "learning_rate": 5.30527991340942e-05, "loss": 1.4144, "step": 712000 }, { "epoch": 0.43, "learning_rate": 5.305069916853364e-05, "loss": 1.4102, "step": 712500 }, { "epoch": 0.43, "learning_rate": 5.304860340290419e-05, "loss": 1.4436, "step": 713000 }, { "epoch": 0.43, "learning_rate": 5.3046503437343625e-05, "loss": 1.4086, "step": 713500 }, { "epoch": 0.43, "learning_rate": 5.3044407671714185e-05, "loss": 1.4445, "step": 714000 }, { "epoch": 0.43, "learning_rate": 5.304230770615362e-05, "loss": 1.4127, "step": 714500 }, { "epoch": 0.43, "learning_rate": 5.304020774059305e-05, "loss": 1.4149, "step": 715000 }, { "epoch": 0.43, "learning_rate": 5.3038107775032486e-05, "loss": 1.4366, "step": 715500 }, { "epoch": 0.43, "learning_rate": 5.303600780947192e-05, "loss": 1.4374, "step": 716000 }, { "epoch": 0.43, "learning_rate": 5.303390784391136e-05, "loss": 1.4664, "step": 716500 }, { "epoch": 0.43, "learning_rate": 5.303181207828192e-05, "loss": 1.46, "step": 717000 }, { "epoch": 0.43, "learning_rate": 5.3029712112721347e-05, "loss": 1.4286, "step": 717500 }, { "epoch": 0.43, "learning_rate": 5.302761214716079e-05, "loss": 1.4548, "step": 718000 }, { "epoch": 0.43, "learning_rate": 5.302551218160022e-05, "loss": 1.4518, "step": 718500 }, { "epoch": 0.43, "learning_rate": 5.3023412216039654e-05, "loss": 1.4134, "step": 719000 }, { "epoch": 0.43, "learning_rate": 5.3021312250479094e-05, "loss": 1.4766, "step": 719500 }, { "epoch": 0.43, "learning_rate": 5.301921648484965e-05, "loss": 1.4393, "step": 720000 }, { "epoch": 0.43, "learning_rate": 5.301711651928908e-05, "loss": 1.4194, "step": 720500 }, { "epoch": 0.43, "learning_rate": 5.3015016553728514e-05, "loss": 1.4593, "step": 721000 }, { "epoch": 0.43, "learning_rate": 5.3012916588167955e-05, "loss": 1.4598, "step": 721500 }, { "epoch": 0.43, "learning_rate": 5.301081662260739e-05, "loss": 1.4184, "step": 722000 }, { "epoch": 0.43, "learning_rate": 5.300872085697794e-05, "loss": 1.4309, "step": 722500 }, { "epoch": 0.43, "learning_rate": 5.3006620891417375e-05, "loss": 1.4349, "step": 723000 }, { "epoch": 0.43, "learning_rate": 5.3004520925856815e-05, "loss": 1.4393, "step": 723500 }, { "epoch": 0.43, "learning_rate": 5.300242096029625e-05, "loss": 1.4495, "step": 724000 }, { "epoch": 0.43, "learning_rate": 5.300032099473568e-05, "loss": 1.433, "step": 724500 }, { "epoch": 0.43, "learning_rate": 5.299822102917512e-05, "loss": 1.4518, "step": 725000 }, { "epoch": 0.43, "learning_rate": 5.2996121063614556e-05, "loss": 1.4506, "step": 725500 }, { "epoch": 0.44, "learning_rate": 5.299402109805399e-05, "loss": 1.4156, "step": 726000 }, { "epoch": 0.44, "learning_rate": 5.299192533242455e-05, "loss": 1.4393, "step": 726500 }, { "epoch": 0.44, "learning_rate": 5.298982536686398e-05, "loss": 1.4054, "step": 727000 }, { "epoch": 0.44, "learning_rate": 5.298772540130342e-05, "loss": 1.4535, "step": 727500 }, { "epoch": 0.44, "learning_rate": 5.298562543574286e-05, "loss": 1.4119, "step": 728000 }, { "epoch": 0.44, "learning_rate": 5.298352547018229e-05, "loss": 1.4354, "step": 728500 }, { "epoch": 0.44, "learning_rate": 5.298143390448397e-05, "loss": 1.4135, "step": 729000 }, { "epoch": 0.44, "learning_rate": 5.29793339389234e-05, "loss": 1.4155, "step": 729500 }, { "epoch": 0.44, "learning_rate": 5.297723397336283e-05, "loss": 1.4293, "step": 730000 }, { "epoch": 0.44, "learning_rate": 5.297513400780227e-05, "loss": 1.4225, "step": 730500 }, { "epoch": 0.44, "learning_rate": 5.297303824217283e-05, "loss": 1.4443, "step": 731000 }, { "epoch": 0.44, "learning_rate": 5.297093827661226e-05, "loss": 1.4244, "step": 731500 }, { "epoch": 0.44, "learning_rate": 5.29688383110517e-05, "loss": 1.4241, "step": 732000 }, { "epoch": 0.44, "learning_rate": 5.296673834549113e-05, "loss": 1.4309, "step": 732500 }, { "epoch": 0.44, "learning_rate": 5.2964638379930565e-05, "loss": 1.4142, "step": 733000 }, { "epoch": 0.44, "learning_rate": 5.2962542614301126e-05, "loss": 1.4607, "step": 733500 }, { "epoch": 0.44, "learning_rate": 5.296044264874056e-05, "loss": 1.4232, "step": 734000 }, { "epoch": 0.44, "learning_rate": 5.295834268317999e-05, "loss": 1.4346, "step": 734500 }, { "epoch": 0.44, "learning_rate": 5.2956242717619426e-05, "loss": 1.429, "step": 735000 }, { "epoch": 0.44, "learning_rate": 5.2954146951989987e-05, "loss": 1.4275, "step": 735500 }, { "epoch": 0.44, "learning_rate": 5.295204698642943e-05, "loss": 1.4596, "step": 736000 }, { "epoch": 0.44, "learning_rate": 5.2949947020868853e-05, "loss": 1.4197, "step": 736500 }, { "epoch": 0.44, "learning_rate": 5.294784705530829e-05, "loss": 1.4253, "step": 737000 }, { "epoch": 0.44, "learning_rate": 5.294574708974773e-05, "loss": 1.4374, "step": 737500 }, { "epoch": 0.44, "learning_rate": 5.294364712418716e-05, "loss": 1.4415, "step": 738000 }, { "epoch": 0.44, "learning_rate": 5.2941547158626594e-05, "loss": 1.4352, "step": 738500 }, { "epoch": 0.44, "learning_rate": 5.2939447193066034e-05, "loss": 1.4322, "step": 739000 }, { "epoch": 0.44, "learning_rate": 5.2937355627367715e-05, "loss": 1.4446, "step": 739500 }, { "epoch": 0.44, "learning_rate": 5.293525566180715e-05, "loss": 1.4068, "step": 740000 }, { "epoch": 0.44, "learning_rate": 5.293315569624658e-05, "loss": 1.4331, "step": 740500 }, { "epoch": 0.44, "learning_rate": 5.2931055730686015e-05, "loss": 1.4115, "step": 741000 }, { "epoch": 0.44, "learning_rate": 5.292895576512545e-05, "loss": 1.4116, "step": 741500 }, { "epoch": 0.44, "learning_rate": 5.292685579956488e-05, "loss": 1.426, "step": 742000 }, { "epoch": 0.45, "learning_rate": 5.292475583400432e-05, "loss": 1.4512, "step": 742500 }, { "epoch": 0.45, "learning_rate": 5.2922655868443756e-05, "loss": 1.4143, "step": 743000 }, { "epoch": 0.45, "learning_rate": 5.292056010281431e-05, "loss": 1.4206, "step": 743500 }, { "epoch": 0.45, "learning_rate": 5.291846013725374e-05, "loss": 1.4405, "step": 744000 }, { "epoch": 0.45, "learning_rate": 5.291636017169318e-05, "loss": 1.4426, "step": 744500 }, { "epoch": 0.45, "learning_rate": 5.291426440606374e-05, "loss": 1.4342, "step": 745000 }, { "epoch": 0.45, "learning_rate": 5.291216444050318e-05, "loss": 1.389, "step": 745500 }, { "epoch": 0.45, "learning_rate": 5.291006447494261e-05, "loss": 1.3899, "step": 746000 }, { "epoch": 0.45, "learning_rate": 5.2907964509382044e-05, "loss": 1.4248, "step": 746500 }, { "epoch": 0.45, "learning_rate": 5.2905868743752604e-05, "loss": 1.4283, "step": 747000 }, { "epoch": 0.45, "learning_rate": 5.290376877819204e-05, "loss": 1.4424, "step": 747500 }, { "epoch": 0.45, "learning_rate": 5.290167301256259e-05, "loss": 1.4209, "step": 748000 }, { "epoch": 0.45, "learning_rate": 5.289957304700203e-05, "loss": 1.4141, "step": 748500 }, { "epoch": 0.45, "learning_rate": 5.2897473081441465e-05, "loss": 1.4175, "step": 749000 }, { "epoch": 0.45, "learning_rate": 5.28953731158809e-05, "loss": 1.4353, "step": 749500 }, { "epoch": 0.45, "learning_rate": 5.289327315032034e-05, "loss": 1.4151, "step": 750000 }, { "epoch": 0.45, "learning_rate": 5.289117318475977e-05, "loss": 1.432, "step": 750500 }, { "epoch": 0.45, "learning_rate": 5.28890732191992e-05, "loss": 1.4435, "step": 751000 }, { "epoch": 0.45, "learning_rate": 5.288697325363864e-05, "loss": 1.4726, "step": 751500 }, { "epoch": 0.45, "learning_rate": 5.288487328807807e-05, "loss": 1.4564, "step": 752000 }, { "epoch": 0.45, "learning_rate": 5.2882773322517506e-05, "loss": 1.4647, "step": 752500 }, { "epoch": 0.45, "learning_rate": 5.2880673356956946e-05, "loss": 1.4249, "step": 753000 }, { "epoch": 0.45, "learning_rate": 5.287857339139638e-05, "loss": 1.4816, "step": 753500 }, { "epoch": 0.45, "learning_rate": 5.287647762576693e-05, "loss": 1.422, "step": 754000 }, { "epoch": 0.45, "learning_rate": 5.2874386060068614e-05, "loss": 1.4455, "step": 754500 }, { "epoch": 0.45, "learning_rate": 5.287228609450805e-05, "loss": 1.421, "step": 755000 }, { "epoch": 0.45, "learning_rate": 5.287018612894749e-05, "loss": 1.4052, "step": 755500 }, { "epoch": 0.45, "learning_rate": 5.286808616338692e-05, "loss": 1.3963, "step": 756000 }, { "epoch": 0.45, "learning_rate": 5.2865986197826354e-05, "loss": 1.4512, "step": 756500 }, { "epoch": 0.45, "learning_rate": 5.2863886232265794e-05, "loss": 1.4022, "step": 757000 }, { "epoch": 0.45, "learning_rate": 5.286178626670523e-05, "loss": 1.4024, "step": 757500 }, { "epoch": 0.45, "learning_rate": 5.285969050107578e-05, "loss": 1.4019, "step": 758000 }, { "epoch": 0.45, "learning_rate": 5.2857590535515215e-05, "loss": 1.4412, "step": 758500 }, { "epoch": 0.46, "learning_rate": 5.2855490569954655e-05, "loss": 1.4478, "step": 759000 }, { "epoch": 0.46, "learning_rate": 5.285339060439409e-05, "loss": 1.4091, "step": 759500 }, { "epoch": 0.46, "learning_rate": 5.285129063883352e-05, "loss": 1.4316, "step": 760000 }, { "epoch": 0.46, "learning_rate": 5.2849190673272955e-05, "loss": 1.436, "step": 760500 }, { "epoch": 0.46, "learning_rate": 5.284709070771239e-05, "loss": 1.4195, "step": 761000 }, { "epoch": 0.46, "learning_rate": 5.284499494208295e-05, "loss": 1.3842, "step": 761500 }, { "epoch": 0.46, "learning_rate": 5.284289497652239e-05, "loss": 1.4408, "step": 762000 }, { "epoch": 0.46, "learning_rate": 5.2840795010961816e-05, "loss": 1.4332, "step": 762500 }, { "epoch": 0.46, "learning_rate": 5.283869504540125e-05, "loss": 1.3958, "step": 763000 }, { "epoch": 0.46, "learning_rate": 5.283659507984069e-05, "loss": 1.4334, "step": 763500 }, { "epoch": 0.46, "learning_rate": 5.283449511428012e-05, "loss": 1.4235, "step": 764000 }, { "epoch": 0.46, "learning_rate": 5.283239514871956e-05, "loss": 1.4217, "step": 764500 }, { "epoch": 0.46, "learning_rate": 5.283029938309011e-05, "loss": 1.4225, "step": 765000 }, { "epoch": 0.46, "learning_rate": 5.282819941752955e-05, "loss": 1.4362, "step": 765500 }, { "epoch": 0.46, "learning_rate": 5.2826099451968984e-05, "loss": 1.396, "step": 766000 }, { "epoch": 0.46, "learning_rate": 5.282399948640842e-05, "loss": 1.4001, "step": 766500 }, { "epoch": 0.46, "learning_rate": 5.2821903720778985e-05, "loss": 1.4292, "step": 767000 }, { "epoch": 0.46, "learning_rate": 5.281980375521841e-05, "loss": 1.4172, "step": 767500 }, { "epoch": 0.46, "learning_rate": 5.2817703789657845e-05, "loss": 1.4426, "step": 768000 }, { "epoch": 0.46, "learning_rate": 5.2815603824097285e-05, "loss": 1.4257, "step": 768500 }, { "epoch": 0.46, "learning_rate": 5.281350385853672e-05, "loss": 1.4549, "step": 769000 }, { "epoch": 0.46, "learning_rate": 5.281140389297615e-05, "loss": 1.429, "step": 769500 }, { "epoch": 0.46, "learning_rate": 5.280930392741559e-05, "loss": 1.4024, "step": 770000 }, { "epoch": 0.46, "learning_rate": 5.2807203961855026e-05, "loss": 1.3868, "step": 770500 }, { "epoch": 0.46, "learning_rate": 5.280510399629446e-05, "loss": 1.4544, "step": 771000 }, { "epoch": 0.46, "learning_rate": 5.280300823066501e-05, "loss": 1.402, "step": 771500 }, { "epoch": 0.46, "learning_rate": 5.280090826510445e-05, "loss": 1.4316, "step": 772000 }, { "epoch": 0.46, "learning_rate": 5.2798808299543886e-05, "loss": 1.409, "step": 772500 }, { "epoch": 0.46, "learning_rate": 5.279670833398332e-05, "loss": 1.4215, "step": 773000 }, { "epoch": 0.46, "learning_rate": 5.279460836842276e-05, "loss": 1.4543, "step": 773500 }, { "epoch": 0.46, "learning_rate": 5.2792508402862194e-05, "loss": 1.4367, "step": 774000 }, { "epoch": 0.46, "learning_rate": 5.279040843730163e-05, "loss": 1.4511, "step": 774500 }, { "epoch": 0.46, "learning_rate": 5.278831267167219e-05, "loss": 1.4217, "step": 775000 }, { "epoch": 0.46, "learning_rate": 5.278621270611162e-05, "loss": 1.423, "step": 775500 }, { "epoch": 0.47, "learning_rate": 5.2784112740551054e-05, "loss": 1.4249, "step": 776000 }, { "epoch": 0.47, "learning_rate": 5.2782012774990495e-05, "loss": 1.4343, "step": 776500 }, { "epoch": 0.47, "learning_rate": 5.277991280942993e-05, "loss": 1.4311, "step": 777000 }, { "epoch": 0.47, "learning_rate": 5.277781704380048e-05, "loss": 1.3929, "step": 777500 }, { "epoch": 0.47, "learning_rate": 5.2775721278171035e-05, "loss": 1.384, "step": 778000 }, { "epoch": 0.47, "learning_rate": 5.277362131261047e-05, "loss": 1.4355, "step": 778500 }, { "epoch": 0.47, "learning_rate": 5.277152134704991e-05, "loss": 1.4185, "step": 779000 }, { "epoch": 0.47, "learning_rate": 5.276942138148934e-05, "loss": 1.4229, "step": 779500 }, { "epoch": 0.47, "learning_rate": 5.2767321415928776e-05, "loss": 1.4343, "step": 780000 }, { "epoch": 0.47, "learning_rate": 5.2765221450368216e-05, "loss": 1.4136, "step": 780500 }, { "epoch": 0.47, "learning_rate": 5.276312148480765e-05, "loss": 1.3813, "step": 781000 }, { "epoch": 0.47, "learning_rate": 5.276102151924708e-05, "loss": 1.4223, "step": 781500 }, { "epoch": 0.47, "learning_rate": 5.275892155368652e-05, "loss": 1.3987, "step": 782000 }, { "epoch": 0.47, "learning_rate": 5.275682158812595e-05, "loss": 1.4144, "step": 782500 }, { "epoch": 0.47, "learning_rate": 5.275472162256539e-05, "loss": 1.4169, "step": 783000 }, { "epoch": 0.47, "learning_rate": 5.2752621657004824e-05, "loss": 1.4241, "step": 783500 }, { "epoch": 0.47, "learning_rate": 5.275052169144426e-05, "loss": 1.423, "step": 784000 }, { "epoch": 0.47, "learning_rate": 5.274842592581482e-05, "loss": 1.4311, "step": 784500 }, { "epoch": 0.47, "learning_rate": 5.274633016018537e-05, "loss": 1.4093, "step": 785000 }, { "epoch": 0.47, "learning_rate": 5.2744234394555924e-05, "loss": 1.4348, "step": 785500 }, { "epoch": 0.47, "learning_rate": 5.2742134428995365e-05, "loss": 1.4067, "step": 786000 }, { "epoch": 0.47, "learning_rate": 5.27400344634348e-05, "loss": 1.4248, "step": 786500 }, { "epoch": 0.47, "learning_rate": 5.273793449787423e-05, "loss": 1.4434, "step": 787000 }, { "epoch": 0.47, "learning_rate": 5.273583453231367e-05, "loss": 1.4103, "step": 787500 }, { "epoch": 0.47, "learning_rate": 5.2733734566753105e-05, "loss": 1.4276, "step": 788000 }, { "epoch": 0.47, "learning_rate": 5.273163460119254e-05, "loss": 1.4342, "step": 788500 }, { "epoch": 0.47, "learning_rate": 5.272953463563198e-05, "loss": 1.4141, "step": 789000 }, { "epoch": 0.47, "learning_rate": 5.2727434670071406e-05, "loss": 1.392, "step": 789500 }, { "epoch": 0.47, "learning_rate": 5.2725338904441966e-05, "loss": 1.4136, "step": 790000 }, { "epoch": 0.47, "learning_rate": 5.2723238938881406e-05, "loss": 1.3944, "step": 790500 }, { "epoch": 0.47, "learning_rate": 5.272113897332084e-05, "loss": 1.4292, "step": 791000 }, { "epoch": 0.47, "learning_rate": 5.271903900776027e-05, "loss": 1.4284, "step": 791500 }, { "epoch": 0.47, "learning_rate": 5.271693904219971e-05, "loss": 1.4478, "step": 792000 }, { "epoch": 0.48, "learning_rate": 5.271483907663914e-05, "loss": 1.4383, "step": 792500 }, { "epoch": 0.48, "learning_rate": 5.2712739111078574e-05, "loss": 1.433, "step": 793000 }, { "epoch": 0.48, "learning_rate": 5.2710639145518014e-05, "loss": 1.4345, "step": 793500 }, { "epoch": 0.48, "learning_rate": 5.2708543379888574e-05, "loss": 1.4657, "step": 794000 }, { "epoch": 0.48, "learning_rate": 5.2706443414328e-05, "loss": 1.4015, "step": 794500 }, { "epoch": 0.48, "learning_rate": 5.2704343448767434e-05, "loss": 1.3839, "step": 795000 }, { "epoch": 0.48, "learning_rate": 5.2702243483206875e-05, "loss": 1.4436, "step": 795500 }, { "epoch": 0.48, "learning_rate": 5.2700147717577435e-05, "loss": 1.4201, "step": 796000 }, { "epoch": 0.48, "learning_rate": 5.269805195194799e-05, "loss": 1.4199, "step": 796500 }, { "epoch": 0.48, "learning_rate": 5.269595198638742e-05, "loss": 1.4173, "step": 797000 }, { "epoch": 0.48, "learning_rate": 5.269385202082686e-05, "loss": 1.4289, "step": 797500 }, { "epoch": 0.48, "learning_rate": 5.2691752055266296e-05, "loss": 1.4226, "step": 798000 }, { "epoch": 0.48, "learning_rate": 5.268965628963685e-05, "loss": 1.3627, "step": 798500 }, { "epoch": 0.48, "learning_rate": 5.268755632407628e-05, "loss": 1.4062, "step": 799000 }, { "epoch": 0.48, "learning_rate": 5.268545635851572e-05, "loss": 1.4044, "step": 799500 }, { "epoch": 0.48, "learning_rate": 5.2683356392955156e-05, "loss": 1.3971, "step": 800000 }, { "epoch": 0.48, "eval_loss": 1.3476606607437134, "eval_runtime": 1101.7337, "eval_samples_per_second": 478.083, "eval_steps_per_second": 79.681, "step": 800000 }, { "epoch": 0.48, "learning_rate": 5.268125642739459e-05, "loss": 1.4049, "step": 800500 }, { "epoch": 0.48, "learning_rate": 5.267915646183403e-05, "loss": 1.4219, "step": 801000 }, { "epoch": 0.48, "learning_rate": 5.267705649627346e-05, "loss": 1.4239, "step": 801500 }, { "epoch": 0.48, "learning_rate": 5.267495653071289e-05, "loss": 1.4172, "step": 802000 }, { "epoch": 0.48, "learning_rate": 5.267285656515233e-05, "loss": 1.4057, "step": 802500 }, { "epoch": 0.48, "learning_rate": 5.2670756599591764e-05, "loss": 1.4081, "step": 803000 }, { "epoch": 0.48, "learning_rate": 5.26686566340312e-05, "loss": 1.4203, "step": 803500 }, { "epoch": 0.48, "learning_rate": 5.266655666847064e-05, "loss": 1.4057, "step": 804000 }, { "epoch": 0.48, "learning_rate": 5.266446090284119e-05, "loss": 1.415, "step": 804500 }, { "epoch": 0.48, "learning_rate": 5.2662360937280625e-05, "loss": 1.4025, "step": 805000 }, { "epoch": 0.48, "learning_rate": 5.2660260971720065e-05, "loss": 1.4336, "step": 805500 }, { "epoch": 0.48, "learning_rate": 5.26581610061595e-05, "loss": 1.3758, "step": 806000 }, { "epoch": 0.48, "learning_rate": 5.265606944046118e-05, "loss": 1.4085, "step": 806500 }, { "epoch": 0.48, "learning_rate": 5.265396947490061e-05, "loss": 1.402, "step": 807000 }, { "epoch": 0.48, "learning_rate": 5.2651869509340046e-05, "loss": 1.4093, "step": 807500 }, { "epoch": 0.48, "learning_rate": 5.2649769543779486e-05, "loss": 1.4118, "step": 808000 }, { "epoch": 0.48, "learning_rate": 5.264766957821891e-05, "loss": 1.4126, "step": 808500 }, { "epoch": 0.49, "learning_rate": 5.2645569612658346e-05, "loss": 1.4374, "step": 809000 }, { "epoch": 0.49, "learning_rate": 5.2643469647097786e-05, "loss": 1.4125, "step": 809500 }, { "epoch": 0.49, "learning_rate": 5.264136968153722e-05, "loss": 1.4105, "step": 810000 }, { "epoch": 0.49, "learning_rate": 5.263926971597665e-05, "loss": 1.4576, "step": 810500 }, { "epoch": 0.49, "learning_rate": 5.2637178150278334e-05, "loss": 1.423, "step": 811000 }, { "epoch": 0.49, "learning_rate": 5.2635078184717774e-05, "loss": 1.4182, "step": 811500 }, { "epoch": 0.49, "learning_rate": 5.263297821915721e-05, "loss": 1.4275, "step": 812000 }, { "epoch": 0.49, "learning_rate": 5.263087825359664e-05, "loss": 1.4098, "step": 812500 }, { "epoch": 0.49, "learning_rate": 5.262877828803608e-05, "loss": 1.4135, "step": 813000 }, { "epoch": 0.49, "learning_rate": 5.262667832247551e-05, "loss": 1.3993, "step": 813500 }, { "epoch": 0.49, "learning_rate": 5.262457835691494e-05, "loss": 1.4268, "step": 814000 }, { "epoch": 0.49, "learning_rate": 5.262247839135438e-05, "loss": 1.4105, "step": 814500 }, { "epoch": 0.49, "learning_rate": 5.262038262572494e-05, "loss": 1.4069, "step": 815000 }, { "epoch": 0.49, "learning_rate": 5.2618282660164375e-05, "loss": 1.3967, "step": 815500 }, { "epoch": 0.49, "learning_rate": 5.261618689453493e-05, "loss": 1.3951, "step": 816000 }, { "epoch": 0.49, "learning_rate": 5.261408692897436e-05, "loss": 1.4378, "step": 816500 }, { "epoch": 0.49, "learning_rate": 5.26119869634138e-05, "loss": 1.3984, "step": 817000 }, { "epoch": 0.49, "learning_rate": 5.2609886997853236e-05, "loss": 1.3945, "step": 817500 }, { "epoch": 0.49, "learning_rate": 5.260779123222379e-05, "loss": 1.4236, "step": 818000 }, { "epoch": 0.49, "learning_rate": 5.260569126666323e-05, "loss": 1.3945, "step": 818500 }, { "epoch": 0.49, "learning_rate": 5.260359130110266e-05, "loss": 1.4138, "step": 819000 }, { "epoch": 0.49, "learning_rate": 5.26014913355421e-05, "loss": 1.3774, "step": 819500 }, { "epoch": 0.49, "learning_rate": 5.259939136998154e-05, "loss": 1.4063, "step": 820000 }, { "epoch": 0.49, "learning_rate": 5.2597291404420964e-05, "loss": 1.4424, "step": 820500 }, { "epoch": 0.49, "learning_rate": 5.25951914388604e-05, "loss": 1.4055, "step": 821000 }, { "epoch": 0.49, "learning_rate": 5.259309147329984e-05, "loss": 1.3729, "step": 821500 }, { "epoch": 0.49, "learning_rate": 5.259099150773927e-05, "loss": 1.4052, "step": 822000 }, { "epoch": 0.49, "learning_rate": 5.258889574210983e-05, "loss": 1.4193, "step": 822500 }, { "epoch": 0.49, "learning_rate": 5.258679577654926e-05, "loss": 1.4445, "step": 823000 }, { "epoch": 0.49, "learning_rate": 5.25846958109887e-05, "loss": 1.4077, "step": 823500 }, { "epoch": 0.49, "learning_rate": 5.258260004535926e-05, "loss": 1.4075, "step": 824000 }, { "epoch": 0.49, "learning_rate": 5.258050007979869e-05, "loss": 1.3979, "step": 824500 }, { "epoch": 0.49, "learning_rate": 5.2578400114238125e-05, "loss": 1.4419, "step": 825000 }, { "epoch": 0.49, "learning_rate": 5.257630014867756e-05, "loss": 1.4302, "step": 825500 }, { "epoch": 0.5, "learning_rate": 5.257420018311699e-05, "loss": 1.4607, "step": 826000 }, { "epoch": 0.5, "learning_rate": 5.257210441748755e-05, "loss": 1.4044, "step": 826500 }, { "epoch": 0.5, "learning_rate": 5.257000445192699e-05, "loss": 1.414, "step": 827000 }, { "epoch": 0.5, "learning_rate": 5.2567908686297546e-05, "loss": 1.412, "step": 827500 }, { "epoch": 0.5, "learning_rate": 5.256580872073698e-05, "loss": 1.4107, "step": 828000 }, { "epoch": 0.5, "learning_rate": 5.256370875517641e-05, "loss": 1.4276, "step": 828500 }, { "epoch": 0.5, "learning_rate": 5.2561608789615854e-05, "loss": 1.4317, "step": 829000 }, { "epoch": 0.5, "learning_rate": 5.255951302398641e-05, "loss": 1.3977, "step": 829500 }, { "epoch": 0.5, "learning_rate": 5.255741305842584e-05, "loss": 1.4031, "step": 830000 }, { "epoch": 0.5, "learning_rate": 5.2555317292796394e-05, "loss": 1.4017, "step": 830500 }, { "epoch": 0.5, "learning_rate": 5.2553217327235834e-05, "loss": 1.4126, "step": 831000 }, { "epoch": 0.5, "learning_rate": 5.255111736167527e-05, "loss": 1.408, "step": 831500 }, { "epoch": 0.5, "learning_rate": 5.25490173961147e-05, "loss": 1.3863, "step": 832000 }, { "epoch": 0.5, "learning_rate": 5.254691743055414e-05, "loss": 1.4112, "step": 832500 }, { "epoch": 0.5, "learning_rate": 5.2544817464993575e-05, "loss": 1.3932, "step": 833000 }, { "epoch": 0.5, "learning_rate": 5.254271749943301e-05, "loss": 1.429, "step": 833500 }, { "epoch": 0.5, "learning_rate": 5.254061753387245e-05, "loss": 1.4309, "step": 834000 }, { "epoch": 0.5, "learning_rate": 5.253851756831188e-05, "loss": 1.4212, "step": 834500 }, { "epoch": 0.5, "learning_rate": 5.253641760275131e-05, "loss": 1.4225, "step": 835000 }, { "epoch": 0.5, "learning_rate": 5.253431763719075e-05, "loss": 1.3975, "step": 835500 }, { "epoch": 0.5, "learning_rate": 5.253221767163018e-05, "loss": 1.4305, "step": 836000 }, { "epoch": 0.5, "learning_rate": 5.2530117706069616e-05, "loss": 1.4204, "step": 836500 }, { "epoch": 0.5, "learning_rate": 5.2528017740509056e-05, "loss": 1.4108, "step": 837000 }, { "epoch": 0.5, "learning_rate": 5.252592197487961e-05, "loss": 1.3828, "step": 837500 }, { "epoch": 0.5, "learning_rate": 5.252382200931904e-05, "loss": 1.4142, "step": 838000 }, { "epoch": 0.5, "learning_rate": 5.252172204375848e-05, "loss": 1.4294, "step": 838500 }, { "epoch": 0.5, "learning_rate": 5.251962207819792e-05, "loss": 1.4154, "step": 839000 }, { "epoch": 0.5, "learning_rate": 5.251752211263735e-05, "loss": 1.4359, "step": 839500 }, { "epoch": 0.5, "learning_rate": 5.251542214707679e-05, "loss": 1.4021, "step": 840000 }, { "epoch": 0.5, "learning_rate": 5.2513326381447344e-05, "loss": 1.4131, "step": 840500 }, { "epoch": 0.5, "learning_rate": 5.251122641588678e-05, "loss": 1.4293, "step": 841000 }, { "epoch": 0.5, "learning_rate": 5.250912645032621e-05, "loss": 1.4133, "step": 841500 }, { "epoch": 0.5, "learning_rate": 5.250702648476565e-05, "loss": 1.441, "step": 842000 }, { "epoch": 0.51, "learning_rate": 5.2504926519205085e-05, "loss": 1.4341, "step": 842500 }, { "epoch": 0.51, "learning_rate": 5.250282655364452e-05, "loss": 1.4481, "step": 843000 }, { "epoch": 0.51, "learning_rate": 5.250073078801507e-05, "loss": 1.3972, "step": 843500 }, { "epoch": 0.51, "learning_rate": 5.249863082245451e-05, "loss": 1.428, "step": 844000 }, { "epoch": 0.51, "learning_rate": 5.2496530856893946e-05, "loss": 1.4243, "step": 844500 }, { "epoch": 0.51, "learning_rate": 5.249443089133338e-05, "loss": 1.4263, "step": 845000 }, { "epoch": 0.51, "learning_rate": 5.249233512570393e-05, "loss": 1.4228, "step": 845500 }, { "epoch": 0.51, "learning_rate": 5.249023516014337e-05, "loss": 1.3805, "step": 846000 }, { "epoch": 0.51, "learning_rate": 5.2488135194582806e-05, "loss": 1.4233, "step": 846500 }, { "epoch": 0.51, "learning_rate": 5.2486035229022247e-05, "loss": 1.4401, "step": 847000 }, { "epoch": 0.51, "learning_rate": 5.248393526346168e-05, "loss": 1.4353, "step": 847500 }, { "epoch": 0.51, "learning_rate": 5.2481839497832234e-05, "loss": 1.4342, "step": 848000 }, { "epoch": 0.51, "learning_rate": 5.247973953227167e-05, "loss": 1.4169, "step": 848500 }, { "epoch": 0.51, "learning_rate": 5.247763956671111e-05, "loss": 1.4189, "step": 849000 }, { "epoch": 0.51, "learning_rate": 5.247553960115054e-05, "loss": 1.3913, "step": 849500 }, { "epoch": 0.51, "learning_rate": 5.2473439635589974e-05, "loss": 1.4473, "step": 850000 }, { "epoch": 0.51, "learning_rate": 5.2471339670029414e-05, "loss": 1.4138, "step": 850500 }, { "epoch": 0.51, "learning_rate": 5.246924390439997e-05, "loss": 1.4174, "step": 851000 }, { "epoch": 0.51, "learning_rate": 5.24671439388394e-05, "loss": 1.4409, "step": 851500 }, { "epoch": 0.51, "learning_rate": 5.2465043973278835e-05, "loss": 1.4366, "step": 852000 }, { "epoch": 0.51, "learning_rate": 5.2462944007718275e-05, "loss": 1.3988, "step": 852500 }, { "epoch": 0.51, "learning_rate": 5.246084404215771e-05, "loss": 1.4026, "step": 853000 }, { "epoch": 0.51, "learning_rate": 5.245874407659714e-05, "loss": 1.4102, "step": 853500 }, { "epoch": 0.51, "learning_rate": 5.24566483109677e-05, "loss": 1.4261, "step": 854000 }, { "epoch": 0.51, "learning_rate": 5.2454548345407136e-05, "loss": 1.4146, "step": 854500 }, { "epoch": 0.51, "learning_rate": 5.245244837984657e-05, "loss": 1.4159, "step": 855000 }, { "epoch": 0.51, "learning_rate": 5.245034841428601e-05, "loss": 1.3839, "step": 855500 }, { "epoch": 0.51, "learning_rate": 5.244824844872544e-05, "loss": 1.4077, "step": 856000 }, { "epoch": 0.51, "learning_rate": 5.2446148483164876e-05, "loss": 1.3936, "step": 856500 }, { "epoch": 0.51, "learning_rate": 5.244404851760431e-05, "loss": 1.3762, "step": 857000 }, { "epoch": 0.51, "learning_rate": 5.2441948552043743e-05, "loss": 1.4056, "step": 857500 }, { "epoch": 0.51, "learning_rate": 5.243984858648318e-05, "loss": 1.4031, "step": 858000 }, { "epoch": 0.51, "learning_rate": 5.243775702078486e-05, "loss": 1.4041, "step": 858500 }, { "epoch": 0.52, "learning_rate": 5.243565705522429e-05, "loss": 1.4188, "step": 859000 }, { "epoch": 0.52, "learning_rate": 5.243355708966373e-05, "loss": 1.4126, "step": 859500 }, { "epoch": 0.52, "learning_rate": 5.2431457124103164e-05, "loss": 1.4103, "step": 860000 }, { "epoch": 0.52, "learning_rate": 5.24293571585426e-05, "loss": 1.4154, "step": 860500 }, { "epoch": 0.52, "learning_rate": 5.242725719298204e-05, "loss": 1.4055, "step": 861000 }, { "epoch": 0.52, "learning_rate": 5.242516142735259e-05, "loss": 1.3874, "step": 861500 }, { "epoch": 0.52, "learning_rate": 5.2423061461792025e-05, "loss": 1.387, "step": 862000 }, { "epoch": 0.52, "learning_rate": 5.2420961496231465e-05, "loss": 1.4242, "step": 862500 }, { "epoch": 0.52, "learning_rate": 5.24188615306709e-05, "loss": 1.4214, "step": 863000 }, { "epoch": 0.52, "learning_rate": 5.241676156511033e-05, "loss": 1.4212, "step": 863500 }, { "epoch": 0.52, "learning_rate": 5.241466159954977e-05, "loss": 1.4259, "step": 864000 }, { "epoch": 0.52, "learning_rate": 5.24125616339892e-05, "loss": 1.4294, "step": 864500 }, { "epoch": 0.52, "learning_rate": 5.241046586835976e-05, "loss": 1.3904, "step": 865000 }, { "epoch": 0.52, "learning_rate": 5.240836590279919e-05, "loss": 1.3872, "step": 865500 }, { "epoch": 0.52, "learning_rate": 5.240626593723863e-05, "loss": 1.415, "step": 866000 }, { "epoch": 0.52, "learning_rate": 5.240416597167806e-05, "loss": 1.4467, "step": 866500 }, { "epoch": 0.52, "learning_rate": 5.2402066006117493e-05, "loss": 1.3677, "step": 867000 }, { "epoch": 0.52, "learning_rate": 5.2399966040556934e-05, "loss": 1.3957, "step": 867500 }, { "epoch": 0.52, "learning_rate": 5.2397870274927494e-05, "loss": 1.4113, "step": 868000 }, { "epoch": 0.52, "learning_rate": 5.239577450929805e-05, "loss": 1.4239, "step": 868500 }, { "epoch": 0.52, "learning_rate": 5.239367454373748e-05, "loss": 1.3956, "step": 869000 }, { "epoch": 0.52, "learning_rate": 5.239157457817692e-05, "loss": 1.3879, "step": 869500 }, { "epoch": 0.52, "learning_rate": 5.2389474612616355e-05, "loss": 1.4304, "step": 870000 }, { "epoch": 0.52, "learning_rate": 5.238737464705579e-05, "loss": 1.4105, "step": 870500 }, { "epoch": 0.52, "learning_rate": 5.238527468149523e-05, "loss": 1.4289, "step": 871000 }, { "epoch": 0.52, "learning_rate": 5.2383174715934655e-05, "loss": 1.3973, "step": 871500 }, { "epoch": 0.52, "learning_rate": 5.238107475037409e-05, "loss": 1.3981, "step": 872000 }, { "epoch": 0.52, "learning_rate": 5.237897478481353e-05, "loss": 1.3973, "step": 872500 }, { "epoch": 0.52, "learning_rate": 5.237687481925296e-05, "loss": 1.4019, "step": 873000 }, { "epoch": 0.52, "learning_rate": 5.2374774853692396e-05, "loss": 1.4261, "step": 873500 }, { "epoch": 0.52, "learning_rate": 5.2372674888131836e-05, "loss": 1.4133, "step": 874000 }, { "epoch": 0.52, "learning_rate": 5.237058332243351e-05, "loss": 1.4314, "step": 874500 }, { "epoch": 0.52, "learning_rate": 5.236848335687295e-05, "loss": 1.3962, "step": 875000 }, { "epoch": 0.52, "learning_rate": 5.2366383391312383e-05, "loss": 1.4012, "step": 875500 }, { "epoch": 0.53, "learning_rate": 5.236428762568294e-05, "loss": 1.4301, "step": 876000 }, { "epoch": 0.53, "learning_rate": 5.236218766012238e-05, "loss": 1.4135, "step": 876500 }, { "epoch": 0.53, "learning_rate": 5.236008769456181e-05, "loss": 1.3894, "step": 877000 }, { "epoch": 0.53, "learning_rate": 5.2357987729001244e-05, "loss": 1.4161, "step": 877500 }, { "epoch": 0.53, "learning_rate": 5.2355887763440684e-05, "loss": 1.4214, "step": 878000 }, { "epoch": 0.53, "learning_rate": 5.235378779788011e-05, "loss": 1.4302, "step": 878500 }, { "epoch": 0.53, "learning_rate": 5.2351687832319544e-05, "loss": 1.4385, "step": 879000 }, { "epoch": 0.53, "learning_rate": 5.2349587866758985e-05, "loss": 1.4295, "step": 879500 }, { "epoch": 0.53, "learning_rate": 5.2347492101129545e-05, "loss": 1.4233, "step": 880000 }, { "epoch": 0.53, "learning_rate": 5.234539213556898e-05, "loss": 1.3985, "step": 880500 }, { "epoch": 0.53, "learning_rate": 5.2343292170008405e-05, "loss": 1.4143, "step": 881000 }, { "epoch": 0.53, "learning_rate": 5.2341192204447845e-05, "loss": 1.4033, "step": 881500 }, { "epoch": 0.53, "learning_rate": 5.233909223888728e-05, "loss": 1.3976, "step": 882000 }, { "epoch": 0.53, "learning_rate": 5.233699227332671e-05, "loss": 1.3902, "step": 882500 }, { "epoch": 0.53, "learning_rate": 5.233489230776615e-05, "loss": 1.4114, "step": 883000 }, { "epoch": 0.53, "learning_rate": 5.2332792342205586e-05, "loss": 1.4045, "step": 883500 }, { "epoch": 0.53, "learning_rate": 5.233069657657614e-05, "loss": 1.3903, "step": 884000 }, { "epoch": 0.53, "learning_rate": 5.232859661101558e-05, "loss": 1.4144, "step": 884500 }, { "epoch": 0.53, "learning_rate": 5.232649664545501e-05, "loss": 1.4088, "step": 885000 }, { "epoch": 0.53, "learning_rate": 5.232439667989445e-05, "loss": 1.364, "step": 885500 }, { "epoch": 0.53, "learning_rate": 5.2322300914265e-05, "loss": 1.3881, "step": 886000 }, { "epoch": 0.53, "learning_rate": 5.232020514863556e-05, "loss": 1.418, "step": 886500 }, { "epoch": 0.53, "learning_rate": 5.2318105183075e-05, "loss": 1.4143, "step": 887000 }, { "epoch": 0.53, "learning_rate": 5.2316005217514434e-05, "loss": 1.4366, "step": 887500 }, { "epoch": 0.53, "learning_rate": 5.231390525195386e-05, "loss": 1.4213, "step": 888000 }, { "epoch": 0.53, "learning_rate": 5.23118052863933e-05, "loss": 1.3788, "step": 888500 }, { "epoch": 0.53, "learning_rate": 5.2309705320832735e-05, "loss": 1.3851, "step": 889000 }, { "epoch": 0.53, "learning_rate": 5.230760535527217e-05, "loss": 1.3879, "step": 889500 }, { "epoch": 0.53, "learning_rate": 5.230550958964273e-05, "loss": 1.386, "step": 890000 }, { "epoch": 0.53, "learning_rate": 5.230340962408216e-05, "loss": 1.4073, "step": 890500 }, { "epoch": 0.53, "learning_rate": 5.2301309658521596e-05, "loss": 1.4101, "step": 891000 }, { "epoch": 0.53, "learning_rate": 5.2299209692961036e-05, "loss": 1.4105, "step": 891500 }, { "epoch": 0.53, "learning_rate": 5.229710972740047e-05, "loss": 1.4069, "step": 892000 }, { "epoch": 0.54, "learning_rate": 5.22950097618399e-05, "loss": 1.4192, "step": 892500 }, { "epoch": 0.54, "learning_rate": 5.229290979627934e-05, "loss": 1.3792, "step": 893000 }, { "epoch": 0.54, "learning_rate": 5.2290809830718776e-05, "loss": 1.3804, "step": 893500 }, { "epoch": 0.54, "learning_rate": 5.228871406508933e-05, "loss": 1.4097, "step": 894000 }, { "epoch": 0.54, "learning_rate": 5.2286614099528763e-05, "loss": 1.4191, "step": 894500 }, { "epoch": 0.54, "learning_rate": 5.2284514133968204e-05, "loss": 1.4171, "step": 895000 }, { "epoch": 0.54, "learning_rate": 5.228241416840764e-05, "loss": 1.4221, "step": 895500 }, { "epoch": 0.54, "learning_rate": 5.228032260270932e-05, "loss": 1.3626, "step": 896000 }, { "epoch": 0.54, "learning_rate": 5.227822263714875e-05, "loss": 1.3973, "step": 896500 }, { "epoch": 0.54, "learning_rate": 5.2276122671588184e-05, "loss": 1.3964, "step": 897000 }, { "epoch": 0.54, "learning_rate": 5.2274026905958745e-05, "loss": 1.4159, "step": 897500 }, { "epoch": 0.54, "learning_rate": 5.227192694039818e-05, "loss": 1.4249, "step": 898000 }, { "epoch": 0.54, "learning_rate": 5.226982697483761e-05, "loss": 1.4169, "step": 898500 }, { "epoch": 0.54, "learning_rate": 5.226772700927705e-05, "loss": 1.4336, "step": 899000 }, { "epoch": 0.54, "learning_rate": 5.2265631243647606e-05, "loss": 1.4122, "step": 899500 }, { "epoch": 0.54, "learning_rate": 5.226353127808704e-05, "loss": 1.4346, "step": 900000 }, { "epoch": 0.54, "eval_loss": 1.338213324546814, "eval_runtime": 1101.9411, "eval_samples_per_second": 477.993, "eval_steps_per_second": 79.666, "step": 900000 }, { "epoch": 0.54, "learning_rate": 5.226143131252647e-05, "loss": 1.4128, "step": 900500 }, { "epoch": 0.54, "learning_rate": 5.225933134696591e-05, "loss": 1.4096, "step": 901000 }, { "epoch": 0.54, "learning_rate": 5.2257231381405346e-05, "loss": 1.3951, "step": 901500 }, { "epoch": 0.54, "learning_rate": 5.225513141584478e-05, "loss": 1.4103, "step": 902000 }, { "epoch": 0.54, "learning_rate": 5.225303145028421e-05, "loss": 1.3779, "step": 902500 }, { "epoch": 0.54, "learning_rate": 5.2250935684654773e-05, "loss": 1.3767, "step": 903000 }, { "epoch": 0.54, "learning_rate": 5.224883571909421e-05, "loss": 1.3837, "step": 903500 }, { "epoch": 0.54, "learning_rate": 5.224673575353364e-05, "loss": 1.4177, "step": 904000 }, { "epoch": 0.54, "learning_rate": 5.224463578797308e-05, "loss": 1.3836, "step": 904500 }, { "epoch": 0.54, "learning_rate": 5.224253582241251e-05, "loss": 1.3993, "step": 905000 }, { "epoch": 0.54, "learning_rate": 5.224043585685195e-05, "loss": 1.4055, "step": 905500 }, { "epoch": 0.54, "learning_rate": 5.223833589129138e-05, "loss": 1.4267, "step": 906000 }, { "epoch": 0.54, "learning_rate": 5.2236235925730814e-05, "loss": 1.4204, "step": 906500 }, { "epoch": 0.54, "learning_rate": 5.2234135960170255e-05, "loss": 1.4353, "step": 907000 }, { "epoch": 0.54, "learning_rate": 5.223203599460969e-05, "loss": 1.3914, "step": 907500 }, { "epoch": 0.54, "learning_rate": 5.222993602904912e-05, "loss": 1.3852, "step": 908000 }, { "epoch": 0.54, "learning_rate": 5.222783606348856e-05, "loss": 1.3969, "step": 908500 }, { "epoch": 0.54, "learning_rate": 5.2225740297859115e-05, "loss": 1.4055, "step": 909000 }, { "epoch": 0.55, "learning_rate": 5.222364033229855e-05, "loss": 1.3707, "step": 909500 }, { "epoch": 0.55, "learning_rate": 5.222154036673798e-05, "loss": 1.3909, "step": 910000 }, { "epoch": 0.55, "learning_rate": 5.221944040117742e-05, "loss": 1.3987, "step": 910500 }, { "epoch": 0.55, "learning_rate": 5.2217340435616856e-05, "loss": 1.4216, "step": 911000 }, { "epoch": 0.55, "learning_rate": 5.221524047005629e-05, "loss": 1.4172, "step": 911500 }, { "epoch": 0.55, "learning_rate": 5.221314470442685e-05, "loss": 1.3801, "step": 912000 }, { "epoch": 0.55, "learning_rate": 5.221104473886628e-05, "loss": 1.4128, "step": 912500 }, { "epoch": 0.55, "learning_rate": 5.220894477330572e-05, "loss": 1.4, "step": 913000 }, { "epoch": 0.55, "learning_rate": 5.220684480774516e-05, "loss": 1.411, "step": 913500 }, { "epoch": 0.55, "learning_rate": 5.220474484218459e-05, "loss": 1.4058, "step": 914000 }, { "epoch": 0.55, "learning_rate": 5.2202649076555144e-05, "loss": 1.4101, "step": 914500 }, { "epoch": 0.55, "learning_rate": 5.220054911099458e-05, "loss": 1.406, "step": 915000 }, { "epoch": 0.55, "learning_rate": 5.219844914543402e-05, "loss": 1.4022, "step": 915500 }, { "epoch": 0.55, "learning_rate": 5.219635337980457e-05, "loss": 1.3702, "step": 916000 }, { "epoch": 0.55, "learning_rate": 5.2194253414244005e-05, "loss": 1.4078, "step": 916500 }, { "epoch": 0.55, "learning_rate": 5.219215344868344e-05, "loss": 1.3671, "step": 917000 }, { "epoch": 0.55, "learning_rate": 5.219005348312288e-05, "loss": 1.4178, "step": 917500 }, { "epoch": 0.55, "learning_rate": 5.218795351756231e-05, "loss": 1.4283, "step": 918000 }, { "epoch": 0.55, "learning_rate": 5.2185853552001745e-05, "loss": 1.3755, "step": 918500 }, { "epoch": 0.55, "learning_rate": 5.2183753586441186e-05, "loss": 1.4042, "step": 919000 }, { "epoch": 0.55, "learning_rate": 5.218165362088062e-05, "loss": 1.3998, "step": 919500 }, { "epoch": 0.55, "learning_rate": 5.217955365532005e-05, "loss": 1.4169, "step": 920000 }, { "epoch": 0.55, "learning_rate": 5.2177453689759486e-05, "loss": 1.4142, "step": 920500 }, { "epoch": 0.55, "learning_rate": 5.217535372419892e-05, "loss": 1.4231, "step": 921000 }, { "epoch": 0.55, "learning_rate": 5.217325375863836e-05, "loss": 1.393, "step": 921500 }, { "epoch": 0.55, "learning_rate": 5.217115799300891e-05, "loss": 1.3594, "step": 922000 }, { "epoch": 0.55, "learning_rate": 5.216905802744835e-05, "loss": 1.4134, "step": 922500 }, { "epoch": 0.55, "learning_rate": 5.216695806188778e-05, "loss": 1.3847, "step": 923000 }, { "epoch": 0.55, "learning_rate": 5.216485809632722e-05, "loss": 1.4215, "step": 923500 }, { "epoch": 0.55, "learning_rate": 5.216276233069778e-05, "loss": 1.3818, "step": 924000 }, { "epoch": 0.55, "learning_rate": 5.216066236513721e-05, "loss": 1.3955, "step": 924500 }, { "epoch": 0.55, "learning_rate": 5.215856659950777e-05, "loss": 1.3995, "step": 925000 }, { "epoch": 0.55, "learning_rate": 5.21564666339472e-05, "loss": 1.4, "step": 925500 }, { "epoch": 0.56, "learning_rate": 5.215436666838664e-05, "loss": 1.4416, "step": 926000 }, { "epoch": 0.56, "learning_rate": 5.2152266702826075e-05, "loss": 1.3824, "step": 926500 }, { "epoch": 0.56, "learning_rate": 5.215017093719663e-05, "loss": 1.3972, "step": 927000 }, { "epoch": 0.56, "learning_rate": 5.214807097163607e-05, "loss": 1.395, "step": 927500 }, { "epoch": 0.56, "learning_rate": 5.21459710060755e-05, "loss": 1.4043, "step": 928000 }, { "epoch": 0.56, "learning_rate": 5.2143871040514936e-05, "loss": 1.4319, "step": 928500 }, { "epoch": 0.56, "learning_rate": 5.2141771074954376e-05, "loss": 1.4073, "step": 929000 }, { "epoch": 0.56, "learning_rate": 5.213967530932493e-05, "loss": 1.4013, "step": 929500 }, { "epoch": 0.56, "learning_rate": 5.213757534376436e-05, "loss": 1.4218, "step": 930000 }, { "epoch": 0.56, "learning_rate": 5.2135475378203796e-05, "loss": 1.4094, "step": 930500 }, { "epoch": 0.56, "learning_rate": 5.213337961257435e-05, "loss": 1.4099, "step": 931000 }, { "epoch": 0.56, "learning_rate": 5.213127964701379e-05, "loss": 1.3959, "step": 931500 }, { "epoch": 0.56, "learning_rate": 5.2129179681453224e-05, "loss": 1.4247, "step": 932000 }, { "epoch": 0.56, "learning_rate": 5.212707971589266e-05, "loss": 1.4103, "step": 932500 }, { "epoch": 0.56, "learning_rate": 5.21249797503321e-05, "loss": 1.4104, "step": 933000 }, { "epoch": 0.56, "learning_rate": 5.212287978477153e-05, "loss": 1.3538, "step": 933500 }, { "epoch": 0.56, "learning_rate": 5.2120779819210964e-05, "loss": 1.4161, "step": 934000 }, { "epoch": 0.56, "learning_rate": 5.21186798536504e-05, "loss": 1.4061, "step": 934500 }, { "epoch": 0.56, "learning_rate": 5.211657988808983e-05, "loss": 1.3926, "step": 935000 }, { "epoch": 0.56, "learning_rate": 5.211448412246039e-05, "loss": 1.4, "step": 935500 }, { "epoch": 0.56, "learning_rate": 5.211238415689983e-05, "loss": 1.3897, "step": 936000 }, { "epoch": 0.56, "learning_rate": 5.211028419133926e-05, "loss": 1.3829, "step": 936500 }, { "epoch": 0.56, "learning_rate": 5.210818422577869e-05, "loss": 1.4139, "step": 937000 }, { "epoch": 0.56, "learning_rate": 5.210608426021813e-05, "loss": 1.4418, "step": 937500 }, { "epoch": 0.56, "learning_rate": 5.2103984294657566e-05, "loss": 1.4129, "step": 938000 }, { "epoch": 0.56, "learning_rate": 5.2101884329097e-05, "loss": 1.4144, "step": 938500 }, { "epoch": 0.56, "learning_rate": 5.209978436353644e-05, "loss": 1.3888, "step": 939000 }, { "epoch": 0.56, "learning_rate": 5.209768859790699e-05, "loss": 1.3965, "step": 939500 }, { "epoch": 0.56, "learning_rate": 5.2095588632346426e-05, "loss": 1.4176, "step": 940000 }, { "epoch": 0.56, "learning_rate": 5.209348866678586e-05, "loss": 1.3752, "step": 940500 }, { "epoch": 0.56, "learning_rate": 5.20913887012253e-05, "loss": 1.4028, "step": 941000 }, { "epoch": 0.56, "learning_rate": 5.2089288735664733e-05, "loss": 1.4089, "step": 941500 }, { "epoch": 0.56, "learning_rate": 5.208718877010417e-05, "loss": 1.4531, "step": 942000 }, { "epoch": 0.57, "learning_rate": 5.208508880454361e-05, "loss": 1.4071, "step": 942500 }, { "epoch": 0.57, "learning_rate": 5.208298883898304e-05, "loss": 1.3919, "step": 943000 }, { "epoch": 0.57, "learning_rate": 5.2080893073353594e-05, "loss": 1.3715, "step": 943500 }, { "epoch": 0.57, "learning_rate": 5.2078793107793034e-05, "loss": 1.4305, "step": 944000 }, { "epoch": 0.57, "learning_rate": 5.207669314223247e-05, "loss": 1.4003, "step": 944500 }, { "epoch": 0.57, "learning_rate": 5.20745931766719e-05, "loss": 1.3806, "step": 945000 }, { "epoch": 0.57, "learning_rate": 5.207249321111134e-05, "loss": 1.4218, "step": 945500 }, { "epoch": 0.57, "learning_rate": 5.2070397445481895e-05, "loss": 1.4129, "step": 946000 }, { "epoch": 0.57, "learning_rate": 5.206829747992133e-05, "loss": 1.3931, "step": 946500 }, { "epoch": 0.57, "learning_rate": 5.206619751436076e-05, "loss": 1.4204, "step": 947000 }, { "epoch": 0.57, "learning_rate": 5.20640975488002e-05, "loss": 1.431, "step": 947500 }, { "epoch": 0.57, "learning_rate": 5.2062001783170756e-05, "loss": 1.4018, "step": 948000 }, { "epoch": 0.57, "learning_rate": 5.205990601754131e-05, "loss": 1.3723, "step": 948500 }, { "epoch": 0.57, "learning_rate": 5.205781445184299e-05, "loss": 1.3877, "step": 949000 }, { "epoch": 0.57, "learning_rate": 5.2055714486282423e-05, "loss": 1.4077, "step": 949500 }, { "epoch": 0.57, "learning_rate": 5.205361452072186e-05, "loss": 1.4215, "step": 950000 }, { "epoch": 0.57, "learning_rate": 5.20515145551613e-05, "loss": 1.3674, "step": 950500 }, { "epoch": 0.57, "learning_rate": 5.204941458960073e-05, "loss": 1.4133, "step": 951000 }, { "epoch": 0.57, "learning_rate": 5.2047314624040164e-05, "loss": 1.4096, "step": 951500 }, { "epoch": 0.57, "learning_rate": 5.2045214658479604e-05, "loss": 1.3995, "step": 952000 }, { "epoch": 0.57, "learning_rate": 5.204311469291904e-05, "loss": 1.4209, "step": 952500 }, { "epoch": 0.57, "learning_rate": 5.2041014727358464e-05, "loss": 1.4311, "step": 953000 }, { "epoch": 0.57, "learning_rate": 5.2038914761797905e-05, "loss": 1.4108, "step": 953500 }, { "epoch": 0.57, "learning_rate": 5.203681479623734e-05, "loss": 1.3646, "step": 954000 }, { "epoch": 0.57, "learning_rate": 5.203471483067677e-05, "loss": 1.3945, "step": 954500 }, { "epoch": 0.57, "learning_rate": 5.203261486511621e-05, "loss": 1.3849, "step": 955000 }, { "epoch": 0.57, "learning_rate": 5.2030519099486765e-05, "loss": 1.3742, "step": 955500 }, { "epoch": 0.57, "learning_rate": 5.20284191339262e-05, "loss": 1.3872, "step": 956000 }, { "epoch": 0.57, "learning_rate": 5.202631916836564e-05, "loss": 1.4085, "step": 956500 }, { "epoch": 0.57, "learning_rate": 5.202421920280507e-05, "loss": 1.4047, "step": 957000 }, { "epoch": 0.57, "learning_rate": 5.2022119237244506e-05, "loss": 1.3947, "step": 957500 }, { "epoch": 0.57, "learning_rate": 5.2020019271683946e-05, "loss": 1.3956, "step": 958000 }, { "epoch": 0.57, "learning_rate": 5.201791930612338e-05, "loss": 1.421, "step": 958500 }, { "epoch": 0.57, "learning_rate": 5.201582354049393e-05, "loss": 1.3871, "step": 959000 }, { "epoch": 0.58, "learning_rate": 5.201372357493337e-05, "loss": 1.4278, "step": 959500 }, { "epoch": 0.58, "learning_rate": 5.201162360937281e-05, "loss": 1.3927, "step": 960000 }, { "epoch": 0.58, "learning_rate": 5.200952364381224e-05, "loss": 1.384, "step": 960500 }, { "epoch": 0.58, "learning_rate": 5.2007423678251674e-05, "loss": 1.4166, "step": 961000 }, { "epoch": 0.58, "learning_rate": 5.200532791262223e-05, "loss": 1.408, "step": 961500 }, { "epoch": 0.58, "learning_rate": 5.200322794706167e-05, "loss": 1.4496, "step": 962000 }, { "epoch": 0.58, "learning_rate": 5.20011279815011e-05, "loss": 1.4173, "step": 962500 }, { "epoch": 0.58, "learning_rate": 5.1999028015940535e-05, "loss": 1.3771, "step": 963000 }, { "epoch": 0.58, "learning_rate": 5.1996928050379975e-05, "loss": 1.4008, "step": 963500 }, { "epoch": 0.58, "learning_rate": 5.199482808481941e-05, "loss": 1.4063, "step": 964000 }, { "epoch": 0.58, "learning_rate": 5.199272811925885e-05, "loss": 1.4071, "step": 964500 }, { "epoch": 0.58, "learning_rate": 5.199062815369828e-05, "loss": 1.3668, "step": 965000 }, { "epoch": 0.58, "learning_rate": 5.1988532388068836e-05, "loss": 1.3878, "step": 965500 }, { "epoch": 0.58, "learning_rate": 5.198643242250827e-05, "loss": 1.4048, "step": 966000 }, { "epoch": 0.58, "learning_rate": 5.198433665687882e-05, "loss": 1.3872, "step": 966500 }, { "epoch": 0.58, "learning_rate": 5.198224089124938e-05, "loss": 1.4358, "step": 967000 }, { "epoch": 0.58, "learning_rate": 5.1980140925688816e-05, "loss": 1.3811, "step": 967500 }, { "epoch": 0.58, "learning_rate": 5.197804096012825e-05, "loss": 1.3875, "step": 968000 }, { "epoch": 0.58, "learning_rate": 5.197594099456768e-05, "loss": 1.3947, "step": 968500 }, { "epoch": 0.58, "learning_rate": 5.1973841029007124e-05, "loss": 1.4098, "step": 969000 }, { "epoch": 0.58, "learning_rate": 5.197174106344656e-05, "loss": 1.4383, "step": 969500 }, { "epoch": 0.58, "learning_rate": 5.196964109788599e-05, "loss": 1.4002, "step": 970000 }, { "epoch": 0.58, "learning_rate": 5.196754113232543e-05, "loss": 1.4456, "step": 970500 }, { "epoch": 0.58, "learning_rate": 5.1965441166764864e-05, "loss": 1.4, "step": 971000 }, { "epoch": 0.58, "learning_rate": 5.1963341201204304e-05, "loss": 1.377, "step": 971500 }, { "epoch": 0.58, "learning_rate": 5.196124123564374e-05, "loss": 1.3814, "step": 972000 }, { "epoch": 0.58, "learning_rate": 5.195914127008317e-05, "loss": 1.4064, "step": 972500 }, { "epoch": 0.58, "learning_rate": 5.1957041304522605e-05, "loss": 1.378, "step": 973000 }, { "epoch": 0.58, "learning_rate": 5.195494133896204e-05, "loss": 1.382, "step": 973500 }, { "epoch": 0.58, "learning_rate": 5.195284977326372e-05, "loss": 1.3616, "step": 974000 }, { "epoch": 0.58, "learning_rate": 5.195074980770315e-05, "loss": 1.4069, "step": 974500 }, { "epoch": 0.58, "learning_rate": 5.1948649842142586e-05, "loss": 1.3978, "step": 975000 }, { "epoch": 0.58, "learning_rate": 5.1946549876582026e-05, "loss": 1.3999, "step": 975500 }, { "epoch": 0.59, "learning_rate": 5.194444991102146e-05, "loss": 1.3601, "step": 976000 }, { "epoch": 0.59, "learning_rate": 5.194234994546089e-05, "loss": 1.3785, "step": 976500 }, { "epoch": 0.59, "learning_rate": 5.194025417983145e-05, "loss": 1.4267, "step": 977000 }, { "epoch": 0.59, "learning_rate": 5.1938154214270887e-05, "loss": 1.386, "step": 977500 }, { "epoch": 0.59, "learning_rate": 5.193605424871032e-05, "loss": 1.4166, "step": 978000 }, { "epoch": 0.59, "learning_rate": 5.193395428314976e-05, "loss": 1.3764, "step": 978500 }, { "epoch": 0.59, "learning_rate": 5.1931854317589194e-05, "loss": 1.3911, "step": 979000 }, { "epoch": 0.59, "learning_rate": 5.192975435202863e-05, "loss": 1.4092, "step": 979500 }, { "epoch": 0.59, "learning_rate": 5.192765438646806e-05, "loss": 1.3693, "step": 980000 }, { "epoch": 0.59, "learning_rate": 5.1925554420907494e-05, "loss": 1.4016, "step": 980500 }, { "epoch": 0.59, "learning_rate": 5.1923458655278054e-05, "loss": 1.4067, "step": 981000 }, { "epoch": 0.59, "learning_rate": 5.192135868971749e-05, "loss": 1.4018, "step": 981500 }, { "epoch": 0.59, "learning_rate": 5.191925872415693e-05, "loss": 1.3851, "step": 982000 }, { "epoch": 0.59, "learning_rate": 5.1917158758596355e-05, "loss": 1.4042, "step": 982500 }, { "epoch": 0.59, "learning_rate": 5.191505879303579e-05, "loss": 1.412, "step": 983000 }, { "epoch": 0.59, "learning_rate": 5.191295882747523e-05, "loss": 1.3879, "step": 983500 }, { "epoch": 0.59, "learning_rate": 5.191086306184579e-05, "loss": 1.4056, "step": 984000 }, { "epoch": 0.59, "learning_rate": 5.190876309628522e-05, "loss": 1.3936, "step": 984500 }, { "epoch": 0.59, "learning_rate": 5.1906663130724656e-05, "loss": 1.3976, "step": 985000 }, { "epoch": 0.59, "learning_rate": 5.190456316516409e-05, "loss": 1.3847, "step": 985500 }, { "epoch": 0.59, "learning_rate": 5.190246319960352e-05, "loss": 1.4073, "step": 986000 }, { "epoch": 0.59, "learning_rate": 5.190036323404296e-05, "loss": 1.4018, "step": 986500 }, { "epoch": 0.59, "learning_rate": 5.1898263268482396e-05, "loss": 1.4115, "step": 987000 }, { "epoch": 0.59, "learning_rate": 5.189616330292183e-05, "loss": 1.3592, "step": 987500 }, { "epoch": 0.59, "learning_rate": 5.1894067537292383e-05, "loss": 1.389, "step": 988000 }, { "epoch": 0.59, "learning_rate": 5.1891967571731824e-05, "loss": 1.3925, "step": 988500 }, { "epoch": 0.59, "learning_rate": 5.188986760617126e-05, "loss": 1.3924, "step": 989000 }, { "epoch": 0.59, "learning_rate": 5.188777184054181e-05, "loss": 1.3927, "step": 989500 }, { "epoch": 0.59, "learning_rate": 5.1885671874981244e-05, "loss": 1.3955, "step": 990000 }, { "epoch": 0.59, "learning_rate": 5.1883571909420684e-05, "loss": 1.3992, "step": 990500 }, { "epoch": 0.59, "learning_rate": 5.188147194386012e-05, "loss": 1.4168, "step": 991000 }, { "epoch": 0.59, "learning_rate": 5.187937197829955e-05, "loss": 1.3828, "step": 991500 }, { "epoch": 0.59, "learning_rate": 5.187727621267011e-05, "loss": 1.4009, "step": 992000 }, { "epoch": 0.6, "learning_rate": 5.1875176247109545e-05, "loss": 1.4023, "step": 992500 }, { "epoch": 0.6, "learning_rate": 5.187307628154898e-05, "loss": 1.3965, "step": 993000 }, { "epoch": 0.6, "learning_rate": 5.187097631598842e-05, "loss": 1.3953, "step": 993500 }, { "epoch": 0.6, "learning_rate": 5.186888055035898e-05, "loss": 1.3709, "step": 994000 }, { "epoch": 0.6, "learning_rate": 5.1866780584798406e-05, "loss": 1.3969, "step": 994500 }, { "epoch": 0.6, "learning_rate": 5.186468061923784e-05, "loss": 1.3666, "step": 995000 }, { "epoch": 0.6, "learning_rate": 5.186258065367728e-05, "loss": 1.3968, "step": 995500 }, { "epoch": 0.6, "learning_rate": 5.186048068811671e-05, "loss": 1.4132, "step": 996000 }, { "epoch": 0.6, "learning_rate": 5.1858380722556146e-05, "loss": 1.3949, "step": 996500 }, { "epoch": 0.6, "learning_rate": 5.185628075699559e-05, "loss": 1.3795, "step": 997000 }, { "epoch": 0.6, "learning_rate": 5.185418499136614e-05, "loss": 1.3726, "step": 997500 }, { "epoch": 0.6, "learning_rate": 5.18520892257367e-05, "loss": 1.3921, "step": 998000 }, { "epoch": 0.6, "learning_rate": 5.1849989260176134e-05, "loss": 1.3885, "step": 998500 }, { "epoch": 0.6, "learning_rate": 5.184788929461557e-05, "loss": 1.3876, "step": 999000 }, { "epoch": 0.6, "learning_rate": 5.1845789329055e-05, "loss": 1.3769, "step": 999500 }, { "epoch": 0.6, "learning_rate": 5.1843689363494434e-05, "loss": 1.4139, "step": 1000000 }, { "epoch": 0.6, "eval_loss": 1.3278173208236694, "eval_runtime": 1111.6546, "eval_samples_per_second": 473.816, "eval_steps_per_second": 78.97, "step": 1000000 }, { "epoch": 0.6, "learning_rate": 5.1841589397933875e-05, "loss": 1.3798, "step": 1000500 }, { "epoch": 0.6, "learning_rate": 5.183948943237331e-05, "loss": 1.4033, "step": 1001000 }, { "epoch": 0.6, "learning_rate": 5.183738946681274e-05, "loss": 1.4, "step": 1001500 }, { "epoch": 0.6, "learning_rate": 5.183528950125218e-05, "loss": 1.4045, "step": 1002000 }, { "epoch": 0.6, "learning_rate": 5.1833193735622735e-05, "loss": 1.4086, "step": 1002500 }, { "epoch": 0.6, "learning_rate": 5.183109377006217e-05, "loss": 1.3815, "step": 1003000 }, { "epoch": 0.6, "learning_rate": 5.182899800443273e-05, "loss": 1.3827, "step": 1003500 }, { "epoch": 0.6, "learning_rate": 5.1826898038872156e-05, "loss": 1.3701, "step": 1004000 }, { "epoch": 0.6, "learning_rate": 5.1824798073311596e-05, "loss": 1.3901, "step": 1004500 }, { "epoch": 0.6, "learning_rate": 5.182269810775103e-05, "loss": 1.3954, "step": 1005000 }, { "epoch": 0.6, "learning_rate": 5.182059814219046e-05, "loss": 1.3861, "step": 1005500 }, { "epoch": 0.6, "learning_rate": 5.18184981766299e-05, "loss": 1.385, "step": 1006000 }, { "epoch": 0.6, "learning_rate": 5.181639821106934e-05, "loss": 1.3888, "step": 1006500 }, { "epoch": 0.6, "learning_rate": 5.181429824550877e-05, "loss": 1.3929, "step": 1007000 }, { "epoch": 0.6, "learning_rate": 5.181220247987933e-05, "loss": 1.3979, "step": 1007500 }, { "epoch": 0.6, "learning_rate": 5.181010671424989e-05, "loss": 1.4017, "step": 1008000 }, { "epoch": 0.6, "learning_rate": 5.1808010948620445e-05, "loss": 1.3863, "step": 1008500 }, { "epoch": 0.6, "learning_rate": 5.180591098305988e-05, "loss": 1.4291, "step": 1009000 }, { "epoch": 0.61, "learning_rate": 5.180381101749931e-05, "loss": 1.4162, "step": 1009500 }, { "epoch": 0.61, "learning_rate": 5.180171105193875e-05, "loss": 1.3728, "step": 1010000 }, { "epoch": 0.61, "learning_rate": 5.1799611086378185e-05, "loss": 1.3718, "step": 1010500 }, { "epoch": 0.61, "learning_rate": 5.179751112081761e-05, "loss": 1.412, "step": 1011000 }, { "epoch": 0.61, "learning_rate": 5.179541535518817e-05, "loss": 1.3689, "step": 1011500 }, { "epoch": 0.61, "learning_rate": 5.179331538962761e-05, "loss": 1.3711, "step": 1012000 }, { "epoch": 0.61, "learning_rate": 5.1791215424067046e-05, "loss": 1.3999, "step": 1012500 }, { "epoch": 0.61, "learning_rate": 5.178911545850648e-05, "loss": 1.3754, "step": 1013000 }, { "epoch": 0.61, "learning_rate": 5.178701549294591e-05, "loss": 1.4067, "step": 1013500 }, { "epoch": 0.61, "learning_rate": 5.1784915527385346e-05, "loss": 1.4159, "step": 1014000 }, { "epoch": 0.61, "learning_rate": 5.1782815561824786e-05, "loss": 1.3835, "step": 1014500 }, { "epoch": 0.61, "learning_rate": 5.178071559626422e-05, "loss": 1.3775, "step": 1015000 }, { "epoch": 0.61, "learning_rate": 5.177861563070365e-05, "loss": 1.3898, "step": 1015500 }, { "epoch": 0.61, "learning_rate": 5.177651986507421e-05, "loss": 1.3919, "step": 1016000 }, { "epoch": 0.61, "learning_rate": 5.177441989951365e-05, "loss": 1.3715, "step": 1016500 }, { "epoch": 0.61, "learning_rate": 5.177231993395308e-05, "loss": 1.3596, "step": 1017000 }, { "epoch": 0.61, "learning_rate": 5.1770219968392514e-05, "loss": 1.3755, "step": 1017500 }, { "epoch": 0.61, "learning_rate": 5.1768120002831954e-05, "loss": 1.3783, "step": 1018000 }, { "epoch": 0.61, "learning_rate": 5.176602003727139e-05, "loss": 1.404, "step": 1018500 }, { "epoch": 0.61, "learning_rate": 5.176392007171082e-05, "loss": 1.4128, "step": 1019000 }, { "epoch": 0.61, "learning_rate": 5.176182010615026e-05, "loss": 1.4049, "step": 1019500 }, { "epoch": 0.61, "learning_rate": 5.1759724340520815e-05, "loss": 1.4072, "step": 1020000 }, { "epoch": 0.61, "learning_rate": 5.175762857489137e-05, "loss": 1.3885, "step": 1020500 }, { "epoch": 0.61, "learning_rate": 5.17555286093308e-05, "loss": 1.3825, "step": 1021000 }, { "epoch": 0.61, "learning_rate": 5.175342864377024e-05, "loss": 1.4055, "step": 1021500 }, { "epoch": 0.61, "learning_rate": 5.1751328678209676e-05, "loss": 1.3783, "step": 1022000 }, { "epoch": 0.61, "learning_rate": 5.174922871264911e-05, "loss": 1.3786, "step": 1022500 }, { "epoch": 0.61, "learning_rate": 5.174712874708855e-05, "loss": 1.4117, "step": 1023000 }, { "epoch": 0.61, "learning_rate": 5.174502878152798e-05, "loss": 1.3867, "step": 1023500 }, { "epoch": 0.61, "learning_rate": 5.1742928815967416e-05, "loss": 1.4044, "step": 1024000 }, { "epoch": 0.61, "learning_rate": 5.174082885040686e-05, "loss": 1.3679, "step": 1024500 }, { "epoch": 0.61, "learning_rate": 5.173872888484629e-05, "loss": 1.4149, "step": 1025000 }, { "epoch": 0.61, "learning_rate": 5.1736633119216844e-05, "loss": 1.3592, "step": 1025500 }, { "epoch": 0.62, "learning_rate": 5.173453315365628e-05, "loss": 1.408, "step": 1026000 }, { "epoch": 0.62, "learning_rate": 5.173243318809572e-05, "loss": 1.4198, "step": 1026500 }, { "epoch": 0.62, "learning_rate": 5.173033322253515e-05, "loss": 1.3621, "step": 1027000 }, { "epoch": 0.62, "learning_rate": 5.1728233256974584e-05, "loss": 1.3688, "step": 1027500 }, { "epoch": 0.62, "learning_rate": 5.1726133291414025e-05, "loss": 1.4138, "step": 1028000 }, { "epoch": 0.62, "learning_rate": 5.172403332585345e-05, "loss": 1.37, "step": 1028500 }, { "epoch": 0.62, "learning_rate": 5.1721933360292885e-05, "loss": 1.402, "step": 1029000 }, { "epoch": 0.62, "learning_rate": 5.171983759466345e-05, "loss": 1.4203, "step": 1029500 }, { "epoch": 0.62, "learning_rate": 5.1717737629102885e-05, "loss": 1.3896, "step": 1030000 }, { "epoch": 0.62, "learning_rate": 5.171563766354232e-05, "loss": 1.3662, "step": 1030500 }, { "epoch": 0.62, "learning_rate": 5.171354189791287e-05, "loss": 1.44, "step": 1031000 }, { "epoch": 0.62, "learning_rate": 5.171144193235231e-05, "loss": 1.384, "step": 1031500 }, { "epoch": 0.62, "learning_rate": 5.1709341966791746e-05, "loss": 1.3914, "step": 1032000 }, { "epoch": 0.62, "learning_rate": 5.170724200123118e-05, "loss": 1.3873, "step": 1032500 }, { "epoch": 0.62, "learning_rate": 5.170514203567062e-05, "loss": 1.4175, "step": 1033000 }, { "epoch": 0.62, "learning_rate": 5.1703042070110046e-05, "loss": 1.3952, "step": 1033500 }, { "epoch": 0.62, "learning_rate": 5.170094210454948e-05, "loss": 1.4053, "step": 1034000 }, { "epoch": 0.62, "learning_rate": 5.169884213898892e-05, "loss": 1.3834, "step": 1034500 }, { "epoch": 0.62, "learning_rate": 5.169674637335948e-05, "loss": 1.4075, "step": 1035000 }, { "epoch": 0.62, "learning_rate": 5.1694646407798914e-05, "loss": 1.3729, "step": 1035500 }, { "epoch": 0.62, "learning_rate": 5.169254644223835e-05, "loss": 1.3867, "step": 1036000 }, { "epoch": 0.62, "learning_rate": 5.169044647667778e-05, "loss": 1.4124, "step": 1036500 }, { "epoch": 0.62, "learning_rate": 5.1688346511117214e-05, "loss": 1.4109, "step": 1037000 }, { "epoch": 0.62, "learning_rate": 5.1686246545556654e-05, "loss": 1.4018, "step": 1037500 }, { "epoch": 0.62, "learning_rate": 5.168414657999609e-05, "loss": 1.402, "step": 1038000 }, { "epoch": 0.62, "learning_rate": 5.168204661443552e-05, "loss": 1.4015, "step": 1038500 }, { "epoch": 0.62, "learning_rate": 5.1679950848806075e-05, "loss": 1.366, "step": 1039000 }, { "epoch": 0.62, "learning_rate": 5.1677850883245515e-05, "loss": 1.4079, "step": 1039500 }, { "epoch": 0.62, "learning_rate": 5.167575091768495e-05, "loss": 1.368, "step": 1040000 }, { "epoch": 0.62, "learning_rate": 5.167365095212438e-05, "loss": 1.4189, "step": 1040500 }, { "epoch": 0.62, "learning_rate": 5.167155098656382e-05, "loss": 1.3794, "step": 1041000 }, { "epoch": 0.62, "learning_rate": 5.1669455220934376e-05, "loss": 1.3755, "step": 1041500 }, { "epoch": 0.62, "learning_rate": 5.166735525537381e-05, "loss": 1.3377, "step": 1042000 }, { "epoch": 0.63, "learning_rate": 5.166525528981324e-05, "loss": 1.3978, "step": 1042500 }, { "epoch": 0.63, "learning_rate": 5.166315532425268e-05, "loss": 1.4078, "step": 1043000 }, { "epoch": 0.63, "learning_rate": 5.1661055358692117e-05, "loss": 1.4194, "step": 1043500 }, { "epoch": 0.63, "learning_rate": 5.165895539313155e-05, "loss": 1.368, "step": 1044000 }, { "epoch": 0.63, "learning_rate": 5.165685542757099e-05, "loss": 1.3645, "step": 1044500 }, { "epoch": 0.63, "learning_rate": 5.165476386187267e-05, "loss": 1.3715, "step": 1045000 }, { "epoch": 0.63, "learning_rate": 5.16526638963121e-05, "loss": 1.3751, "step": 1045500 }, { "epoch": 0.63, "learning_rate": 5.165056393075153e-05, "loss": 1.4012, "step": 1046000 }, { "epoch": 0.63, "learning_rate": 5.164846396519097e-05, "loss": 1.3949, "step": 1046500 }, { "epoch": 0.63, "learning_rate": 5.1646363999630405e-05, "loss": 1.3993, "step": 1047000 }, { "epoch": 0.63, "learning_rate": 5.164426403406984e-05, "loss": 1.3863, "step": 1047500 }, { "epoch": 0.63, "learning_rate": 5.164216406850928e-05, "loss": 1.4035, "step": 1048000 }, { "epoch": 0.63, "learning_rate": 5.164006410294871e-05, "loss": 1.3584, "step": 1048500 }, { "epoch": 0.63, "learning_rate": 5.1637968337319265e-05, "loss": 1.3973, "step": 1049000 }, { "epoch": 0.63, "learning_rate": 5.16358683717587e-05, "loss": 1.3706, "step": 1049500 }, { "epoch": 0.63, "learning_rate": 5.163376840619814e-05, "loss": 1.4108, "step": 1050000 }, { "epoch": 0.63, "learning_rate": 5.163166844063757e-05, "loss": 1.3854, "step": 1050500 }, { "epoch": 0.63, "learning_rate": 5.1629568475077006e-05, "loss": 1.3789, "step": 1051000 }, { "epoch": 0.63, "learning_rate": 5.1627468509516446e-05, "loss": 1.3949, "step": 1051500 }, { "epoch": 0.63, "learning_rate": 5.162536854395588e-05, "loss": 1.4109, "step": 1052000 }, { "epoch": 0.63, "learning_rate": 5.162326857839531e-05, "loss": 1.3556, "step": 1052500 }, { "epoch": 0.63, "learning_rate": 5.1621168612834746e-05, "loss": 1.4084, "step": 1053000 }, { "epoch": 0.63, "learning_rate": 5.161907284720531e-05, "loss": 1.3938, "step": 1053500 }, { "epoch": 0.63, "learning_rate": 5.161697288164474e-05, "loss": 1.3817, "step": 1054000 }, { "epoch": 0.63, "learning_rate": 5.1614877116015294e-05, "loss": 1.3785, "step": 1054500 }, { "epoch": 0.63, "learning_rate": 5.1612777150454734e-05, "loss": 1.3916, "step": 1055000 }, { "epoch": 0.63, "learning_rate": 5.161067718489417e-05, "loss": 1.4128, "step": 1055500 }, { "epoch": 0.63, "learning_rate": 5.16085772193336e-05, "loss": 1.3998, "step": 1056000 }, { "epoch": 0.63, "learning_rate": 5.160647725377304e-05, "loss": 1.4013, "step": 1056500 }, { "epoch": 0.63, "learning_rate": 5.1604381488143595e-05, "loss": 1.3908, "step": 1057000 }, { "epoch": 0.63, "learning_rate": 5.160228152258303e-05, "loss": 1.3517, "step": 1057500 }, { "epoch": 0.63, "learning_rate": 5.160018155702246e-05, "loss": 1.3747, "step": 1058000 }, { "epoch": 0.63, "learning_rate": 5.15980815914619e-05, "loss": 1.4061, "step": 1058500 }, { "epoch": 0.63, "learning_rate": 5.1595981625901335e-05, "loss": 1.3756, "step": 1059000 }, { "epoch": 0.64, "learning_rate": 5.159388166034077e-05, "loss": 1.3746, "step": 1059500 }, { "epoch": 0.64, "learning_rate": 5.159178169478021e-05, "loss": 1.3892, "step": 1060000 }, { "epoch": 0.64, "learning_rate": 5.158968592915076e-05, "loss": 1.4025, "step": 1060500 }, { "epoch": 0.64, "learning_rate": 5.1587585963590196e-05, "loss": 1.3828, "step": 1061000 }, { "epoch": 0.64, "learning_rate": 5.1585485998029636e-05, "loss": 1.3817, "step": 1061500 }, { "epoch": 0.64, "learning_rate": 5.158338603246907e-05, "loss": 1.3874, "step": 1062000 }, { "epoch": 0.64, "learning_rate": 5.1581286066908497e-05, "loss": 1.4019, "step": 1062500 }, { "epoch": 0.64, "learning_rate": 5.157918610134794e-05, "loss": 1.3728, "step": 1063000 }, { "epoch": 0.64, "learning_rate": 5.157708613578737e-05, "loss": 1.4161, "step": 1063500 }, { "epoch": 0.64, "learning_rate": 5.1574986170226804e-05, "loss": 1.3771, "step": 1064000 }, { "epoch": 0.64, "learning_rate": 5.1572890404597364e-05, "loss": 1.413, "step": 1064500 }, { "epoch": 0.64, "learning_rate": 5.157079463896792e-05, "loss": 1.3853, "step": 1065000 }, { "epoch": 0.64, "learning_rate": 5.156869467340736e-05, "loss": 1.3814, "step": 1065500 }, { "epoch": 0.64, "learning_rate": 5.156659470784679e-05, "loss": 1.3648, "step": 1066000 }, { "epoch": 0.64, "learning_rate": 5.1564494742286225e-05, "loss": 1.3922, "step": 1066500 }, { "epoch": 0.64, "learning_rate": 5.1562394776725665e-05, "loss": 1.3924, "step": 1067000 }, { "epoch": 0.64, "learning_rate": 5.156029481116509e-05, "loss": 1.3848, "step": 1067500 }, { "epoch": 0.64, "learning_rate": 5.155819904553565e-05, "loss": 1.399, "step": 1068000 }, { "epoch": 0.64, "learning_rate": 5.155609907997509e-05, "loss": 1.3675, "step": 1068500 }, { "epoch": 0.64, "learning_rate": 5.1553999114414526e-05, "loss": 1.4029, "step": 1069000 }, { "epoch": 0.64, "learning_rate": 5.155189914885396e-05, "loss": 1.3658, "step": 1069500 }, { "epoch": 0.64, "learning_rate": 5.154979918329339e-05, "loss": 1.3809, "step": 1070000 }, { "epoch": 0.64, "learning_rate": 5.1547699217732826e-05, "loss": 1.3865, "step": 1070500 }, { "epoch": 0.64, "learning_rate": 5.154559925217226e-05, "loss": 1.3994, "step": 1071000 }, { "epoch": 0.64, "learning_rate": 5.154350348654282e-05, "loss": 1.4062, "step": 1071500 }, { "epoch": 0.64, "learning_rate": 5.154140352098226e-05, "loss": 1.3787, "step": 1072000 }, { "epoch": 0.64, "learning_rate": 5.153930355542169e-05, "loss": 1.3769, "step": 1072500 }, { "epoch": 0.64, "learning_rate": 5.153720358986112e-05, "loss": 1.3793, "step": 1073000 }, { "epoch": 0.64, "learning_rate": 5.153510362430056e-05, "loss": 1.3987, "step": 1073500 }, { "epoch": 0.64, "learning_rate": 5.1533003658739994e-05, "loss": 1.3735, "step": 1074000 }, { "epoch": 0.64, "learning_rate": 5.153090369317943e-05, "loss": 1.3841, "step": 1074500 }, { "epoch": 0.64, "learning_rate": 5.152880372761887e-05, "loss": 1.3733, "step": 1075000 }, { "epoch": 0.64, "learning_rate": 5.15267037620583e-05, "loss": 1.379, "step": 1075500 }, { "epoch": 0.65, "learning_rate": 5.1524607996428855e-05, "loss": 1.3643, "step": 1076000 }, { "epoch": 0.65, "learning_rate": 5.1522508030868295e-05, "loss": 1.4039, "step": 1076500 }, { "epoch": 0.65, "learning_rate": 5.152041226523885e-05, "loss": 1.422, "step": 1077000 }, { "epoch": 0.65, "learning_rate": 5.151831229967828e-05, "loss": 1.3997, "step": 1077500 }, { "epoch": 0.65, "learning_rate": 5.1516212334117715e-05, "loss": 1.3428, "step": 1078000 }, { "epoch": 0.65, "learning_rate": 5.1514112368557156e-05, "loss": 1.4137, "step": 1078500 }, { "epoch": 0.65, "learning_rate": 5.1512016602927716e-05, "loss": 1.3951, "step": 1079000 }, { "epoch": 0.65, "learning_rate": 5.150991663736714e-05, "loss": 1.4016, "step": 1079500 }, { "epoch": 0.65, "learning_rate": 5.1507816671806576e-05, "loss": 1.4085, "step": 1080000 }, { "epoch": 0.65, "learning_rate": 5.1505716706246016e-05, "loss": 1.3827, "step": 1080500 }, { "epoch": 0.65, "learning_rate": 5.150361674068545e-05, "loss": 1.349, "step": 1081000 }, { "epoch": 0.65, "learning_rate": 5.150151677512488e-05, "loss": 1.3886, "step": 1081500 }, { "epoch": 0.65, "learning_rate": 5.1499416809564324e-05, "loss": 1.414, "step": 1082000 }, { "epoch": 0.65, "learning_rate": 5.149732104393488e-05, "loss": 1.4363, "step": 1082500 }, { "epoch": 0.65, "learning_rate": 5.149522107837431e-05, "loss": 1.369, "step": 1083000 }, { "epoch": 0.65, "learning_rate": 5.149312111281375e-05, "loss": 1.3952, "step": 1083500 }, { "epoch": 0.65, "learning_rate": 5.1491021147253184e-05, "loss": 1.3957, "step": 1084000 }, { "epoch": 0.65, "learning_rate": 5.148892118169262e-05, "loss": 1.4046, "step": 1084500 }, { "epoch": 0.65, "learning_rate": 5.148682121613206e-05, "loss": 1.3808, "step": 1085000 }, { "epoch": 0.65, "learning_rate": 5.148472545050261e-05, "loss": 1.3724, "step": 1085500 }, { "epoch": 0.65, "learning_rate": 5.1482625484942045e-05, "loss": 1.402, "step": 1086000 }, { "epoch": 0.65, "learning_rate": 5.148052551938148e-05, "loss": 1.4139, "step": 1086500 }, { "epoch": 0.65, "learning_rate": 5.147842555382092e-05, "loss": 1.3749, "step": 1087000 }, { "epoch": 0.65, "learning_rate": 5.147632558826035e-05, "loss": 1.4002, "step": 1087500 }, { "epoch": 0.65, "learning_rate": 5.1474225622699786e-05, "loss": 1.3736, "step": 1088000 }, { "epoch": 0.65, "learning_rate": 5.1472125657139226e-05, "loss": 1.3747, "step": 1088500 }, { "epoch": 0.65, "learning_rate": 5.147002569157866e-05, "loss": 1.3658, "step": 1089000 }, { "epoch": 0.65, "learning_rate": 5.146792992594921e-05, "loss": 1.3934, "step": 1089500 }, { "epoch": 0.65, "learning_rate": 5.146582996038865e-05, "loss": 1.3778, "step": 1090000 }, { "epoch": 0.65, "learning_rate": 5.146373419475921e-05, "loss": 1.3887, "step": 1090500 }, { "epoch": 0.65, "learning_rate": 5.146163422919864e-05, "loss": 1.3824, "step": 1091000 }, { "epoch": 0.65, "learning_rate": 5.1459534263638074e-05, "loss": 1.41, "step": 1091500 }, { "epoch": 0.65, "learning_rate": 5.1457434298077514e-05, "loss": 1.4074, "step": 1092000 }, { "epoch": 0.65, "learning_rate": 5.145533853244807e-05, "loss": 1.4086, "step": 1092500 }, { "epoch": 0.66, "learning_rate": 5.14532385668875e-05, "loss": 1.3811, "step": 1093000 }, { "epoch": 0.66, "learning_rate": 5.1451138601326934e-05, "loss": 1.3733, "step": 1093500 }, { "epoch": 0.66, "learning_rate": 5.1449038635766375e-05, "loss": 1.4263, "step": 1094000 }, { "epoch": 0.66, "learning_rate": 5.144693867020581e-05, "loss": 1.3807, "step": 1094500 }, { "epoch": 0.66, "learning_rate": 5.144483870464524e-05, "loss": 1.3812, "step": 1095000 }, { "epoch": 0.66, "learning_rate": 5.144273873908468e-05, "loss": 1.3496, "step": 1095500 }, { "epoch": 0.66, "learning_rate": 5.1440638773524115e-05, "loss": 1.3953, "step": 1096000 }, { "epoch": 0.66, "learning_rate": 5.143853880796355e-05, "loss": 1.3786, "step": 1096500 }, { "epoch": 0.66, "learning_rate": 5.143644304233411e-05, "loss": 1.3497, "step": 1097000 }, { "epoch": 0.66, "learning_rate": 5.143434307677354e-05, "loss": 1.3816, "step": 1097500 }, { "epoch": 0.66, "learning_rate": 5.1432243111212976e-05, "loss": 1.3858, "step": 1098000 }, { "epoch": 0.66, "learning_rate": 5.1430143145652416e-05, "loss": 1.3926, "step": 1098500 }, { "epoch": 0.66, "learning_rate": 5.142804318009184e-05, "loss": 1.4155, "step": 1099000 }, { "epoch": 0.66, "learning_rate": 5.14259474144624e-05, "loss": 1.378, "step": 1099500 }, { "epoch": 0.66, "learning_rate": 5.142384744890184e-05, "loss": 1.3969, "step": 1100000 }, { "epoch": 0.66, "eval_loss": 1.3153480291366577, "eval_runtime": 1111.5226, "eval_samples_per_second": 473.873, "eval_steps_per_second": 78.979, "step": 1100000 }, { "epoch": 0.66, "learning_rate": 5.142174748334128e-05, "loss": 1.3804, "step": 1100500 }, { "epoch": 0.66, "learning_rate": 5.141964751778071e-05, "loss": 1.3754, "step": 1101000 }, { "epoch": 0.66, "learning_rate": 5.141754755222014e-05, "loss": 1.3715, "step": 1101500 }, { "epoch": 0.66, "learning_rate": 5.141544758665958e-05, "loss": 1.3656, "step": 1102000 }, { "epoch": 0.66, "learning_rate": 5.141335182103014e-05, "loss": 1.3822, "step": 1102500 }, { "epoch": 0.66, "learning_rate": 5.141125185546957e-05, "loss": 1.3963, "step": 1103000 }, { "epoch": 0.66, "learning_rate": 5.1409151889909005e-05, "loss": 1.3675, "step": 1103500 }, { "epoch": 0.66, "learning_rate": 5.140705192434844e-05, "loss": 1.415, "step": 1104000 }, { "epoch": 0.66, "learning_rate": 5.140495195878787e-05, "loss": 1.3495, "step": 1104500 }, { "epoch": 0.66, "learning_rate": 5.140285199322731e-05, "loss": 1.3917, "step": 1105000 }, { "epoch": 0.66, "learning_rate": 5.1400752027666745e-05, "loss": 1.3534, "step": 1105500 }, { "epoch": 0.66, "learning_rate": 5.1398656262037306e-05, "loss": 1.4196, "step": 1106000 }, { "epoch": 0.66, "learning_rate": 5.139655629647673e-05, "loss": 1.3883, "step": 1106500 }, { "epoch": 0.66, "learning_rate": 5.139445633091617e-05, "loss": 1.3661, "step": 1107000 }, { "epoch": 0.66, "learning_rate": 5.1392356365355606e-05, "loss": 1.3944, "step": 1107500 }, { "epoch": 0.66, "learning_rate": 5.139025639979504e-05, "loss": 1.3743, "step": 1108000 }, { "epoch": 0.66, "learning_rate": 5.138815643423448e-05, "loss": 1.3976, "step": 1108500 }, { "epoch": 0.66, "learning_rate": 5.138605646867391e-05, "loss": 1.4238, "step": 1109000 }, { "epoch": 0.67, "learning_rate": 5.1383956503113347e-05, "loss": 1.3618, "step": 1109500 }, { "epoch": 0.67, "learning_rate": 5.138186493741503e-05, "loss": 1.3853, "step": 1110000 }, { "epoch": 0.67, "learning_rate": 5.137976497185446e-05, "loss": 1.36, "step": 1110500 }, { "epoch": 0.67, "learning_rate": 5.1377665006293894e-05, "loss": 1.4155, "step": 1111000 }, { "epoch": 0.67, "learning_rate": 5.1375569240664454e-05, "loss": 1.3881, "step": 1111500 }, { "epoch": 0.67, "learning_rate": 5.137346927510389e-05, "loss": 1.3841, "step": 1112000 }, { "epoch": 0.67, "learning_rate": 5.137136930954333e-05, "loss": 1.3756, "step": 1112500 }, { "epoch": 0.67, "learning_rate": 5.136926934398276e-05, "loss": 1.4265, "step": 1113000 }, { "epoch": 0.67, "learning_rate": 5.136716937842219e-05, "loss": 1.3672, "step": 1113500 }, { "epoch": 0.67, "learning_rate": 5.136506941286163e-05, "loss": 1.3761, "step": 1114000 }, { "epoch": 0.67, "learning_rate": 5.136296944730106e-05, "loss": 1.3674, "step": 1114500 }, { "epoch": 0.67, "learning_rate": 5.1360869481740495e-05, "loss": 1.3773, "step": 1115000 }, { "epoch": 0.67, "learning_rate": 5.1358769516179935e-05, "loss": 1.3895, "step": 1115500 }, { "epoch": 0.67, "learning_rate": 5.135666955061937e-05, "loss": 1.3748, "step": 1116000 }, { "epoch": 0.67, "learning_rate": 5.13545695850588e-05, "loss": 1.4154, "step": 1116500 }, { "epoch": 0.67, "learning_rate": 5.135246961949824e-05, "loss": 1.401, "step": 1117000 }, { "epoch": 0.67, "learning_rate": 5.1350373853868796e-05, "loss": 1.4003, "step": 1117500 }, { "epoch": 0.67, "learning_rate": 5.134827388830823e-05, "loss": 1.3916, "step": 1118000 }, { "epoch": 0.67, "learning_rate": 5.134617392274766e-05, "loss": 1.3667, "step": 1118500 }, { "epoch": 0.67, "learning_rate": 5.1344078157118223e-05, "loss": 1.3732, "step": 1119000 }, { "epoch": 0.67, "learning_rate": 5.1341982391488784e-05, "loss": 1.3752, "step": 1119500 }, { "epoch": 0.67, "learning_rate": 5.133988242592822e-05, "loss": 1.3812, "step": 1120000 }, { "epoch": 0.67, "learning_rate": 5.1337782460367644e-05, "loss": 1.4159, "step": 1120500 }, { "epoch": 0.67, "learning_rate": 5.1335682494807084e-05, "loss": 1.3821, "step": 1121000 }, { "epoch": 0.67, "learning_rate": 5.133358252924652e-05, "loss": 1.3776, "step": 1121500 }, { "epoch": 0.67, "learning_rate": 5.133148256368595e-05, "loss": 1.4106, "step": 1122000 }, { "epoch": 0.67, "learning_rate": 5.132938259812539e-05, "loss": 1.3728, "step": 1122500 }, { "epoch": 0.67, "learning_rate": 5.1327282632564825e-05, "loss": 1.4052, "step": 1123000 }, { "epoch": 0.67, "learning_rate": 5.132518266700426e-05, "loss": 1.3816, "step": 1123500 }, { "epoch": 0.67, "learning_rate": 5.13230827014437e-05, "loss": 1.405, "step": 1124000 }, { "epoch": 0.67, "learning_rate": 5.132098693581425e-05, "loss": 1.3953, "step": 1124500 }, { "epoch": 0.67, "learning_rate": 5.1318886970253686e-05, "loss": 1.3854, "step": 1125000 }, { "epoch": 0.67, "learning_rate": 5.131678700469312e-05, "loss": 1.3799, "step": 1125500 }, { "epoch": 0.68, "learning_rate": 5.131468703913256e-05, "loss": 1.3544, "step": 1126000 }, { "epoch": 0.68, "learning_rate": 5.131258707357199e-05, "loss": 1.3627, "step": 1126500 }, { "epoch": 0.68, "learning_rate": 5.1310487108011426e-05, "loss": 1.3785, "step": 1127000 }, { "epoch": 0.68, "learning_rate": 5.1308387142450866e-05, "loss": 1.4006, "step": 1127500 }, { "epoch": 0.68, "learning_rate": 5.130629137682142e-05, "loss": 1.3694, "step": 1128000 }, { "epoch": 0.68, "learning_rate": 5.1304191411260853e-05, "loss": 1.3698, "step": 1128500 }, { "epoch": 0.68, "learning_rate": 5.1302091445700294e-05, "loss": 1.4122, "step": 1129000 }, { "epoch": 0.68, "learning_rate": 5.129999148013973e-05, "loss": 1.3834, "step": 1129500 }, { "epoch": 0.68, "learning_rate": 5.129789151457916e-05, "loss": 1.3871, "step": 1130000 }, { "epoch": 0.68, "learning_rate": 5.1295795748949714e-05, "loss": 1.3673, "step": 1130500 }, { "epoch": 0.68, "learning_rate": 5.1293695783389154e-05, "loss": 1.4203, "step": 1131000 }, { "epoch": 0.68, "learning_rate": 5.129159581782859e-05, "loss": 1.3797, "step": 1131500 }, { "epoch": 0.68, "learning_rate": 5.128949585226802e-05, "loss": 1.3973, "step": 1132000 }, { "epoch": 0.68, "learning_rate": 5.1287400086638575e-05, "loss": 1.3759, "step": 1132500 }, { "epoch": 0.68, "learning_rate": 5.1285300121078015e-05, "loss": 1.4024, "step": 1133000 }, { "epoch": 0.68, "learning_rate": 5.128320435544857e-05, "loss": 1.3668, "step": 1133500 }, { "epoch": 0.68, "learning_rate": 5.1281104389888e-05, "loss": 1.3785, "step": 1134000 }, { "epoch": 0.68, "learning_rate": 5.127900442432744e-05, "loss": 1.3942, "step": 1134500 }, { "epoch": 0.68, "learning_rate": 5.1276904458766876e-05, "loss": 1.3998, "step": 1135000 }, { "epoch": 0.68, "learning_rate": 5.127480449320631e-05, "loss": 1.3801, "step": 1135500 }, { "epoch": 0.68, "learning_rate": 5.127270452764575e-05, "loss": 1.375, "step": 1136000 }, { "epoch": 0.68, "learning_rate": 5.127060456208518e-05, "loss": 1.3782, "step": 1136500 }, { "epoch": 0.68, "learning_rate": 5.1268508796455737e-05, "loss": 1.3587, "step": 1137000 }, { "epoch": 0.68, "learning_rate": 5.126640883089517e-05, "loss": 1.3792, "step": 1137500 }, { "epoch": 0.68, "learning_rate": 5.126430886533461e-05, "loss": 1.3931, "step": 1138000 }, { "epoch": 0.68, "learning_rate": 5.1262208899774044e-05, "loss": 1.3647, "step": 1138500 }, { "epoch": 0.68, "learning_rate": 5.126010893421348e-05, "loss": 1.3771, "step": 1139000 }, { "epoch": 0.68, "learning_rate": 5.125800896865292e-05, "loss": 1.4079, "step": 1139500 }, { "epoch": 0.68, "learning_rate": 5.125590900309235e-05, "loss": 1.3752, "step": 1140000 }, { "epoch": 0.68, "learning_rate": 5.125380903753178e-05, "loss": 1.3997, "step": 1140500 }, { "epoch": 0.68, "learning_rate": 5.125170907197122e-05, "loss": 1.4085, "step": 1141000 }, { "epoch": 0.68, "learning_rate": 5.124960910641065e-05, "loss": 1.3524, "step": 1141500 }, { "epoch": 0.68, "learning_rate": 5.124750914085009e-05, "loss": 1.3721, "step": 1142000 }, { "epoch": 0.68, "learning_rate": 5.1245409175289525e-05, "loss": 1.3779, "step": 1142500 }, { "epoch": 0.69, "learning_rate": 5.124331340966008e-05, "loss": 1.3825, "step": 1143000 }, { "epoch": 0.69, "learning_rate": 5.124121344409951e-05, "loss": 1.4193, "step": 1143500 }, { "epoch": 0.69, "learning_rate": 5.123911347853895e-05, "loss": 1.3746, "step": 1144000 }, { "epoch": 0.69, "learning_rate": 5.1237013512978386e-05, "loss": 1.4017, "step": 1144500 }, { "epoch": 0.69, "learning_rate": 5.123491354741782e-05, "loss": 1.3472, "step": 1145000 }, { "epoch": 0.69, "learning_rate": 5.123281358185726e-05, "loss": 1.3713, "step": 1145500 }, { "epoch": 0.69, "learning_rate": 5.123072201615893e-05, "loss": 1.3892, "step": 1146000 }, { "epoch": 0.69, "learning_rate": 5.122862205059837e-05, "loss": 1.3863, "step": 1146500 }, { "epoch": 0.69, "learning_rate": 5.122652208503781e-05, "loss": 1.3757, "step": 1147000 }, { "epoch": 0.69, "learning_rate": 5.1224422119477233e-05, "loss": 1.3846, "step": 1147500 }, { "epoch": 0.69, "learning_rate": 5.1222322153916674e-05, "loss": 1.3781, "step": 1148000 }, { "epoch": 0.69, "learning_rate": 5.122022218835611e-05, "loss": 1.3856, "step": 1148500 }, { "epoch": 0.69, "learning_rate": 5.121812222279555e-05, "loss": 1.3714, "step": 1149000 }, { "epoch": 0.69, "learning_rate": 5.121602225723498e-05, "loss": 1.3471, "step": 1149500 }, { "epoch": 0.69, "learning_rate": 5.1213922291674414e-05, "loss": 1.3798, "step": 1150000 }, { "epoch": 0.69, "learning_rate": 5.1211822326113855e-05, "loss": 1.4012, "step": 1150500 }, { "epoch": 0.69, "learning_rate": 5.120972236055329e-05, "loss": 1.3677, "step": 1151000 }, { "epoch": 0.69, "learning_rate": 5.120762239499272e-05, "loss": 1.3716, "step": 1151500 }, { "epoch": 0.69, "learning_rate": 5.1205526629363275e-05, "loss": 1.3841, "step": 1152000 }, { "epoch": 0.69, "learning_rate": 5.120343086373383e-05, "loss": 1.3865, "step": 1152500 }, { "epoch": 0.69, "learning_rate": 5.120133089817327e-05, "loss": 1.3806, "step": 1153000 }, { "epoch": 0.69, "learning_rate": 5.11992309326127e-05, "loss": 1.3734, "step": 1153500 }, { "epoch": 0.69, "learning_rate": 5.1197130967052136e-05, "loss": 1.3676, "step": 1154000 }, { "epoch": 0.69, "learning_rate": 5.1195031001491576e-05, "loss": 1.3616, "step": 1154500 }, { "epoch": 0.69, "learning_rate": 5.119293103593101e-05, "loss": 1.3571, "step": 1155000 }, { "epoch": 0.69, "learning_rate": 5.119083107037044e-05, "loss": 1.3581, "step": 1155500 }, { "epoch": 0.69, "learning_rate": 5.1188735304741e-05, "loss": 1.4098, "step": 1156000 }, { "epoch": 0.69, "learning_rate": 5.118663533918044e-05, "loss": 1.3855, "step": 1156500 }, { "epoch": 0.69, "learning_rate": 5.118453537361987e-05, "loss": 1.3983, "step": 1157000 }, { "epoch": 0.69, "learning_rate": 5.1182439607990424e-05, "loss": 1.3756, "step": 1157500 }, { "epoch": 0.69, "learning_rate": 5.1180339642429864e-05, "loss": 1.3781, "step": 1158000 }, { "epoch": 0.69, "learning_rate": 5.11782396768693e-05, "loss": 1.3963, "step": 1158500 }, { "epoch": 0.69, "learning_rate": 5.117613971130873e-05, "loss": 1.3835, "step": 1159000 }, { "epoch": 0.7, "learning_rate": 5.117403974574817e-05, "loss": 1.3928, "step": 1159500 }, { "epoch": 0.7, "learning_rate": 5.1171939780187605e-05, "loss": 1.3423, "step": 1160000 }, { "epoch": 0.7, "learning_rate": 5.116983981462704e-05, "loss": 1.3655, "step": 1160500 }, { "epoch": 0.7, "learning_rate": 5.116773984906648e-05, "loss": 1.3685, "step": 1161000 }, { "epoch": 0.7, "learning_rate": 5.116563988350591e-05, "loss": 1.3539, "step": 1161500 }, { "epoch": 0.7, "learning_rate": 5.1163539917945345e-05, "loss": 1.3701, "step": 1162000 }, { "epoch": 0.7, "learning_rate": 5.11614441523159e-05, "loss": 1.3552, "step": 1162500 }, { "epoch": 0.7, "learning_rate": 5.115934418675534e-05, "loss": 1.4087, "step": 1163000 }, { "epoch": 0.7, "learning_rate": 5.115724422119477e-05, "loss": 1.3697, "step": 1163500 }, { "epoch": 0.7, "learning_rate": 5.1155148455565326e-05, "loss": 1.3504, "step": 1164000 }, { "epoch": 0.7, "learning_rate": 5.1153048490004766e-05, "loss": 1.3818, "step": 1164500 }, { "epoch": 0.7, "learning_rate": 5.11509485244442e-05, "loss": 1.3863, "step": 1165000 }, { "epoch": 0.7, "learning_rate": 5.114884855888363e-05, "loss": 1.3985, "step": 1165500 }, { "epoch": 0.7, "learning_rate": 5.1146748593323073e-05, "loss": 1.3701, "step": 1166000 }, { "epoch": 0.7, "learning_rate": 5.114464862776251e-05, "loss": 1.403, "step": 1166500 }, { "epoch": 0.7, "learning_rate": 5.114254866220194e-05, "loss": 1.3908, "step": 1167000 }, { "epoch": 0.7, "learning_rate": 5.1140448696641374e-05, "loss": 1.365, "step": 1167500 }, { "epoch": 0.7, "learning_rate": 5.113834873108081e-05, "loss": 1.3751, "step": 1168000 }, { "epoch": 0.7, "learning_rate": 5.113625296545137e-05, "loss": 1.4, "step": 1168500 }, { "epoch": 0.7, "learning_rate": 5.11341529998908e-05, "loss": 1.3502, "step": 1169000 }, { "epoch": 0.7, "learning_rate": 5.113205303433024e-05, "loss": 1.3935, "step": 1169500 }, { "epoch": 0.7, "learning_rate": 5.112995306876967e-05, "loss": 1.3827, "step": 1170000 }, { "epoch": 0.7, "learning_rate": 5.112785730314023e-05, "loss": 1.3787, "step": 1170500 }, { "epoch": 0.7, "learning_rate": 5.112575733757966e-05, "loss": 1.3628, "step": 1171000 }, { "epoch": 0.7, "learning_rate": 5.11236573720191e-05, "loss": 1.3505, "step": 1171500 }, { "epoch": 0.7, "learning_rate": 5.1121557406458535e-05, "loss": 1.3781, "step": 1172000 }, { "epoch": 0.7, "learning_rate": 5.111945744089797e-05, "loss": 1.3364, "step": 1172500 }, { "epoch": 0.7, "learning_rate": 5.11173574753374e-05, "loss": 1.3908, "step": 1173000 }, { "epoch": 0.7, "learning_rate": 5.1115257509776836e-05, "loss": 1.3812, "step": 1173500 }, { "epoch": 0.7, "learning_rate": 5.1113161744147396e-05, "loss": 1.4128, "step": 1174000 }, { "epoch": 0.7, "learning_rate": 5.111106177858683e-05, "loss": 1.3733, "step": 1174500 }, { "epoch": 0.7, "learning_rate": 5.110896181302626e-05, "loss": 1.3805, "step": 1175000 }, { "epoch": 0.7, "learning_rate": 5.1106861847465697e-05, "loss": 1.3571, "step": 1175500 }, { "epoch": 0.71, "learning_rate": 5.110476188190514e-05, "loss": 1.3847, "step": 1176000 }, { "epoch": 0.71, "learning_rate": 5.110266191634457e-05, "loss": 1.4097, "step": 1176500 }, { "epoch": 0.71, "learning_rate": 5.1100566150715124e-05, "loss": 1.4068, "step": 1177000 }, { "epoch": 0.71, "learning_rate": 5.109846618515456e-05, "loss": 1.3897, "step": 1177500 }, { "epoch": 0.71, "learning_rate": 5.1096366219594e-05, "loss": 1.3688, "step": 1178000 }, { "epoch": 0.71, "learning_rate": 5.109426625403343e-05, "loss": 1.3812, "step": 1178500 }, { "epoch": 0.71, "learning_rate": 5.1092166288472864e-05, "loss": 1.3976, "step": 1179000 }, { "epoch": 0.71, "learning_rate": 5.1090066322912305e-05, "loss": 1.3727, "step": 1179500 }, { "epoch": 0.71, "learning_rate": 5.108796635735174e-05, "loss": 1.3653, "step": 1180000 }, { "epoch": 0.71, "learning_rate": 5.108587059172229e-05, "loss": 1.3896, "step": 1180500 }, { "epoch": 0.71, "learning_rate": 5.108377062616173e-05, "loss": 1.3743, "step": 1181000 }, { "epoch": 0.71, "learning_rate": 5.1081670660601165e-05, "loss": 1.3712, "step": 1181500 }, { "epoch": 0.71, "learning_rate": 5.10795706950406e-05, "loss": 1.3768, "step": 1182000 }, { "epoch": 0.71, "learning_rate": 5.107747072948004e-05, "loss": 1.3707, "step": 1182500 }, { "epoch": 0.71, "learning_rate": 5.107537076391947e-05, "loss": 1.386, "step": 1183000 }, { "epoch": 0.71, "learning_rate": 5.1073274998290026e-05, "loss": 1.3595, "step": 1183500 }, { "epoch": 0.71, "learning_rate": 5.107117503272946e-05, "loss": 1.3561, "step": 1184000 }, { "epoch": 0.71, "learning_rate": 5.10690750671689e-05, "loss": 1.3615, "step": 1184500 }, { "epoch": 0.71, "learning_rate": 5.106697510160833e-05, "loss": 1.3894, "step": 1185000 }, { "epoch": 0.71, "learning_rate": 5.106487513604777e-05, "loss": 1.3899, "step": 1185500 }, { "epoch": 0.71, "learning_rate": 5.106277517048721e-05, "loss": 1.3547, "step": 1186000 }, { "epoch": 0.71, "learning_rate": 5.106067940485776e-05, "loss": 1.3683, "step": 1186500 }, { "epoch": 0.71, "learning_rate": 5.1058579439297194e-05, "loss": 1.3676, "step": 1187000 }, { "epoch": 0.71, "learning_rate": 5.105647947373663e-05, "loss": 1.3993, "step": 1187500 }, { "epoch": 0.71, "learning_rate": 5.105437950817607e-05, "loss": 1.3662, "step": 1188000 }, { "epoch": 0.71, "learning_rate": 5.10522795426155e-05, "loss": 1.3727, "step": 1188500 }, { "epoch": 0.71, "learning_rate": 5.1050183776986055e-05, "loss": 1.4103, "step": 1189000 }, { "epoch": 0.71, "learning_rate": 5.1048083811425495e-05, "loss": 1.3652, "step": 1189500 }, { "epoch": 0.71, "learning_rate": 5.104598384586493e-05, "loss": 1.3977, "step": 1190000 }, { "epoch": 0.71, "learning_rate": 5.104388388030436e-05, "loss": 1.3614, "step": 1190500 }, { "epoch": 0.71, "learning_rate": 5.10417839147438e-05, "loss": 1.414, "step": 1191000 }, { "epoch": 0.71, "learning_rate": 5.1039683949183236e-05, "loss": 1.3397, "step": 1191500 }, { "epoch": 0.71, "learning_rate": 5.103758818355379e-05, "loss": 1.3673, "step": 1192000 }, { "epoch": 0.71, "learning_rate": 5.103548821799322e-05, "loss": 1.3586, "step": 1192500 }, { "epoch": 0.72, "learning_rate": 5.103338825243266e-05, "loss": 1.3636, "step": 1193000 }, { "epoch": 0.72, "learning_rate": 5.1031288286872096e-05, "loss": 1.3844, "step": 1193500 }, { "epoch": 0.72, "learning_rate": 5.102918832131153e-05, "loss": 1.3879, "step": 1194000 }, { "epoch": 0.72, "learning_rate": 5.102709255568209e-05, "loss": 1.3613, "step": 1194500 }, { "epoch": 0.72, "learning_rate": 5.1024992590121524e-05, "loss": 1.3877, "step": 1195000 }, { "epoch": 0.72, "learning_rate": 5.102289262456096e-05, "loss": 1.3694, "step": 1195500 }, { "epoch": 0.72, "learning_rate": 5.102079685893151e-05, "loss": 1.3863, "step": 1196000 }, { "epoch": 0.72, "learning_rate": 5.101869689337095e-05, "loss": 1.3787, "step": 1196500 }, { "epoch": 0.72, "learning_rate": 5.1016596927810384e-05, "loss": 1.3816, "step": 1197000 }, { "epoch": 0.72, "learning_rate": 5.101449696224982e-05, "loss": 1.3487, "step": 1197500 }, { "epoch": 0.72, "learning_rate": 5.101240119662037e-05, "loss": 1.3569, "step": 1198000 }, { "epoch": 0.72, "learning_rate": 5.101030123105981e-05, "loss": 1.3908, "step": 1198500 }, { "epoch": 0.72, "learning_rate": 5.1008201265499245e-05, "loss": 1.3867, "step": 1199000 }, { "epoch": 0.72, "learning_rate": 5.100610129993868e-05, "loss": 1.3663, "step": 1199500 }, { "epoch": 0.72, "learning_rate": 5.100400133437812e-05, "loss": 1.3723, "step": 1200000 }, { "epoch": 0.72, "eval_loss": 1.3151705265045166, "eval_runtime": 1104.7712, "eval_samples_per_second": 476.768, "eval_steps_per_second": 79.462, "step": 1200000 }, { "epoch": 0.72, "learning_rate": 5.100190136881755e-05, "loss": 1.3804, "step": 1200500 }, { "epoch": 0.72, "learning_rate": 5.0999801403256986e-05, "loss": 1.3675, "step": 1201000 }, { "epoch": 0.72, "learning_rate": 5.099770143769642e-05, "loss": 1.3808, "step": 1201500 }, { "epoch": 0.72, "learning_rate": 5.099560147213585e-05, "loss": 1.3655, "step": 1202000 }, { "epoch": 0.72, "learning_rate": 5.099350150657529e-05, "loss": 1.3646, "step": 1202500 }, { "epoch": 0.72, "learning_rate": 5.0991401541014726e-05, "loss": 1.377, "step": 1203000 }, { "epoch": 0.72, "learning_rate": 5.098930157545416e-05, "loss": 1.3981, "step": 1203500 }, { "epoch": 0.72, "learning_rate": 5.09872016098936e-05, "loss": 1.3675, "step": 1204000 }, { "epoch": 0.72, "learning_rate": 5.0985105844264154e-05, "loss": 1.3753, "step": 1204500 }, { "epoch": 0.72, "learning_rate": 5.098300587870359e-05, "loss": 1.3472, "step": 1205000 }, { "epoch": 0.72, "learning_rate": 5.098090591314302e-05, "loss": 1.357, "step": 1205500 }, { "epoch": 0.72, "learning_rate": 5.097880594758246e-05, "loss": 1.3797, "step": 1206000 }, { "epoch": 0.72, "learning_rate": 5.0976705982021894e-05, "loss": 1.4025, "step": 1206500 }, { "epoch": 0.72, "learning_rate": 5.097461021639245e-05, "loss": 1.375, "step": 1207000 }, { "epoch": 0.72, "learning_rate": 5.097251445076301e-05, "loss": 1.3654, "step": 1207500 }, { "epoch": 0.72, "learning_rate": 5.097041448520244e-05, "loss": 1.3685, "step": 1208000 }, { "epoch": 0.72, "learning_rate": 5.0968318719573e-05, "loss": 1.3489, "step": 1208500 }, { "epoch": 0.72, "learning_rate": 5.0966218754012435e-05, "loss": 1.3542, "step": 1209000 }, { "epoch": 0.73, "learning_rate": 5.096411878845187e-05, "loss": 1.3754, "step": 1209500 }, { "epoch": 0.73, "learning_rate": 5.096201882289131e-05, "loss": 1.391, "step": 1210000 }, { "epoch": 0.73, "learning_rate": 5.095991885733074e-05, "loss": 1.379, "step": 1210500 }, { "epoch": 0.73, "learning_rate": 5.095781889177017e-05, "loss": 1.3446, "step": 1211000 }, { "epoch": 0.73, "learning_rate": 5.095571892620961e-05, "loss": 1.3724, "step": 1211500 }, { "epoch": 0.73, "learning_rate": 5.095361896064904e-05, "loss": 1.3668, "step": 1212000 }, { "epoch": 0.73, "learning_rate": 5.0951518995088476e-05, "loss": 1.3661, "step": 1212500 }, { "epoch": 0.73, "learning_rate": 5.0949419029527917e-05, "loss": 1.3654, "step": 1213000 }, { "epoch": 0.73, "learning_rate": 5.094731906396735e-05, "loss": 1.3541, "step": 1213500 }, { "epoch": 0.73, "learning_rate": 5.0945219098406784e-05, "loss": 1.3598, "step": 1214000 }, { "epoch": 0.73, "learning_rate": 5.0943119132846224e-05, "loss": 1.3503, "step": 1214500 }, { "epoch": 0.73, "learning_rate": 5.094101916728566e-05, "loss": 1.3971, "step": 1215000 }, { "epoch": 0.73, "learning_rate": 5.093892340165621e-05, "loss": 1.3821, "step": 1215500 }, { "epoch": 0.73, "learning_rate": 5.0936823436095644e-05, "loss": 1.377, "step": 1216000 }, { "epoch": 0.73, "learning_rate": 5.0934723470535084e-05, "loss": 1.356, "step": 1216500 }, { "epoch": 0.73, "learning_rate": 5.093262350497452e-05, "loss": 1.3789, "step": 1217000 }, { "epoch": 0.73, "learning_rate": 5.093052353941395e-05, "loss": 1.3561, "step": 1217500 }, { "epoch": 0.73, "learning_rate": 5.092842357385339e-05, "loss": 1.3694, "step": 1218000 }, { "epoch": 0.73, "learning_rate": 5.0926327808223945e-05, "loss": 1.3678, "step": 1218500 }, { "epoch": 0.73, "learning_rate": 5.092422784266338e-05, "loss": 1.3942, "step": 1219000 }, { "epoch": 0.73, "learning_rate": 5.092212787710282e-05, "loss": 1.3781, "step": 1219500 }, { "epoch": 0.73, "learning_rate": 5.092002791154225e-05, "loss": 1.4007, "step": 1220000 }, { "epoch": 0.73, "learning_rate": 5.0917927945981686e-05, "loss": 1.3577, "step": 1220500 }, { "epoch": 0.73, "learning_rate": 5.091583218035224e-05, "loss": 1.372, "step": 1221000 }, { "epoch": 0.73, "learning_rate": 5.091373641472279e-05, "loss": 1.3697, "step": 1221500 }, { "epoch": 0.73, "learning_rate": 5.091164064909335e-05, "loss": 1.3779, "step": 1222000 }, { "epoch": 0.73, "learning_rate": 5.0909540683532794e-05, "loss": 1.3555, "step": 1222500 }, { "epoch": 0.73, "learning_rate": 5.090744071797222e-05, "loss": 1.3911, "step": 1223000 }, { "epoch": 0.73, "learning_rate": 5.090534075241166e-05, "loss": 1.3517, "step": 1223500 }, { "epoch": 0.73, "learning_rate": 5.0903240786851094e-05, "loss": 1.3597, "step": 1224000 }, { "epoch": 0.73, "learning_rate": 5.0901145021221654e-05, "loss": 1.3848, "step": 1224500 }, { "epoch": 0.73, "learning_rate": 5.089904505566109e-05, "loss": 1.3372, "step": 1225000 }, { "epoch": 0.73, "learning_rate": 5.089694509010052e-05, "loss": 1.4007, "step": 1225500 }, { "epoch": 0.74, "learning_rate": 5.0894845124539955e-05, "loss": 1.3585, "step": 1226000 }, { "epoch": 0.74, "learning_rate": 5.089274515897939e-05, "loss": 1.3632, "step": 1226500 }, { "epoch": 0.74, "learning_rate": 5.089064519341883e-05, "loss": 1.3868, "step": 1227000 }, { "epoch": 0.74, "learning_rate": 5.088854522785826e-05, "loss": 1.3611, "step": 1227500 }, { "epoch": 0.74, "learning_rate": 5.0886445262297695e-05, "loss": 1.3871, "step": 1228000 }, { "epoch": 0.74, "learning_rate": 5.088434949666825e-05, "loss": 1.3915, "step": 1228500 }, { "epoch": 0.74, "learning_rate": 5.088224953110769e-05, "loss": 1.3762, "step": 1229000 }, { "epoch": 0.74, "learning_rate": 5.088014956554712e-05, "loss": 1.3556, "step": 1229500 }, { "epoch": 0.74, "learning_rate": 5.0878049599986556e-05, "loss": 1.3397, "step": 1230000 }, { "epoch": 0.74, "learning_rate": 5.0875949634425996e-05, "loss": 1.38, "step": 1230500 }, { "epoch": 0.74, "learning_rate": 5.087384966886543e-05, "loss": 1.3747, "step": 1231000 }, { "epoch": 0.74, "learning_rate": 5.087174970330486e-05, "loss": 1.359, "step": 1231500 }, { "epoch": 0.74, "learning_rate": 5.08696497377443e-05, "loss": 1.3434, "step": 1232000 }, { "epoch": 0.74, "learning_rate": 5.086754977218374e-05, "loss": 1.3631, "step": 1232500 }, { "epoch": 0.74, "learning_rate": 5.086544980662317e-05, "loss": 1.3705, "step": 1233000 }, { "epoch": 0.74, "learning_rate": 5.0863349841062604e-05, "loss": 1.3553, "step": 1233500 }, { "epoch": 0.74, "learning_rate": 5.086124987550204e-05, "loss": 1.3719, "step": 1234000 }, { "epoch": 0.74, "learning_rate": 5.08591541098726e-05, "loss": 1.3385, "step": 1234500 }, { "epoch": 0.74, "learning_rate": 5.085705414431204e-05, "loss": 1.3611, "step": 1235000 }, { "epoch": 0.74, "learning_rate": 5.0854954178751464e-05, "loss": 1.3961, "step": 1235500 }, { "epoch": 0.74, "learning_rate": 5.08528542131909e-05, "loss": 1.3432, "step": 1236000 }, { "epoch": 0.74, "learning_rate": 5.085075424763034e-05, "loss": 1.3536, "step": 1236500 }, { "epoch": 0.74, "learning_rate": 5.084866268193201e-05, "loss": 1.3644, "step": 1237000 }, { "epoch": 0.74, "learning_rate": 5.084656271637145e-05, "loss": 1.3721, "step": 1237500 }, { "epoch": 0.74, "learning_rate": 5.0844466950742006e-05, "loss": 1.4002, "step": 1238000 }, { "epoch": 0.74, "learning_rate": 5.084236698518144e-05, "loss": 1.3527, "step": 1238500 }, { "epoch": 0.74, "learning_rate": 5.084026701962088e-05, "loss": 1.3735, "step": 1239000 }, { "epoch": 0.74, "learning_rate": 5.083816705406031e-05, "loss": 1.3966, "step": 1239500 }, { "epoch": 0.74, "learning_rate": 5.0836067088499746e-05, "loss": 1.3662, "step": 1240000 }, { "epoch": 0.74, "learning_rate": 5.0833967122939187e-05, "loss": 1.3885, "step": 1240500 }, { "epoch": 0.74, "learning_rate": 5.083186715737862e-05, "loss": 1.3659, "step": 1241000 }, { "epoch": 0.74, "learning_rate": 5.0829767191818053e-05, "loss": 1.3429, "step": 1241500 }, { "epoch": 0.74, "learning_rate": 5.0827667226257494e-05, "loss": 1.3731, "step": 1242000 }, { "epoch": 0.74, "learning_rate": 5.082556726069693e-05, "loss": 1.3712, "step": 1242500 }, { "epoch": 0.75, "learning_rate": 5.0823467295136354e-05, "loss": 1.3826, "step": 1243000 }, { "epoch": 0.75, "learning_rate": 5.0821371529506914e-05, "loss": 1.3889, "step": 1243500 }, { "epoch": 0.75, "learning_rate": 5.0819271563946354e-05, "loss": 1.3738, "step": 1244000 }, { "epoch": 0.75, "learning_rate": 5.081717159838579e-05, "loss": 1.3506, "step": 1244500 }, { "epoch": 0.75, "learning_rate": 5.081507163282522e-05, "loss": 1.3725, "step": 1245000 }, { "epoch": 0.75, "learning_rate": 5.0812971667264655e-05, "loss": 1.3725, "step": 1245500 }, { "epoch": 0.75, "learning_rate": 5.081087170170409e-05, "loss": 1.3702, "step": 1246000 }, { "epoch": 0.75, "learning_rate": 5.080877593607465e-05, "loss": 1.3648, "step": 1246500 }, { "epoch": 0.75, "learning_rate": 5.080667597051409e-05, "loss": 1.3978, "step": 1247000 }, { "epoch": 0.75, "learning_rate": 5.0804576004953516e-05, "loss": 1.3837, "step": 1247500 }, { "epoch": 0.75, "learning_rate": 5.080247603939295e-05, "loss": 1.3811, "step": 1248000 }, { "epoch": 0.75, "learning_rate": 5.080037607383239e-05, "loss": 1.3716, "step": 1248500 }, { "epoch": 0.75, "learning_rate": 5.079827610827182e-05, "loss": 1.3669, "step": 1249000 }, { "epoch": 0.75, "learning_rate": 5.0796176142711256e-05, "loss": 1.3718, "step": 1249500 }, { "epoch": 0.75, "learning_rate": 5.0794076177150696e-05, "loss": 1.3555, "step": 1250000 }, { "epoch": 0.75, "learning_rate": 5.079198041152125e-05, "loss": 1.3691, "step": 1250500 }, { "epoch": 0.75, "learning_rate": 5.0789880445960683e-05, "loss": 1.3493, "step": 1251000 }, { "epoch": 0.75, "learning_rate": 5.078778048040012e-05, "loss": 1.4041, "step": 1251500 }, { "epoch": 0.75, "learning_rate": 5.078568471477068e-05, "loss": 1.4076, "step": 1252000 }, { "epoch": 0.75, "learning_rate": 5.078358474921011e-05, "loss": 1.3565, "step": 1252500 }, { "epoch": 0.75, "learning_rate": 5.0781484783649544e-05, "loss": 1.3829, "step": 1253000 }, { "epoch": 0.75, "learning_rate": 5.0779384818088984e-05, "loss": 1.3445, "step": 1253500 }, { "epoch": 0.75, "learning_rate": 5.077728485252842e-05, "loss": 1.3654, "step": 1254000 }, { "epoch": 0.75, "learning_rate": 5.077518488696785e-05, "loss": 1.3696, "step": 1254500 }, { "epoch": 0.75, "learning_rate": 5.0773089121338405e-05, "loss": 1.3649, "step": 1255000 }, { "epoch": 0.75, "learning_rate": 5.0770989155777845e-05, "loss": 1.3795, "step": 1255500 }, { "epoch": 0.75, "learning_rate": 5.076888919021728e-05, "loss": 1.3856, "step": 1256000 }, { "epoch": 0.75, "learning_rate": 5.076678922465671e-05, "loss": 1.3688, "step": 1256500 }, { "epoch": 0.75, "learning_rate": 5.076468925909615e-05, "loss": 1.346, "step": 1257000 }, { "epoch": 0.75, "learning_rate": 5.0762593493466706e-05, "loss": 1.3966, "step": 1257500 }, { "epoch": 0.75, "learning_rate": 5.076049352790614e-05, "loss": 1.36, "step": 1258000 }, { "epoch": 0.75, "learning_rate": 5.075839356234557e-05, "loss": 1.3862, "step": 1258500 }, { "epoch": 0.75, "learning_rate": 5.075629359678501e-05, "loss": 1.3487, "step": 1259000 }, { "epoch": 0.76, "learning_rate": 5.0754197831155567e-05, "loss": 1.3504, "step": 1259500 }, { "epoch": 0.76, "learning_rate": 5.0752097865595e-05, "loss": 1.4048, "step": 1260000 }, { "epoch": 0.76, "learning_rate": 5.074999790003444e-05, "loss": 1.3684, "step": 1260500 }, { "epoch": 0.76, "learning_rate": 5.0747902134405e-05, "loss": 1.3706, "step": 1261000 }, { "epoch": 0.76, "learning_rate": 5.0745802168844434e-05, "loss": 1.327, "step": 1261500 }, { "epoch": 0.76, "learning_rate": 5.074370220328386e-05, "loss": 1.3419, "step": 1262000 }, { "epoch": 0.76, "learning_rate": 5.07416022377233e-05, "loss": 1.3675, "step": 1262500 }, { "epoch": 0.76, "learning_rate": 5.0739502272162734e-05, "loss": 1.3986, "step": 1263000 }, { "epoch": 0.76, "learning_rate": 5.073740230660217e-05, "loss": 1.388, "step": 1263500 }, { "epoch": 0.76, "learning_rate": 5.073530234104161e-05, "loss": 1.3511, "step": 1264000 }, { "epoch": 0.76, "learning_rate": 5.073320237548104e-05, "loss": 1.3755, "step": 1264500 }, { "epoch": 0.76, "learning_rate": 5.0731102409920475e-05, "loss": 1.3551, "step": 1265000 }, { "epoch": 0.76, "learning_rate": 5.072900664429103e-05, "loss": 1.358, "step": 1265500 }, { "epoch": 0.76, "learning_rate": 5.072690667873047e-05, "loss": 1.3489, "step": 1266000 }, { "epoch": 0.76, "learning_rate": 5.07248067131699e-05, "loss": 1.3711, "step": 1266500 }, { "epoch": 0.76, "learning_rate": 5.0722706747609336e-05, "loss": 1.3771, "step": 1267000 }, { "epoch": 0.76, "learning_rate": 5.0720606782048776e-05, "loss": 1.351, "step": 1267500 }, { "epoch": 0.76, "learning_rate": 5.071851101641933e-05, "loss": 1.3494, "step": 1268000 }, { "epoch": 0.76, "learning_rate": 5.071641525078989e-05, "loss": 1.3627, "step": 1268500 }, { "epoch": 0.76, "learning_rate": 5.0714315285229317e-05, "loss": 1.3813, "step": 1269000 }, { "epoch": 0.76, "learning_rate": 5.071221531966876e-05, "loss": 1.3699, "step": 1269500 }, { "epoch": 0.76, "learning_rate": 5.071011535410819e-05, "loss": 1.3926, "step": 1270000 }, { "epoch": 0.76, "learning_rate": 5.0708015388547624e-05, "loss": 1.3404, "step": 1270500 }, { "epoch": 0.76, "learning_rate": 5.0705915422987064e-05, "loss": 1.3685, "step": 1271000 }, { "epoch": 0.76, "learning_rate": 5.07038154574265e-05, "loss": 1.359, "step": 1271500 }, { "epoch": 0.76, "learning_rate": 5.070171549186593e-05, "loss": 1.3457, "step": 1272000 }, { "epoch": 0.76, "learning_rate": 5.069961552630537e-05, "loss": 1.3798, "step": 1272500 }, { "epoch": 0.76, "learning_rate": 5.0697519760675925e-05, "loss": 1.3773, "step": 1273000 }, { "epoch": 0.76, "learning_rate": 5.069541979511536e-05, "loss": 1.3593, "step": 1273500 }, { "epoch": 0.76, "learning_rate": 5.069331982955479e-05, "loss": 1.3903, "step": 1274000 }, { "epoch": 0.76, "learning_rate": 5.069121986399423e-05, "loss": 1.3781, "step": 1274500 }, { "epoch": 0.76, "learning_rate": 5.0689119898433665e-05, "loss": 1.3771, "step": 1275000 }, { "epoch": 0.76, "learning_rate": 5.06870199328731e-05, "loss": 1.3793, "step": 1275500 }, { "epoch": 0.77, "learning_rate": 5.068492416724366e-05, "loss": 1.3676, "step": 1276000 }, { "epoch": 0.77, "learning_rate": 5.068282420168309e-05, "loss": 1.3566, "step": 1276500 }, { "epoch": 0.77, "learning_rate": 5.0680724236122526e-05, "loss": 1.3834, "step": 1277000 }, { "epoch": 0.77, "learning_rate": 5.0678632670424207e-05, "loss": 1.3827, "step": 1277500 }, { "epoch": 0.77, "learning_rate": 5.067653270486364e-05, "loss": 1.3837, "step": 1278000 }, { "epoch": 0.77, "learning_rate": 5.0674432739303073e-05, "loss": 1.3729, "step": 1278500 }, { "epoch": 0.77, "learning_rate": 5.067233277374251e-05, "loss": 1.3562, "step": 1279000 }, { "epoch": 0.77, "learning_rate": 5.067023280818194e-05, "loss": 1.3532, "step": 1279500 }, { "epoch": 0.77, "learning_rate": 5.066813284262138e-05, "loss": 1.3822, "step": 1280000 }, { "epoch": 0.77, "learning_rate": 5.0666032877060814e-05, "loss": 1.3682, "step": 1280500 }, { "epoch": 0.77, "learning_rate": 5.066393291150025e-05, "loss": 1.3506, "step": 1281000 }, { "epoch": 0.77, "learning_rate": 5.066183294593969e-05, "loss": 1.3694, "step": 1281500 }, { "epoch": 0.77, "learning_rate": 5.065973298037912e-05, "loss": 1.3946, "step": 1282000 }, { "epoch": 0.77, "learning_rate": 5.0657633014818555e-05, "loss": 1.3872, "step": 1282500 }, { "epoch": 0.77, "learning_rate": 5.0655533049257995e-05, "loss": 1.3448, "step": 1283000 }, { "epoch": 0.77, "learning_rate": 5.065343728362855e-05, "loss": 1.3682, "step": 1283500 }, { "epoch": 0.77, "learning_rate": 5.065133731806798e-05, "loss": 1.4006, "step": 1284000 }, { "epoch": 0.77, "learning_rate": 5.064923735250742e-05, "loss": 1.3558, "step": 1284500 }, { "epoch": 0.77, "learning_rate": 5.0647137386946856e-05, "loss": 1.3758, "step": 1285000 }, { "epoch": 0.77, "learning_rate": 5.064503742138629e-05, "loss": 1.3791, "step": 1285500 }, { "epoch": 0.77, "learning_rate": 5.064293745582573e-05, "loss": 1.3498, "step": 1286000 }, { "epoch": 0.77, "learning_rate": 5.0640837490265156e-05, "loss": 1.3765, "step": 1286500 }, { "epoch": 0.77, "learning_rate": 5.063873752470459e-05, "loss": 1.3596, "step": 1287000 }, { "epoch": 0.77, "learning_rate": 5.063664175907515e-05, "loss": 1.3406, "step": 1287500 }, { "epoch": 0.77, "learning_rate": 5.063454179351459e-05, "loss": 1.3448, "step": 1288000 }, { "epoch": 0.77, "learning_rate": 5.0632441827954024e-05, "loss": 1.3607, "step": 1288500 }, { "epoch": 0.77, "learning_rate": 5.063034186239345e-05, "loss": 1.3638, "step": 1289000 }, { "epoch": 0.77, "learning_rate": 5.062824609676401e-05, "loss": 1.3756, "step": 1289500 }, { "epoch": 0.77, "learning_rate": 5.062614613120345e-05, "loss": 1.3478, "step": 1290000 }, { "epoch": 0.77, "learning_rate": 5.0624050365574004e-05, "loss": 1.3888, "step": 1290500 }, { "epoch": 0.77, "learning_rate": 5.062195040001344e-05, "loss": 1.3328, "step": 1291000 }, { "epoch": 0.77, "learning_rate": 5.061985043445288e-05, "loss": 1.3895, "step": 1291500 }, { "epoch": 0.77, "learning_rate": 5.061775046889231e-05, "loss": 1.3858, "step": 1292000 }, { "epoch": 0.77, "learning_rate": 5.0615650503331745e-05, "loss": 1.3637, "step": 1292500 }, { "epoch": 0.78, "learning_rate": 5.0613550537771185e-05, "loss": 1.3533, "step": 1293000 }, { "epoch": 0.78, "learning_rate": 5.061145057221061e-05, "loss": 1.3716, "step": 1293500 }, { "epoch": 0.78, "learning_rate": 5.0609350606650045e-05, "loss": 1.3715, "step": 1294000 }, { "epoch": 0.78, "learning_rate": 5.0607254841020606e-05, "loss": 1.3417, "step": 1294500 }, { "epoch": 0.78, "learning_rate": 5.0605154875460046e-05, "loss": 1.3617, "step": 1295000 }, { "epoch": 0.78, "learning_rate": 5.06030591098306e-05, "loss": 1.358, "step": 1295500 }, { "epoch": 0.78, "learning_rate": 5.060095914427003e-05, "loss": 1.353, "step": 1296000 }, { "epoch": 0.78, "learning_rate": 5.0598859178709466e-05, "loss": 1.3714, "step": 1296500 }, { "epoch": 0.78, "learning_rate": 5.059675921314891e-05, "loss": 1.3416, "step": 1297000 }, { "epoch": 0.78, "learning_rate": 5.059465924758834e-05, "loss": 1.3545, "step": 1297500 }, { "epoch": 0.78, "learning_rate": 5.0592563481958894e-05, "loss": 1.3627, "step": 1298000 }, { "epoch": 0.78, "learning_rate": 5.0590463516398334e-05, "loss": 1.3899, "step": 1298500 }, { "epoch": 0.78, "learning_rate": 5.058836775076889e-05, "loss": 1.3737, "step": 1299000 }, { "epoch": 0.78, "learning_rate": 5.058626778520832e-05, "loss": 1.3628, "step": 1299500 }, { "epoch": 0.78, "learning_rate": 5.0584167819647754e-05, "loss": 1.3439, "step": 1300000 }, { "epoch": 0.78, "eval_loss": 1.3037538528442383, "eval_runtime": 1107.3433, "eval_samples_per_second": 475.661, "eval_steps_per_second": 79.277, "step": 1300000 }, { "epoch": 0.78, "learning_rate": 5.0582067854087195e-05, "loss": 1.344, "step": 1300500 }, { "epoch": 0.78, "learning_rate": 5.057996788852663e-05, "loss": 1.3935, "step": 1301000 }, { "epoch": 0.78, "learning_rate": 5.057786792296606e-05, "loss": 1.3962, "step": 1301500 }, { "epoch": 0.78, "learning_rate": 5.0575772157336615e-05, "loss": 1.3614, "step": 1302000 }, { "epoch": 0.78, "learning_rate": 5.0573672191776055e-05, "loss": 1.3494, "step": 1302500 }, { "epoch": 0.78, "learning_rate": 5.057157222621549e-05, "loss": 1.3693, "step": 1303000 }, { "epoch": 0.78, "learning_rate": 5.056947226065492e-05, "loss": 1.3903, "step": 1303500 }, { "epoch": 0.78, "learning_rate": 5.056737229509436e-05, "loss": 1.3436, "step": 1304000 }, { "epoch": 0.78, "learning_rate": 5.0565272329533796e-05, "loss": 1.3662, "step": 1304500 }, { "epoch": 0.78, "learning_rate": 5.056317236397323e-05, "loss": 1.3452, "step": 1305000 }, { "epoch": 0.78, "learning_rate": 5.056107659834379e-05, "loss": 1.3836, "step": 1305500 }, { "epoch": 0.78, "learning_rate": 5.055897663278322e-05, "loss": 1.3745, "step": 1306000 }, { "epoch": 0.78, "learning_rate": 5.055687666722266e-05, "loss": 1.3352, "step": 1306500 }, { "epoch": 0.78, "learning_rate": 5.05547767016621e-05, "loss": 1.3591, "step": 1307000 }, { "epoch": 0.78, "learning_rate": 5.055267673610153e-05, "loss": 1.3743, "step": 1307500 }, { "epoch": 0.78, "learning_rate": 5.055057677054096e-05, "loss": 1.3339, "step": 1308000 }, { "epoch": 0.78, "learning_rate": 5.054848520484264e-05, "loss": 1.363, "step": 1308500 }, { "epoch": 0.78, "learning_rate": 5.054638523928207e-05, "loss": 1.3596, "step": 1309000 }, { "epoch": 0.79, "learning_rate": 5.054428527372151e-05, "loss": 1.356, "step": 1309500 }, { "epoch": 0.79, "learning_rate": 5.0542185308160945e-05, "loss": 1.3246, "step": 1310000 }, { "epoch": 0.79, "learning_rate": 5.054008534260038e-05, "loss": 1.3558, "step": 1310500 }, { "epoch": 0.79, "learning_rate": 5.053798537703982e-05, "loss": 1.3723, "step": 1311000 }, { "epoch": 0.79, "learning_rate": 5.053588541147925e-05, "loss": 1.379, "step": 1311500 }, { "epoch": 0.79, "learning_rate": 5.0533789645849805e-05, "loss": 1.3505, "step": 1312000 }, { "epoch": 0.79, "learning_rate": 5.0531689680289246e-05, "loss": 1.3125, "step": 1312500 }, { "epoch": 0.79, "learning_rate": 5.052958971472868e-05, "loss": 1.3397, "step": 1313000 }, { "epoch": 0.79, "learning_rate": 5.052748974916811e-05, "loss": 1.3812, "step": 1313500 }, { "epoch": 0.79, "learning_rate": 5.052538978360755e-05, "loss": 1.3588, "step": 1314000 }, { "epoch": 0.79, "learning_rate": 5.0523289818046986e-05, "loss": 1.3531, "step": 1314500 }, { "epoch": 0.79, "learning_rate": 5.052118985248641e-05, "loss": 1.3294, "step": 1315000 }, { "epoch": 0.79, "learning_rate": 5.051908988692585e-05, "loss": 1.3517, "step": 1315500 }, { "epoch": 0.79, "learning_rate": 5.051698992136529e-05, "loss": 1.3449, "step": 1316000 }, { "epoch": 0.79, "learning_rate": 5.051488995580472e-05, "loss": 1.3732, "step": 1316500 }, { "epoch": 0.79, "learning_rate": 5.051278999024416e-05, "loss": 1.3561, "step": 1317000 }, { "epoch": 0.79, "learning_rate": 5.0510690024683594e-05, "loss": 1.3669, "step": 1317500 }, { "epoch": 0.79, "learning_rate": 5.050859005912303e-05, "loss": 1.3783, "step": 1318000 }, { "epoch": 0.79, "learning_rate": 5.050649009356247e-05, "loss": 1.3262, "step": 1318500 }, { "epoch": 0.79, "learning_rate": 5.05043901280019e-05, "loss": 1.3714, "step": 1319000 }, { "epoch": 0.79, "learning_rate": 5.0502294362372455e-05, "loss": 1.3824, "step": 1319500 }, { "epoch": 0.79, "learning_rate": 5.0500194396811895e-05, "loss": 1.3397, "step": 1320000 }, { "epoch": 0.79, "learning_rate": 5.049809443125133e-05, "loss": 1.373, "step": 1320500 }, { "epoch": 0.79, "learning_rate": 5.049599446569076e-05, "loss": 1.3412, "step": 1321000 }, { "epoch": 0.79, "learning_rate": 5.04938945001302e-05, "loss": 1.3345, "step": 1321500 }, { "epoch": 0.79, "learning_rate": 5.0491794534569635e-05, "loss": 1.3698, "step": 1322000 }, { "epoch": 0.79, "learning_rate": 5.048969456900907e-05, "loss": 1.3573, "step": 1322500 }, { "epoch": 0.79, "learning_rate": 5.04875946034485e-05, "loss": 1.3603, "step": 1323000 }, { "epoch": 0.79, "learning_rate": 5.048549883781906e-05, "loss": 1.3738, "step": 1323500 }, { "epoch": 0.79, "learning_rate": 5.0483403072189616e-05, "loss": 1.3813, "step": 1324000 }, { "epoch": 0.79, "learning_rate": 5.048130310662905e-05, "loss": 1.3621, "step": 1324500 }, { "epoch": 0.79, "learning_rate": 5.047920314106848e-05, "loss": 1.3689, "step": 1325000 }, { "epoch": 0.79, "learning_rate": 5.0477103175507923e-05, "loss": 1.3878, "step": 1325500 }, { "epoch": 0.79, "learning_rate": 5.047500740987848e-05, "loss": 1.3583, "step": 1326000 }, { "epoch": 0.8, "learning_rate": 5.047290744431791e-05, "loss": 1.3273, "step": 1326500 }, { "epoch": 0.8, "learning_rate": 5.047080747875735e-05, "loss": 1.3863, "step": 1327000 }, { "epoch": 0.8, "learning_rate": 5.0468707513196784e-05, "loss": 1.3773, "step": 1327500 }, { "epoch": 0.8, "learning_rate": 5.046660754763622e-05, "loss": 1.3776, "step": 1328000 }, { "epoch": 0.8, "learning_rate": 5.046450758207566e-05, "loss": 1.3504, "step": 1328500 }, { "epoch": 0.8, "learning_rate": 5.046240761651509e-05, "loss": 1.3939, "step": 1329000 }, { "epoch": 0.8, "learning_rate": 5.0460307650954525e-05, "loss": 1.3695, "step": 1329500 }, { "epoch": 0.8, "learning_rate": 5.045820768539396e-05, "loss": 1.3504, "step": 1330000 }, { "epoch": 0.8, "learning_rate": 5.045610771983339e-05, "loss": 1.3887, "step": 1330500 }, { "epoch": 0.8, "learning_rate": 5.0454007754272825e-05, "loss": 1.3703, "step": 1331000 }, { "epoch": 0.8, "learning_rate": 5.0451907788712265e-05, "loss": 1.3466, "step": 1331500 }, { "epoch": 0.8, "learning_rate": 5.0449812023082826e-05, "loss": 1.3766, "step": 1332000 }, { "epoch": 0.8, "learning_rate": 5.044771205752225e-05, "loss": 1.3679, "step": 1332500 }, { "epoch": 0.8, "learning_rate": 5.0445612091961686e-05, "loss": 1.3516, "step": 1333000 }, { "epoch": 0.8, "learning_rate": 5.0443516326332246e-05, "loss": 1.3815, "step": 1333500 }, { "epoch": 0.8, "learning_rate": 5.0441416360771686e-05, "loss": 1.4009, "step": 1334000 }, { "epoch": 0.8, "learning_rate": 5.043931639521112e-05, "loss": 1.3667, "step": 1334500 }, { "epoch": 0.8, "learning_rate": 5.043721642965055e-05, "loss": 1.3997, "step": 1335000 }, { "epoch": 0.8, "learning_rate": 5.043511646408999e-05, "loss": 1.3618, "step": 1335500 }, { "epoch": 0.8, "learning_rate": 5.043301649852942e-05, "loss": 1.3986, "step": 1336000 }, { "epoch": 0.8, "learning_rate": 5.043092073289998e-05, "loss": 1.3771, "step": 1336500 }, { "epoch": 0.8, "learning_rate": 5.042882076733942e-05, "loss": 1.3695, "step": 1337000 }, { "epoch": 0.8, "learning_rate": 5.0426725001709974e-05, "loss": 1.3269, "step": 1337500 }, { "epoch": 0.8, "learning_rate": 5.042462503614941e-05, "loss": 1.3586, "step": 1338000 }, { "epoch": 0.8, "learning_rate": 5.042252507058884e-05, "loss": 1.357, "step": 1338500 }, { "epoch": 0.8, "learning_rate": 5.042042510502828e-05, "loss": 1.3575, "step": 1339000 }, { "epoch": 0.8, "learning_rate": 5.041832513946771e-05, "loss": 1.3306, "step": 1339500 }, { "epoch": 0.8, "learning_rate": 5.041622517390714e-05, "loss": 1.3897, "step": 1340000 }, { "epoch": 0.8, "learning_rate": 5.041412520834658e-05, "loss": 1.3571, "step": 1340500 }, { "epoch": 0.8, "learning_rate": 5.0412025242786015e-05, "loss": 1.3571, "step": 1341000 }, { "epoch": 0.8, "learning_rate": 5.040992527722545e-05, "loss": 1.3547, "step": 1341500 }, { "epoch": 0.8, "learning_rate": 5.040782531166489e-05, "loss": 1.382, "step": 1342000 }, { "epoch": 0.8, "learning_rate": 5.040572534610432e-05, "loss": 1.3588, "step": 1342500 }, { "epoch": 0.81, "learning_rate": 5.0403629580474876e-05, "loss": 1.3556, "step": 1343000 }, { "epoch": 0.81, "learning_rate": 5.0401529614914316e-05, "loss": 1.3782, "step": 1343500 }, { "epoch": 0.81, "learning_rate": 5.039942964935375e-05, "loss": 1.3421, "step": 1344000 }, { "epoch": 0.81, "learning_rate": 5.039732968379318e-05, "loss": 1.368, "step": 1344500 }, { "epoch": 0.81, "learning_rate": 5.0395229718232624e-05, "loss": 1.3513, "step": 1345000 }, { "epoch": 0.81, "learning_rate": 5.039312975267206e-05, "loss": 1.3517, "step": 1345500 }, { "epoch": 0.81, "learning_rate": 5.039102978711149e-05, "loss": 1.3703, "step": 1346000 }, { "epoch": 0.81, "learning_rate": 5.0388934021482044e-05, "loss": 1.3803, "step": 1346500 }, { "epoch": 0.81, "learning_rate": 5.0386834055921484e-05, "loss": 1.3719, "step": 1347000 }, { "epoch": 0.81, "learning_rate": 5.038473409036092e-05, "loss": 1.3595, "step": 1347500 }, { "epoch": 0.81, "learning_rate": 5.038263832473147e-05, "loss": 1.3555, "step": 1348000 }, { "epoch": 0.81, "learning_rate": 5.0380538359170905e-05, "loss": 1.368, "step": 1348500 }, { "epoch": 0.81, "learning_rate": 5.0378438393610345e-05, "loss": 1.3555, "step": 1349000 }, { "epoch": 0.81, "learning_rate": 5.037633842804978e-05, "loss": 1.3669, "step": 1349500 }, { "epoch": 0.81, "learning_rate": 5.037423846248921e-05, "loss": 1.3711, "step": 1350000 }, { "epoch": 0.81, "learning_rate": 5.037213849692865e-05, "loss": 1.3683, "step": 1350500 }, { "epoch": 0.81, "learning_rate": 5.0370038531368086e-05, "loss": 1.3621, "step": 1351000 }, { "epoch": 0.81, "learning_rate": 5.036793856580752e-05, "loss": 1.3727, "step": 1351500 }, { "epoch": 0.81, "learning_rate": 5.036584280017808e-05, "loss": 1.3427, "step": 1352000 }, { "epoch": 0.81, "learning_rate": 5.036374283461751e-05, "loss": 1.3463, "step": 1352500 }, { "epoch": 0.81, "learning_rate": 5.0361642869056946e-05, "loss": 1.3728, "step": 1353000 }, { "epoch": 0.81, "learning_rate": 5.0359542903496387e-05, "loss": 1.3597, "step": 1353500 }, { "epoch": 0.81, "learning_rate": 5.035744713786694e-05, "loss": 1.3237, "step": 1354000 }, { "epoch": 0.81, "learning_rate": 5.0355347172306374e-05, "loss": 1.3856, "step": 1354500 }, { "epoch": 0.81, "learning_rate": 5.035324720674581e-05, "loss": 1.3451, "step": 1355000 }, { "epoch": 0.81, "learning_rate": 5.035114724118525e-05, "loss": 1.3661, "step": 1355500 }, { "epoch": 0.81, "learning_rate": 5.034904727562468e-05, "loss": 1.3572, "step": 1356000 }, { "epoch": 0.81, "learning_rate": 5.0346947310064114e-05, "loss": 1.3456, "step": 1356500 }, { "epoch": 0.81, "learning_rate": 5.034484734450355e-05, "loss": 1.3603, "step": 1357000 }, { "epoch": 0.81, "learning_rate": 5.034274737894298e-05, "loss": 1.3491, "step": 1357500 }, { "epoch": 0.81, "learning_rate": 5.0340647413382415e-05, "loss": 1.3883, "step": 1358000 }, { "epoch": 0.81, "learning_rate": 5.0338551647752975e-05, "loss": 1.3518, "step": 1358500 }, { "epoch": 0.81, "learning_rate": 5.0336451682192415e-05, "loss": 1.3455, "step": 1359000 }, { "epoch": 0.82, "learning_rate": 5.033435171663184e-05, "loss": 1.3467, "step": 1359500 }, { "epoch": 0.82, "learning_rate": 5.033225175107128e-05, "loss": 1.3773, "step": 1360000 }, { "epoch": 0.82, "learning_rate": 5.0330151785510716e-05, "loss": 1.3622, "step": 1360500 }, { "epoch": 0.82, "learning_rate": 5.032805181995015e-05, "loss": 1.3367, "step": 1361000 }, { "epoch": 0.82, "learning_rate": 5.032595185438959e-05, "loss": 1.364, "step": 1361500 }, { "epoch": 0.82, "learning_rate": 5.032385608876014e-05, "loss": 1.3671, "step": 1362000 }, { "epoch": 0.82, "learning_rate": 5.0321756123199576e-05, "loss": 1.3471, "step": 1362500 }, { "epoch": 0.82, "learning_rate": 5.031966035757014e-05, "loss": 1.3458, "step": 1363000 }, { "epoch": 0.82, "learning_rate": 5.031756039200957e-05, "loss": 1.3724, "step": 1363500 }, { "epoch": 0.82, "learning_rate": 5.031546042644901e-05, "loss": 1.3923, "step": 1364000 }, { "epoch": 0.82, "learning_rate": 5.031336046088844e-05, "loss": 1.3735, "step": 1364500 }, { "epoch": 0.82, "learning_rate": 5.031126049532787e-05, "loss": 1.366, "step": 1365000 }, { "epoch": 0.82, "learning_rate": 5.030916052976731e-05, "loss": 1.3841, "step": 1365500 }, { "epoch": 0.82, "learning_rate": 5.0307060564206744e-05, "loss": 1.3393, "step": 1366000 }, { "epoch": 0.82, "learning_rate": 5.0304960598646184e-05, "loss": 1.3327, "step": 1366500 }, { "epoch": 0.82, "learning_rate": 5.030286063308562e-05, "loss": 1.3566, "step": 1367000 }, { "epoch": 0.82, "learning_rate": 5.030076066752505e-05, "loss": 1.3514, "step": 1367500 }, { "epoch": 0.82, "learning_rate": 5.029866070196449e-05, "loss": 1.3473, "step": 1368000 }, { "epoch": 0.82, "learning_rate": 5.0296560736403925e-05, "loss": 1.3618, "step": 1368500 }, { "epoch": 0.82, "learning_rate": 5.029446497077448e-05, "loss": 1.3708, "step": 1369000 }, { "epoch": 0.82, "learning_rate": 5.029236920514503e-05, "loss": 1.37, "step": 1369500 }, { "epoch": 0.82, "learning_rate": 5.029027343951559e-05, "loss": 1.3517, "step": 1370000 }, { "epoch": 0.82, "learning_rate": 5.0288173473955026e-05, "loss": 1.3619, "step": 1370500 }, { "epoch": 0.82, "learning_rate": 5.0286073508394466e-05, "loss": 1.3541, "step": 1371000 }, { "epoch": 0.82, "learning_rate": 5.028397354283389e-05, "loss": 1.339, "step": 1371500 }, { "epoch": 0.82, "learning_rate": 5.028187357727333e-05, "loss": 1.3613, "step": 1372000 }, { "epoch": 0.82, "learning_rate": 5.0279773611712767e-05, "loss": 1.3839, "step": 1372500 }, { "epoch": 0.82, "learning_rate": 5.02776736461522e-05, "loss": 1.3925, "step": 1373000 }, { "epoch": 0.82, "learning_rate": 5.027557368059164e-05, "loss": 1.372, "step": 1373500 }, { "epoch": 0.82, "learning_rate": 5.0273473715031074e-05, "loss": 1.3491, "step": 1374000 }, { "epoch": 0.82, "learning_rate": 5.027137374947051e-05, "loss": 1.3841, "step": 1374500 }, { "epoch": 0.82, "learning_rate": 5.026927798384106e-05, "loss": 1.3851, "step": 1375000 }, { "epoch": 0.82, "learning_rate": 5.02671780182805e-05, "loss": 1.3629, "step": 1375500 }, { "epoch": 0.82, "learning_rate": 5.0265078052719934e-05, "loss": 1.3333, "step": 1376000 }, { "epoch": 0.83, "learning_rate": 5.026297808715937e-05, "loss": 1.3728, "step": 1376500 }, { "epoch": 0.83, "learning_rate": 5.026087812159881e-05, "loss": 1.359, "step": 1377000 }, { "epoch": 0.83, "learning_rate": 5.025877815603824e-05, "loss": 1.3648, "step": 1377500 }, { "epoch": 0.83, "learning_rate": 5.0256678190477675e-05, "loss": 1.3634, "step": 1378000 }, { "epoch": 0.83, "learning_rate": 5.0254578224917115e-05, "loss": 1.3242, "step": 1378500 }, { "epoch": 0.83, "learning_rate": 5.025248245928767e-05, "loss": 1.3445, "step": 1379000 }, { "epoch": 0.83, "learning_rate": 5.02503824937271e-05, "loss": 1.3264, "step": 1379500 }, { "epoch": 0.83, "learning_rate": 5.0248282528166536e-05, "loss": 1.3571, "step": 1380000 }, { "epoch": 0.83, "learning_rate": 5.0246182562605976e-05, "loss": 1.3501, "step": 1380500 }, { "epoch": 0.83, "learning_rate": 5.024408259704541e-05, "loss": 1.3979, "step": 1381000 }, { "epoch": 0.83, "learning_rate": 5.024198683141596e-05, "loss": 1.3461, "step": 1381500 }, { "epoch": 0.83, "learning_rate": 5.02398868658554e-05, "loss": 1.3789, "step": 1382000 }, { "epoch": 0.83, "learning_rate": 5.023778690029484e-05, "loss": 1.365, "step": 1382500 }, { "epoch": 0.83, "learning_rate": 5.023568693473427e-05, "loss": 1.3791, "step": 1383000 }, { "epoch": 0.83, "learning_rate": 5.023358696917371e-05, "loss": 1.3781, "step": 1383500 }, { "epoch": 0.83, "learning_rate": 5.023148700361314e-05, "loss": 1.3572, "step": 1384000 }, { "epoch": 0.83, "learning_rate": 5.022938703805257e-05, "loss": 1.3567, "step": 1384500 }, { "epoch": 0.83, "learning_rate": 5.022728707249201e-05, "loss": 1.3838, "step": 1385000 }, { "epoch": 0.83, "learning_rate": 5.022519130686257e-05, "loss": 1.3742, "step": 1385500 }, { "epoch": 0.83, "learning_rate": 5.0223095541233125e-05, "loss": 1.3452, "step": 1386000 }, { "epoch": 0.83, "learning_rate": 5.022099557567256e-05, "loss": 1.3815, "step": 1386500 }, { "epoch": 0.83, "learning_rate": 5.021889981004311e-05, "loss": 1.3345, "step": 1387000 }, { "epoch": 0.83, "learning_rate": 5.021679984448255e-05, "loss": 1.3558, "step": 1387500 }, { "epoch": 0.83, "learning_rate": 5.0214699878921985e-05, "loss": 1.3787, "step": 1388000 }, { "epoch": 0.83, "learning_rate": 5.021259991336142e-05, "loss": 1.3451, "step": 1388500 }, { "epoch": 0.83, "learning_rate": 5.021049994780086e-05, "loss": 1.3576, "step": 1389000 }, { "epoch": 0.83, "learning_rate": 5.020840418217141e-05, "loss": 1.3378, "step": 1389500 }, { "epoch": 0.83, "learning_rate": 5.0206304216610846e-05, "loss": 1.3333, "step": 1390000 }, { "epoch": 0.83, "learning_rate": 5.020420425105028e-05, "loss": 1.3448, "step": 1390500 }, { "epoch": 0.83, "learning_rate": 5.020210428548972e-05, "loss": 1.3811, "step": 1391000 }, { "epoch": 0.83, "learning_rate": 5.020000431992915e-05, "loss": 1.3399, "step": 1391500 }, { "epoch": 0.83, "learning_rate": 5.019790855429971e-05, "loss": 1.3733, "step": 1392000 }, { "epoch": 0.83, "learning_rate": 5.019580858873914e-05, "loss": 1.3372, "step": 1392500 }, { "epoch": 0.84, "learning_rate": 5.019370862317858e-05, "loss": 1.3775, "step": 1393000 }, { "epoch": 0.84, "learning_rate": 5.0191608657618014e-05, "loss": 1.3937, "step": 1393500 }, { "epoch": 0.84, "learning_rate": 5.018951289198857e-05, "loss": 1.3529, "step": 1394000 }, { "epoch": 0.84, "learning_rate": 5.018741292642801e-05, "loss": 1.338, "step": 1394500 }, { "epoch": 0.84, "learning_rate": 5.018531296086744e-05, "loss": 1.3768, "step": 1395000 }, { "epoch": 0.84, "learning_rate": 5.0183212995306875e-05, "loss": 1.3556, "step": 1395500 }, { "epoch": 0.84, "learning_rate": 5.0181113029746315e-05, "loss": 1.345, "step": 1396000 }, { "epoch": 0.84, "learning_rate": 5.017901306418575e-05, "loss": 1.3234, "step": 1396500 }, { "epoch": 0.84, "learning_rate": 5.017691309862518e-05, "loss": 1.3617, "step": 1397000 }, { "epoch": 0.84, "learning_rate": 5.017481313306462e-05, "loss": 1.3563, "step": 1397500 }, { "epoch": 0.84, "learning_rate": 5.0172713167504056e-05, "loss": 1.3239, "step": 1398000 }, { "epoch": 0.84, "learning_rate": 5.017061320194348e-05, "loss": 1.3378, "step": 1398500 }, { "epoch": 0.84, "learning_rate": 5.016851743631404e-05, "loss": 1.354, "step": 1399000 }, { "epoch": 0.84, "learning_rate": 5.016641747075348e-05, "loss": 1.3327, "step": 1399500 }, { "epoch": 0.84, "learning_rate": 5.0164317505192916e-05, "loss": 1.3511, "step": 1400000 }, { "epoch": 0.84, "eval_loss": 1.2957533597946167, "eval_runtime": 1117.7468, "eval_samples_per_second": 471.234, "eval_steps_per_second": 78.539, "step": 1400000 }, { "epoch": 0.84, "learning_rate": 5.016221753963235e-05, "loss": 1.3559, "step": 1400500 }, { "epoch": 0.84, "learning_rate": 5.016011757407178e-05, "loss": 1.3687, "step": 1401000 }, { "epoch": 0.84, "learning_rate": 5.015801760851122e-05, "loss": 1.3554, "step": 1401500 }, { "epoch": 0.84, "learning_rate": 5.015592184288178e-05, "loss": 1.36, "step": 1402000 }, { "epoch": 0.84, "learning_rate": 5.015382187732121e-05, "loss": 1.3392, "step": 1402500 }, { "epoch": 0.84, "learning_rate": 5.0151721911760644e-05, "loss": 1.3502, "step": 1403000 }, { "epoch": 0.84, "learning_rate": 5.014962194620008e-05, "loss": 1.3557, "step": 1403500 }, { "epoch": 0.84, "learning_rate": 5.014752198063952e-05, "loss": 1.3468, "step": 1404000 }, { "epoch": 0.84, "learning_rate": 5.014542201507895e-05, "loss": 1.3427, "step": 1404500 }, { "epoch": 0.84, "learning_rate": 5.0143322049518385e-05, "loss": 1.3872, "step": 1405000 }, { "epoch": 0.84, "learning_rate": 5.0141222083957825e-05, "loss": 1.3528, "step": 1405500 }, { "epoch": 0.84, "learning_rate": 5.013912631832838e-05, "loss": 1.3865, "step": 1406000 }, { "epoch": 0.84, "learning_rate": 5.013702635276781e-05, "loss": 1.3521, "step": 1406500 }, { "epoch": 0.84, "learning_rate": 5.0134926387207245e-05, "loss": 1.369, "step": 1407000 }, { "epoch": 0.84, "learning_rate": 5.0132826421646686e-05, "loss": 1.3716, "step": 1407500 }, { "epoch": 0.84, "learning_rate": 5.013072645608612e-05, "loss": 1.3633, "step": 1408000 }, { "epoch": 0.84, "learning_rate": 5.012862649052555e-05, "loss": 1.3631, "step": 1408500 }, { "epoch": 0.84, "learning_rate": 5.012652652496499e-05, "loss": 1.367, "step": 1409000 }, { "epoch": 0.85, "learning_rate": 5.0124426559404426e-05, "loss": 1.3253, "step": 1409500 }, { "epoch": 0.85, "learning_rate": 5.012232659384386e-05, "loss": 1.3425, "step": 1410000 }, { "epoch": 0.85, "learning_rate": 5.01202266282833e-05, "loss": 1.3829, "step": 1410500 }, { "epoch": 0.85, "learning_rate": 5.011812666272273e-05, "loss": 1.3671, "step": 1411000 }, { "epoch": 0.85, "learning_rate": 5.011602669716216e-05, "loss": 1.3428, "step": 1411500 }, { "epoch": 0.85, "learning_rate": 5.011393093153273e-05, "loss": 1.356, "step": 1412000 }, { "epoch": 0.85, "learning_rate": 5.011183096597216e-05, "loss": 1.3526, "step": 1412500 }, { "epoch": 0.85, "learning_rate": 5.0109731000411594e-05, "loss": 1.3665, "step": 1413000 }, { "epoch": 0.85, "learning_rate": 5.010763103485103e-05, "loss": 1.3537, "step": 1413500 }, { "epoch": 0.85, "learning_rate": 5.010553106929046e-05, "loss": 1.3535, "step": 1414000 }, { "epoch": 0.85, "learning_rate": 5.0103431103729894e-05, "loss": 1.3784, "step": 1414500 }, { "epoch": 0.85, "learning_rate": 5.0101335338100455e-05, "loss": 1.3379, "step": 1415000 }, { "epoch": 0.85, "learning_rate": 5.0099235372539895e-05, "loss": 1.3678, "step": 1415500 }, { "epoch": 0.85, "learning_rate": 5.009713540697932e-05, "loss": 1.3478, "step": 1416000 }, { "epoch": 0.85, "learning_rate": 5.0095035441418755e-05, "loss": 1.3404, "step": 1416500 }, { "epoch": 0.85, "learning_rate": 5.0092935475858195e-05, "loss": 1.3638, "step": 1417000 }, { "epoch": 0.85, "learning_rate": 5.009083551029763e-05, "loss": 1.3543, "step": 1417500 }, { "epoch": 0.85, "learning_rate": 5.008873974466818e-05, "loss": 1.3728, "step": 1418000 }, { "epoch": 0.85, "learning_rate": 5.008663977910762e-05, "loss": 1.3341, "step": 1418500 }, { "epoch": 0.85, "learning_rate": 5.0084539813547056e-05, "loss": 1.3762, "step": 1419000 }, { "epoch": 0.85, "learning_rate": 5.008243984798649e-05, "loss": 1.3458, "step": 1419500 }, { "epoch": 0.85, "learning_rate": 5.008034408235705e-05, "loss": 1.3447, "step": 1420000 }, { "epoch": 0.85, "learning_rate": 5.0078244116796483e-05, "loss": 1.3587, "step": 1420500 }, { "epoch": 0.85, "learning_rate": 5.007614415123592e-05, "loss": 1.3616, "step": 1421000 }, { "epoch": 0.85, "learning_rate": 5.007404418567535e-05, "loss": 1.3597, "step": 1421500 }, { "epoch": 0.85, "learning_rate": 5.007194842004591e-05, "loss": 1.3673, "step": 1422000 }, { "epoch": 0.85, "learning_rate": 5.0069852654416464e-05, "loss": 1.3474, "step": 1422500 }, { "epoch": 0.85, "learning_rate": 5.0067752688855905e-05, "loss": 1.3718, "step": 1423000 }, { "epoch": 0.85, "learning_rate": 5.006565272329534e-05, "loss": 1.3493, "step": 1423500 }, { "epoch": 0.85, "learning_rate": 5.006355275773477e-05, "loss": 1.3535, "step": 1424000 }, { "epoch": 0.85, "learning_rate": 5.006145279217421e-05, "loss": 1.353, "step": 1424500 }, { "epoch": 0.85, "learning_rate": 5.0059352826613645e-05, "loss": 1.3666, "step": 1425000 }, { "epoch": 0.85, "learning_rate": 5.005725286105308e-05, "loss": 1.3343, "step": 1425500 }, { "epoch": 0.85, "learning_rate": 5.005515709542364e-05, "loss": 1.368, "step": 1426000 }, { "epoch": 0.86, "learning_rate": 5.005305712986307e-05, "loss": 1.3494, "step": 1426500 }, { "epoch": 0.86, "learning_rate": 5.0050957164302506e-05, "loss": 1.3604, "step": 1427000 }, { "epoch": 0.86, "learning_rate": 5.0048857198741946e-05, "loss": 1.3449, "step": 1427500 }, { "epoch": 0.86, "learning_rate": 5.004675723318137e-05, "loss": 1.3752, "step": 1428000 }, { "epoch": 0.86, "learning_rate": 5.0044657267620806e-05, "loss": 1.3549, "step": 1428500 }, { "epoch": 0.86, "learning_rate": 5.0042557302060246e-05, "loss": 1.3495, "step": 1429000 }, { "epoch": 0.86, "learning_rate": 5.004045733649968e-05, "loss": 1.3743, "step": 1429500 }, { "epoch": 0.86, "learning_rate": 5.0038357370939113e-05, "loss": 1.3473, "step": 1430000 }, { "epoch": 0.86, "learning_rate": 5.003626160530967e-05, "loss": 1.3312, "step": 1430500 }, { "epoch": 0.86, "learning_rate": 5.003416163974911e-05, "loss": 1.3626, "step": 1431000 }, { "epoch": 0.86, "learning_rate": 5.003206167418854e-05, "loss": 1.3733, "step": 1431500 }, { "epoch": 0.86, "learning_rate": 5.0029961708627974e-05, "loss": 1.3673, "step": 1432000 }, { "epoch": 0.86, "learning_rate": 5.0027865942998534e-05, "loss": 1.3973, "step": 1432500 }, { "epoch": 0.86, "learning_rate": 5.002576597743797e-05, "loss": 1.3579, "step": 1433000 }, { "epoch": 0.86, "learning_rate": 5.00236660118774e-05, "loss": 1.3774, "step": 1433500 }, { "epoch": 0.86, "learning_rate": 5.002157024624796e-05, "loss": 1.3346, "step": 1434000 }, { "epoch": 0.86, "learning_rate": 5.00194702806874e-05, "loss": 1.3529, "step": 1434500 }, { "epoch": 0.86, "learning_rate": 5.001737031512683e-05, "loss": 1.3542, "step": 1435000 }, { "epoch": 0.86, "learning_rate": 5.001527034956626e-05, "loss": 1.3794, "step": 1435500 }, { "epoch": 0.86, "learning_rate": 5.00131703840057e-05, "loss": 1.3467, "step": 1436000 }, { "epoch": 0.86, "learning_rate": 5.001107461837626e-05, "loss": 1.3688, "step": 1436500 }, { "epoch": 0.86, "learning_rate": 5.0008974652815696e-05, "loss": 1.3703, "step": 1437000 }, { "epoch": 0.86, "learning_rate": 5.000687468725512e-05, "loss": 1.3655, "step": 1437500 }, { "epoch": 0.86, "learning_rate": 5.000477472169456e-05, "loss": 1.3403, "step": 1438000 }, { "epoch": 0.86, "learning_rate": 5.0002674756133997e-05, "loss": 1.3419, "step": 1438500 }, { "epoch": 0.86, "learning_rate": 5.000057479057343e-05, "loss": 1.3477, "step": 1439000 }, { "epoch": 0.86, "learning_rate": 4.999847902494399e-05, "loss": 1.3568, "step": 1439500 }, { "epoch": 0.86, "learning_rate": 4.9996379059383424e-05, "loss": 1.3574, "step": 1440000 }, { "epoch": 0.86, "learning_rate": 4.999427909382286e-05, "loss": 1.3569, "step": 1440500 }, { "epoch": 0.86, "learning_rate": 4.99921791282623e-05, "loss": 1.3307, "step": 1441000 }, { "epoch": 0.86, "learning_rate": 4.999007916270173e-05, "loss": 1.3464, "step": 1441500 }, { "epoch": 0.86, "learning_rate": 4.9987983397072285e-05, "loss": 1.3494, "step": 1442000 }, { "epoch": 0.86, "learning_rate": 4.998588343151172e-05, "loss": 1.3825, "step": 1442500 }, { "epoch": 0.87, "learning_rate": 4.998378346595116e-05, "loss": 1.3466, "step": 1443000 }, { "epoch": 0.87, "learning_rate": 4.998168350039059e-05, "loss": 1.3645, "step": 1443500 }, { "epoch": 0.87, "learning_rate": 4.9979583534830025e-05, "loss": 1.3432, "step": 1444000 }, { "epoch": 0.87, "learning_rate": 4.9977483569269465e-05, "loss": 1.3897, "step": 1444500 }, { "epoch": 0.87, "learning_rate": 4.99753836037089e-05, "loss": 1.3479, "step": 1445000 }, { "epoch": 0.87, "learning_rate": 4.997328363814833e-05, "loss": 1.3598, "step": 1445500 }, { "epoch": 0.87, "learning_rate": 4.997118367258777e-05, "loss": 1.3676, "step": 1446000 }, { "epoch": 0.87, "learning_rate": 4.9969087906958326e-05, "loss": 1.3521, "step": 1446500 }, { "epoch": 0.87, "learning_rate": 4.996698794139776e-05, "loss": 1.3406, "step": 1447000 }, { "epoch": 0.87, "learning_rate": 4.996488797583719e-05, "loss": 1.3737, "step": 1447500 }, { "epoch": 0.87, "learning_rate": 4.996278801027663e-05, "loss": 1.3665, "step": 1448000 }, { "epoch": 0.87, "learning_rate": 4.996069224464719e-05, "loss": 1.3931, "step": 1448500 }, { "epoch": 0.87, "learning_rate": 4.995859227908662e-05, "loss": 1.34, "step": 1449000 }, { "epoch": 0.87, "learning_rate": 4.995649231352606e-05, "loss": 1.3524, "step": 1449500 }, { "epoch": 0.87, "learning_rate": 4.9954392347965494e-05, "loss": 1.3403, "step": 1450000 }, { "epoch": 0.87, "learning_rate": 4.995229238240493e-05, "loss": 1.3441, "step": 1450500 }, { "epoch": 0.87, "learning_rate": 4.995019241684437e-05, "loss": 1.3675, "step": 1451000 }, { "epoch": 0.87, "learning_rate": 4.99480924512838e-05, "loss": 1.3558, "step": 1451500 }, { "epoch": 0.87, "learning_rate": 4.9945992485723235e-05, "loss": 1.3145, "step": 1452000 }, { "epoch": 0.87, "learning_rate": 4.994389252016267e-05, "loss": 1.3376, "step": 1452500 }, { "epoch": 0.87, "learning_rate": 4.99417925546021e-05, "loss": 1.3487, "step": 1453000 }, { "epoch": 0.87, "learning_rate": 4.9939692589041535e-05, "loss": 1.3964, "step": 1453500 }, { "epoch": 0.87, "learning_rate": 4.9937596823412095e-05, "loss": 1.384, "step": 1454000 }, { "epoch": 0.87, "learning_rate": 4.993550105778265e-05, "loss": 1.3288, "step": 1454500 }, { "epoch": 0.87, "learning_rate": 4.993340109222209e-05, "loss": 1.3658, "step": 1455000 }, { "epoch": 0.87, "learning_rate": 4.993130112666152e-05, "loss": 1.3944, "step": 1455500 }, { "epoch": 0.87, "learning_rate": 4.9929201161100956e-05, "loss": 1.3843, "step": 1456000 }, { "epoch": 0.87, "learning_rate": 4.9927101195540396e-05, "loss": 1.3401, "step": 1456500 }, { "epoch": 0.87, "learning_rate": 4.992500122997982e-05, "loss": 1.3714, "step": 1457000 }, { "epoch": 0.87, "learning_rate": 4.992290126441926e-05, "loss": 1.3549, "step": 1457500 }, { "epoch": 0.87, "learning_rate": 4.99208012988587e-05, "loss": 1.3391, "step": 1458000 }, { "epoch": 0.87, "learning_rate": 4.991870133329813e-05, "loss": 1.3782, "step": 1458500 }, { "epoch": 0.87, "learning_rate": 4.991660136773757e-05, "loss": 1.3308, "step": 1459000 }, { "epoch": 0.88, "learning_rate": 4.9914501402177004e-05, "loss": 1.3384, "step": 1459500 }, { "epoch": 0.88, "learning_rate": 4.991240143661644e-05, "loss": 1.3612, "step": 1460000 }, { "epoch": 0.88, "learning_rate": 4.991030567098699e-05, "loss": 1.3616, "step": 1460500 }, { "epoch": 0.88, "learning_rate": 4.990820570542643e-05, "loss": 1.3731, "step": 1461000 }, { "epoch": 0.88, "learning_rate": 4.9906105739865865e-05, "loss": 1.3501, "step": 1461500 }, { "epoch": 0.88, "learning_rate": 4.99040057743053e-05, "loss": 1.3622, "step": 1462000 }, { "epoch": 0.88, "learning_rate": 4.990191000867585e-05, "loss": 1.3907, "step": 1462500 }, { "epoch": 0.88, "learning_rate": 4.989981424304641e-05, "loss": 1.3486, "step": 1463000 }, { "epoch": 0.88, "learning_rate": 4.989771427748585e-05, "loss": 1.3768, "step": 1463500 }, { "epoch": 0.88, "learning_rate": 4.9895614311925286e-05, "loss": 1.3447, "step": 1464000 }, { "epoch": 0.88, "learning_rate": 4.989351434636472e-05, "loss": 1.3832, "step": 1464500 }, { "epoch": 0.88, "learning_rate": 4.989141438080415e-05, "loss": 1.3687, "step": 1465000 }, { "epoch": 0.88, "learning_rate": 4.9889314415243586e-05, "loss": 1.3254, "step": 1465500 }, { "epoch": 0.88, "learning_rate": 4.9887218649614146e-05, "loss": 1.3592, "step": 1466000 }, { "epoch": 0.88, "learning_rate": 4.988511868405358e-05, "loss": 1.3441, "step": 1466500 }, { "epoch": 0.88, "learning_rate": 4.988301871849301e-05, "loss": 1.3329, "step": 1467000 }, { "epoch": 0.88, "learning_rate": 4.988091875293245e-05, "loss": 1.327, "step": 1467500 }, { "epoch": 0.88, "learning_rate": 4.987882298730301e-05, "loss": 1.3498, "step": 1468000 }, { "epoch": 0.88, "learning_rate": 4.987672302174245e-05, "loss": 1.3766, "step": 1468500 }, { "epoch": 0.88, "learning_rate": 4.9874623056181874e-05, "loss": 1.3593, "step": 1469000 }, { "epoch": 0.88, "learning_rate": 4.987252309062131e-05, "loss": 1.3689, "step": 1469500 }, { "epoch": 0.88, "learning_rate": 4.987042312506075e-05, "loss": 1.3375, "step": 1470000 }, { "epoch": 0.88, "learning_rate": 4.986832315950018e-05, "loss": 1.3634, "step": 1470500 }, { "epoch": 0.88, "learning_rate": 4.986622319393962e-05, "loss": 1.3612, "step": 1471000 }, { "epoch": 0.88, "learning_rate": 4.9864123228379055e-05, "loss": 1.3619, "step": 1471500 }, { "epoch": 0.88, "learning_rate": 4.986202746274961e-05, "loss": 1.386, "step": 1472000 }, { "epoch": 0.88, "learning_rate": 4.985992749718904e-05, "loss": 1.343, "step": 1472500 }, { "epoch": 0.88, "learning_rate": 4.985782753162848e-05, "loss": 1.3386, "step": 1473000 }, { "epoch": 0.88, "learning_rate": 4.9855727566067916e-05, "loss": 1.3211, "step": 1473500 }, { "epoch": 0.88, "learning_rate": 4.985363180043847e-05, "loss": 1.365, "step": 1474000 }, { "epoch": 0.88, "learning_rate": 4.98515318348779e-05, "loss": 1.3443, "step": 1474500 }, { "epoch": 0.88, "learning_rate": 4.984943186931734e-05, "loss": 1.3525, "step": 1475000 }, { "epoch": 0.88, "learning_rate": 4.9847331903756776e-05, "loss": 1.3659, "step": 1475500 }, { "epoch": 0.88, "learning_rate": 4.984523613812733e-05, "loss": 1.3623, "step": 1476000 }, { "epoch": 0.89, "learning_rate": 4.984314037249789e-05, "loss": 1.3571, "step": 1476500 }, { "epoch": 0.89, "learning_rate": 4.9841040406937324e-05, "loss": 1.3557, "step": 1477000 }, { "epoch": 0.89, "learning_rate": 4.9838940441376764e-05, "loss": 1.3724, "step": 1477500 }, { "epoch": 0.89, "learning_rate": 4.98368404758162e-05, "loss": 1.3603, "step": 1478000 }, { "epoch": 0.89, "learning_rate": 4.983474051025563e-05, "loss": 1.3528, "step": 1478500 }, { "epoch": 0.89, "learning_rate": 4.983264474462619e-05, "loss": 1.35, "step": 1479000 }, { "epoch": 0.89, "learning_rate": 4.9830544779065625e-05, "loss": 1.3552, "step": 1479500 }, { "epoch": 0.89, "learning_rate": 4.982844481350506e-05, "loss": 1.3568, "step": 1480000 }, { "epoch": 0.89, "learning_rate": 4.98263448479445e-05, "loss": 1.3534, "step": 1480500 }, { "epoch": 0.89, "learning_rate": 4.982424908231505e-05, "loss": 1.327, "step": 1481000 }, { "epoch": 0.89, "learning_rate": 4.9822149116754485e-05, "loss": 1.357, "step": 1481500 }, { "epoch": 0.89, "learning_rate": 4.982005335112504e-05, "loss": 1.3553, "step": 1482000 }, { "epoch": 0.89, "learning_rate": 4.981795338556447e-05, "loss": 1.3514, "step": 1482500 }, { "epoch": 0.89, "learning_rate": 4.981585342000391e-05, "loss": 1.3264, "step": 1483000 }, { "epoch": 0.89, "learning_rate": 4.9813753454443346e-05, "loss": 1.3313, "step": 1483500 }, { "epoch": 0.89, "learning_rate": 4.9811653488882786e-05, "loss": 1.3577, "step": 1484000 }, { "epoch": 0.89, "learning_rate": 4.980955352332222e-05, "loss": 1.3812, "step": 1484500 }, { "epoch": 0.89, "learning_rate": 4.980745355776165e-05, "loss": 1.3652, "step": 1485000 }, { "epoch": 0.89, "learning_rate": 4.980535359220109e-05, "loss": 1.3653, "step": 1485500 }, { "epoch": 0.89, "learning_rate": 4.980325362664052e-05, "loss": 1.335, "step": 1486000 }, { "epoch": 0.89, "learning_rate": 4.980115786101108e-05, "loss": 1.3733, "step": 1486500 }, { "epoch": 0.89, "learning_rate": 4.9799062095381634e-05, "loss": 1.3455, "step": 1487000 }, { "epoch": 0.89, "learning_rate": 4.979696212982107e-05, "loss": 1.3338, "step": 1487500 }, { "epoch": 0.89, "learning_rate": 4.979486216426051e-05, "loss": 1.3578, "step": 1488000 }, { "epoch": 0.89, "learning_rate": 4.979276219869994e-05, "loss": 1.3312, "step": 1488500 }, { "epoch": 0.89, "learning_rate": 4.9790662233139375e-05, "loss": 1.3559, "step": 1489000 }, { "epoch": 0.89, "learning_rate": 4.9788562267578815e-05, "loss": 1.3392, "step": 1489500 }, { "epoch": 0.89, "learning_rate": 4.978646230201825e-05, "loss": 1.3373, "step": 1490000 }, { "epoch": 0.89, "learning_rate": 4.978436233645768e-05, "loss": 1.3669, "step": 1490500 }, { "epoch": 0.89, "learning_rate": 4.9782262370897115e-05, "loss": 1.3521, "step": 1491000 }, { "epoch": 0.89, "learning_rate": 4.978016240533655e-05, "loss": 1.3375, "step": 1491500 }, { "epoch": 0.89, "learning_rate": 4.977806243977599e-05, "loss": 1.324, "step": 1492000 }, { "epoch": 0.89, "learning_rate": 4.977596667414655e-05, "loss": 1.3571, "step": 1492500 }, { "epoch": 0.9, "learning_rate": 4.9773866708585976e-05, "loss": 1.3361, "step": 1493000 }, { "epoch": 0.9, "learning_rate": 4.977176674302541e-05, "loss": 1.3423, "step": 1493500 }, { "epoch": 0.9, "learning_rate": 4.976966677746485e-05, "loss": 1.3543, "step": 1494000 }, { "epoch": 0.9, "learning_rate": 4.976756681190428e-05, "loss": 1.359, "step": 1494500 }, { "epoch": 0.9, "learning_rate": 4.9765471046274844e-05, "loss": 1.3461, "step": 1495000 }, { "epoch": 0.9, "learning_rate": 4.976337108071427e-05, "loss": 1.3232, "step": 1495500 }, { "epoch": 0.9, "learning_rate": 4.976127111515371e-05, "loss": 1.3619, "step": 1496000 }, { "epoch": 0.9, "learning_rate": 4.9759171149593144e-05, "loss": 1.3703, "step": 1496500 }, { "epoch": 0.9, "learning_rate": 4.9757075383963704e-05, "loss": 1.3747, "step": 1497000 }, { "epoch": 0.9, "learning_rate": 4.975497541840314e-05, "loss": 1.36, "step": 1497500 }, { "epoch": 0.9, "learning_rate": 4.975287545284257e-05, "loss": 1.3668, "step": 1498000 }, { "epoch": 0.9, "learning_rate": 4.9750775487282005e-05, "loss": 1.3609, "step": 1498500 }, { "epoch": 0.9, "learning_rate": 4.9748675521721445e-05, "loss": 1.3692, "step": 1499000 }, { "epoch": 0.9, "learning_rate": 4.974657555616088e-05, "loss": 1.3724, "step": 1499500 }, { "epoch": 0.9, "learning_rate": 4.974447559060031e-05, "loss": 1.3617, "step": 1500000 }, { "epoch": 0.9, "eval_loss": 1.2882256507873535, "eval_runtime": 1113.7701, "eval_samples_per_second": 472.916, "eval_steps_per_second": 78.82, "step": 1500000 }, { "epoch": 0.9, "learning_rate": 4.974237562503975e-05, "loss": 1.348, "step": 1500500 }, { "epoch": 0.9, "learning_rate": 4.9740275659479186e-05, "loss": 1.3631, "step": 1501000 }, { "epoch": 0.9, "learning_rate": 4.973817989384974e-05, "loss": 1.328, "step": 1501500 }, { "epoch": 0.9, "learning_rate": 4.973607992828917e-05, "loss": 1.3218, "step": 1502000 }, { "epoch": 0.9, "learning_rate": 4.973397996272861e-05, "loss": 1.3427, "step": 1502500 }, { "epoch": 0.9, "learning_rate": 4.9731879997168046e-05, "loss": 1.3348, "step": 1503000 }, { "epoch": 0.9, "learning_rate": 4.972978003160748e-05, "loss": 1.3697, "step": 1503500 }, { "epoch": 0.9, "learning_rate": 4.972768006604692e-05, "loss": 1.3246, "step": 1504000 }, { "epoch": 0.9, "learning_rate": 4.9725580100486353e-05, "loss": 1.3394, "step": 1504500 }, { "epoch": 0.9, "learning_rate": 4.972348013492579e-05, "loss": 1.3589, "step": 1505000 }, { "epoch": 0.9, "learning_rate": 4.972138436929634e-05, "loss": 1.3467, "step": 1505500 }, { "epoch": 0.9, "learning_rate": 4.971928440373578e-05, "loss": 1.3528, "step": 1506000 }, { "epoch": 0.9, "learning_rate": 4.9717188638106334e-05, "loss": 1.3551, "step": 1506500 }, { "epoch": 0.9, "learning_rate": 4.971508867254577e-05, "loss": 1.3595, "step": 1507000 }, { "epoch": 0.9, "learning_rate": 4.971298870698521e-05, "loss": 1.3974, "step": 1507500 }, { "epoch": 0.9, "learning_rate": 4.971088874142464e-05, "loss": 1.3466, "step": 1508000 }, { "epoch": 0.9, "learning_rate": 4.9708792975795195e-05, "loss": 1.3667, "step": 1508500 }, { "epoch": 0.9, "learning_rate": 4.9706697210165755e-05, "loss": 1.3634, "step": 1509000 }, { "epoch": 0.91, "learning_rate": 4.970459724460518e-05, "loss": 1.3597, "step": 1509500 }, { "epoch": 0.91, "learning_rate": 4.970249727904462e-05, "loss": 1.354, "step": 1510000 }, { "epoch": 0.91, "learning_rate": 4.9700397313484056e-05, "loss": 1.3445, "step": 1510500 }, { "epoch": 0.91, "learning_rate": 4.969829734792349e-05, "loss": 1.3389, "step": 1511000 }, { "epoch": 0.91, "learning_rate": 4.969619738236293e-05, "loss": 1.3428, "step": 1511500 }, { "epoch": 0.91, "learning_rate": 4.969409741680236e-05, "loss": 1.3533, "step": 1512000 }, { "epoch": 0.91, "learning_rate": 4.9691997451241796e-05, "loss": 1.3077, "step": 1512500 }, { "epoch": 0.91, "learning_rate": 4.968990168561236e-05, "loss": 1.3885, "step": 1513000 }, { "epoch": 0.91, "learning_rate": 4.968780172005179e-05, "loss": 1.3356, "step": 1513500 }, { "epoch": 0.91, "learning_rate": 4.9685701754491224e-05, "loss": 1.3411, "step": 1514000 }, { "epoch": 0.91, "learning_rate": 4.9683601788930664e-05, "loss": 1.3309, "step": 1514500 }, { "epoch": 0.91, "learning_rate": 4.96815018233701e-05, "loss": 1.3334, "step": 1515000 }, { "epoch": 0.91, "learning_rate": 4.967940185780953e-05, "loss": 1.3377, "step": 1515500 }, { "epoch": 0.91, "learning_rate": 4.9677306092180084e-05, "loss": 1.3242, "step": 1516000 }, { "epoch": 0.91, "learning_rate": 4.9675206126619525e-05, "loss": 1.3217, "step": 1516500 }, { "epoch": 0.91, "learning_rate": 4.967310616105896e-05, "loss": 1.3435, "step": 1517000 }, { "epoch": 0.91, "learning_rate": 4.967100619549839e-05, "loss": 1.3899, "step": 1517500 }, { "epoch": 0.91, "learning_rate": 4.9668910429868945e-05, "loss": 1.3423, "step": 1518000 }, { "epoch": 0.91, "learning_rate": 4.9666810464308385e-05, "loss": 1.3375, "step": 1518500 }, { "epoch": 0.91, "learning_rate": 4.966471469867894e-05, "loss": 1.3677, "step": 1519000 }, { "epoch": 0.91, "learning_rate": 4.966261473311837e-05, "loss": 1.3255, "step": 1519500 }, { "epoch": 0.91, "learning_rate": 4.966051476755781e-05, "loss": 1.343, "step": 1520000 }, { "epoch": 0.91, "learning_rate": 4.9658414801997246e-05, "loss": 1.3466, "step": 1520500 }, { "epoch": 0.91, "learning_rate": 4.9656319036367806e-05, "loss": 1.3477, "step": 1521000 }, { "epoch": 0.91, "learning_rate": 4.965421907080723e-05, "loss": 1.3337, "step": 1521500 }, { "epoch": 0.91, "learning_rate": 4.965211910524667e-05, "loss": 1.3597, "step": 1522000 }, { "epoch": 0.91, "learning_rate": 4.965001913968611e-05, "loss": 1.3646, "step": 1522500 }, { "epoch": 0.91, "learning_rate": 4.964791917412554e-05, "loss": 1.3481, "step": 1523000 }, { "epoch": 0.91, "learning_rate": 4.964581920856498e-05, "loss": 1.3579, "step": 1523500 }, { "epoch": 0.91, "learning_rate": 4.9643719243004414e-05, "loss": 1.363, "step": 1524000 }, { "epoch": 0.91, "learning_rate": 4.964161927744385e-05, "loss": 1.336, "step": 1524500 }, { "epoch": 0.91, "learning_rate": 4.963951931188329e-05, "loss": 1.3627, "step": 1525000 }, { "epoch": 0.91, "learning_rate": 4.963741934632272e-05, "loss": 1.3363, "step": 1525500 }, { "epoch": 0.91, "learning_rate": 4.9635319380762155e-05, "loss": 1.3587, "step": 1526000 }, { "epoch": 0.92, "learning_rate": 4.9633219415201595e-05, "loss": 1.3497, "step": 1526500 }, { "epoch": 0.92, "learning_rate": 4.963112364957215e-05, "loss": 1.335, "step": 1527000 }, { "epoch": 0.92, "learning_rate": 4.962902368401158e-05, "loss": 1.3441, "step": 1527500 }, { "epoch": 0.92, "learning_rate": 4.9626923718451015e-05, "loss": 1.3459, "step": 1528000 }, { "epoch": 0.92, "learning_rate": 4.9624823752890455e-05, "loss": 1.3846, "step": 1528500 }, { "epoch": 0.92, "learning_rate": 4.962273218719213e-05, "loss": 1.351, "step": 1529000 }, { "epoch": 0.92, "learning_rate": 4.962063222163156e-05, "loss": 1.3635, "step": 1529500 }, { "epoch": 0.92, "learning_rate": 4.961853645600212e-05, "loss": 1.359, "step": 1530000 }, { "epoch": 0.92, "learning_rate": 4.9616436490441556e-05, "loss": 1.3364, "step": 1530500 }, { "epoch": 0.92, "learning_rate": 4.961433652488099e-05, "loss": 1.3353, "step": 1531000 }, { "epoch": 0.92, "learning_rate": 4.961223655932042e-05, "loss": 1.3298, "step": 1531500 }, { "epoch": 0.92, "learning_rate": 4.961013659375986e-05, "loss": 1.3628, "step": 1532000 }, { "epoch": 0.92, "learning_rate": 4.960804082813042e-05, "loss": 1.3335, "step": 1532500 }, { "epoch": 0.92, "learning_rate": 4.960594086256986e-05, "loss": 1.3212, "step": 1533000 }, { "epoch": 0.92, "learning_rate": 4.9603840897009284e-05, "loss": 1.3389, "step": 1533500 }, { "epoch": 0.92, "learning_rate": 4.9601740931448724e-05, "loss": 1.3497, "step": 1534000 }, { "epoch": 0.92, "learning_rate": 4.959964096588816e-05, "loss": 1.3317, "step": 1534500 }, { "epoch": 0.92, "learning_rate": 4.959754100032759e-05, "loss": 1.3454, "step": 1535000 }, { "epoch": 0.92, "learning_rate": 4.959544103476703e-05, "loss": 1.3644, "step": 1535500 }, { "epoch": 0.92, "learning_rate": 4.9593341069206465e-05, "loss": 1.3565, "step": 1536000 }, { "epoch": 0.92, "learning_rate": 4.959124530357702e-05, "loss": 1.3489, "step": 1536500 }, { "epoch": 0.92, "learning_rate": 4.958914953794758e-05, "loss": 1.3489, "step": 1537000 }, { "epoch": 0.92, "learning_rate": 4.958704957238701e-05, "loss": 1.37, "step": 1537500 }, { "epoch": 0.92, "learning_rate": 4.9584949606826446e-05, "loss": 1.3483, "step": 1538000 }, { "epoch": 0.92, "learning_rate": 4.958284964126588e-05, "loss": 1.3569, "step": 1538500 }, { "epoch": 0.92, "learning_rate": 4.958074967570531e-05, "loss": 1.3278, "step": 1539000 }, { "epoch": 0.92, "learning_rate": 4.957864971014475e-05, "loss": 1.3169, "step": 1539500 }, { "epoch": 0.92, "learning_rate": 4.9576549744584186e-05, "loss": 1.3644, "step": 1540000 }, { "epoch": 0.92, "learning_rate": 4.957444977902362e-05, "loss": 1.3393, "step": 1540500 }, { "epoch": 0.92, "learning_rate": 4.957234981346306e-05, "loss": 1.3195, "step": 1541000 }, { "epoch": 0.92, "learning_rate": 4.9570254047833614e-05, "loss": 1.3636, "step": 1541500 }, { "epoch": 0.92, "learning_rate": 4.9568158282204174e-05, "loss": 1.3549, "step": 1542000 }, { "epoch": 0.92, "learning_rate": 4.956605831664361e-05, "loss": 1.3589, "step": 1542500 }, { "epoch": 0.93, "learning_rate": 4.956395835108304e-05, "loss": 1.3544, "step": 1543000 }, { "epoch": 0.93, "learning_rate": 4.9561858385522474e-05, "loss": 1.36, "step": 1543500 }, { "epoch": 0.93, "learning_rate": 4.955975841996191e-05, "loss": 1.328, "step": 1544000 }, { "epoch": 0.93, "learning_rate": 4.955765845440135e-05, "loss": 1.3684, "step": 1544500 }, { "epoch": 0.93, "learning_rate": 4.955555848884078e-05, "loss": 1.3337, "step": 1545000 }, { "epoch": 0.93, "learning_rate": 4.9553458523280215e-05, "loss": 1.3604, "step": 1545500 }, { "epoch": 0.93, "learning_rate": 4.9551358557719655e-05, "loss": 1.3611, "step": 1546000 }, { "epoch": 0.93, "learning_rate": 4.954926279209021e-05, "loss": 1.3561, "step": 1546500 }, { "epoch": 0.93, "learning_rate": 4.954716282652964e-05, "loss": 1.3712, "step": 1547000 }, { "epoch": 0.93, "learning_rate": 4.9545062860969076e-05, "loss": 1.3572, "step": 1547500 }, { "epoch": 0.93, "learning_rate": 4.9542962895408516e-05, "loss": 1.3385, "step": 1548000 }, { "epoch": 0.93, "learning_rate": 4.954086292984795e-05, "loss": 1.33, "step": 1548500 }, { "epoch": 0.93, "learning_rate": 4.953876296428739e-05, "loss": 1.3408, "step": 1549000 }, { "epoch": 0.93, "learning_rate": 4.953666719865794e-05, "loss": 1.3574, "step": 1549500 }, { "epoch": 0.93, "learning_rate": 4.953456723309738e-05, "loss": 1.3448, "step": 1550000 }, { "epoch": 0.93, "learning_rate": 4.953246726753681e-05, "loss": 1.324, "step": 1550500 }, { "epoch": 0.93, "learning_rate": 4.953036730197625e-05, "loss": 1.3288, "step": 1551000 }, { "epoch": 0.93, "learning_rate": 4.9528267336415684e-05, "loss": 1.3402, "step": 1551500 }, { "epoch": 0.93, "learning_rate": 4.952617157078624e-05, "loss": 1.3622, "step": 1552000 }, { "epoch": 0.93, "learning_rate": 4.952407160522567e-05, "loss": 1.3224, "step": 1552500 }, { "epoch": 0.93, "learning_rate": 4.952197163966511e-05, "loss": 1.3556, "step": 1553000 }, { "epoch": 0.93, "learning_rate": 4.9519871674104545e-05, "loss": 1.34, "step": 1553500 }, { "epoch": 0.93, "learning_rate": 4.95177759084751e-05, "loss": 1.3238, "step": 1554000 }, { "epoch": 0.93, "learning_rate": 4.951567594291453e-05, "loss": 1.3197, "step": 1554500 }, { "epoch": 0.93, "learning_rate": 4.951357597735397e-05, "loss": 1.3351, "step": 1555000 }, { "epoch": 0.93, "learning_rate": 4.9511476011793405e-05, "loss": 1.3388, "step": 1555500 }, { "epoch": 0.93, "learning_rate": 4.9509376046232846e-05, "loss": 1.3504, "step": 1556000 }, { "epoch": 0.93, "learning_rate": 4.950727608067228e-05, "loss": 1.354, "step": 1556500 }, { "epoch": 0.93, "learning_rate": 4.950517611511171e-05, "loss": 1.3417, "step": 1557000 }, { "epoch": 0.93, "learning_rate": 4.950307614955115e-05, "loss": 1.3471, "step": 1557500 }, { "epoch": 0.93, "learning_rate": 4.950097618399058e-05, "loss": 1.3371, "step": 1558000 }, { "epoch": 0.93, "learning_rate": 4.949888041836114e-05, "loss": 1.4033, "step": 1558500 }, { "epoch": 0.93, "learning_rate": 4.949678045280057e-05, "loss": 1.3568, "step": 1559000 }, { "epoch": 0.93, "learning_rate": 4.9494680487240013e-05, "loss": 1.3091, "step": 1559500 }, { "epoch": 0.94, "learning_rate": 4.949258052167945e-05, "loss": 1.3257, "step": 1560000 }, { "epoch": 0.94, "learning_rate": 4.949048475605e-05, "loss": 1.3642, "step": 1560500 }, { "epoch": 0.94, "learning_rate": 4.9488384790489434e-05, "loss": 1.3463, "step": 1561000 }, { "epoch": 0.94, "learning_rate": 4.9486284824928874e-05, "loss": 1.352, "step": 1561500 }, { "epoch": 0.94, "learning_rate": 4.948418485936831e-05, "loss": 1.3293, "step": 1562000 }, { "epoch": 0.94, "learning_rate": 4.948208489380774e-05, "loss": 1.3767, "step": 1562500 }, { "epoch": 0.94, "learning_rate": 4.9479984928247175e-05, "loss": 1.3366, "step": 1563000 }, { "epoch": 0.94, "learning_rate": 4.947788496268661e-05, "loss": 1.3837, "step": 1563500 }, { "epoch": 0.94, "learning_rate": 4.947578919705717e-05, "loss": 1.3572, "step": 1564000 }, { "epoch": 0.94, "learning_rate": 4.947368923149661e-05, "loss": 1.3573, "step": 1564500 }, { "epoch": 0.94, "learning_rate": 4.9471589265936035e-05, "loss": 1.3428, "step": 1565000 }, { "epoch": 0.94, "learning_rate": 4.946948930037547e-05, "loss": 1.3316, "step": 1565500 }, { "epoch": 0.94, "learning_rate": 4.946739353474603e-05, "loss": 1.3365, "step": 1566000 }, { "epoch": 0.94, "learning_rate": 4.946529356918547e-05, "loss": 1.322, "step": 1566500 }, { "epoch": 0.94, "learning_rate": 4.94631936036249e-05, "loss": 1.3555, "step": 1567000 }, { "epoch": 0.94, "learning_rate": 4.946109363806433e-05, "loss": 1.3733, "step": 1567500 }, { "epoch": 0.94, "learning_rate": 4.945899367250377e-05, "loss": 1.3221, "step": 1568000 }, { "epoch": 0.94, "learning_rate": 4.945689790687433e-05, "loss": 1.3194, "step": 1568500 }, { "epoch": 0.94, "learning_rate": 4.9454802141244884e-05, "loss": 1.3709, "step": 1569000 }, { "epoch": 0.94, "learning_rate": 4.945270217568432e-05, "loss": 1.3719, "step": 1569500 }, { "epoch": 0.94, "learning_rate": 4.945060221012376e-05, "loss": 1.3441, "step": 1570000 }, { "epoch": 0.94, "learning_rate": 4.944850224456319e-05, "loss": 1.3369, "step": 1570500 }, { "epoch": 0.94, "learning_rate": 4.9446402279002624e-05, "loss": 1.3299, "step": 1571000 }, { "epoch": 0.94, "learning_rate": 4.9444302313442064e-05, "loss": 1.3348, "step": 1571500 }, { "epoch": 0.94, "learning_rate": 4.94422023478815e-05, "loss": 1.3646, "step": 1572000 }, { "epoch": 0.94, "learning_rate": 4.9440102382320925e-05, "loss": 1.3545, "step": 1572500 }, { "epoch": 0.94, "learning_rate": 4.9438006616691485e-05, "loss": 1.3508, "step": 1573000 }, { "epoch": 0.94, "learning_rate": 4.9435906651130925e-05, "loss": 1.3335, "step": 1573500 }, { "epoch": 0.94, "learning_rate": 4.943380668557036e-05, "loss": 1.3367, "step": 1574000 }, { "epoch": 0.94, "learning_rate": 4.9431706720009785e-05, "loss": 1.3435, "step": 1574500 }, { "epoch": 0.94, "learning_rate": 4.9429606754449226e-05, "loss": 1.3548, "step": 1575000 }, { "epoch": 0.94, "learning_rate": 4.9427510988819786e-05, "loss": 1.3785, "step": 1575500 }, { "epoch": 0.94, "learning_rate": 4.942541102325922e-05, "loss": 1.3359, "step": 1576000 }, { "epoch": 0.95, "learning_rate": 4.942331525762977e-05, "loss": 1.3524, "step": 1576500 }, { "epoch": 0.95, "learning_rate": 4.942121529206921e-05, "loss": 1.3399, "step": 1577000 }, { "epoch": 0.95, "learning_rate": 4.941911532650865e-05, "loss": 1.3354, "step": 1577500 }, { "epoch": 0.95, "learning_rate": 4.941701536094808e-05, "loss": 1.3445, "step": 1578000 }, { "epoch": 0.95, "learning_rate": 4.941491539538752e-05, "loss": 1.366, "step": 1578500 }, { "epoch": 0.95, "learning_rate": 4.9412815429826954e-05, "loss": 1.3466, "step": 1579000 }, { "epoch": 0.95, "learning_rate": 4.941071546426638e-05, "loss": 1.347, "step": 1579500 }, { "epoch": 0.95, "learning_rate": 4.940861969863694e-05, "loss": 1.3362, "step": 1580000 }, { "epoch": 0.95, "learning_rate": 4.940651973307638e-05, "loss": 1.3347, "step": 1580500 }, { "epoch": 0.95, "learning_rate": 4.9404419767515815e-05, "loss": 1.3625, "step": 1581000 }, { "epoch": 0.95, "learning_rate": 4.940231980195525e-05, "loss": 1.3505, "step": 1581500 }, { "epoch": 0.95, "learning_rate": 4.940021983639468e-05, "loss": 1.3196, "step": 1582000 }, { "epoch": 0.95, "learning_rate": 4.9398119870834115e-05, "loss": 1.3699, "step": 1582500 }, { "epoch": 0.95, "learning_rate": 4.939601990527355e-05, "loss": 1.3295, "step": 1583000 }, { "epoch": 0.95, "learning_rate": 4.939391993971299e-05, "loss": 1.3681, "step": 1583500 }, { "epoch": 0.95, "learning_rate": 4.939181997415242e-05, "loss": 1.3205, "step": 1584000 }, { "epoch": 0.95, "learning_rate": 4.9389724208522976e-05, "loss": 1.3392, "step": 1584500 }, { "epoch": 0.95, "learning_rate": 4.9387624242962416e-05, "loss": 1.3744, "step": 1585000 }, { "epoch": 0.95, "learning_rate": 4.938552427740185e-05, "loss": 1.3297, "step": 1585500 }, { "epoch": 0.95, "learning_rate": 4.938342431184128e-05, "loss": 1.3485, "step": 1586000 }, { "epoch": 0.95, "learning_rate": 4.9381328546211836e-05, "loss": 1.3344, "step": 1586500 }, { "epoch": 0.95, "learning_rate": 4.9379228580651277e-05, "loss": 1.3471, "step": 1587000 }, { "epoch": 0.95, "learning_rate": 4.937712861509071e-05, "loss": 1.3561, "step": 1587500 }, { "epoch": 0.95, "learning_rate": 4.9375028649530143e-05, "loss": 1.3412, "step": 1588000 }, { "epoch": 0.95, "learning_rate": 4.9372928683969584e-05, "loss": 1.3467, "step": 1588500 }, { "epoch": 0.95, "learning_rate": 4.937083291834014e-05, "loss": 1.3465, "step": 1589000 }, { "epoch": 0.95, "learning_rate": 4.936873295277957e-05, "loss": 1.3542, "step": 1589500 }, { "epoch": 0.95, "learning_rate": 4.9366632987219004e-05, "loss": 1.3381, "step": 1590000 }, { "epoch": 0.95, "learning_rate": 4.9364533021658444e-05, "loss": 1.3452, "step": 1590500 }, { "epoch": 0.95, "learning_rate": 4.9362437256029005e-05, "loss": 1.3143, "step": 1591000 }, { "epoch": 0.95, "learning_rate": 4.936033729046843e-05, "loss": 1.3282, "step": 1591500 }, { "epoch": 0.95, "learning_rate": 4.935823732490787e-05, "loss": 1.3443, "step": 1592000 }, { "epoch": 0.95, "learning_rate": 4.9356137359347305e-05, "loss": 1.3675, "step": 1592500 }, { "epoch": 0.96, "learning_rate": 4.935403739378674e-05, "loss": 1.3367, "step": 1593000 }, { "epoch": 0.96, "learning_rate": 4.935194162815729e-05, "loss": 1.3559, "step": 1593500 }, { "epoch": 0.96, "learning_rate": 4.934984166259673e-05, "loss": 1.3653, "step": 1594000 }, { "epoch": 0.96, "learning_rate": 4.934774589696729e-05, "loss": 1.3417, "step": 1594500 }, { "epoch": 0.96, "learning_rate": 4.9345645931406726e-05, "loss": 1.3747, "step": 1595000 }, { "epoch": 0.96, "learning_rate": 4.934354596584616e-05, "loss": 1.3429, "step": 1595500 }, { "epoch": 0.96, "learning_rate": 4.934144600028559e-05, "loss": 1.3333, "step": 1596000 }, { "epoch": 0.96, "learning_rate": 4.933934603472503e-05, "loss": 1.3325, "step": 1596500 }, { "epoch": 0.96, "learning_rate": 4.933724606916446e-05, "loss": 1.3429, "step": 1597000 }, { "epoch": 0.96, "learning_rate": 4.93351461036039e-05, "loss": 1.3447, "step": 1597500 }, { "epoch": 0.96, "learning_rate": 4.9333046138043334e-05, "loss": 1.3466, "step": 1598000 }, { "epoch": 0.96, "learning_rate": 4.933094617248277e-05, "loss": 1.3154, "step": 1598500 }, { "epoch": 0.96, "learning_rate": 4.932885040685333e-05, "loss": 1.3398, "step": 1599000 }, { "epoch": 0.96, "learning_rate": 4.932675044129276e-05, "loss": 1.3711, "step": 1599500 }, { "epoch": 0.96, "learning_rate": 4.9324650475732195e-05, "loss": 1.3525, "step": 1600000 }, { "epoch": 0.96, "eval_loss": 1.2816615104675293, "eval_runtime": 1114.3312, "eval_samples_per_second": 472.678, "eval_steps_per_second": 78.78, "step": 1600000 }, { "epoch": 0.96, "learning_rate": 4.9322554710102755e-05, "loss": 1.3228, "step": 1600500 }, { "epoch": 0.96, "learning_rate": 4.932045474454219e-05, "loss": 1.3275, "step": 1601000 }, { "epoch": 0.96, "learning_rate": 4.931835477898162e-05, "loss": 1.3305, "step": 1601500 }, { "epoch": 0.96, "learning_rate": 4.9316254813421055e-05, "loss": 1.3366, "step": 1602000 }, { "epoch": 0.96, "learning_rate": 4.9314154847860495e-05, "loss": 1.3326, "step": 1602500 }, { "epoch": 0.96, "learning_rate": 4.931205488229993e-05, "loss": 1.351, "step": 1603000 }, { "epoch": 0.96, "learning_rate": 4.930995491673936e-05, "loss": 1.3251, "step": 1603500 }, { "epoch": 0.96, "learning_rate": 4.93078549511788e-05, "loss": 1.3241, "step": 1604000 }, { "epoch": 0.96, "learning_rate": 4.9305754985618236e-05, "loss": 1.3535, "step": 1604500 }, { "epoch": 0.96, "learning_rate": 4.930365502005767e-05, "loss": 1.3476, "step": 1605000 }, { "epoch": 0.96, "learning_rate": 4.930155505449711e-05, "loss": 1.3506, "step": 1605500 }, { "epoch": 0.96, "learning_rate": 4.929945508893654e-05, "loss": 1.3257, "step": 1606000 }, { "epoch": 0.96, "learning_rate": 4.929736352323822e-05, "loss": 1.3537, "step": 1606500 }, { "epoch": 0.96, "learning_rate": 4.929526355767765e-05, "loss": 1.3489, "step": 1607000 }, { "epoch": 0.96, "learning_rate": 4.929316359211709e-05, "loss": 1.3398, "step": 1607500 }, { "epoch": 0.96, "learning_rate": 4.9291063626556524e-05, "loss": 1.3953, "step": 1608000 }, { "epoch": 0.96, "learning_rate": 4.928896786092708e-05, "loss": 1.38, "step": 1608500 }, { "epoch": 0.96, "learning_rate": 4.928686789536651e-05, "loss": 1.3583, "step": 1609000 }, { "epoch": 0.96, "learning_rate": 4.928476792980595e-05, "loss": 1.3318, "step": 1609500 }, { "epoch": 0.97, "learning_rate": 4.9282667964245385e-05, "loss": 1.3325, "step": 1610000 }, { "epoch": 0.97, "learning_rate": 4.928056799868482e-05, "loss": 1.3764, "step": 1610500 }, { "epoch": 0.97, "learning_rate": 4.927847223305537e-05, "loss": 1.3424, "step": 1611000 }, { "epoch": 0.97, "learning_rate": 4.927637226749481e-05, "loss": 1.3613, "step": 1611500 }, { "epoch": 0.97, "learning_rate": 4.9274272301934246e-05, "loss": 1.3303, "step": 1612000 }, { "epoch": 0.97, "learning_rate": 4.927217233637368e-05, "loss": 1.342, "step": 1612500 }, { "epoch": 0.97, "learning_rate": 4.927007237081312e-05, "loss": 1.3667, "step": 1613000 }, { "epoch": 0.97, "learning_rate": 4.926797240525255e-05, "loss": 1.3277, "step": 1613500 }, { "epoch": 0.97, "learning_rate": 4.9265876639623106e-05, "loss": 1.3334, "step": 1614000 }, { "epoch": 0.97, "learning_rate": 4.9263776674062547e-05, "loss": 1.3126, "step": 1614500 }, { "epoch": 0.97, "learning_rate": 4.926167670850198e-05, "loss": 1.3503, "step": 1615000 }, { "epoch": 0.97, "learning_rate": 4.9259580942872534e-05, "loss": 1.3597, "step": 1615500 }, { "epoch": 0.97, "learning_rate": 4.925748097731197e-05, "loss": 1.3893, "step": 1616000 }, { "epoch": 0.97, "learning_rate": 4.925538101175141e-05, "loss": 1.3699, "step": 1616500 }, { "epoch": 0.97, "learning_rate": 4.925328104619084e-05, "loss": 1.3627, "step": 1617000 }, { "epoch": 0.97, "learning_rate": 4.9251181080630274e-05, "loss": 1.3352, "step": 1617500 }, { "epoch": 0.97, "learning_rate": 4.9249081115069714e-05, "loss": 1.3576, "step": 1618000 }, { "epoch": 0.97, "learning_rate": 4.924698534944027e-05, "loss": 1.3476, "step": 1618500 }, { "epoch": 0.97, "learning_rate": 4.92448853838797e-05, "loss": 1.3203, "step": 1619000 }, { "epoch": 0.97, "learning_rate": 4.9242785418319135e-05, "loss": 1.3482, "step": 1619500 }, { "epoch": 0.97, "learning_rate": 4.9240685452758575e-05, "loss": 1.3479, "step": 1620000 }, { "epoch": 0.97, "learning_rate": 4.923858968712913e-05, "loss": 1.3514, "step": 1620500 }, { "epoch": 0.97, "learning_rate": 4.923648972156856e-05, "loss": 1.3628, "step": 1621000 }, { "epoch": 0.97, "learning_rate": 4.9234389756008e-05, "loss": 1.3282, "step": 1621500 }, { "epoch": 0.97, "learning_rate": 4.9232289790447436e-05, "loss": 1.3338, "step": 1622000 }, { "epoch": 0.97, "learning_rate": 4.923018982488687e-05, "loss": 1.3206, "step": 1622500 }, { "epoch": 0.97, "learning_rate": 4.922808985932631e-05, "loss": 1.3188, "step": 1623000 }, { "epoch": 0.97, "learning_rate": 4.922598989376574e-05, "loss": 1.3437, "step": 1623500 }, { "epoch": 0.97, "learning_rate": 4.9223889928205176e-05, "loss": 1.3719, "step": 1624000 }, { "epoch": 0.97, "learning_rate": 4.922178996264462e-05, "loss": 1.3251, "step": 1624500 }, { "epoch": 0.97, "learning_rate": 4.921968999708405e-05, "loss": 1.3605, "step": 1625000 }, { "epoch": 0.97, "learning_rate": 4.9217594231454604e-05, "loss": 1.3332, "step": 1625500 }, { "epoch": 0.97, "learning_rate": 4.921549426589404e-05, "loss": 1.3278, "step": 1626000 }, { "epoch": 0.98, "learning_rate": 4.921339430033348e-05, "loss": 1.3342, "step": 1626500 }, { "epoch": 0.98, "learning_rate": 4.921129433477291e-05, "loss": 1.3519, "step": 1627000 }, { "epoch": 0.98, "learning_rate": 4.9209194369212344e-05, "loss": 1.3322, "step": 1627500 }, { "epoch": 0.98, "learning_rate": 4.920709440365178e-05, "loss": 1.3127, "step": 1628000 }, { "epoch": 0.98, "learning_rate": 4.920499443809121e-05, "loss": 1.3316, "step": 1628500 }, { "epoch": 0.98, "learning_rate": 4.920289447253065e-05, "loss": 1.3169, "step": 1629000 }, { "epoch": 0.98, "learning_rate": 4.920079870690121e-05, "loss": 1.3298, "step": 1629500 }, { "epoch": 0.98, "learning_rate": 4.919869874134064e-05, "loss": 1.327, "step": 1630000 }, { "epoch": 0.98, "learning_rate": 4.919659877578007e-05, "loss": 1.3527, "step": 1630500 }, { "epoch": 0.98, "learning_rate": 4.919449881021951e-05, "loss": 1.3395, "step": 1631000 }, { "epoch": 0.98, "learning_rate": 4.9192398844658946e-05, "loss": 1.3324, "step": 1631500 }, { "epoch": 0.98, "learning_rate": 4.919029887909838e-05, "loss": 1.3712, "step": 1632000 }, { "epoch": 0.98, "learning_rate": 4.918819891353782e-05, "loss": 1.3527, "step": 1632500 }, { "epoch": 0.98, "learning_rate": 4.918609894797725e-05, "loss": 1.3549, "step": 1633000 }, { "epoch": 0.98, "learning_rate": 4.9184003182347806e-05, "loss": 1.3562, "step": 1633500 }, { "epoch": 0.98, "learning_rate": 4.918190321678724e-05, "loss": 1.3464, "step": 1634000 }, { "epoch": 0.98, "learning_rate": 4.91798074511578e-05, "loss": 1.3637, "step": 1634500 }, { "epoch": 0.98, "learning_rate": 4.9177707485597234e-05, "loss": 1.3466, "step": 1635000 }, { "epoch": 0.98, "learning_rate": 4.9175611719967794e-05, "loss": 1.3584, "step": 1635500 }, { "epoch": 0.98, "learning_rate": 4.917351175440723e-05, "loss": 1.3563, "step": 1636000 }, { "epoch": 0.98, "learning_rate": 4.917141178884667e-05, "loss": 1.3437, "step": 1636500 }, { "epoch": 0.98, "learning_rate": 4.91693118232861e-05, "loss": 1.3621, "step": 1637000 }, { "epoch": 0.98, "learning_rate": 4.916721185772553e-05, "loss": 1.3542, "step": 1637500 }, { "epoch": 0.98, "learning_rate": 4.916511189216497e-05, "loss": 1.3303, "step": 1638000 }, { "epoch": 0.98, "learning_rate": 4.916301612653553e-05, "loss": 1.3375, "step": 1638500 }, { "epoch": 0.98, "learning_rate": 4.916091616097496e-05, "loss": 1.3842, "step": 1639000 }, { "epoch": 0.98, "learning_rate": 4.9158816195414395e-05, "loss": 1.3391, "step": 1639500 }, { "epoch": 0.98, "learning_rate": 4.915671622985383e-05, "loss": 1.3402, "step": 1640000 }, { "epoch": 0.98, "learning_rate": 4.915461626429326e-05, "loss": 1.3568, "step": 1640500 }, { "epoch": 0.98, "learning_rate": 4.9152516298732696e-05, "loss": 1.3437, "step": 1641000 }, { "epoch": 0.98, "learning_rate": 4.9150416333172136e-05, "loss": 1.3553, "step": 1641500 }, { "epoch": 0.98, "learning_rate": 4.914832056754269e-05, "loss": 1.3354, "step": 1642000 }, { "epoch": 0.98, "learning_rate": 4.914622060198212e-05, "loss": 1.3628, "step": 1642500 }, { "epoch": 0.99, "learning_rate": 4.914412063642156e-05, "loss": 1.3646, "step": 1643000 }, { "epoch": 0.99, "learning_rate": 4.9142020670861e-05, "loss": 1.3636, "step": 1643500 }, { "epoch": 0.99, "learning_rate": 4.913992070530043e-05, "loss": 1.3389, "step": 1644000 }, { "epoch": 0.99, "learning_rate": 4.913782073973987e-05, "loss": 1.3594, "step": 1644500 }, { "epoch": 0.99, "learning_rate": 4.9135720774179304e-05, "loss": 1.3287, "step": 1645000 }, { "epoch": 0.99, "learning_rate": 4.913362500854986e-05, "loss": 1.3302, "step": 1645500 }, { "epoch": 0.99, "learning_rate": 4.913152504298929e-05, "loss": 1.3227, "step": 1646000 }, { "epoch": 0.99, "learning_rate": 4.912942507742873e-05, "loss": 1.3276, "step": 1646500 }, { "epoch": 0.99, "learning_rate": 4.9127325111868165e-05, "loss": 1.3495, "step": 1647000 }, { "epoch": 0.99, "learning_rate": 4.91252251463076e-05, "loss": 1.3695, "step": 1647500 }, { "epoch": 0.99, "learning_rate": 4.912312518074704e-05, "loss": 1.3119, "step": 1648000 }, { "epoch": 0.99, "learning_rate": 4.912102521518647e-05, "loss": 1.3442, "step": 1648500 }, { "epoch": 0.99, "learning_rate": 4.911893364948815e-05, "loss": 1.3458, "step": 1649000 }, { "epoch": 0.99, "learning_rate": 4.911683368392758e-05, "loss": 1.3335, "step": 1649500 }, { "epoch": 0.99, "learning_rate": 4.911473371836702e-05, "loss": 1.3144, "step": 1650000 }, { "epoch": 0.99, "learning_rate": 4.911263375280645e-05, "loss": 1.3231, "step": 1650500 }, { "epoch": 0.99, "learning_rate": 4.9110533787245886e-05, "loss": 1.3279, "step": 1651000 }, { "epoch": 0.99, "learning_rate": 4.9108433821685326e-05, "loss": 1.3703, "step": 1651500 }, { "epoch": 0.99, "learning_rate": 4.910633385612476e-05, "loss": 1.3416, "step": 1652000 }, { "epoch": 0.99, "learning_rate": 4.910423389056419e-05, "loss": 1.314, "step": 1652500 }, { "epoch": 0.99, "learning_rate": 4.9102133925003633e-05, "loss": 1.3466, "step": 1653000 }, { "epoch": 0.99, "learning_rate": 4.910003395944307e-05, "loss": 1.374, "step": 1653500 }, { "epoch": 0.99, "learning_rate": 4.90979339938825e-05, "loss": 1.3301, "step": 1654000 }, { "epoch": 0.99, "learning_rate": 4.909583402832194e-05, "loss": 1.3464, "step": 1654500 }, { "epoch": 0.99, "learning_rate": 4.9093738262692494e-05, "loss": 1.3181, "step": 1655000 }, { "epoch": 0.99, "learning_rate": 4.909163829713193e-05, "loss": 1.3446, "step": 1655500 }, { "epoch": 0.99, "learning_rate": 4.908953833157136e-05, "loss": 1.3494, "step": 1656000 }, { "epoch": 0.99, "learning_rate": 4.90874383660108e-05, "loss": 1.3476, "step": 1656500 }, { "epoch": 0.99, "learning_rate": 4.908533840045023e-05, "loss": 1.3353, "step": 1657000 }, { "epoch": 0.99, "learning_rate": 4.908323843488966e-05, "loss": 1.3224, "step": 1657500 }, { "epoch": 0.99, "learning_rate": 4.90811384693291e-05, "loss": 1.3577, "step": 1658000 }, { "epoch": 0.99, "learning_rate": 4.9079038503768535e-05, "loss": 1.3249, "step": 1658500 }, { "epoch": 0.99, "learning_rate": 4.9076942738139096e-05, "loss": 1.3217, "step": 1659000 }, { "epoch": 0.99, "learning_rate": 4.907484277257853e-05, "loss": 1.3337, "step": 1659500 }, { "epoch": 1.0, "learning_rate": 4.907274280701796e-05, "loss": 1.3134, "step": 1660000 }, { "epoch": 1.0, "learning_rate": 4.9070642841457396e-05, "loss": 1.329, "step": 1660500 }, { "epoch": 1.0, "learning_rate": 4.9068542875896836e-05, "loss": 1.3373, "step": 1661000 }, { "epoch": 1.0, "learning_rate": 4.906644291033627e-05, "loss": 1.3225, "step": 1661500 }, { "epoch": 1.0, "learning_rate": 4.906434714470682e-05, "loss": 1.3423, "step": 1662000 }, { "epoch": 1.0, "learning_rate": 4.9062247179146257e-05, "loss": 1.3482, "step": 1662500 }, { "epoch": 1.0, "learning_rate": 4.90601472135857e-05, "loss": 1.3441, "step": 1663000 }, { "epoch": 1.0, "learning_rate": 4.905804724802513e-05, "loss": 1.3595, "step": 1663500 }, { "epoch": 1.0, "learning_rate": 4.9055947282464564e-05, "loss": 1.3429, "step": 1664000 }, { "epoch": 1.0, "learning_rate": 4.9053847316904004e-05, "loss": 1.3229, "step": 1664500 }, { "epoch": 1.0, "learning_rate": 4.905174735134344e-05, "loss": 1.3678, "step": 1665000 }, { "epoch": 1.0, "learning_rate": 4.904965158571399e-05, "loss": 1.3735, "step": 1665500 }, { "epoch": 1.0, "learning_rate": 4.9047551620153424e-05, "loss": 1.3369, "step": 1666000 }, { "epoch": 1.0, "learning_rate": 4.9045451654592865e-05, "loss": 1.3198, "step": 1666500 }, { "epoch": 1.0, "learning_rate": 4.90433516890323e-05, "loss": 1.3055, "step": 1667000 }, { "epoch": 1.0, "learning_rate": 4.904125172347174e-05, "loss": 1.3209, "step": 1667500 }, { "epoch": 1.0, "learning_rate": 4.903915175791117e-05, "loss": 1.3425, "step": 1668000 }, { "epoch": 1.0, "learning_rate": 4.9037055992281725e-05, "loss": 1.2968, "step": 1668500 }, { "epoch": 1.0, "learning_rate": 4.903495602672116e-05, "loss": 1.3499, "step": 1669000 }, { "epoch": 1.0, "learning_rate": 4.90328560611606e-05, "loss": 1.3247, "step": 1669500 }, { "epoch": 1.0, "learning_rate": 4.903075609560003e-05, "loss": 1.3059, "step": 1670000 }, { "epoch": 1.0, "learning_rate": 4.9028656130039466e-05, "loss": 1.3355, "step": 1670500 }, { "epoch": 1.0, "learning_rate": 4.9026556164478906e-05, "loss": 1.3115, "step": 1671000 }, { "epoch": 1.0, "learning_rate": 4.902445619891834e-05, "loss": 1.2839, "step": 1671500 }, { "epoch": 1.0, "learning_rate": 4.902236043328889e-05, "loss": 1.2893, "step": 1672000 }, { "epoch": 1.0, "learning_rate": 4.902026046772833e-05, "loss": 1.3132, "step": 1672500 }, { "epoch": 1.0, "learning_rate": 4.901816050216777e-05, "loss": 1.3137, "step": 1673000 }, { "epoch": 1.0, "learning_rate": 4.90160605366072e-05, "loss": 1.3299, "step": 1673500 }, { "epoch": 1.0, "learning_rate": 4.9013960571046634e-05, "loss": 1.3091, "step": 1674000 }, { "epoch": 1.0, "learning_rate": 4.9011864805417194e-05, "loss": 1.3247, "step": 1674500 }, { "epoch": 1.0, "learning_rate": 4.900976483985663e-05, "loss": 1.3192, "step": 1675000 }, { "epoch": 1.0, "learning_rate": 4.900766487429606e-05, "loss": 1.3255, "step": 1675500 }, { "epoch": 1.0, "learning_rate": 4.90055649087355e-05, "loss": 1.2757, "step": 1676000 }, { "epoch": 1.01, "learning_rate": 4.9003464943174935e-05, "loss": 1.3088, "step": 1676500 }, { "epoch": 1.01, "learning_rate": 4.900136497761436e-05, "loss": 1.3, "step": 1677000 }, { "epoch": 1.01, "learning_rate": 4.899926921198492e-05, "loss": 1.333, "step": 1677500 }, { "epoch": 1.01, "learning_rate": 4.899716924642436e-05, "loss": 1.3413, "step": 1678000 }, { "epoch": 1.01, "learning_rate": 4.8995069280863796e-05, "loss": 1.3359, "step": 1678500 }, { "epoch": 1.01, "learning_rate": 4.899296931530323e-05, "loss": 1.3033, "step": 1679000 }, { "epoch": 1.01, "learning_rate": 4.899086934974266e-05, "loss": 1.3078, "step": 1679500 }, { "epoch": 1.01, "learning_rate": 4.898877358411322e-05, "loss": 1.3102, "step": 1680000 }, { "epoch": 1.01, "learning_rate": 4.8986673618552656e-05, "loss": 1.3324, "step": 1680500 }, { "epoch": 1.01, "learning_rate": 4.898457785292321e-05, "loss": 1.3398, "step": 1681000 }, { "epoch": 1.01, "learning_rate": 4.898247788736265e-05, "loss": 1.2893, "step": 1681500 }, { "epoch": 1.01, "learning_rate": 4.8980377921802084e-05, "loss": 1.2809, "step": 1682000 }, { "epoch": 1.01, "learning_rate": 4.897827795624152e-05, "loss": 1.319, "step": 1682500 }, { "epoch": 1.01, "learning_rate": 4.897617799068096e-05, "loss": 1.3058, "step": 1683000 }, { "epoch": 1.01, "learning_rate": 4.897407802512039e-05, "loss": 1.313, "step": 1683500 }, { "epoch": 1.01, "learning_rate": 4.897197805955982e-05, "loss": 1.2913, "step": 1684000 }, { "epoch": 1.01, "learning_rate": 4.896987809399926e-05, "loss": 1.2862, "step": 1684500 }, { "epoch": 1.01, "learning_rate": 4.896777812843869e-05, "loss": 1.3271, "step": 1685000 }, { "epoch": 1.01, "learning_rate": 4.896568236280925e-05, "loss": 1.3049, "step": 1685500 }, { "epoch": 1.01, "learning_rate": 4.8963582397248685e-05, "loss": 1.3139, "step": 1686000 }, { "epoch": 1.01, "learning_rate": 4.896148663161924e-05, "loss": 1.3118, "step": 1686500 }, { "epoch": 1.01, "learning_rate": 4.895938666605868e-05, "loss": 1.3058, "step": 1687000 }, { "epoch": 1.01, "learning_rate": 4.895728670049811e-05, "loss": 1.3056, "step": 1687500 }, { "epoch": 1.01, "learning_rate": 4.8955186734937546e-05, "loss": 1.2781, "step": 1688000 }, { "epoch": 1.01, "learning_rate": 4.8953086769376986e-05, "loss": 1.3212, "step": 1688500 }, { "epoch": 1.01, "learning_rate": 4.895098680381641e-05, "loss": 1.3027, "step": 1689000 }, { "epoch": 1.01, "learning_rate": 4.894888683825585e-05, "loss": 1.3256, "step": 1689500 }, { "epoch": 1.01, "learning_rate": 4.8946786872695286e-05, "loss": 1.2971, "step": 1690000 }, { "epoch": 1.01, "learning_rate": 4.894468690713472e-05, "loss": 1.3134, "step": 1690500 }, { "epoch": 1.01, "learning_rate": 4.894258694157416e-05, "loss": 1.3294, "step": 1691000 }, { "epoch": 1.01, "learning_rate": 4.8940486976013593e-05, "loss": 1.2934, "step": 1691500 }, { "epoch": 1.01, "learning_rate": 4.893839121038415e-05, "loss": 1.2831, "step": 1692000 }, { "epoch": 1.01, "learning_rate": 4.893629124482358e-05, "loss": 1.344, "step": 1692500 }, { "epoch": 1.02, "learning_rate": 4.893419127926302e-05, "loss": 1.3321, "step": 1693000 }, { "epoch": 1.02, "learning_rate": 4.8932091313702454e-05, "loss": 1.3235, "step": 1693500 }, { "epoch": 1.02, "learning_rate": 4.892999134814189e-05, "loss": 1.3072, "step": 1694000 }, { "epoch": 1.02, "learning_rate": 4.892789558251244e-05, "loss": 1.2983, "step": 1694500 }, { "epoch": 1.02, "learning_rate": 4.8925799816883e-05, "loss": 1.3203, "step": 1695000 }, { "epoch": 1.02, "learning_rate": 4.892369985132244e-05, "loss": 1.3262, "step": 1695500 }, { "epoch": 1.02, "learning_rate": 4.892159988576187e-05, "loss": 1.3245, "step": 1696000 }, { "epoch": 1.02, "learning_rate": 4.891949992020131e-05, "loss": 1.3063, "step": 1696500 }, { "epoch": 1.02, "learning_rate": 4.891739995464074e-05, "loss": 1.3251, "step": 1697000 }, { "epoch": 1.02, "learning_rate": 4.8915299989080176e-05, "loss": 1.2963, "step": 1697500 }, { "epoch": 1.02, "learning_rate": 4.8913200023519616e-05, "loss": 1.315, "step": 1698000 }, { "epoch": 1.02, "learning_rate": 4.891110005795905e-05, "loss": 1.2855, "step": 1698500 }, { "epoch": 1.02, "learning_rate": 4.890900009239848e-05, "loss": 1.3112, "step": 1699000 }, { "epoch": 1.02, "learning_rate": 4.8906904326769036e-05, "loss": 1.3363, "step": 1699500 }, { "epoch": 1.02, "learning_rate": 4.8904804361208477e-05, "loss": 1.3065, "step": 1700000 }, { "epoch": 1.02, "eval_loss": 1.2773289680480957, "eval_runtime": 1103.5769, "eval_samples_per_second": 477.284, "eval_steps_per_second": 79.548, "step": 1700000 }, { "epoch": 1.02, "learning_rate": 4.890270439564791e-05, "loss": 1.3451, "step": 1700500 }, { "epoch": 1.02, "learning_rate": 4.8900604430087344e-05, "loss": 1.3006, "step": 1701000 }, { "epoch": 1.02, "learning_rate": 4.8898504464526784e-05, "loss": 1.2997, "step": 1701500 }, { "epoch": 1.02, "learning_rate": 4.889640449896622e-05, "loss": 1.3124, "step": 1702000 }, { "epoch": 1.02, "learning_rate": 4.889430453340565e-05, "loss": 1.2988, "step": 1702500 }, { "epoch": 1.02, "learning_rate": 4.8892208767776204e-05, "loss": 1.3237, "step": 1703000 }, { "epoch": 1.02, "learning_rate": 4.8890108802215644e-05, "loss": 1.3079, "step": 1703500 }, { "epoch": 1.02, "learning_rate": 4.888800883665508e-05, "loss": 1.3231, "step": 1704000 }, { "epoch": 1.02, "learning_rate": 4.888590887109451e-05, "loss": 1.3401, "step": 1704500 }, { "epoch": 1.02, "learning_rate": 4.888380890553395e-05, "loss": 1.2761, "step": 1705000 }, { "epoch": 1.02, "learning_rate": 4.8881708939973385e-05, "loss": 1.2937, "step": 1705500 }, { "epoch": 1.02, "learning_rate": 4.887960897441282e-05, "loss": 1.32, "step": 1706000 }, { "epoch": 1.02, "learning_rate": 4.887751320878338e-05, "loss": 1.3185, "step": 1706500 }, { "epoch": 1.02, "learning_rate": 4.887541324322281e-05, "loss": 1.3029, "step": 1707000 }, { "epoch": 1.02, "learning_rate": 4.8873313277662246e-05, "loss": 1.3474, "step": 1707500 }, { "epoch": 1.02, "learning_rate": 4.8871213312101686e-05, "loss": 1.302, "step": 1708000 }, { "epoch": 1.02, "learning_rate": 4.886911754647224e-05, "loss": 1.2993, "step": 1708500 }, { "epoch": 1.02, "learning_rate": 4.886701758091167e-05, "loss": 1.2953, "step": 1709000 }, { "epoch": 1.02, "learning_rate": 4.886492181528223e-05, "loss": 1.3249, "step": 1709500 }, { "epoch": 1.03, "learning_rate": 4.886282184972166e-05, "loss": 1.3487, "step": 1710000 }, { "epoch": 1.03, "learning_rate": 4.88607218841611e-05, "loss": 1.3395, "step": 1710500 }, { "epoch": 1.03, "learning_rate": 4.8858621918600534e-05, "loss": 1.3049, "step": 1711000 }, { "epoch": 1.03, "learning_rate": 4.885652195303997e-05, "loss": 1.3145, "step": 1711500 }, { "epoch": 1.03, "learning_rate": 4.885442198747941e-05, "loss": 1.3196, "step": 1712000 }, { "epoch": 1.03, "learning_rate": 4.885232202191884e-05, "loss": 1.3258, "step": 1712500 }, { "epoch": 1.03, "learning_rate": 4.8850222056358274e-05, "loss": 1.2732, "step": 1713000 }, { "epoch": 1.03, "learning_rate": 4.884812209079771e-05, "loss": 1.3186, "step": 1713500 }, { "epoch": 1.03, "learning_rate": 4.884602212523714e-05, "loss": 1.3046, "step": 1714000 }, { "epoch": 1.03, "learning_rate": 4.884392215967658e-05, "loss": 1.3048, "step": 1714500 }, { "epoch": 1.03, "learning_rate": 4.8841822194116015e-05, "loss": 1.3228, "step": 1715000 }, { "epoch": 1.03, "learning_rate": 4.883972222855545e-05, "loss": 1.3315, "step": 1715500 }, { "epoch": 1.03, "learning_rate": 4.883762226299489e-05, "loss": 1.3386, "step": 1716000 }, { "epoch": 1.03, "learning_rate": 4.883552229743432e-05, "loss": 1.3365, "step": 1716500 }, { "epoch": 1.03, "learning_rate": 4.8833422331873756e-05, "loss": 1.2803, "step": 1717000 }, { "epoch": 1.03, "learning_rate": 4.8831322366313196e-05, "loss": 1.2987, "step": 1717500 }, { "epoch": 1.03, "learning_rate": 4.882923080061487e-05, "loss": 1.3279, "step": 1718000 }, { "epoch": 1.03, "learning_rate": 4.88271308350543e-05, "loss": 1.3276, "step": 1718500 }, { "epoch": 1.03, "learning_rate": 4.8825030869493737e-05, "loss": 1.2691, "step": 1719000 }, { "epoch": 1.03, "learning_rate": 4.882293090393317e-05, "loss": 1.3368, "step": 1719500 }, { "epoch": 1.03, "learning_rate": 4.882083093837261e-05, "loss": 1.3238, "step": 1720000 }, { "epoch": 1.03, "learning_rate": 4.8818735172743164e-05, "loss": 1.335, "step": 1720500 }, { "epoch": 1.03, "learning_rate": 4.88166352071826e-05, "loss": 1.2921, "step": 1721000 }, { "epoch": 1.03, "learning_rate": 4.881453524162204e-05, "loss": 1.3196, "step": 1721500 }, { "epoch": 1.03, "learning_rate": 4.881243527606147e-05, "loss": 1.2938, "step": 1722000 }, { "epoch": 1.03, "learning_rate": 4.8810335310500904e-05, "loss": 1.3096, "step": 1722500 }, { "epoch": 1.03, "learning_rate": 4.8808235344940345e-05, "loss": 1.2825, "step": 1723000 }, { "epoch": 1.03, "learning_rate": 4.88061395793109e-05, "loss": 1.3051, "step": 1723500 }, { "epoch": 1.03, "learning_rate": 4.880403961375033e-05, "loss": 1.3161, "step": 1724000 }, { "epoch": 1.03, "learning_rate": 4.8801939648189765e-05, "loss": 1.2923, "step": 1724500 }, { "epoch": 1.03, "learning_rate": 4.8799843882560325e-05, "loss": 1.3401, "step": 1725000 }, { "epoch": 1.03, "learning_rate": 4.879774391699976e-05, "loss": 1.3084, "step": 1725500 }, { "epoch": 1.03, "learning_rate": 4.879564395143919e-05, "loss": 1.316, "step": 1726000 }, { "epoch": 1.04, "learning_rate": 4.8793543985878626e-05, "loss": 1.3236, "step": 1726500 }, { "epoch": 1.04, "learning_rate": 4.8791444020318066e-05, "loss": 1.3192, "step": 1727000 }, { "epoch": 1.04, "learning_rate": 4.87893440547575e-05, "loss": 1.298, "step": 1727500 }, { "epoch": 1.04, "learning_rate": 4.878724828912805e-05, "loss": 1.2932, "step": 1728000 }, { "epoch": 1.04, "learning_rate": 4.878514832356749e-05, "loss": 1.3344, "step": 1728500 }, { "epoch": 1.04, "learning_rate": 4.878304835800693e-05, "loss": 1.3266, "step": 1729000 }, { "epoch": 1.04, "learning_rate": 4.878094839244636e-05, "loss": 1.3025, "step": 1729500 }, { "epoch": 1.04, "learning_rate": 4.87788484268858e-05, "loss": 1.3232, "step": 1730000 }, { "epoch": 1.04, "learning_rate": 4.8776752661256354e-05, "loss": 1.3291, "step": 1730500 }, { "epoch": 1.04, "learning_rate": 4.877465269569579e-05, "loss": 1.3149, "step": 1731000 }, { "epoch": 1.04, "learning_rate": 4.877255273013522e-05, "loss": 1.3003, "step": 1731500 }, { "epoch": 1.04, "learning_rate": 4.877045276457466e-05, "loss": 1.3323, "step": 1732000 }, { "epoch": 1.04, "learning_rate": 4.8768352799014095e-05, "loss": 1.3121, "step": 1732500 }, { "epoch": 1.04, "learning_rate": 4.876625283345353e-05, "loss": 1.2915, "step": 1733000 }, { "epoch": 1.04, "learning_rate": 4.876415286789297e-05, "loss": 1.2812, "step": 1733500 }, { "epoch": 1.04, "learning_rate": 4.87620529023324e-05, "loss": 1.2925, "step": 1734000 }, { "epoch": 1.04, "learning_rate": 4.8759952936771835e-05, "loss": 1.2923, "step": 1734500 }, { "epoch": 1.04, "learning_rate": 4.8757852971211276e-05, "loss": 1.3062, "step": 1735000 }, { "epoch": 1.04, "learning_rate": 4.875575720558183e-05, "loss": 1.308, "step": 1735500 }, { "epoch": 1.04, "learning_rate": 4.875365724002126e-05, "loss": 1.2999, "step": 1736000 }, { "epoch": 1.04, "learning_rate": 4.87515572744607e-05, "loss": 1.3146, "step": 1736500 }, { "epoch": 1.04, "learning_rate": 4.8749457308900136e-05, "loss": 1.3186, "step": 1737000 }, { "epoch": 1.04, "learning_rate": 4.874735734333957e-05, "loss": 1.3461, "step": 1737500 }, { "epoch": 1.04, "learning_rate": 4.874526157771012e-05, "loss": 1.2961, "step": 1738000 }, { "epoch": 1.04, "learning_rate": 4.8743161612149564e-05, "loss": 1.2979, "step": 1738500 }, { "epoch": 1.04, "learning_rate": 4.874106584652012e-05, "loss": 1.3106, "step": 1739000 }, { "epoch": 1.04, "learning_rate": 4.873896588095955e-05, "loss": 1.3428, "step": 1739500 }, { "epoch": 1.04, "learning_rate": 4.8736865915398984e-05, "loss": 1.3389, "step": 1740000 }, { "epoch": 1.04, "learning_rate": 4.8734765949838424e-05, "loss": 1.3272, "step": 1740500 }, { "epoch": 1.04, "learning_rate": 4.873266598427786e-05, "loss": 1.3012, "step": 1741000 }, { "epoch": 1.04, "learning_rate": 4.873056601871729e-05, "loss": 1.3113, "step": 1741500 }, { "epoch": 1.04, "learning_rate": 4.872846605315673e-05, "loss": 1.3202, "step": 1742000 }, { "epoch": 1.04, "learning_rate": 4.8726366087596165e-05, "loss": 1.32, "step": 1742500 }, { "epoch": 1.04, "learning_rate": 4.87242661220356e-05, "loss": 1.303, "step": 1743000 }, { "epoch": 1.05, "learning_rate": 4.872217035640616e-05, "loss": 1.3142, "step": 1743500 }, { "epoch": 1.05, "learning_rate": 4.872007039084559e-05, "loss": 1.297, "step": 1744000 }, { "epoch": 1.05, "learning_rate": 4.8717970425285026e-05, "loss": 1.3163, "step": 1744500 }, { "epoch": 1.05, "learning_rate": 4.8715870459724466e-05, "loss": 1.3082, "step": 1745000 }, { "epoch": 1.05, "learning_rate": 4.871377049416389e-05, "loss": 1.2977, "step": 1745500 }, { "epoch": 1.05, "learning_rate": 4.8711670528603326e-05, "loss": 1.3109, "step": 1746000 }, { "epoch": 1.05, "learning_rate": 4.8709574762973886e-05, "loss": 1.309, "step": 1746500 }, { "epoch": 1.05, "learning_rate": 4.8707474797413327e-05, "loss": 1.2974, "step": 1747000 }, { "epoch": 1.05, "learning_rate": 4.870537483185275e-05, "loss": 1.3414, "step": 1747500 }, { "epoch": 1.05, "learning_rate": 4.870327486629219e-05, "loss": 1.341, "step": 1748000 }, { "epoch": 1.05, "learning_rate": 4.870117490073163e-05, "loss": 1.3161, "step": 1748500 }, { "epoch": 1.05, "learning_rate": 4.869907493517106e-05, "loss": 1.324, "step": 1749000 }, { "epoch": 1.05, "learning_rate": 4.869697916954162e-05, "loss": 1.3014, "step": 1749500 }, { "epoch": 1.05, "learning_rate": 4.8694879203981054e-05, "loss": 1.3037, "step": 1750000 }, { "epoch": 1.05, "learning_rate": 4.869277923842049e-05, "loss": 1.3504, "step": 1750500 }, { "epoch": 1.05, "learning_rate": 4.869067927285992e-05, "loss": 1.3312, "step": 1751000 }, { "epoch": 1.05, "learning_rate": 4.868857930729936e-05, "loss": 1.3136, "step": 1751500 }, { "epoch": 1.05, "learning_rate": 4.868648354166992e-05, "loss": 1.3492, "step": 1752000 }, { "epoch": 1.05, "learning_rate": 4.868438357610935e-05, "loss": 1.2909, "step": 1752500 }, { "epoch": 1.05, "learning_rate": 4.868228361054878e-05, "loss": 1.3058, "step": 1753000 }, { "epoch": 1.05, "learning_rate": 4.868018364498822e-05, "loss": 1.3117, "step": 1753500 }, { "epoch": 1.05, "learning_rate": 4.867808787935878e-05, "loss": 1.2739, "step": 1754000 }, { "epoch": 1.05, "learning_rate": 4.8675987913798216e-05, "loss": 1.3095, "step": 1754500 }, { "epoch": 1.05, "learning_rate": 4.867388794823764e-05, "loss": 1.308, "step": 1755000 }, { "epoch": 1.05, "learning_rate": 4.867178798267708e-05, "loss": 1.3201, "step": 1755500 }, { "epoch": 1.05, "learning_rate": 4.8669688017116516e-05, "loss": 1.2708, "step": 1756000 }, { "epoch": 1.05, "learning_rate": 4.866759225148708e-05, "loss": 1.3014, "step": 1756500 }, { "epoch": 1.05, "learning_rate": 4.866549228592651e-05, "loss": 1.3001, "step": 1757000 }, { "epoch": 1.05, "learning_rate": 4.8663392320365944e-05, "loss": 1.3107, "step": 1757500 }, { "epoch": 1.05, "learning_rate": 4.866129235480538e-05, "loss": 1.3307, "step": 1758000 }, { "epoch": 1.05, "learning_rate": 4.865919238924482e-05, "loss": 1.2898, "step": 1758500 }, { "epoch": 1.05, "learning_rate": 4.865709242368425e-05, "loss": 1.3414, "step": 1759000 }, { "epoch": 1.05, "learning_rate": 4.8654996658054804e-05, "loss": 1.3133, "step": 1759500 }, { "epoch": 1.06, "learning_rate": 4.865289669249424e-05, "loss": 1.3219, "step": 1760000 }, { "epoch": 1.06, "learning_rate": 4.86508009268648e-05, "loss": 1.3287, "step": 1760500 }, { "epoch": 1.06, "learning_rate": 4.864870096130424e-05, "loss": 1.3176, "step": 1761000 }, { "epoch": 1.06, "learning_rate": 4.864660099574367e-05, "loss": 1.3227, "step": 1761500 }, { "epoch": 1.06, "learning_rate": 4.86445010301831e-05, "loss": 1.3296, "step": 1762000 }, { "epoch": 1.06, "learning_rate": 4.864240106462254e-05, "loss": 1.3105, "step": 1762500 }, { "epoch": 1.06, "learning_rate": 4.864030109906197e-05, "loss": 1.3107, "step": 1763000 }, { "epoch": 1.06, "learning_rate": 4.863820533343253e-05, "loss": 1.3732, "step": 1763500 }, { "epoch": 1.06, "learning_rate": 4.8636105367871966e-05, "loss": 1.3197, "step": 1764000 }, { "epoch": 1.06, "learning_rate": 4.86340054023114e-05, "loss": 1.3488, "step": 1764500 }, { "epoch": 1.06, "learning_rate": 4.863190543675083e-05, "loss": 1.3445, "step": 1765000 }, { "epoch": 1.06, "learning_rate": 4.862980547119027e-05, "loss": 1.3561, "step": 1765500 }, { "epoch": 1.06, "learning_rate": 4.862771390549195e-05, "loss": 1.3153, "step": 1766000 }, { "epoch": 1.06, "learning_rate": 4.862561393993139e-05, "loss": 1.3101, "step": 1766500 }, { "epoch": 1.06, "learning_rate": 4.862351397437082e-05, "loss": 1.3025, "step": 1767000 }, { "epoch": 1.06, "learning_rate": 4.8621414008810254e-05, "loss": 1.3141, "step": 1767500 }, { "epoch": 1.06, "learning_rate": 4.8619314043249694e-05, "loss": 1.3163, "step": 1768000 }, { "epoch": 1.06, "learning_rate": 4.861721407768913e-05, "loss": 1.3264, "step": 1768500 }, { "epoch": 1.06, "learning_rate": 4.8615114112128554e-05, "loss": 1.2973, "step": 1769000 }, { "epoch": 1.06, "learning_rate": 4.8613014146567995e-05, "loss": 1.3279, "step": 1769500 }, { "epoch": 1.06, "learning_rate": 4.8610918380938555e-05, "loss": 1.3032, "step": 1770000 }, { "epoch": 1.06, "learning_rate": 4.860881841537799e-05, "loss": 1.3418, "step": 1770500 }, { "epoch": 1.06, "learning_rate": 4.860671844981742e-05, "loss": 1.3011, "step": 1771000 }, { "epoch": 1.06, "learning_rate": 4.860462268418798e-05, "loss": 1.3262, "step": 1771500 }, { "epoch": 1.06, "learning_rate": 4.8602522718627416e-05, "loss": 1.3351, "step": 1772000 }, { "epoch": 1.06, "learning_rate": 4.860042275306685e-05, "loss": 1.3056, "step": 1772500 }, { "epoch": 1.06, "learning_rate": 4.859832278750629e-05, "loss": 1.3281, "step": 1773000 }, { "epoch": 1.06, "learning_rate": 4.859622282194572e-05, "loss": 1.2955, "step": 1773500 }, { "epoch": 1.06, "learning_rate": 4.859412285638515e-05, "loss": 1.3369, "step": 1774000 }, { "epoch": 1.06, "learning_rate": 4.859202289082459e-05, "loss": 1.3167, "step": 1774500 }, { "epoch": 1.06, "learning_rate": 4.858992292526402e-05, "loss": 1.3314, "step": 1775000 }, { "epoch": 1.06, "learning_rate": 4.858782295970346e-05, "loss": 1.3354, "step": 1775500 }, { "epoch": 1.06, "learning_rate": 4.85857229941429e-05, "loss": 1.3033, "step": 1776000 }, { "epoch": 1.07, "learning_rate": 4.858362302858233e-05, "loss": 1.3206, "step": 1776500 }, { "epoch": 1.07, "learning_rate": 4.8581523063021764e-05, "loss": 1.348, "step": 1777000 }, { "epoch": 1.07, "learning_rate": 4.8579423097461204e-05, "loss": 1.3127, "step": 1777500 }, { "epoch": 1.07, "learning_rate": 4.857732733183176e-05, "loss": 1.3249, "step": 1778000 }, { "epoch": 1.07, "learning_rate": 4.857523156620231e-05, "loss": 1.2975, "step": 1778500 }, { "epoch": 1.07, "learning_rate": 4.8573131600641745e-05, "loss": 1.3334, "step": 1779000 }, { "epoch": 1.07, "learning_rate": 4.8571031635081185e-05, "loss": 1.3137, "step": 1779500 }, { "epoch": 1.07, "learning_rate": 4.856893166952062e-05, "loss": 1.3106, "step": 1780000 }, { "epoch": 1.07, "learning_rate": 4.856683170396005e-05, "loss": 1.3027, "step": 1780500 }, { "epoch": 1.07, "learning_rate": 4.8564735938330605e-05, "loss": 1.2983, "step": 1781000 }, { "epoch": 1.07, "learning_rate": 4.8562635972770046e-05, "loss": 1.2961, "step": 1781500 }, { "epoch": 1.07, "learning_rate": 4.856053600720948e-05, "loss": 1.3247, "step": 1782000 }, { "epoch": 1.07, "learning_rate": 4.855843604164891e-05, "loss": 1.3572, "step": 1782500 }, { "epoch": 1.07, "learning_rate": 4.855633607608835e-05, "loss": 1.3089, "step": 1783000 }, { "epoch": 1.07, "learning_rate": 4.8554236110527786e-05, "loss": 1.3045, "step": 1783500 }, { "epoch": 1.07, "learning_rate": 4.855213614496722e-05, "loss": 1.3004, "step": 1784000 }, { "epoch": 1.07, "learning_rate": 4.855003617940666e-05, "loss": 1.3203, "step": 1784500 }, { "epoch": 1.07, "learning_rate": 4.854793621384609e-05, "loss": 1.3277, "step": 1785000 }, { "epoch": 1.07, "learning_rate": 4.854583624828553e-05, "loss": 1.343, "step": 1785500 }, { "epoch": 1.07, "learning_rate": 4.854373628272497e-05, "loss": 1.3062, "step": 1786000 }, { "epoch": 1.07, "learning_rate": 4.8541636317164394e-05, "loss": 1.312, "step": 1786500 }, { "epoch": 1.07, "learning_rate": 4.8539540551534954e-05, "loss": 1.3076, "step": 1787000 }, { "epoch": 1.07, "learning_rate": 4.853744478590551e-05, "loss": 1.3162, "step": 1787500 }, { "epoch": 1.07, "learning_rate": 4.853534482034495e-05, "loss": 1.3062, "step": 1788000 }, { "epoch": 1.07, "learning_rate": 4.853324485478438e-05, "loss": 1.3056, "step": 1788500 }, { "epoch": 1.07, "learning_rate": 4.8531144889223815e-05, "loss": 1.3177, "step": 1789000 }, { "epoch": 1.07, "learning_rate": 4.8529044923663255e-05, "loss": 1.3211, "step": 1789500 }, { "epoch": 1.07, "learning_rate": 4.852694495810269e-05, "loss": 1.3162, "step": 1790000 }, { "epoch": 1.07, "learning_rate": 4.852484499254212e-05, "loss": 1.3199, "step": 1790500 }, { "epoch": 1.07, "learning_rate": 4.8522749226912676e-05, "loss": 1.3287, "step": 1791000 }, { "epoch": 1.07, "learning_rate": 4.8520649261352116e-05, "loss": 1.3033, "step": 1791500 }, { "epoch": 1.07, "learning_rate": 4.851854929579155e-05, "loss": 1.3347, "step": 1792000 }, { "epoch": 1.07, "learning_rate": 4.851644933023098e-05, "loss": 1.3048, "step": 1792500 }, { "epoch": 1.07, "learning_rate": 4.851434936467042e-05, "loss": 1.3333, "step": 1793000 }, { "epoch": 1.08, "learning_rate": 4.851224939910985e-05, "loss": 1.2907, "step": 1793500 }, { "epoch": 1.08, "learning_rate": 4.851014943354929e-05, "loss": 1.3106, "step": 1794000 }, { "epoch": 1.08, "learning_rate": 4.850804946798872e-05, "loss": 1.3237, "step": 1794500 }, { "epoch": 1.08, "learning_rate": 4.8505953702359284e-05, "loss": 1.3053, "step": 1795000 }, { "epoch": 1.08, "learning_rate": 4.850385373679872e-05, "loss": 1.3079, "step": 1795500 }, { "epoch": 1.08, "learning_rate": 4.850175797116927e-05, "loss": 1.3212, "step": 1796000 }, { "epoch": 1.08, "learning_rate": 4.849965800560871e-05, "loss": 1.3209, "step": 1796500 }, { "epoch": 1.08, "learning_rate": 4.8497558040048144e-05, "loss": 1.3323, "step": 1797000 }, { "epoch": 1.08, "learning_rate": 4.849545807448758e-05, "loss": 1.3424, "step": 1797500 }, { "epoch": 1.08, "learning_rate": 4.849335810892702e-05, "loss": 1.3093, "step": 1798000 }, { "epoch": 1.08, "learning_rate": 4.8491258143366445e-05, "loss": 1.2874, "step": 1798500 }, { "epoch": 1.08, "learning_rate": 4.848915817780588e-05, "loss": 1.338, "step": 1799000 }, { "epoch": 1.08, "learning_rate": 4.848705821224532e-05, "loss": 1.3179, "step": 1799500 }, { "epoch": 1.08, "learning_rate": 4.848496244661588e-05, "loss": 1.3104, "step": 1800000 }, { "epoch": 1.08, "eval_loss": 1.270274043083191, "eval_runtime": 1102.2881, "eval_samples_per_second": 477.842, "eval_steps_per_second": 79.641, "step": 1800000 }, { "epoch": 1.08, "learning_rate": 4.848286668098643e-05, "loss": 1.2991, "step": 1800500 }, { "epoch": 1.08, "learning_rate": 4.8480766715425866e-05, "loss": 1.3134, "step": 1801000 }, { "epoch": 1.08, "learning_rate": 4.8478666749865306e-05, "loss": 1.311, "step": 1801500 }, { "epoch": 1.08, "learning_rate": 4.847656678430474e-05, "loss": 1.3283, "step": 1802000 }, { "epoch": 1.08, "learning_rate": 4.847447101867529e-05, "loss": 1.3142, "step": 1802500 }, { "epoch": 1.08, "learning_rate": 4.8472371053114727e-05, "loss": 1.3167, "step": 1803000 }, { "epoch": 1.08, "learning_rate": 4.847027108755417e-05, "loss": 1.287, "step": 1803500 }, { "epoch": 1.08, "learning_rate": 4.84681711219936e-05, "loss": 1.3142, "step": 1804000 }, { "epoch": 1.08, "learning_rate": 4.8466071156433034e-05, "loss": 1.3273, "step": 1804500 }, { "epoch": 1.08, "learning_rate": 4.8463971190872474e-05, "loss": 1.2969, "step": 1805000 }, { "epoch": 1.08, "learning_rate": 4.846187542524303e-05, "loss": 1.2837, "step": 1805500 }, { "epoch": 1.08, "learning_rate": 4.845977545968246e-05, "loss": 1.3223, "step": 1806000 }, { "epoch": 1.08, "learning_rate": 4.8457675494121894e-05, "loss": 1.3271, "step": 1806500 }, { "epoch": 1.08, "learning_rate": 4.8455575528561335e-05, "loss": 1.3276, "step": 1807000 }, { "epoch": 1.08, "learning_rate": 4.845347556300077e-05, "loss": 1.3072, "step": 1807500 }, { "epoch": 1.08, "learning_rate": 4.84513755974402e-05, "loss": 1.2955, "step": 1808000 }, { "epoch": 1.08, "learning_rate": 4.8449275631879635e-05, "loss": 1.3455, "step": 1808500 }, { "epoch": 1.08, "learning_rate": 4.8447179866250195e-05, "loss": 1.3101, "step": 1809000 }, { "epoch": 1.08, "learning_rate": 4.844507990068963e-05, "loss": 1.3247, "step": 1809500 }, { "epoch": 1.09, "learning_rate": 4.844297993512907e-05, "loss": 1.3299, "step": 1810000 }, { "epoch": 1.09, "learning_rate": 4.8440879969568496e-05, "loss": 1.2931, "step": 1810500 }, { "epoch": 1.09, "learning_rate": 4.843878000400793e-05, "loss": 1.3211, "step": 1811000 }, { "epoch": 1.09, "learning_rate": 4.843668423837849e-05, "loss": 1.3119, "step": 1811500 }, { "epoch": 1.09, "learning_rate": 4.843458427281793e-05, "loss": 1.3255, "step": 1812000 }, { "epoch": 1.09, "learning_rate": 4.843248430725736e-05, "loss": 1.2941, "step": 1812500 }, { "epoch": 1.09, "learning_rate": 4.843038434169679e-05, "loss": 1.2795, "step": 1813000 }, { "epoch": 1.09, "learning_rate": 4.842828437613623e-05, "loss": 1.3158, "step": 1813500 }, { "epoch": 1.09, "learning_rate": 4.842618861050679e-05, "loss": 1.3327, "step": 1814000 }, { "epoch": 1.09, "learning_rate": 4.8424088644946224e-05, "loss": 1.2865, "step": 1814500 }, { "epoch": 1.09, "learning_rate": 4.842198867938566e-05, "loss": 1.343, "step": 1815000 }, { "epoch": 1.09, "learning_rate": 4.841988871382509e-05, "loss": 1.3282, "step": 1815500 }, { "epoch": 1.09, "learning_rate": 4.8417788748264524e-05, "loss": 1.3172, "step": 1816000 }, { "epoch": 1.09, "learning_rate": 4.8415688782703965e-05, "loss": 1.3135, "step": 1816500 }, { "epoch": 1.09, "learning_rate": 4.84135888171434e-05, "loss": 1.3162, "step": 1817000 }, { "epoch": 1.09, "learning_rate": 4.841149305151395e-05, "loss": 1.2869, "step": 1817500 }, { "epoch": 1.09, "learning_rate": 4.8409393085953385e-05, "loss": 1.3241, "step": 1818000 }, { "epoch": 1.09, "learning_rate": 4.8407293120392825e-05, "loss": 1.3076, "step": 1818500 }, { "epoch": 1.09, "learning_rate": 4.8405197354763386e-05, "loss": 1.3266, "step": 1819000 }, { "epoch": 1.09, "learning_rate": 4.840309738920282e-05, "loss": 1.3018, "step": 1819500 }, { "epoch": 1.09, "learning_rate": 4.8400997423642246e-05, "loss": 1.3167, "step": 1820000 }, { "epoch": 1.09, "learning_rate": 4.8398897458081686e-05, "loss": 1.328, "step": 1820500 }, { "epoch": 1.09, "learning_rate": 4.839679749252112e-05, "loss": 1.3197, "step": 1821000 }, { "epoch": 1.09, "learning_rate": 4.839469752696055e-05, "loss": 1.3096, "step": 1821500 }, { "epoch": 1.09, "learning_rate": 4.839259756139999e-05, "loss": 1.3213, "step": 1822000 }, { "epoch": 1.09, "learning_rate": 4.839049759583943e-05, "loss": 1.3035, "step": 1822500 }, { "epoch": 1.09, "learning_rate": 4.838839763027886e-05, "loss": 1.3237, "step": 1823000 }, { "epoch": 1.09, "learning_rate": 4.83862976647183e-05, "loss": 1.3145, "step": 1823500 }, { "epoch": 1.09, "learning_rate": 4.8384201899088854e-05, "loss": 1.342, "step": 1824000 }, { "epoch": 1.09, "learning_rate": 4.838210193352829e-05, "loss": 1.3149, "step": 1824500 }, { "epoch": 1.09, "learning_rate": 4.838000196796773e-05, "loss": 1.3287, "step": 1825000 }, { "epoch": 1.09, "learning_rate": 4.837790200240716e-05, "loss": 1.3375, "step": 1825500 }, { "epoch": 1.09, "learning_rate": 4.8375802036846595e-05, "loss": 1.3295, "step": 1826000 }, { "epoch": 1.1, "learning_rate": 4.837370627121715e-05, "loss": 1.3244, "step": 1826500 }, { "epoch": 1.1, "learning_rate": 4.837160630565659e-05, "loss": 1.3153, "step": 1827000 }, { "epoch": 1.1, "learning_rate": 4.836950634009602e-05, "loss": 1.3397, "step": 1827500 }, { "epoch": 1.1, "learning_rate": 4.8367406374535455e-05, "loss": 1.2851, "step": 1828000 }, { "epoch": 1.1, "learning_rate": 4.8365306408974896e-05, "loss": 1.3149, "step": 1828500 }, { "epoch": 1.1, "learning_rate": 4.836321064334545e-05, "loss": 1.3114, "step": 1829000 }, { "epoch": 1.1, "learning_rate": 4.836111067778488e-05, "loss": 1.3317, "step": 1829500 }, { "epoch": 1.1, "learning_rate": 4.8359010712224316e-05, "loss": 1.3038, "step": 1830000 }, { "epoch": 1.1, "learning_rate": 4.8356910746663756e-05, "loss": 1.3123, "step": 1830500 }, { "epoch": 1.1, "learning_rate": 4.835481078110319e-05, "loss": 1.3107, "step": 1831000 }, { "epoch": 1.1, "learning_rate": 4.835271081554262e-05, "loss": 1.2752, "step": 1831500 }, { "epoch": 1.1, "learning_rate": 4.8350610849982063e-05, "loss": 1.2903, "step": 1832000 }, { "epoch": 1.1, "learning_rate": 4.834851088442149e-05, "loss": 1.2883, "step": 1832500 }, { "epoch": 1.1, "learning_rate": 4.834641511879205e-05, "loss": 1.3159, "step": 1833000 }, { "epoch": 1.1, "learning_rate": 4.834431515323149e-05, "loss": 1.3215, "step": 1833500 }, { "epoch": 1.1, "learning_rate": 4.8342215187670924e-05, "loss": 1.3161, "step": 1834000 }, { "epoch": 1.1, "learning_rate": 4.834011522211036e-05, "loss": 1.3116, "step": 1834500 }, { "epoch": 1.1, "learning_rate": 4.833801525654979e-05, "loss": 1.3314, "step": 1835000 }, { "epoch": 1.1, "learning_rate": 4.833591949092035e-05, "loss": 1.2894, "step": 1835500 }, { "epoch": 1.1, "learning_rate": 4.8333819525359785e-05, "loss": 1.3186, "step": 1836000 }, { "epoch": 1.1, "learning_rate": 4.833171955979922e-05, "loss": 1.3133, "step": 1836500 }, { "epoch": 1.1, "learning_rate": 4.832961959423866e-05, "loss": 1.2812, "step": 1837000 }, { "epoch": 1.1, "learning_rate": 4.832752802854033e-05, "loss": 1.3165, "step": 1837500 }, { "epoch": 1.1, "learning_rate": 4.8325428062979766e-05, "loss": 1.3302, "step": 1838000 }, { "epoch": 1.1, "learning_rate": 4.83233280974192e-05, "loss": 1.2881, "step": 1838500 }, { "epoch": 1.1, "learning_rate": 4.832122813185864e-05, "loss": 1.2915, "step": 1839000 }, { "epoch": 1.1, "learning_rate": 4.831912816629807e-05, "loss": 1.3224, "step": 1839500 }, { "epoch": 1.1, "learning_rate": 4.8317028200737506e-05, "loss": 1.3222, "step": 1840000 }, { "epoch": 1.1, "learning_rate": 4.8314928235176947e-05, "loss": 1.3011, "step": 1840500 }, { "epoch": 1.1, "learning_rate": 4.831282826961638e-05, "loss": 1.2879, "step": 1841000 }, { "epoch": 1.1, "learning_rate": 4.8310732503986934e-05, "loss": 1.2982, "step": 1841500 }, { "epoch": 1.1, "learning_rate": 4.830863253842637e-05, "loss": 1.3242, "step": 1842000 }, { "epoch": 1.1, "learning_rate": 4.830653677279692e-05, "loss": 1.3228, "step": 1842500 }, { "epoch": 1.1, "learning_rate": 4.830443680723636e-05, "loss": 1.3108, "step": 1843000 }, { "epoch": 1.11, "learning_rate": 4.8302336841675794e-05, "loss": 1.3055, "step": 1843500 }, { "epoch": 1.11, "learning_rate": 4.830024107604635e-05, "loss": 1.2989, "step": 1844000 }, { "epoch": 1.11, "learning_rate": 4.829814111048579e-05, "loss": 1.3046, "step": 1844500 }, { "epoch": 1.11, "learning_rate": 4.829604114492522e-05, "loss": 1.3159, "step": 1845000 }, { "epoch": 1.11, "learning_rate": 4.8293941179364655e-05, "loss": 1.3203, "step": 1845500 }, { "epoch": 1.11, "learning_rate": 4.8291841213804095e-05, "loss": 1.3156, "step": 1846000 }, { "epoch": 1.11, "learning_rate": 4.828974124824353e-05, "loss": 1.3104, "step": 1846500 }, { "epoch": 1.11, "learning_rate": 4.828764128268296e-05, "loss": 1.3332, "step": 1847000 }, { "epoch": 1.11, "learning_rate": 4.82855413171224e-05, "loss": 1.3068, "step": 1847500 }, { "epoch": 1.11, "learning_rate": 4.8283441351561836e-05, "loss": 1.3318, "step": 1848000 }, { "epoch": 1.11, "learning_rate": 4.828134138600127e-05, "loss": 1.327, "step": 1848500 }, { "epoch": 1.11, "learning_rate": 4.827924142044071e-05, "loss": 1.3263, "step": 1849000 }, { "epoch": 1.11, "learning_rate": 4.8277141454880136e-05, "loss": 1.2962, "step": 1849500 }, { "epoch": 1.11, "learning_rate": 4.82750456892507e-05, "loss": 1.2958, "step": 1850000 }, { "epoch": 1.11, "learning_rate": 4.827294992362125e-05, "loss": 1.307, "step": 1850500 }, { "epoch": 1.11, "learning_rate": 4.8270854157991804e-05, "loss": 1.3069, "step": 1851000 }, { "epoch": 1.11, "learning_rate": 4.8268754192431244e-05, "loss": 1.3428, "step": 1851500 }, { "epoch": 1.11, "learning_rate": 4.826665422687068e-05, "loss": 1.3033, "step": 1852000 }, { "epoch": 1.11, "learning_rate": 4.826455426131011e-05, "loss": 1.3086, "step": 1852500 }, { "epoch": 1.11, "learning_rate": 4.826245429574955e-05, "loss": 1.2995, "step": 1853000 }, { "epoch": 1.11, "learning_rate": 4.8260354330188985e-05, "loss": 1.3417, "step": 1853500 }, { "epoch": 1.11, "learning_rate": 4.825825436462842e-05, "loss": 1.3133, "step": 1854000 }, { "epoch": 1.11, "learning_rate": 4.825615439906786e-05, "loss": 1.2966, "step": 1854500 }, { "epoch": 1.11, "learning_rate": 4.825405863343841e-05, "loss": 1.2957, "step": 1855000 }, { "epoch": 1.11, "learning_rate": 4.8251958667877845e-05, "loss": 1.3265, "step": 1855500 }, { "epoch": 1.11, "learning_rate": 4.824985870231728e-05, "loss": 1.325, "step": 1856000 }, { "epoch": 1.11, "learning_rate": 4.824775873675672e-05, "loss": 1.3063, "step": 1856500 }, { "epoch": 1.11, "learning_rate": 4.824565877119615e-05, "loss": 1.3177, "step": 1857000 }, { "epoch": 1.11, "learning_rate": 4.8243558805635586e-05, "loss": 1.3145, "step": 1857500 }, { "epoch": 1.11, "learning_rate": 4.8241458840075026e-05, "loss": 1.3118, "step": 1858000 }, { "epoch": 1.11, "learning_rate": 4.823936307444558e-05, "loss": 1.3287, "step": 1858500 }, { "epoch": 1.11, "learning_rate": 4.823726310888501e-05, "loss": 1.3177, "step": 1859000 }, { "epoch": 1.11, "learning_rate": 4.8235163143324453e-05, "loss": 1.338, "step": 1859500 }, { "epoch": 1.12, "learning_rate": 4.823306317776389e-05, "loss": 1.297, "step": 1860000 }, { "epoch": 1.12, "learning_rate": 4.823096321220332e-05, "loss": 1.3539, "step": 1860500 }, { "epoch": 1.12, "learning_rate": 4.8228863246642754e-05, "loss": 1.3059, "step": 1861000 }, { "epoch": 1.12, "learning_rate": 4.822676328108219e-05, "loss": 1.2969, "step": 1861500 }, { "epoch": 1.12, "learning_rate": 4.822466331552162e-05, "loss": 1.2838, "step": 1862000 }, { "epoch": 1.12, "learning_rate": 4.822256334996106e-05, "loss": 1.282, "step": 1862500 }, { "epoch": 1.12, "learning_rate": 4.822046758433162e-05, "loss": 1.3411, "step": 1863000 }, { "epoch": 1.12, "learning_rate": 4.821836761877105e-05, "loss": 1.3203, "step": 1863500 }, { "epoch": 1.12, "learning_rate": 4.821626765321048e-05, "loss": 1.3352, "step": 1864000 }, { "epoch": 1.12, "learning_rate": 4.821416768764992e-05, "loss": 1.3241, "step": 1864500 }, { "epoch": 1.12, "learning_rate": 4.8212067722089355e-05, "loss": 1.3355, "step": 1865000 }, { "epoch": 1.12, "learning_rate": 4.820996775652879e-05, "loss": 1.3013, "step": 1865500 }, { "epoch": 1.12, "learning_rate": 4.820786779096823e-05, "loss": 1.3189, "step": 1866000 }, { "epoch": 1.12, "learning_rate": 4.820577202533878e-05, "loss": 1.2824, "step": 1866500 }, { "epoch": 1.12, "learning_rate": 4.820367625970934e-05, "loss": 1.3141, "step": 1867000 }, { "epoch": 1.12, "learning_rate": 4.8201576294148776e-05, "loss": 1.3096, "step": 1867500 }, { "epoch": 1.12, "learning_rate": 4.8199476328588217e-05, "loss": 1.2998, "step": 1868000 }, { "epoch": 1.12, "learning_rate": 4.819737636302764e-05, "loss": 1.3126, "step": 1868500 }, { "epoch": 1.12, "learning_rate": 4.819527639746708e-05, "loss": 1.3126, "step": 1869000 }, { "epoch": 1.12, "learning_rate": 4.819317643190652e-05, "loss": 1.3258, "step": 1869500 }, { "epoch": 1.12, "learning_rate": 4.819107646634595e-05, "loss": 1.2946, "step": 1870000 }, { "epoch": 1.12, "learning_rate": 4.8188976500785384e-05, "loss": 1.3413, "step": 1870500 }, { "epoch": 1.12, "learning_rate": 4.8186876535224824e-05, "loss": 1.3415, "step": 1871000 }, { "epoch": 1.12, "learning_rate": 4.818477656966426e-05, "loss": 1.3305, "step": 1871500 }, { "epoch": 1.12, "learning_rate": 4.818267660410369e-05, "loss": 1.3167, "step": 1872000 }, { "epoch": 1.12, "learning_rate": 4.8180580838474245e-05, "loss": 1.2958, "step": 1872500 }, { "epoch": 1.12, "learning_rate": 4.8178480872913685e-05, "loss": 1.3007, "step": 1873000 }, { "epoch": 1.12, "learning_rate": 4.817638090735312e-05, "loss": 1.2843, "step": 1873500 }, { "epoch": 1.12, "learning_rate": 4.817428094179255e-05, "loss": 1.3086, "step": 1874000 }, { "epoch": 1.12, "learning_rate": 4.817218097623199e-05, "loss": 1.2905, "step": 1874500 }, { "epoch": 1.12, "learning_rate": 4.8170081010671425e-05, "loss": 1.3166, "step": 1875000 }, { "epoch": 1.12, "learning_rate": 4.816798104511086e-05, "loss": 1.2994, "step": 1875500 }, { "epoch": 1.12, "learning_rate": 4.816588107955029e-05, "loss": 1.3281, "step": 1876000 }, { "epoch": 1.13, "learning_rate": 4.816378531392085e-05, "loss": 1.2857, "step": 1876500 }, { "epoch": 1.13, "learning_rate": 4.8161689548291406e-05, "loss": 1.3176, "step": 1877000 }, { "epoch": 1.13, "learning_rate": 4.815958958273084e-05, "loss": 1.2986, "step": 1877500 }, { "epoch": 1.13, "learning_rate": 4.815748961717028e-05, "loss": 1.3129, "step": 1878000 }, { "epoch": 1.13, "learning_rate": 4.8155393851540834e-05, "loss": 1.3136, "step": 1878500 }, { "epoch": 1.13, "learning_rate": 4.8153298085911394e-05, "loss": 1.3019, "step": 1879000 }, { "epoch": 1.13, "learning_rate": 4.815119812035083e-05, "loss": 1.3076, "step": 1879500 }, { "epoch": 1.13, "learning_rate": 4.814909815479026e-05, "loss": 1.297, "step": 1880000 }, { "epoch": 1.13, "learning_rate": 4.8146998189229694e-05, "loss": 1.3049, "step": 1880500 }, { "epoch": 1.13, "learning_rate": 4.814489822366913e-05, "loss": 1.3001, "step": 1881000 }, { "epoch": 1.13, "learning_rate": 4.814279825810857e-05, "loss": 1.3266, "step": 1881500 }, { "epoch": 1.13, "learning_rate": 4.8140698292548e-05, "loss": 1.3026, "step": 1882000 }, { "epoch": 1.13, "learning_rate": 4.8138598326987435e-05, "loss": 1.3462, "step": 1882500 }, { "epoch": 1.13, "learning_rate": 4.8136498361426875e-05, "loss": 1.326, "step": 1883000 }, { "epoch": 1.13, "learning_rate": 4.813439839586631e-05, "loss": 1.3078, "step": 1883500 }, { "epoch": 1.13, "learning_rate": 4.813229843030574e-05, "loss": 1.3208, "step": 1884000 }, { "epoch": 1.13, "learning_rate": 4.813019846474518e-05, "loss": 1.3004, "step": 1884500 }, { "epoch": 1.13, "learning_rate": 4.8128098499184616e-05, "loss": 1.316, "step": 1885000 }, { "epoch": 1.13, "learning_rate": 4.812599853362405e-05, "loss": 1.2925, "step": 1885500 }, { "epoch": 1.13, "learning_rate": 4.812389856806348e-05, "loss": 1.3095, "step": 1886000 }, { "epoch": 1.13, "learning_rate": 4.8121798602502916e-05, "loss": 1.3264, "step": 1886500 }, { "epoch": 1.13, "learning_rate": 4.8119702836873476e-05, "loss": 1.2915, "step": 1887000 }, { "epoch": 1.13, "learning_rate": 4.811760707124403e-05, "loss": 1.3195, "step": 1887500 }, { "epoch": 1.13, "learning_rate": 4.8115511305614584e-05, "loss": 1.2971, "step": 1888000 }, { "epoch": 1.13, "learning_rate": 4.8113411340054024e-05, "loss": 1.2841, "step": 1888500 }, { "epoch": 1.13, "learning_rate": 4.811131137449346e-05, "loss": 1.329, "step": 1889000 }, { "epoch": 1.13, "learning_rate": 4.810921140893289e-05, "loss": 1.2787, "step": 1889500 }, { "epoch": 1.13, "learning_rate": 4.810711144337233e-05, "loss": 1.3185, "step": 1890000 }, { "epoch": 1.13, "learning_rate": 4.8105015677742885e-05, "loss": 1.3155, "step": 1890500 }, { "epoch": 1.13, "learning_rate": 4.810291571218232e-05, "loss": 1.3171, "step": 1891000 }, { "epoch": 1.13, "learning_rate": 4.810081574662175e-05, "loss": 1.2975, "step": 1891500 }, { "epoch": 1.13, "learning_rate": 4.809871578106119e-05, "loss": 1.3036, "step": 1892000 }, { "epoch": 1.13, "learning_rate": 4.8096615815500625e-05, "loss": 1.3092, "step": 1892500 }, { "epoch": 1.13, "learning_rate": 4.809452004987118e-05, "loss": 1.3449, "step": 1893000 }, { "epoch": 1.14, "learning_rate": 4.809242428424174e-05, "loss": 1.3265, "step": 1893500 }, { "epoch": 1.14, "learning_rate": 4.809032431868117e-05, "loss": 1.3099, "step": 1894000 }, { "epoch": 1.14, "learning_rate": 4.8088224353120606e-05, "loss": 1.3159, "step": 1894500 }, { "epoch": 1.14, "learning_rate": 4.808612438756004e-05, "loss": 1.3047, "step": 1895000 }, { "epoch": 1.14, "learning_rate": 4.808402442199948e-05, "loss": 1.3174, "step": 1895500 }, { "epoch": 1.14, "learning_rate": 4.808192445643891e-05, "loss": 1.3332, "step": 1896000 }, { "epoch": 1.14, "learning_rate": 4.8079824490878347e-05, "loss": 1.2987, "step": 1896500 }, { "epoch": 1.14, "learning_rate": 4.807772452531779e-05, "loss": 1.2993, "step": 1897000 }, { "epoch": 1.14, "learning_rate": 4.807562455975722e-05, "loss": 1.3207, "step": 1897500 }, { "epoch": 1.14, "learning_rate": 4.8073524594196654e-05, "loss": 1.275, "step": 1898000 }, { "epoch": 1.14, "learning_rate": 4.807142882856721e-05, "loss": 1.3002, "step": 1898500 }, { "epoch": 1.14, "learning_rate": 4.806932886300665e-05, "loss": 1.3351, "step": 1899000 }, { "epoch": 1.14, "learning_rate": 4.806722889744608e-05, "loss": 1.2986, "step": 1899500 }, { "epoch": 1.14, "learning_rate": 4.8065128931885514e-05, "loss": 1.3284, "step": 1900000 }, { "epoch": 1.14, "eval_loss": 1.2703109979629517, "eval_runtime": 1101.196, "eval_samples_per_second": 478.316, "eval_steps_per_second": 79.72, "step": 1900000 }, { "epoch": 1.14, "learning_rate": 4.8063028966324955e-05, "loss": 1.2887, "step": 1900500 }, { "epoch": 1.14, "learning_rate": 4.806092900076439e-05, "loss": 1.3282, "step": 1901000 }, { "epoch": 1.14, "learning_rate": 4.805882903520382e-05, "loss": 1.3289, "step": 1901500 }, { "epoch": 1.14, "learning_rate": 4.8056733269574375e-05, "loss": 1.3267, "step": 1902000 }, { "epoch": 1.14, "learning_rate": 4.8054633304013815e-05, "loss": 1.3294, "step": 1902500 }, { "epoch": 1.14, "learning_rate": 4.805253333845325e-05, "loss": 1.2941, "step": 1903000 }, { "epoch": 1.14, "learning_rate": 4.805043337289268e-05, "loss": 1.2935, "step": 1903500 }, { "epoch": 1.14, "learning_rate": 4.804833340733212e-05, "loss": 1.3163, "step": 1904000 }, { "epoch": 1.14, "learning_rate": 4.8046233441771556e-05, "loss": 1.3318, "step": 1904500 }, { "epoch": 1.14, "learning_rate": 4.804413767614211e-05, "loss": 1.3048, "step": 1905000 }, { "epoch": 1.14, "learning_rate": 4.804203771058155e-05, "loss": 1.334, "step": 1905500 }, { "epoch": 1.14, "learning_rate": 4.803993774502098e-05, "loss": 1.2887, "step": 1906000 }, { "epoch": 1.14, "learning_rate": 4.803783777946042e-05, "loss": 1.292, "step": 1906500 }, { "epoch": 1.14, "learning_rate": 4.803573781389985e-05, "loss": 1.3307, "step": 1907000 }, { "epoch": 1.14, "learning_rate": 4.8033637848339284e-05, "loss": 1.2953, "step": 1907500 }, { "epoch": 1.14, "learning_rate": 4.803153788277872e-05, "loss": 1.2881, "step": 1908000 }, { "epoch": 1.14, "learning_rate": 4.802944211714928e-05, "loss": 1.3179, "step": 1908500 }, { "epoch": 1.14, "learning_rate": 4.802734635151983e-05, "loss": 1.3172, "step": 1909000 }, { "epoch": 1.14, "learning_rate": 4.802524638595927e-05, "loss": 1.3109, "step": 1909500 }, { "epoch": 1.15, "learning_rate": 4.8023146420398705e-05, "loss": 1.289, "step": 1910000 }, { "epoch": 1.15, "learning_rate": 4.8021046454838145e-05, "loss": 1.2974, "step": 1910500 }, { "epoch": 1.15, "learning_rate": 4.801894648927758e-05, "loss": 1.3036, "step": 1911000 }, { "epoch": 1.15, "learning_rate": 4.801684652371701e-05, "loss": 1.2854, "step": 1911500 }, { "epoch": 1.15, "learning_rate": 4.8014746558156445e-05, "loss": 1.2929, "step": 1912000 }, { "epoch": 1.15, "learning_rate": 4.801264659259588e-05, "loss": 1.3062, "step": 1912500 }, { "epoch": 1.15, "learning_rate": 4.801054662703531e-05, "loss": 1.3065, "step": 1913000 }, { "epoch": 1.15, "learning_rate": 4.800844666147475e-05, "loss": 1.2574, "step": 1913500 }, { "epoch": 1.15, "learning_rate": 4.800635089584531e-05, "loss": 1.3001, "step": 1914000 }, { "epoch": 1.15, "learning_rate": 4.800425093028474e-05, "loss": 1.2838, "step": 1914500 }, { "epoch": 1.15, "learning_rate": 4.800215096472417e-05, "loss": 1.306, "step": 1915000 }, { "epoch": 1.15, "learning_rate": 4.800005099916361e-05, "loss": 1.3196, "step": 1915500 }, { "epoch": 1.15, "learning_rate": 4.799795103360305e-05, "loss": 1.3106, "step": 1916000 }, { "epoch": 1.15, "learning_rate": 4.799585106804248e-05, "loss": 1.3306, "step": 1916500 }, { "epoch": 1.15, "learning_rate": 4.799375110248192e-05, "loss": 1.3111, "step": 1917000 }, { "epoch": 1.15, "learning_rate": 4.7991651136921354e-05, "loss": 1.3301, "step": 1917500 }, { "epoch": 1.15, "learning_rate": 4.798955117136079e-05, "loss": 1.2936, "step": 1918000 }, { "epoch": 1.15, "learning_rate": 4.798745120580023e-05, "loss": 1.3296, "step": 1918500 }, { "epoch": 1.15, "learning_rate": 4.798535124023966e-05, "loss": 1.3108, "step": 1919000 }, { "epoch": 1.15, "learning_rate": 4.7983251274679095e-05, "loss": 1.3224, "step": 1919500 }, { "epoch": 1.15, "learning_rate": 4.7981155509049655e-05, "loss": 1.2901, "step": 1920000 }, { "epoch": 1.15, "learning_rate": 4.797905554348909e-05, "loss": 1.3028, "step": 1920500 }, { "epoch": 1.15, "learning_rate": 4.797695977785964e-05, "loss": 1.3334, "step": 1921000 }, { "epoch": 1.15, "learning_rate": 4.7974859812299075e-05, "loss": 1.3049, "step": 1921500 }, { "epoch": 1.15, "learning_rate": 4.7972759846738516e-05, "loss": 1.2857, "step": 1922000 }, { "epoch": 1.15, "learning_rate": 4.797065988117795e-05, "loss": 1.3086, "step": 1922500 }, { "epoch": 1.15, "learning_rate": 4.796855991561738e-05, "loss": 1.3116, "step": 1923000 }, { "epoch": 1.15, "learning_rate": 4.796645995005682e-05, "loss": 1.3488, "step": 1923500 }, { "epoch": 1.15, "learning_rate": 4.7964359984496256e-05, "loss": 1.3126, "step": 1924000 }, { "epoch": 1.15, "learning_rate": 4.796226421886681e-05, "loss": 1.3128, "step": 1924500 }, { "epoch": 1.15, "learning_rate": 4.796016425330624e-05, "loss": 1.3525, "step": 1925000 }, { "epoch": 1.15, "learning_rate": 4.7958064287745683e-05, "loss": 1.3269, "step": 1925500 }, { "epoch": 1.15, "learning_rate": 4.795596852211624e-05, "loss": 1.3106, "step": 1926000 }, { "epoch": 1.16, "learning_rate": 4.795386855655567e-05, "loss": 1.3043, "step": 1926500 }, { "epoch": 1.16, "learning_rate": 4.795176859099511e-05, "loss": 1.3259, "step": 1927000 }, { "epoch": 1.16, "learning_rate": 4.7949668625434544e-05, "loss": 1.3182, "step": 1927500 }, { "epoch": 1.16, "learning_rate": 4.794756865987398e-05, "loss": 1.3094, "step": 1928000 }, { "epoch": 1.16, "learning_rate": 4.794547289424453e-05, "loss": 1.3304, "step": 1928500 }, { "epoch": 1.16, "learning_rate": 4.794337292868397e-05, "loss": 1.3227, "step": 1929000 }, { "epoch": 1.16, "learning_rate": 4.7941272963123405e-05, "loss": 1.3197, "step": 1929500 }, { "epoch": 1.16, "learning_rate": 4.793917719749396e-05, "loss": 1.3179, "step": 1930000 }, { "epoch": 1.16, "learning_rate": 4.793707723193339e-05, "loss": 1.3055, "step": 1930500 }, { "epoch": 1.16, "learning_rate": 4.793497726637283e-05, "loss": 1.3084, "step": 1931000 }, { "epoch": 1.16, "learning_rate": 4.7932877300812266e-05, "loss": 1.2987, "step": 1931500 }, { "epoch": 1.16, "learning_rate": 4.79307773352517e-05, "loss": 1.3032, "step": 1932000 }, { "epoch": 1.16, "learning_rate": 4.792867736969114e-05, "loss": 1.3378, "step": 1932500 }, { "epoch": 1.16, "learning_rate": 4.792657740413057e-05, "loss": 1.3174, "step": 1933000 }, { "epoch": 1.16, "learning_rate": 4.7924481638501126e-05, "loss": 1.3101, "step": 1933500 }, { "epoch": 1.16, "learning_rate": 4.792238167294057e-05, "loss": 1.3142, "step": 1934000 }, { "epoch": 1.16, "learning_rate": 4.792028170738e-05, "loss": 1.3066, "step": 1934500 }, { "epoch": 1.16, "learning_rate": 4.7918181741819434e-05, "loss": 1.3203, "step": 1935000 }, { "epoch": 1.16, "learning_rate": 4.7916081776258874e-05, "loss": 1.3089, "step": 1935500 }, { "epoch": 1.16, "learning_rate": 4.791398181069831e-05, "loss": 1.3238, "step": 1936000 }, { "epoch": 1.16, "learning_rate": 4.7911881845137734e-05, "loss": 1.3104, "step": 1936500 }, { "epoch": 1.16, "learning_rate": 4.7909781879577174e-05, "loss": 1.3061, "step": 1937000 }, { "epoch": 1.16, "learning_rate": 4.790768191401661e-05, "loss": 1.3394, "step": 1937500 }, { "epoch": 1.16, "learning_rate": 4.790558194845604e-05, "loss": 1.3253, "step": 1938000 }, { "epoch": 1.16, "learning_rate": 4.790348198289548e-05, "loss": 1.3198, "step": 1938500 }, { "epoch": 1.16, "learning_rate": 4.7901386217266035e-05, "loss": 1.307, "step": 1939000 }, { "epoch": 1.16, "learning_rate": 4.789928625170547e-05, "loss": 1.2937, "step": 1939500 }, { "epoch": 1.16, "learning_rate": 4.78971862861449e-05, "loss": 1.2981, "step": 1940000 }, { "epoch": 1.16, "learning_rate": 4.789508632058434e-05, "loss": 1.3396, "step": 1940500 }, { "epoch": 1.16, "learning_rate": 4.7892986355023775e-05, "loss": 1.3233, "step": 1941000 }, { "epoch": 1.16, "learning_rate": 4.789089058939433e-05, "loss": 1.2959, "step": 1941500 }, { "epoch": 1.16, "learning_rate": 4.788879062383377e-05, "loss": 1.3036, "step": 1942000 }, { "epoch": 1.16, "learning_rate": 4.78866906582732e-05, "loss": 1.319, "step": 1942500 }, { "epoch": 1.16, "learning_rate": 4.7884590692712636e-05, "loss": 1.2911, "step": 1943000 }, { "epoch": 1.17, "learning_rate": 4.7882490727152076e-05, "loss": 1.3313, "step": 1943500 }, { "epoch": 1.17, "learning_rate": 4.788039076159151e-05, "loss": 1.308, "step": 1944000 }, { "epoch": 1.17, "learning_rate": 4.787829079603094e-05, "loss": 1.3128, "step": 1944500 }, { "epoch": 1.17, "learning_rate": 4.7876190830470384e-05, "loss": 1.2849, "step": 1945000 }, { "epoch": 1.17, "learning_rate": 4.787409506484094e-05, "loss": 1.3079, "step": 1945500 }, { "epoch": 1.17, "learning_rate": 4.787199509928037e-05, "loss": 1.2978, "step": 1946000 }, { "epoch": 1.17, "learning_rate": 4.7869895133719804e-05, "loss": 1.3392, "step": 1946500 }, { "epoch": 1.17, "learning_rate": 4.7867795168159244e-05, "loss": 1.303, "step": 1947000 }, { "epoch": 1.17, "learning_rate": 4.786569520259868e-05, "loss": 1.2798, "step": 1947500 }, { "epoch": 1.17, "learning_rate": 4.786359523703811e-05, "loss": 1.3084, "step": 1948000 }, { "epoch": 1.17, "learning_rate": 4.786149527147755e-05, "loss": 1.3264, "step": 1948500 }, { "epoch": 1.17, "learning_rate": 4.785939530591698e-05, "loss": 1.2991, "step": 1949000 }, { "epoch": 1.17, "learning_rate": 4.785729954028754e-05, "loss": 1.3265, "step": 1949500 }, { "epoch": 1.17, "learning_rate": 4.785519957472697e-05, "loss": 1.2822, "step": 1950000 }, { "epoch": 1.17, "learning_rate": 4.785309960916641e-05, "loss": 1.3209, "step": 1950500 }, { "epoch": 1.17, "learning_rate": 4.7851003843536966e-05, "loss": 1.3073, "step": 1951000 }, { "epoch": 1.17, "learning_rate": 4.78489038779764e-05, "loss": 1.3184, "step": 1951500 }, { "epoch": 1.17, "learning_rate": 4.784680391241584e-05, "loss": 1.2972, "step": 1952000 }, { "epoch": 1.17, "learning_rate": 4.784470394685527e-05, "loss": 1.3109, "step": 1952500 }, { "epoch": 1.17, "learning_rate": 4.7842603981294706e-05, "loss": 1.3136, "step": 1953000 }, { "epoch": 1.17, "learning_rate": 4.784050401573415e-05, "loss": 1.2922, "step": 1953500 }, { "epoch": 1.17, "learning_rate": 4.78384082501047e-05, "loss": 1.326, "step": 1954000 }, { "epoch": 1.17, "learning_rate": 4.7836308284544134e-05, "loss": 1.3251, "step": 1954500 }, { "epoch": 1.17, "learning_rate": 4.783420831898357e-05, "loss": 1.3091, "step": 1955000 }, { "epoch": 1.17, "learning_rate": 4.783210835342301e-05, "loss": 1.2829, "step": 1955500 }, { "epoch": 1.17, "learning_rate": 4.783001258779356e-05, "loss": 1.3083, "step": 1956000 }, { "epoch": 1.17, "learning_rate": 4.7827912622232994e-05, "loss": 1.2976, "step": 1956500 }, { "epoch": 1.17, "learning_rate": 4.7825812656672435e-05, "loss": 1.3159, "step": 1957000 }, { "epoch": 1.17, "learning_rate": 4.782371269111187e-05, "loss": 1.318, "step": 1957500 }, { "epoch": 1.17, "learning_rate": 4.78216127255513e-05, "loss": 1.3248, "step": 1958000 }, { "epoch": 1.17, "learning_rate": 4.781951275999074e-05, "loss": 1.3238, "step": 1958500 }, { "epoch": 1.17, "learning_rate": 4.781741279443017e-05, "loss": 1.31, "step": 1959000 }, { "epoch": 1.17, "learning_rate": 4.781531702880073e-05, "loss": 1.2805, "step": 1959500 }, { "epoch": 1.18, "learning_rate": 4.781321706324016e-05, "loss": 1.3078, "step": 1960000 }, { "epoch": 1.18, "learning_rate": 4.78111170976796e-05, "loss": 1.3041, "step": 1960500 }, { "epoch": 1.18, "learning_rate": 4.780901713211903e-05, "loss": 1.3363, "step": 1961000 }, { "epoch": 1.18, "learning_rate": 4.780691716655846e-05, "loss": 1.3228, "step": 1961500 }, { "epoch": 1.18, "learning_rate": 4.78048172009979e-05, "loss": 1.302, "step": 1962000 }, { "epoch": 1.18, "learning_rate": 4.7802717235437336e-05, "loss": 1.3133, "step": 1962500 }, { "epoch": 1.18, "learning_rate": 4.780061726987677e-05, "loss": 1.3215, "step": 1963000 }, { "epoch": 1.18, "learning_rate": 4.779851730431621e-05, "loss": 1.3024, "step": 1963500 }, { "epoch": 1.18, "learning_rate": 4.7796421538686764e-05, "loss": 1.3371, "step": 1964000 }, { "epoch": 1.18, "learning_rate": 4.77943215731262e-05, "loss": 1.3041, "step": 1964500 }, { "epoch": 1.18, "learning_rate": 4.779222160756564e-05, "loss": 1.3172, "step": 1965000 }, { "epoch": 1.18, "learning_rate": 4.779012164200507e-05, "loss": 1.3548, "step": 1965500 }, { "epoch": 1.18, "learning_rate": 4.7788021676444504e-05, "loss": 1.3079, "step": 1966000 }, { "epoch": 1.18, "learning_rate": 4.7785921710883944e-05, "loss": 1.3098, "step": 1966500 }, { "epoch": 1.18, "learning_rate": 4.77838259452545e-05, "loss": 1.2921, "step": 1967000 }, { "epoch": 1.18, "learning_rate": 4.778172597969393e-05, "loss": 1.2957, "step": 1967500 }, { "epoch": 1.18, "learning_rate": 4.7779626014133365e-05, "loss": 1.3378, "step": 1968000 }, { "epoch": 1.18, "learning_rate": 4.777753024850392e-05, "loss": 1.306, "step": 1968500 }, { "epoch": 1.18, "learning_rate": 4.777543028294336e-05, "loss": 1.3239, "step": 1969000 }, { "epoch": 1.18, "learning_rate": 4.777333451731392e-05, "loss": 1.3367, "step": 1969500 }, { "epoch": 1.18, "learning_rate": 4.777123455175335e-05, "loss": 1.3068, "step": 1970000 }, { "epoch": 1.18, "learning_rate": 4.7769134586192786e-05, "loss": 1.3171, "step": 1970500 }, { "epoch": 1.18, "learning_rate": 4.776703462063222e-05, "loss": 1.2998, "step": 1971000 }, { "epoch": 1.18, "learning_rate": 4.776493465507165e-05, "loss": 1.3171, "step": 1971500 }, { "epoch": 1.18, "learning_rate": 4.776283468951109e-05, "loss": 1.31, "step": 1972000 }, { "epoch": 1.18, "learning_rate": 4.776073472395053e-05, "loss": 1.2945, "step": 1972500 }, { "epoch": 1.18, "learning_rate": 4.775863475838996e-05, "loss": 1.336, "step": 1973000 }, { "epoch": 1.18, "learning_rate": 4.7756538992760514e-05, "loss": 1.2934, "step": 1973500 }, { "epoch": 1.18, "learning_rate": 4.7754439027199954e-05, "loss": 1.2931, "step": 1974000 }, { "epoch": 1.18, "learning_rate": 4.775233906163939e-05, "loss": 1.2957, "step": 1974500 }, { "epoch": 1.18, "learning_rate": 4.775023909607882e-05, "loss": 1.2815, "step": 1975000 }, { "epoch": 1.18, "learning_rate": 4.7748143330449374e-05, "loss": 1.3078, "step": 1975500 }, { "epoch": 1.18, "learning_rate": 4.7746043364888815e-05, "loss": 1.3086, "step": 1976000 }, { "epoch": 1.18, "learning_rate": 4.774394339932825e-05, "loss": 1.3303, "step": 1976500 }, { "epoch": 1.19, "learning_rate": 4.774184343376768e-05, "loss": 1.2859, "step": 1977000 }, { "epoch": 1.19, "learning_rate": 4.773974346820712e-05, "loss": 1.3045, "step": 1977500 }, { "epoch": 1.19, "learning_rate": 4.7737643502646555e-05, "loss": 1.2856, "step": 1978000 }, { "epoch": 1.19, "learning_rate": 4.773554353708599e-05, "loss": 1.3258, "step": 1978500 }, { "epoch": 1.19, "learning_rate": 4.773344357152543e-05, "loss": 1.3055, "step": 1979000 }, { "epoch": 1.19, "learning_rate": 4.773134360596486e-05, "loss": 1.3127, "step": 1979500 }, { "epoch": 1.19, "learning_rate": 4.7729243640404296e-05, "loss": 1.3333, "step": 1980000 }, { "epoch": 1.19, "learning_rate": 4.7727143674843736e-05, "loss": 1.3097, "step": 1980500 }, { "epoch": 1.19, "learning_rate": 4.772504370928316e-05, "loss": 1.3109, "step": 1981000 }, { "epoch": 1.19, "learning_rate": 4.772294794365372e-05, "loss": 1.3105, "step": 1981500 }, { "epoch": 1.19, "learning_rate": 4.772084797809316e-05, "loss": 1.3171, "step": 1982000 }, { "epoch": 1.19, "learning_rate": 4.77187480125326e-05, "loss": 1.3272, "step": 1982500 }, { "epoch": 1.19, "learning_rate": 4.771664804697203e-05, "loss": 1.3086, "step": 1983000 }, { "epoch": 1.19, "learning_rate": 4.7714548081411464e-05, "loss": 1.3165, "step": 1983500 }, { "epoch": 1.19, "learning_rate": 4.7712452315782024e-05, "loss": 1.298, "step": 1984000 }, { "epoch": 1.19, "learning_rate": 4.771035235022146e-05, "loss": 1.3231, "step": 1984500 }, { "epoch": 1.19, "learning_rate": 4.770825238466089e-05, "loss": 1.3212, "step": 1985000 }, { "epoch": 1.19, "learning_rate": 4.7706152419100324e-05, "loss": 1.2968, "step": 1985500 }, { "epoch": 1.19, "learning_rate": 4.7704056653470885e-05, "loss": 1.3224, "step": 1986000 }, { "epoch": 1.19, "learning_rate": 4.770195668791032e-05, "loss": 1.3082, "step": 1986500 }, { "epoch": 1.19, "learning_rate": 4.769985672234975e-05, "loss": 1.2747, "step": 1987000 }, { "epoch": 1.19, "learning_rate": 4.769776095672031e-05, "loss": 1.2951, "step": 1987500 }, { "epoch": 1.19, "learning_rate": 4.7695660991159746e-05, "loss": 1.2899, "step": 1988000 }, { "epoch": 1.19, "learning_rate": 4.769356102559918e-05, "loss": 1.3238, "step": 1988500 }, { "epoch": 1.19, "learning_rate": 4.769146106003862e-05, "loss": 1.3103, "step": 1989000 }, { "epoch": 1.19, "learning_rate": 4.768936109447805e-05, "loss": 1.3129, "step": 1989500 }, { "epoch": 1.19, "learning_rate": 4.7687261128917486e-05, "loss": 1.3034, "step": 1990000 }, { "epoch": 1.19, "learning_rate": 4.768516116335692e-05, "loss": 1.3063, "step": 1990500 }, { "epoch": 1.19, "learning_rate": 4.768306119779635e-05, "loss": 1.2932, "step": 1991000 }, { "epoch": 1.19, "learning_rate": 4.7680961232235787e-05, "loss": 1.3289, "step": 1991500 }, { "epoch": 1.19, "learning_rate": 4.767886126667523e-05, "loss": 1.3119, "step": 1992000 }, { "epoch": 1.19, "learning_rate": 4.767676130111466e-05, "loss": 1.2969, "step": 1992500 }, { "epoch": 1.19, "learning_rate": 4.7674665535485214e-05, "loss": 1.3076, "step": 1993000 }, { "epoch": 1.2, "learning_rate": 4.767256556992465e-05, "loss": 1.3159, "step": 1993500 }, { "epoch": 1.2, "learning_rate": 4.767046980429521e-05, "loss": 1.2829, "step": 1994000 }, { "epoch": 1.2, "learning_rate": 4.766836983873465e-05, "loss": 1.2993, "step": 1994500 }, { "epoch": 1.2, "learning_rate": 4.766626987317408e-05, "loss": 1.3246, "step": 1995000 }, { "epoch": 1.2, "learning_rate": 4.7664169907613515e-05, "loss": 1.2838, "step": 1995500 }, { "epoch": 1.2, "learning_rate": 4.766206994205295e-05, "loss": 1.3062, "step": 1996000 }, { "epoch": 1.2, "learning_rate": 4.765997417642351e-05, "loss": 1.3042, "step": 1996500 }, { "epoch": 1.2, "learning_rate": 4.765787421086294e-05, "loss": 1.3223, "step": 1997000 }, { "epoch": 1.2, "learning_rate": 4.7655774245302376e-05, "loss": 1.2662, "step": 1997500 }, { "epoch": 1.2, "learning_rate": 4.765367427974181e-05, "loss": 1.3111, "step": 1998000 }, { "epoch": 1.2, "learning_rate": 4.765157431418124e-05, "loss": 1.3282, "step": 1998500 }, { "epoch": 1.2, "learning_rate": 4.764947434862068e-05, "loss": 1.2805, "step": 1999000 }, { "epoch": 1.2, "learning_rate": 4.7647374383060116e-05, "loss": 1.2951, "step": 1999500 }, { "epoch": 1.2, "learning_rate": 4.764527441749955e-05, "loss": 1.3019, "step": 2000000 }, { "epoch": 1.2, "eval_loss": 1.2635042667388916, "eval_runtime": 1110.2941, "eval_samples_per_second": 474.397, "eval_steps_per_second": 79.066, "step": 2000000 }, { "epoch": 1.2, "learning_rate": 4.764317445193899e-05, "loss": 1.3184, "step": 2000500 }, { "epoch": 1.2, "learning_rate": 4.764107868630954e-05, "loss": 1.3177, "step": 2001000 }, { "epoch": 1.2, "learning_rate": 4.763897872074898e-05, "loss": 1.2882, "step": 2001500 }, { "epoch": 1.2, "learning_rate": 4.763687875518841e-05, "loss": 1.2976, "step": 2002000 }, { "epoch": 1.2, "learning_rate": 4.763477878962785e-05, "loss": 1.3246, "step": 2002500 }, { "epoch": 1.2, "learning_rate": 4.7632678824067284e-05, "loss": 1.3032, "step": 2003000 }, { "epoch": 1.2, "learning_rate": 4.7630578858506724e-05, "loss": 1.3143, "step": 2003500 }, { "epoch": 1.2, "learning_rate": 4.762847889294616e-05, "loss": 1.3001, "step": 2004000 }, { "epoch": 1.2, "learning_rate": 4.762637892738559e-05, "loss": 1.3211, "step": 2004500 }, { "epoch": 1.2, "learning_rate": 4.7624283161756145e-05, "loss": 1.3073, "step": 2005000 }, { "epoch": 1.2, "learning_rate": 4.7622183196195585e-05, "loss": 1.309, "step": 2005500 }, { "epoch": 1.2, "learning_rate": 4.762008743056614e-05, "loss": 1.3084, "step": 2006000 }, { "epoch": 1.2, "learning_rate": 4.761798746500557e-05, "loss": 1.3333, "step": 2006500 }, { "epoch": 1.2, "learning_rate": 4.7615887499445005e-05, "loss": 1.2914, "step": 2007000 }, { "epoch": 1.2, "learning_rate": 4.7613787533884446e-05, "loss": 1.2925, "step": 2007500 }, { "epoch": 1.2, "learning_rate": 4.761168756832388e-05, "loss": 1.309, "step": 2008000 }, { "epoch": 1.2, "learning_rate": 4.760958760276331e-05, "loss": 1.3079, "step": 2008500 }, { "epoch": 1.2, "learning_rate": 4.760748763720275e-05, "loss": 1.3071, "step": 2009000 }, { "epoch": 1.2, "learning_rate": 4.7605391871573306e-05, "loss": 1.3132, "step": 2009500 }, { "epoch": 1.21, "learning_rate": 4.760329610594386e-05, "loss": 1.2999, "step": 2010000 }, { "epoch": 1.21, "learning_rate": 4.7601196140383293e-05, "loss": 1.3272, "step": 2010500 }, { "epoch": 1.21, "learning_rate": 4.7599096174822734e-05, "loss": 1.3097, "step": 2011000 }, { "epoch": 1.21, "learning_rate": 4.759699620926217e-05, "loss": 1.2843, "step": 2011500 }, { "epoch": 1.21, "learning_rate": 4.75948962437016e-05, "loss": 1.3241, "step": 2012000 }, { "epoch": 1.21, "learning_rate": 4.759279627814104e-05, "loss": 1.3017, "step": 2012500 }, { "epoch": 1.21, "learning_rate": 4.7590696312580474e-05, "loss": 1.3028, "step": 2013000 }, { "epoch": 1.21, "learning_rate": 4.758859634701991e-05, "loss": 1.2988, "step": 2013500 }, { "epoch": 1.21, "learning_rate": 4.758649638145935e-05, "loss": 1.3152, "step": 2014000 }, { "epoch": 1.21, "learning_rate": 4.75844006158299e-05, "loss": 1.3362, "step": 2014500 }, { "epoch": 1.21, "learning_rate": 4.7582300650269335e-05, "loss": 1.3022, "step": 2015000 }, { "epoch": 1.21, "learning_rate": 4.758020068470877e-05, "loss": 1.2848, "step": 2015500 }, { "epoch": 1.21, "learning_rate": 4.757810071914821e-05, "loss": 1.3017, "step": 2016000 }, { "epoch": 1.21, "learning_rate": 4.757600075358764e-05, "loss": 1.3187, "step": 2016500 }, { "epoch": 1.21, "learning_rate": 4.7573900788027076e-05, "loss": 1.2904, "step": 2017000 }, { "epoch": 1.21, "learning_rate": 4.757180082246651e-05, "loss": 1.3214, "step": 2017500 }, { "epoch": 1.21, "learning_rate": 4.756970085690594e-05, "loss": 1.2859, "step": 2018000 }, { "epoch": 1.21, "learning_rate": 4.756760929120762e-05, "loss": 1.3173, "step": 2018500 }, { "epoch": 1.21, "learning_rate": 4.7565509325647056e-05, "loss": 1.3088, "step": 2019000 }, { "epoch": 1.21, "learning_rate": 4.75634093600865e-05, "loss": 1.3237, "step": 2019500 }, { "epoch": 1.21, "learning_rate": 4.756130939452593e-05, "loss": 1.3135, "step": 2020000 }, { "epoch": 1.21, "learning_rate": 4.7559209428965364e-05, "loss": 1.315, "step": 2020500 }, { "epoch": 1.21, "learning_rate": 4.7557109463404804e-05, "loss": 1.3039, "step": 2021000 }, { "epoch": 1.21, "learning_rate": 4.755500949784424e-05, "loss": 1.3214, "step": 2021500 }, { "epoch": 1.21, "learning_rate": 4.755291373221479e-05, "loss": 1.2855, "step": 2022000 }, { "epoch": 1.21, "learning_rate": 4.7550813766654224e-05, "loss": 1.3199, "step": 2022500 }, { "epoch": 1.21, "learning_rate": 4.7548713801093665e-05, "loss": 1.3314, "step": 2023000 }, { "epoch": 1.21, "learning_rate": 4.75466138355331e-05, "loss": 1.3271, "step": 2023500 }, { "epoch": 1.21, "learning_rate": 4.754451386997253e-05, "loss": 1.3423, "step": 2024000 }, { "epoch": 1.21, "learning_rate": 4.7542413904411965e-05, "loss": 1.323, "step": 2024500 }, { "epoch": 1.21, "learning_rate": 4.75403139388514e-05, "loss": 1.3235, "step": 2025000 }, { "epoch": 1.21, "learning_rate": 4.753821397329084e-05, "loss": 1.3155, "step": 2025500 }, { "epoch": 1.21, "learning_rate": 4.75361182076614e-05, "loss": 1.293, "step": 2026000 }, { "epoch": 1.21, "learning_rate": 4.753402244203195e-05, "loss": 1.3104, "step": 2026500 }, { "epoch": 1.22, "learning_rate": 4.7531922476471386e-05, "loss": 1.3364, "step": 2027000 }, { "epoch": 1.22, "learning_rate": 4.752982251091082e-05, "loss": 1.279, "step": 2027500 }, { "epoch": 1.22, "learning_rate": 4.752772254535026e-05, "loss": 1.3082, "step": 2028000 }, { "epoch": 1.22, "learning_rate": 4.752562257978969e-05, "loss": 1.3133, "step": 2028500 }, { "epoch": 1.22, "learning_rate": 4.752353101409137e-05, "loss": 1.2863, "step": 2029000 }, { "epoch": 1.22, "learning_rate": 4.75214310485308e-05, "loss": 1.2735, "step": 2029500 }, { "epoch": 1.22, "learning_rate": 4.751933108297024e-05, "loss": 1.3269, "step": 2030000 }, { "epoch": 1.22, "learning_rate": 4.7517231117409674e-05, "loss": 1.3441, "step": 2030500 }, { "epoch": 1.22, "learning_rate": 4.751513115184911e-05, "loss": 1.3173, "step": 2031000 }, { "epoch": 1.22, "learning_rate": 4.751303118628855e-05, "loss": 1.328, "step": 2031500 }, { "epoch": 1.22, "learning_rate": 4.751093122072798e-05, "loss": 1.2935, "step": 2032000 }, { "epoch": 1.22, "learning_rate": 4.7508831255167415e-05, "loss": 1.2998, "step": 2032500 }, { "epoch": 1.22, "learning_rate": 4.7506731289606855e-05, "loss": 1.2958, "step": 2033000 }, { "epoch": 1.22, "learning_rate": 4.750463132404629e-05, "loss": 1.293, "step": 2033500 }, { "epoch": 1.22, "learning_rate": 4.750253975834796e-05, "loss": 1.3184, "step": 2034000 }, { "epoch": 1.22, "learning_rate": 4.7500439792787396e-05, "loss": 1.3178, "step": 2034500 }, { "epoch": 1.22, "learning_rate": 4.749833982722683e-05, "loss": 1.2973, "step": 2035000 }, { "epoch": 1.22, "learning_rate": 4.749623986166627e-05, "loss": 1.3073, "step": 2035500 }, { "epoch": 1.22, "learning_rate": 4.74941398961057e-05, "loss": 1.3038, "step": 2036000 }, { "epoch": 1.22, "learning_rate": 4.7492039930545136e-05, "loss": 1.3156, "step": 2036500 }, { "epoch": 1.22, "learning_rate": 4.7489939964984576e-05, "loss": 1.3153, "step": 2037000 }, { "epoch": 1.22, "learning_rate": 4.748783999942401e-05, "loss": 1.2881, "step": 2037500 }, { "epoch": 1.22, "learning_rate": 4.748574003386344e-05, "loss": 1.3233, "step": 2038000 }, { "epoch": 1.22, "learning_rate": 4.7483640068302884e-05, "loss": 1.3069, "step": 2038500 }, { "epoch": 1.22, "learning_rate": 4.748154010274231e-05, "loss": 1.2989, "step": 2039000 }, { "epoch": 1.22, "learning_rate": 4.747944013718175e-05, "loss": 1.3174, "step": 2039500 }, { "epoch": 1.22, "learning_rate": 4.747734437155231e-05, "loss": 1.2975, "step": 2040000 }, { "epoch": 1.22, "learning_rate": 4.7475244405991744e-05, "loss": 1.3095, "step": 2040500 }, { "epoch": 1.22, "learning_rate": 4.747314444043118e-05, "loss": 1.296, "step": 2041000 }, { "epoch": 1.22, "learning_rate": 4.747104867480173e-05, "loss": 1.3202, "step": 2041500 }, { "epoch": 1.22, "learning_rate": 4.746894870924117e-05, "loss": 1.3006, "step": 2042000 }, { "epoch": 1.22, "learning_rate": 4.7466848743680605e-05, "loss": 1.3178, "step": 2042500 }, { "epoch": 1.22, "learning_rate": 4.746474877812004e-05, "loss": 1.3315, "step": 2043000 }, { "epoch": 1.23, "learning_rate": 4.746264881255947e-05, "loss": 1.326, "step": 2043500 }, { "epoch": 1.23, "learning_rate": 4.7460548846998905e-05, "loss": 1.3472, "step": 2044000 }, { "epoch": 1.23, "learning_rate": 4.7458453081369466e-05, "loss": 1.2894, "step": 2044500 }, { "epoch": 1.23, "learning_rate": 4.74563531158089e-05, "loss": 1.3077, "step": 2045000 }, { "epoch": 1.23, "learning_rate": 4.745425315024834e-05, "loss": 1.3049, "step": 2045500 }, { "epoch": 1.23, "learning_rate": 4.7452153184687766e-05, "loss": 1.3121, "step": 2046000 }, { "epoch": 1.23, "learning_rate": 4.7450053219127206e-05, "loss": 1.3135, "step": 2046500 }, { "epoch": 1.23, "learning_rate": 4.744795325356664e-05, "loss": 1.2927, "step": 2047000 }, { "epoch": 1.23, "learning_rate": 4.744585328800607e-05, "loss": 1.3167, "step": 2047500 }, { "epoch": 1.23, "learning_rate": 4.7443757522376634e-05, "loss": 1.3012, "step": 2048000 }, { "epoch": 1.23, "learning_rate": 4.744165755681607e-05, "loss": 1.2855, "step": 2048500 }, { "epoch": 1.23, "learning_rate": 4.74395575912555e-05, "loss": 1.3016, "step": 2049000 }, { "epoch": 1.23, "learning_rate": 4.7437457625694934e-05, "loss": 1.2813, "step": 2049500 }, { "epoch": 1.23, "learning_rate": 4.7435357660134374e-05, "loss": 1.304, "step": 2050000 }, { "epoch": 1.23, "learning_rate": 4.743325769457381e-05, "loss": 1.3497, "step": 2050500 }, { "epoch": 1.23, "learning_rate": 4.743116192894436e-05, "loss": 1.3306, "step": 2051000 }, { "epoch": 1.23, "learning_rate": 4.7429061963383795e-05, "loss": 1.3091, "step": 2051500 }, { "epoch": 1.23, "learning_rate": 4.7426961997823235e-05, "loss": 1.3144, "step": 2052000 }, { "epoch": 1.23, "learning_rate": 4.742486203226267e-05, "loss": 1.303, "step": 2052500 }, { "epoch": 1.23, "learning_rate": 4.74227620667021e-05, "loss": 1.3204, "step": 2053000 }, { "epoch": 1.23, "learning_rate": 4.742066210114154e-05, "loss": 1.2898, "step": 2053500 }, { "epoch": 1.23, "learning_rate": 4.7418562135580976e-05, "loss": 1.3483, "step": 2054000 }, { "epoch": 1.23, "learning_rate": 4.741646217002041e-05, "loss": 1.2907, "step": 2054500 }, { "epoch": 1.23, "learning_rate": 4.741436640439097e-05, "loss": 1.3236, "step": 2055000 }, { "epoch": 1.23, "learning_rate": 4.74122664388304e-05, "loss": 1.3411, "step": 2055500 }, { "epoch": 1.23, "learning_rate": 4.7410166473269836e-05, "loss": 1.3246, "step": 2056000 }, { "epoch": 1.23, "learning_rate": 4.740807070764039e-05, "loss": 1.2847, "step": 2056500 }, { "epoch": 1.23, "learning_rate": 4.740597074207983e-05, "loss": 1.3262, "step": 2057000 }, { "epoch": 1.23, "learning_rate": 4.7403870776519264e-05, "loss": 1.2978, "step": 2057500 }, { "epoch": 1.23, "learning_rate": 4.74017708109587e-05, "loss": 1.3048, "step": 2058000 }, { "epoch": 1.23, "learning_rate": 4.739967084539814e-05, "loss": 1.3204, "step": 2058500 }, { "epoch": 1.23, "learning_rate": 4.739757507976869e-05, "loss": 1.3146, "step": 2059000 }, { "epoch": 1.23, "learning_rate": 4.7395475114208124e-05, "loss": 1.2996, "step": 2059500 }, { "epoch": 1.24, "learning_rate": 4.739337514864756e-05, "loss": 1.3284, "step": 2060000 }, { "epoch": 1.24, "learning_rate": 4.7391275183087e-05, "loss": 1.3154, "step": 2060500 }, { "epoch": 1.24, "learning_rate": 4.738917941745755e-05, "loss": 1.2837, "step": 2061000 }, { "epoch": 1.24, "learning_rate": 4.7387079451896985e-05, "loss": 1.2931, "step": 2061500 }, { "epoch": 1.24, "learning_rate": 4.7384979486336425e-05, "loss": 1.285, "step": 2062000 }, { "epoch": 1.24, "learning_rate": 4.738287952077586e-05, "loss": 1.2819, "step": 2062500 }, { "epoch": 1.24, "learning_rate": 4.738077955521529e-05, "loss": 1.328, "step": 2063000 }, { "epoch": 1.24, "learning_rate": 4.737867958965473e-05, "loss": 1.3267, "step": 2063500 }, { "epoch": 1.24, "learning_rate": 4.7376579624094166e-05, "loss": 1.3084, "step": 2064000 }, { "epoch": 1.24, "learning_rate": 4.73744796585336e-05, "loss": 1.3122, "step": 2064500 }, { "epoch": 1.24, "learning_rate": 4.737237969297304e-05, "loss": 1.3371, "step": 2065000 }, { "epoch": 1.24, "learning_rate": 4.737028392734359e-05, "loss": 1.3154, "step": 2065500 }, { "epoch": 1.24, "learning_rate": 4.736818816171415e-05, "loss": 1.3268, "step": 2066000 }, { "epoch": 1.24, "learning_rate": 4.736608819615358e-05, "loss": 1.2935, "step": 2066500 }, { "epoch": 1.24, "learning_rate": 4.7363988230593014e-05, "loss": 1.3219, "step": 2067000 }, { "epoch": 1.24, "learning_rate": 4.7361888265032454e-05, "loss": 1.3166, "step": 2067500 }, { "epoch": 1.24, "learning_rate": 4.735979249940301e-05, "loss": 1.3136, "step": 2068000 }, { "epoch": 1.24, "learning_rate": 4.735769253384244e-05, "loss": 1.2952, "step": 2068500 }, { "epoch": 1.24, "learning_rate": 4.735559256828188e-05, "loss": 1.2956, "step": 2069000 }, { "epoch": 1.24, "learning_rate": 4.735349680265244e-05, "loss": 1.3205, "step": 2069500 }, { "epoch": 1.24, "learning_rate": 4.735139683709187e-05, "loss": 1.3191, "step": 2070000 }, { "epoch": 1.24, "learning_rate": 4.73492968715313e-05, "loss": 1.2867, "step": 2070500 }, { "epoch": 1.24, "learning_rate": 4.734719690597074e-05, "loss": 1.3288, "step": 2071000 }, { "epoch": 1.24, "learning_rate": 4.7345096940410175e-05, "loss": 1.2941, "step": 2071500 }, { "epoch": 1.24, "learning_rate": 4.734299697484961e-05, "loss": 1.2976, "step": 2072000 }, { "epoch": 1.24, "learning_rate": 4.734089700928905e-05, "loss": 1.3322, "step": 2072500 }, { "epoch": 1.24, "learning_rate": 4.733879704372848e-05, "loss": 1.3032, "step": 2073000 }, { "epoch": 1.24, "learning_rate": 4.7336697078167916e-05, "loss": 1.3135, "step": 2073500 }, { "epoch": 1.24, "learning_rate": 4.7334597112607356e-05, "loss": 1.2917, "step": 2074000 }, { "epoch": 1.24, "learning_rate": 4.733249714704679e-05, "loss": 1.3004, "step": 2074500 }, { "epoch": 1.24, "learning_rate": 4.733039718148622e-05, "loss": 1.3116, "step": 2075000 }, { "epoch": 1.24, "learning_rate": 4.7328301415856783e-05, "loss": 1.2761, "step": 2075500 }, { "epoch": 1.24, "learning_rate": 4.732620145029622e-05, "loss": 1.343, "step": 2076000 }, { "epoch": 1.24, "learning_rate": 4.732410148473565e-05, "loss": 1.3075, "step": 2076500 }, { "epoch": 1.25, "learning_rate": 4.732200151917509e-05, "loss": 1.2946, "step": 2077000 }, { "epoch": 1.25, "learning_rate": 4.7319901553614524e-05, "loss": 1.3313, "step": 2077500 }, { "epoch": 1.25, "learning_rate": 4.731780158805395e-05, "loss": 1.3118, "step": 2078000 }, { "epoch": 1.25, "learning_rate": 4.731570162249339e-05, "loss": 1.307, "step": 2078500 }, { "epoch": 1.25, "learning_rate": 4.7313601656932824e-05, "loss": 1.2916, "step": 2079000 }, { "epoch": 1.25, "learning_rate": 4.7311510091234505e-05, "loss": 1.3316, "step": 2079500 }, { "epoch": 1.25, "learning_rate": 4.730941012567394e-05, "loss": 1.2948, "step": 2080000 }, { "epoch": 1.25, "learning_rate": 4.730731016011337e-05, "loss": 1.2968, "step": 2080500 }, { "epoch": 1.25, "learning_rate": 4.730521019455281e-05, "loss": 1.2793, "step": 2081000 }, { "epoch": 1.25, "learning_rate": 4.7303110228992245e-05, "loss": 1.3118, "step": 2081500 }, { "epoch": 1.25, "learning_rate": 4.730101026343168e-05, "loss": 1.2868, "step": 2082000 }, { "epoch": 1.25, "learning_rate": 4.729891029787111e-05, "loss": 1.2819, "step": 2082500 }, { "epoch": 1.25, "learning_rate": 4.729681453224167e-05, "loss": 1.2951, "step": 2083000 }, { "epoch": 1.25, "learning_rate": 4.7294714566681106e-05, "loss": 1.2996, "step": 2083500 }, { "epoch": 1.25, "learning_rate": 4.7292614601120546e-05, "loss": 1.2857, "step": 2084000 }, { "epoch": 1.25, "learning_rate": 4.729051463555998e-05, "loss": 1.2822, "step": 2084500 }, { "epoch": 1.25, "learning_rate": 4.7288414669999407e-05, "loss": 1.3085, "step": 2085000 }, { "epoch": 1.25, "learning_rate": 4.728631470443885e-05, "loss": 1.2799, "step": 2085500 }, { "epoch": 1.25, "learning_rate": 4.728421473887828e-05, "loss": 1.3053, "step": 2086000 }, { "epoch": 1.25, "learning_rate": 4.7282114773317714e-05, "loss": 1.2769, "step": 2086500 }, { "epoch": 1.25, "learning_rate": 4.7280019007688274e-05, "loss": 1.3155, "step": 2087000 }, { "epoch": 1.25, "learning_rate": 4.727791904212771e-05, "loss": 1.2979, "step": 2087500 }, { "epoch": 1.25, "learning_rate": 4.727581907656714e-05, "loss": 1.3011, "step": 2088000 }, { "epoch": 1.25, "learning_rate": 4.7273719111006574e-05, "loss": 1.2925, "step": 2088500 }, { "epoch": 1.25, "learning_rate": 4.7271627545308255e-05, "loss": 1.3319, "step": 2089000 }, { "epoch": 1.25, "learning_rate": 4.7269527579747695e-05, "loss": 1.3107, "step": 2089500 }, { "epoch": 1.25, "learning_rate": 4.726742761418713e-05, "loss": 1.3185, "step": 2090000 }, { "epoch": 1.25, "learning_rate": 4.726532764862656e-05, "loss": 1.2748, "step": 2090500 }, { "epoch": 1.25, "learning_rate": 4.7263227683066e-05, "loss": 1.3045, "step": 2091000 }, { "epoch": 1.25, "learning_rate": 4.7261127717505436e-05, "loss": 1.2925, "step": 2091500 }, { "epoch": 1.25, "learning_rate": 4.725902775194486e-05, "loss": 1.3007, "step": 2092000 }, { "epoch": 1.25, "learning_rate": 4.725693198631542e-05, "loss": 1.3188, "step": 2092500 }, { "epoch": 1.25, "learning_rate": 4.725483202075486e-05, "loss": 1.314, "step": 2093000 }, { "epoch": 1.26, "learning_rate": 4.7252732055194296e-05, "loss": 1.3045, "step": 2093500 }, { "epoch": 1.26, "learning_rate": 4.725063208963373e-05, "loss": 1.2829, "step": 2094000 }, { "epoch": 1.26, "learning_rate": 4.7248536324004284e-05, "loss": 1.2943, "step": 2094500 }, { "epoch": 1.26, "learning_rate": 4.7246436358443724e-05, "loss": 1.2947, "step": 2095000 }, { "epoch": 1.26, "learning_rate": 4.724433639288316e-05, "loss": 1.306, "step": 2095500 }, { "epoch": 1.26, "learning_rate": 4.724223642732259e-05, "loss": 1.2993, "step": 2096000 }, { "epoch": 1.26, "learning_rate": 4.724013646176203e-05, "loss": 1.3149, "step": 2096500 }, { "epoch": 1.26, "learning_rate": 4.723803649620146e-05, "loss": 1.2976, "step": 2097000 }, { "epoch": 1.26, "learning_rate": 4.723594073057202e-05, "loss": 1.3189, "step": 2097500 }, { "epoch": 1.26, "learning_rate": 4.723384076501146e-05, "loss": 1.3227, "step": 2098000 }, { "epoch": 1.26, "learning_rate": 4.723174079945089e-05, "loss": 1.2949, "step": 2098500 }, { "epoch": 1.26, "learning_rate": 4.7229640833890325e-05, "loss": 1.2889, "step": 2099000 }, { "epoch": 1.26, "learning_rate": 4.722754086832976e-05, "loss": 1.3133, "step": 2099500 }, { "epoch": 1.26, "learning_rate": 4.722544090276919e-05, "loss": 1.2928, "step": 2100000 }, { "epoch": 1.26, "eval_loss": 1.2566254138946533, "eval_runtime": 1098.8033, "eval_samples_per_second": 479.358, "eval_steps_per_second": 79.893, "step": 2100000 }, { "epoch": 1.26, "learning_rate": 4.722334513713975e-05, "loss": 1.3077, "step": 2100500 }, { "epoch": 1.26, "learning_rate": 4.7221245171579186e-05, "loss": 1.2959, "step": 2101000 }, { "epoch": 1.26, "learning_rate": 4.721914520601862e-05, "loss": 1.3162, "step": 2101500 }, { "epoch": 1.26, "learning_rate": 4.721704524045805e-05, "loss": 1.2964, "step": 2102000 }, { "epoch": 1.26, "learning_rate": 4.7214945274897486e-05, "loss": 1.2677, "step": 2102500 }, { "epoch": 1.26, "learning_rate": 4.7212845309336926e-05, "loss": 1.3008, "step": 2103000 }, { "epoch": 1.26, "learning_rate": 4.721074954370749e-05, "loss": 1.2803, "step": 2103500 }, { "epoch": 1.26, "learning_rate": 4.7208649578146913e-05, "loss": 1.3284, "step": 2104000 }, { "epoch": 1.26, "learning_rate": 4.7206549612586354e-05, "loss": 1.3021, "step": 2104500 }, { "epoch": 1.26, "learning_rate": 4.720444964702579e-05, "loss": 1.319, "step": 2105000 }, { "epoch": 1.26, "learning_rate": 4.720234968146522e-05, "loss": 1.3188, "step": 2105500 }, { "epoch": 1.26, "learning_rate": 4.720025391583578e-05, "loss": 1.299, "step": 2106000 }, { "epoch": 1.26, "learning_rate": 4.7198153950275214e-05, "loss": 1.292, "step": 2106500 }, { "epoch": 1.26, "learning_rate": 4.719605398471465e-05, "loss": 1.2733, "step": 2107000 }, { "epoch": 1.26, "learning_rate": 4.719395401915408e-05, "loss": 1.2849, "step": 2107500 }, { "epoch": 1.26, "learning_rate": 4.719185405359352e-05, "loss": 1.3065, "step": 2108000 }, { "epoch": 1.26, "learning_rate": 4.7189754088032955e-05, "loss": 1.2831, "step": 2108500 }, { "epoch": 1.26, "learning_rate": 4.718765412247239e-05, "loss": 1.2901, "step": 2109000 }, { "epoch": 1.26, "learning_rate": 4.718555415691183e-05, "loss": 1.2861, "step": 2109500 }, { "epoch": 1.27, "learning_rate": 4.71834625912135e-05, "loss": 1.308, "step": 2110000 }, { "epoch": 1.27, "learning_rate": 4.718136262565294e-05, "loss": 1.308, "step": 2110500 }, { "epoch": 1.27, "learning_rate": 4.717926266009237e-05, "loss": 1.3069, "step": 2111000 }, { "epoch": 1.27, "learning_rate": 4.717716269453181e-05, "loss": 1.2779, "step": 2111500 }, { "epoch": 1.27, "learning_rate": 4.717506272897124e-05, "loss": 1.2905, "step": 2112000 }, { "epoch": 1.27, "learning_rate": 4.7172962763410677e-05, "loss": 1.3082, "step": 2112500 }, { "epoch": 1.27, "learning_rate": 4.717086279785012e-05, "loss": 1.3273, "step": 2113000 }, { "epoch": 1.27, "learning_rate": 4.716876283228955e-05, "loss": 1.3164, "step": 2113500 }, { "epoch": 1.27, "learning_rate": 4.7166662866728984e-05, "loss": 1.31, "step": 2114000 }, { "epoch": 1.27, "learning_rate": 4.7164562901168424e-05, "loss": 1.2888, "step": 2114500 }, { "epoch": 1.27, "learning_rate": 4.716246713553898e-05, "loss": 1.3108, "step": 2115000 }, { "epoch": 1.27, "learning_rate": 4.716036716997841e-05, "loss": 1.2832, "step": 2115500 }, { "epoch": 1.27, "learning_rate": 4.7158267204417844e-05, "loss": 1.2932, "step": 2116000 }, { "epoch": 1.27, "learning_rate": 4.7156167238857285e-05, "loss": 1.3313, "step": 2116500 }, { "epoch": 1.27, "learning_rate": 4.715406727329672e-05, "loss": 1.3278, "step": 2117000 }, { "epoch": 1.27, "learning_rate": 4.715196730773615e-05, "loss": 1.304, "step": 2117500 }, { "epoch": 1.27, "learning_rate": 4.714986734217559e-05, "loss": 1.313, "step": 2118000 }, { "epoch": 1.27, "learning_rate": 4.7147767376615025e-05, "loss": 1.2911, "step": 2118500 }, { "epoch": 1.27, "learning_rate": 4.714567161098558e-05, "loss": 1.3017, "step": 2119000 }, { "epoch": 1.27, "learning_rate": 4.714357164542501e-05, "loss": 1.3371, "step": 2119500 }, { "epoch": 1.27, "learning_rate": 4.714147167986445e-05, "loss": 1.289, "step": 2120000 }, { "epoch": 1.27, "learning_rate": 4.7139371714303886e-05, "loss": 1.3024, "step": 2120500 }, { "epoch": 1.27, "learning_rate": 4.713727174874332e-05, "loss": 1.3215, "step": 2121000 }, { "epoch": 1.27, "learning_rate": 4.713517178318275e-05, "loss": 1.2975, "step": 2121500 }, { "epoch": 1.27, "learning_rate": 4.7133071817622186e-05, "loss": 1.299, "step": 2122000 }, { "epoch": 1.27, "learning_rate": 4.7130971852061627e-05, "loss": 1.291, "step": 2122500 }, { "epoch": 1.27, "learning_rate": 4.71288802863633e-05, "loss": 1.3185, "step": 2123000 }, { "epoch": 1.27, "learning_rate": 4.712678032080274e-05, "loss": 1.3085, "step": 2123500 }, { "epoch": 1.27, "learning_rate": 4.7124680355242174e-05, "loss": 1.2923, "step": 2124000 }, { "epoch": 1.27, "learning_rate": 4.712258038968161e-05, "loss": 1.2792, "step": 2124500 }, { "epoch": 1.27, "learning_rate": 4.712048042412105e-05, "loss": 1.307, "step": 2125000 }, { "epoch": 1.27, "learning_rate": 4.711838045856048e-05, "loss": 1.2888, "step": 2125500 }, { "epoch": 1.27, "learning_rate": 4.711628049299991e-05, "loss": 1.3012, "step": 2126000 }, { "epoch": 1.27, "learning_rate": 4.711418052743935e-05, "loss": 1.303, "step": 2126500 }, { "epoch": 1.28, "learning_rate": 4.711208056187878e-05, "loss": 1.3162, "step": 2127000 }, { "epoch": 1.28, "learning_rate": 4.710998479624934e-05, "loss": 1.2963, "step": 2127500 }, { "epoch": 1.28, "learning_rate": 4.710788483068878e-05, "loss": 1.2893, "step": 2128000 }, { "epoch": 1.28, "learning_rate": 4.710578486512821e-05, "loss": 1.2947, "step": 2128500 }, { "epoch": 1.28, "learning_rate": 4.710368489956764e-05, "loss": 1.3196, "step": 2129000 }, { "epoch": 1.28, "learning_rate": 4.71015891339382e-05, "loss": 1.3049, "step": 2129500 }, { "epoch": 1.28, "learning_rate": 4.7099493368308756e-05, "loss": 1.3145, "step": 2130000 }, { "epoch": 1.28, "learning_rate": 4.7097393402748196e-05, "loss": 1.2462, "step": 2130500 }, { "epoch": 1.28, "learning_rate": 4.709529343718763e-05, "loss": 1.3073, "step": 2131000 }, { "epoch": 1.28, "learning_rate": 4.709319347162706e-05, "loss": 1.3123, "step": 2131500 }, { "epoch": 1.28, "learning_rate": 4.7091093506066504e-05, "loss": 1.3297, "step": 2132000 }, { "epoch": 1.28, "learning_rate": 4.708899354050594e-05, "loss": 1.2877, "step": 2132500 }, { "epoch": 1.28, "learning_rate": 4.708689357494537e-05, "loss": 1.3054, "step": 2133000 }, { "epoch": 1.28, "learning_rate": 4.7084793609384804e-05, "loss": 1.3155, "step": 2133500 }, { "epoch": 1.28, "learning_rate": 4.7082697843755364e-05, "loss": 1.2698, "step": 2134000 }, { "epoch": 1.28, "learning_rate": 4.70805978781948e-05, "loss": 1.2962, "step": 2134500 }, { "epoch": 1.28, "learning_rate": 4.707850631249647e-05, "loss": 1.3119, "step": 2135000 }, { "epoch": 1.28, "learning_rate": 4.7076406346935905e-05, "loss": 1.3021, "step": 2135500 }, { "epoch": 1.28, "learning_rate": 4.7074306381375345e-05, "loss": 1.2672, "step": 2136000 }, { "epoch": 1.28, "learning_rate": 4.707220641581478e-05, "loss": 1.3078, "step": 2136500 }, { "epoch": 1.28, "learning_rate": 4.707010645025421e-05, "loss": 1.2866, "step": 2137000 }, { "epoch": 1.28, "learning_rate": 4.706800648469365e-05, "loss": 1.3061, "step": 2137500 }, { "epoch": 1.28, "learning_rate": 4.7065906519133086e-05, "loss": 1.3172, "step": 2138000 }, { "epoch": 1.28, "learning_rate": 4.706380655357252e-05, "loss": 1.3106, "step": 2138500 }, { "epoch": 1.28, "learning_rate": 4.706170658801196e-05, "loss": 1.2807, "step": 2139000 }, { "epoch": 1.28, "learning_rate": 4.705960662245139e-05, "loss": 1.3079, "step": 2139500 }, { "epoch": 1.28, "learning_rate": 4.7057506656890826e-05, "loss": 1.2682, "step": 2140000 }, { "epoch": 1.28, "learning_rate": 4.705540669133026e-05, "loss": 1.2924, "step": 2140500 }, { "epoch": 1.28, "learning_rate": 4.705331092570082e-05, "loss": 1.3214, "step": 2141000 }, { "epoch": 1.28, "learning_rate": 4.7051210960140254e-05, "loss": 1.2939, "step": 2141500 }, { "epoch": 1.28, "learning_rate": 4.7049110994579694e-05, "loss": 1.2926, "step": 2142000 }, { "epoch": 1.28, "learning_rate": 4.704701942888136e-05, "loss": 1.2906, "step": 2142500 }, { "epoch": 1.28, "learning_rate": 4.70449194633208e-05, "loss": 1.3183, "step": 2143000 }, { "epoch": 1.29, "learning_rate": 4.7042819497760234e-05, "loss": 1.2804, "step": 2143500 }, { "epoch": 1.29, "learning_rate": 4.704071953219967e-05, "loss": 1.3177, "step": 2144000 }, { "epoch": 1.29, "learning_rate": 4.703861956663911e-05, "loss": 1.2715, "step": 2144500 }, { "epoch": 1.29, "learning_rate": 4.703651960107854e-05, "loss": 1.2879, "step": 2145000 }, { "epoch": 1.29, "learning_rate": 4.7034423835449095e-05, "loss": 1.3203, "step": 2145500 }, { "epoch": 1.29, "learning_rate": 4.703232386988853e-05, "loss": 1.2726, "step": 2146000 }, { "epoch": 1.29, "learning_rate": 4.703022390432797e-05, "loss": 1.3169, "step": 2146500 }, { "epoch": 1.29, "learning_rate": 4.70281239387674e-05, "loss": 1.3098, "step": 2147000 }, { "epoch": 1.29, "learning_rate": 4.702602397320684e-05, "loss": 1.2992, "step": 2147500 }, { "epoch": 1.29, "learning_rate": 4.7023924007646276e-05, "loss": 1.2911, "step": 2148000 }, { "epoch": 1.29, "learning_rate": 4.702182404208571e-05, "loss": 1.2933, "step": 2148500 }, { "epoch": 1.29, "learning_rate": 4.701972407652515e-05, "loss": 1.2892, "step": 2149000 }, { "epoch": 1.29, "learning_rate": 4.701762411096458e-05, "loss": 1.3169, "step": 2149500 }, { "epoch": 1.29, "learning_rate": 4.701552834533514e-05, "loss": 1.2979, "step": 2150000 }, { "epoch": 1.29, "learning_rate": 4.701342837977457e-05, "loss": 1.3174, "step": 2150500 }, { "epoch": 1.29, "learning_rate": 4.701132841421401e-05, "loss": 1.295, "step": 2151000 }, { "epoch": 1.29, "learning_rate": 4.7009228448653444e-05, "loss": 1.2988, "step": 2151500 }, { "epoch": 1.29, "learning_rate": 4.700712848309288e-05, "loss": 1.2717, "step": 2152000 }, { "epoch": 1.29, "learning_rate": 4.700502851753231e-05, "loss": 1.3021, "step": 2152500 }, { "epoch": 1.29, "learning_rate": 4.7002928551971744e-05, "loss": 1.2963, "step": 2153000 }, { "epoch": 1.29, "learning_rate": 4.7000832786342305e-05, "loss": 1.3104, "step": 2153500 }, { "epoch": 1.29, "learning_rate": 4.699873282078174e-05, "loss": 1.2788, "step": 2154000 }, { "epoch": 1.29, "learning_rate": 4.699663285522118e-05, "loss": 1.3219, "step": 2154500 }, { "epoch": 1.29, "learning_rate": 4.6994532889660605e-05, "loss": 1.3133, "step": 2155000 }, { "epoch": 1.29, "learning_rate": 4.6992432924100045e-05, "loss": 1.3099, "step": 2155500 }, { "epoch": 1.29, "learning_rate": 4.699033295853948e-05, "loss": 1.31, "step": 2156000 }, { "epoch": 1.29, "learning_rate": 4.698823299297891e-05, "loss": 1.3022, "step": 2156500 }, { "epoch": 1.29, "learning_rate": 4.6986137227349466e-05, "loss": 1.2902, "step": 2157000 }, { "epoch": 1.29, "learning_rate": 4.6984037261788906e-05, "loss": 1.3182, "step": 2157500 }, { "epoch": 1.29, "learning_rate": 4.698193729622834e-05, "loss": 1.2903, "step": 2158000 }, { "epoch": 1.29, "learning_rate": 4.697983733066777e-05, "loss": 1.3087, "step": 2158500 }, { "epoch": 1.29, "learning_rate": 4.697773736510721e-05, "loss": 1.3112, "step": 2159000 }, { "epoch": 1.29, "learning_rate": 4.6975637399546647e-05, "loss": 1.3376, "step": 2159500 }, { "epoch": 1.3, "learning_rate": 4.69735416339172e-05, "loss": 1.2639, "step": 2160000 }, { "epoch": 1.3, "learning_rate": 4.6971441668356634e-05, "loss": 1.3126, "step": 2160500 }, { "epoch": 1.3, "learning_rate": 4.6969341702796074e-05, "loss": 1.3024, "step": 2161000 }, { "epoch": 1.3, "learning_rate": 4.696724173723551e-05, "loss": 1.2967, "step": 2161500 }, { "epoch": 1.3, "learning_rate": 4.696514597160606e-05, "loss": 1.2999, "step": 2162000 }, { "epoch": 1.3, "learning_rate": 4.696305020597662e-05, "loss": 1.2965, "step": 2162500 }, { "epoch": 1.3, "learning_rate": 4.6960954440347175e-05, "loss": 1.2943, "step": 2163000 }, { "epoch": 1.3, "learning_rate": 4.6958854474786615e-05, "loss": 1.2958, "step": 2163500 }, { "epoch": 1.3, "learning_rate": 4.695675450922605e-05, "loss": 1.2983, "step": 2164000 }, { "epoch": 1.3, "learning_rate": 4.695465454366548e-05, "loss": 1.2821, "step": 2164500 }, { "epoch": 1.3, "learning_rate": 4.695255457810492e-05, "loss": 1.2944, "step": 2165000 }, { "epoch": 1.3, "learning_rate": 4.6950454612544356e-05, "loss": 1.2745, "step": 2165500 }, { "epoch": 1.3, "learning_rate": 4.694835464698379e-05, "loss": 1.324, "step": 2166000 }, { "epoch": 1.3, "learning_rate": 4.694625468142322e-05, "loss": 1.3051, "step": 2166500 }, { "epoch": 1.3, "learning_rate": 4.6944154715862656e-05, "loss": 1.3191, "step": 2167000 }, { "epoch": 1.3, "learning_rate": 4.694205475030209e-05, "loss": 1.3141, "step": 2167500 }, { "epoch": 1.3, "learning_rate": 4.693995478474153e-05, "loss": 1.3012, "step": 2168000 }, { "epoch": 1.3, "learning_rate": 4.693785481918096e-05, "loss": 1.3137, "step": 2168500 }, { "epoch": 1.3, "learning_rate": 4.69357548536204e-05, "loss": 1.2997, "step": 2169000 }, { "epoch": 1.3, "learning_rate": 4.693365488805984e-05, "loss": 1.2951, "step": 2169500 }, { "epoch": 1.3, "learning_rate": 4.693155912243039e-05, "loss": 1.2867, "step": 2170000 }, { "epoch": 1.3, "learning_rate": 4.6929459156869824e-05, "loss": 1.2777, "step": 2170500 }, { "epoch": 1.3, "learning_rate": 4.6927359191309264e-05, "loss": 1.3149, "step": 2171000 }, { "epoch": 1.3, "learning_rate": 4.69252592257487e-05, "loss": 1.2897, "step": 2171500 }, { "epoch": 1.3, "learning_rate": 4.692316346011925e-05, "loss": 1.3101, "step": 2172000 }, { "epoch": 1.3, "learning_rate": 4.6921063494558685e-05, "loss": 1.3044, "step": 2172500 }, { "epoch": 1.3, "learning_rate": 4.6918963528998125e-05, "loss": 1.3205, "step": 2173000 }, { "epoch": 1.3, "learning_rate": 4.691686356343756e-05, "loss": 1.3072, "step": 2173500 }, { "epoch": 1.3, "learning_rate": 4.691476359787699e-05, "loss": 1.3045, "step": 2174000 }, { "epoch": 1.3, "learning_rate": 4.6912667832247545e-05, "loss": 1.2946, "step": 2174500 }, { "epoch": 1.3, "learning_rate": 4.6910567866686986e-05, "loss": 1.3025, "step": 2175000 }, { "epoch": 1.3, "learning_rate": 4.6908472101057546e-05, "loss": 1.2733, "step": 2175500 }, { "epoch": 1.3, "learning_rate": 4.690637213549698e-05, "loss": 1.3078, "step": 2176000 }, { "epoch": 1.3, "learning_rate": 4.690427216993641e-05, "loss": 1.27, "step": 2176500 }, { "epoch": 1.31, "learning_rate": 4.6902172204375846e-05, "loss": 1.2954, "step": 2177000 }, { "epoch": 1.31, "learning_rate": 4.690007223881528e-05, "loss": 1.3015, "step": 2177500 }, { "epoch": 1.31, "learning_rate": 4.689797227325472e-05, "loss": 1.3106, "step": 2178000 }, { "epoch": 1.31, "learning_rate": 4.6895872307694153e-05, "loss": 1.3247, "step": 2178500 }, { "epoch": 1.31, "learning_rate": 4.689377234213359e-05, "loss": 1.3034, "step": 2179000 }, { "epoch": 1.31, "learning_rate": 4.689168077643527e-05, "loss": 1.2953, "step": 2179500 }, { "epoch": 1.31, "learning_rate": 4.68895808108747e-05, "loss": 1.3089, "step": 2180000 }, { "epoch": 1.31, "learning_rate": 4.688748084531414e-05, "loss": 1.2996, "step": 2180500 }, { "epoch": 1.31, "learning_rate": 4.688538087975357e-05, "loss": 1.3184, "step": 2181000 }, { "epoch": 1.31, "learning_rate": 4.6883280914193e-05, "loss": 1.3064, "step": 2181500 }, { "epoch": 1.31, "learning_rate": 4.688118094863244e-05, "loss": 1.2797, "step": 2182000 }, { "epoch": 1.31, "learning_rate": 4.6879080983071875e-05, "loss": 1.2958, "step": 2182500 }, { "epoch": 1.31, "learning_rate": 4.687698101751131e-05, "loss": 1.2903, "step": 2183000 }, { "epoch": 1.31, "learning_rate": 4.687488105195075e-05, "loss": 1.2999, "step": 2183500 }, { "epoch": 1.31, "learning_rate": 4.687278108639018e-05, "loss": 1.2913, "step": 2184000 }, { "epoch": 1.31, "learning_rate": 4.6870681120829616e-05, "loss": 1.2997, "step": 2184500 }, { "epoch": 1.31, "learning_rate": 4.6868581155269056e-05, "loss": 1.2939, "step": 2185000 }, { "epoch": 1.31, "learning_rate": 4.686648118970849e-05, "loss": 1.3071, "step": 2185500 }, { "epoch": 1.31, "learning_rate": 4.686438962401016e-05, "loss": 1.3262, "step": 2186000 }, { "epoch": 1.31, "learning_rate": 4.6862289658449596e-05, "loss": 1.3023, "step": 2186500 }, { "epoch": 1.31, "learning_rate": 4.686018969288904e-05, "loss": 1.2758, "step": 2187000 }, { "epoch": 1.31, "learning_rate": 4.685808972732847e-05, "loss": 1.3133, "step": 2187500 }, { "epoch": 1.31, "learning_rate": 4.6855989761767904e-05, "loss": 1.2848, "step": 2188000 }, { "epoch": 1.31, "learning_rate": 4.6853889796207344e-05, "loss": 1.3079, "step": 2188500 }, { "epoch": 1.31, "learning_rate": 4.685178983064678e-05, "loss": 1.2885, "step": 2189000 }, { "epoch": 1.31, "learning_rate": 4.684968986508621e-05, "loss": 1.3003, "step": 2189500 }, { "epoch": 1.31, "learning_rate": 4.684758989952565e-05, "loss": 1.2812, "step": 2190000 }, { "epoch": 1.31, "learning_rate": 4.6845489933965084e-05, "loss": 1.3098, "step": 2190500 }, { "epoch": 1.31, "learning_rate": 4.684338996840452e-05, "loss": 1.2912, "step": 2191000 }, { "epoch": 1.31, "learning_rate": 4.684129000284395e-05, "loss": 1.3051, "step": 2191500 }, { "epoch": 1.31, "learning_rate": 4.683919423721451e-05, "loss": 1.2818, "step": 2192000 }, { "epoch": 1.31, "learning_rate": 4.6837094271653945e-05, "loss": 1.2831, "step": 2192500 }, { "epoch": 1.31, "learning_rate": 4.683499430609338e-05, "loss": 1.3283, "step": 2193000 }, { "epoch": 1.32, "learning_rate": 4.683289434053281e-05, "loss": 1.2793, "step": 2193500 }, { "epoch": 1.32, "learning_rate": 4.6830794374972245e-05, "loss": 1.2845, "step": 2194000 }, { "epoch": 1.32, "learning_rate": 4.6828694409411686e-05, "loss": 1.2864, "step": 2194500 }, { "epoch": 1.32, "learning_rate": 4.682659444385112e-05, "loss": 1.3281, "step": 2195000 }, { "epoch": 1.32, "learning_rate": 4.682449447829055e-05, "loss": 1.2927, "step": 2195500 }, { "epoch": 1.32, "learning_rate": 4.6822398712661106e-05, "loss": 1.2917, "step": 2196000 }, { "epoch": 1.32, "learning_rate": 4.6820298747100546e-05, "loss": 1.2737, "step": 2196500 }, { "epoch": 1.32, "learning_rate": 4.681820298147111e-05, "loss": 1.2818, "step": 2197000 }, { "epoch": 1.32, "learning_rate": 4.681610301591054e-05, "loss": 1.2887, "step": 2197500 }, { "epoch": 1.32, "learning_rate": 4.6814003050349974e-05, "loss": 1.3135, "step": 2198000 }, { "epoch": 1.32, "learning_rate": 4.681190308478941e-05, "loss": 1.3057, "step": 2198500 }, { "epoch": 1.32, "learning_rate": 4.680980311922884e-05, "loss": 1.3412, "step": 2199000 }, { "epoch": 1.32, "learning_rate": 4.6807703153668274e-05, "loss": 1.31, "step": 2199500 }, { "epoch": 1.32, "learning_rate": 4.680560738803884e-05, "loss": 1.2881, "step": 2200000 }, { "epoch": 1.32, "eval_loss": 1.2527239322662354, "eval_runtime": 1100.921, "eval_samples_per_second": 478.436, "eval_steps_per_second": 79.74, "step": 2200000 }, { "epoch": 1.32, "learning_rate": 4.6803507422478275e-05, "loss": 1.2734, "step": 2200500 }, { "epoch": 1.32, "learning_rate": 4.68014074569177e-05, "loss": 1.3167, "step": 2201000 }, { "epoch": 1.32, "learning_rate": 4.679930749135714e-05, "loss": 1.295, "step": 2201500 }, { "epoch": 1.32, "learning_rate": 4.6797207525796575e-05, "loss": 1.2774, "step": 2202000 }, { "epoch": 1.32, "learning_rate": 4.6795111760167135e-05, "loss": 1.3323, "step": 2202500 }, { "epoch": 1.32, "learning_rate": 4.679301179460656e-05, "loss": 1.2898, "step": 2203000 }, { "epoch": 1.32, "learning_rate": 4.6790911829046e-05, "loss": 1.301, "step": 2203500 }, { "epoch": 1.32, "learning_rate": 4.6788811863485436e-05, "loss": 1.3103, "step": 2204000 }, { "epoch": 1.32, "learning_rate": 4.678671189792487e-05, "loss": 1.2631, "step": 2204500 }, { "epoch": 1.32, "learning_rate": 4.678461193236431e-05, "loss": 1.2943, "step": 2205000 }, { "epoch": 1.32, "learning_rate": 4.678251616673486e-05, "loss": 1.2944, "step": 2205500 }, { "epoch": 1.32, "learning_rate": 4.6780420401105423e-05, "loss": 1.2924, "step": 2206000 }, { "epoch": 1.32, "learning_rate": 4.677832043554486e-05, "loss": 1.2832, "step": 2206500 }, { "epoch": 1.32, "learning_rate": 4.67762204699843e-05, "loss": 1.3144, "step": 2207000 }, { "epoch": 1.32, "learning_rate": 4.677412050442373e-05, "loss": 1.3082, "step": 2207500 }, { "epoch": 1.32, "learning_rate": 4.677202053886316e-05, "loss": 1.3081, "step": 2208000 }, { "epoch": 1.32, "learning_rate": 4.67699205733026e-05, "loss": 1.302, "step": 2208500 }, { "epoch": 1.32, "learning_rate": 4.676782060774203e-05, "loss": 1.314, "step": 2209000 }, { "epoch": 1.32, "learning_rate": 4.6765720642181464e-05, "loss": 1.2998, "step": 2209500 }, { "epoch": 1.32, "learning_rate": 4.6763624876552025e-05, "loss": 1.294, "step": 2210000 }, { "epoch": 1.33, "learning_rate": 4.676152911092258e-05, "loss": 1.3207, "step": 2210500 }, { "epoch": 1.33, "learning_rate": 4.675942914536202e-05, "loss": 1.2986, "step": 2211000 }, { "epoch": 1.33, "learning_rate": 4.675732917980145e-05, "loss": 1.306, "step": 2211500 }, { "epoch": 1.33, "learning_rate": 4.6755229214240885e-05, "loss": 1.2873, "step": 2212000 }, { "epoch": 1.33, "learning_rate": 4.6753129248680326e-05, "loss": 1.276, "step": 2212500 }, { "epoch": 1.33, "learning_rate": 4.675102928311975e-05, "loss": 1.3286, "step": 2213000 }, { "epoch": 1.33, "learning_rate": 4.674892931755919e-05, "loss": 1.3012, "step": 2213500 }, { "epoch": 1.33, "learning_rate": 4.674683355192975e-05, "loss": 1.2965, "step": 2214000 }, { "epoch": 1.33, "learning_rate": 4.6744733586369186e-05, "loss": 1.2722, "step": 2214500 }, { "epoch": 1.33, "learning_rate": 4.674263362080861e-05, "loss": 1.2788, "step": 2215000 }, { "epoch": 1.33, "learning_rate": 4.674053365524805e-05, "loss": 1.2766, "step": 2215500 }, { "epoch": 1.33, "learning_rate": 4.673843368968749e-05, "loss": 1.3094, "step": 2216000 }, { "epoch": 1.33, "learning_rate": 4.673633372412692e-05, "loss": 1.292, "step": 2216500 }, { "epoch": 1.33, "learning_rate": 4.673423375856636e-05, "loss": 1.2821, "step": 2217000 }, { "epoch": 1.33, "learning_rate": 4.6732133793005794e-05, "loss": 1.3001, "step": 2217500 }, { "epoch": 1.33, "learning_rate": 4.673003382744523e-05, "loss": 1.307, "step": 2218000 }, { "epoch": 1.33, "learning_rate": 4.672793806181578e-05, "loss": 1.3112, "step": 2218500 }, { "epoch": 1.33, "learning_rate": 4.672583809625522e-05, "loss": 1.3469, "step": 2219000 }, { "epoch": 1.33, "learning_rate": 4.6723738130694655e-05, "loss": 1.3156, "step": 2219500 }, { "epoch": 1.33, "learning_rate": 4.672163816513409e-05, "loss": 1.2869, "step": 2220000 }, { "epoch": 1.33, "learning_rate": 4.671954239950465e-05, "loss": 1.301, "step": 2220500 }, { "epoch": 1.33, "learning_rate": 4.671744243394408e-05, "loss": 1.2725, "step": 2221000 }, { "epoch": 1.33, "learning_rate": 4.6715342468383515e-05, "loss": 1.2962, "step": 2221500 }, { "epoch": 1.33, "learning_rate": 4.6713246702754076e-05, "loss": 1.2724, "step": 2222000 }, { "epoch": 1.33, "learning_rate": 4.671114673719351e-05, "loss": 1.3134, "step": 2222500 }, { "epoch": 1.33, "learning_rate": 4.670904677163294e-05, "loss": 1.3162, "step": 2223000 }, { "epoch": 1.33, "learning_rate": 4.6706946806072376e-05, "loss": 1.2715, "step": 2223500 }, { "epoch": 1.33, "learning_rate": 4.6704846840511816e-05, "loss": 1.2806, "step": 2224000 }, { "epoch": 1.33, "learning_rate": 4.670274687495125e-05, "loss": 1.2666, "step": 2224500 }, { "epoch": 1.33, "learning_rate": 4.670064690939068e-05, "loss": 1.3119, "step": 2225000 }, { "epoch": 1.33, "learning_rate": 4.6698546943830124e-05, "loss": 1.2865, "step": 2225500 }, { "epoch": 1.33, "learning_rate": 4.669645117820068e-05, "loss": 1.3324, "step": 2226000 }, { "epoch": 1.33, "learning_rate": 4.669435121264011e-05, "loss": 1.2923, "step": 2226500 }, { "epoch": 1.34, "learning_rate": 4.6692251247079544e-05, "loss": 1.3143, "step": 2227000 }, { "epoch": 1.34, "learning_rate": 4.6690151281518984e-05, "loss": 1.2843, "step": 2227500 }, { "epoch": 1.34, "learning_rate": 4.668805131595842e-05, "loss": 1.3038, "step": 2228000 }, { "epoch": 1.34, "learning_rate": 4.668595135039785e-05, "loss": 1.322, "step": 2228500 }, { "epoch": 1.34, "learning_rate": 4.668385138483729e-05, "loss": 1.286, "step": 2229000 }, { "epoch": 1.34, "learning_rate": 4.6681751419276725e-05, "loss": 1.3026, "step": 2229500 }, { "epoch": 1.34, "learning_rate": 4.667965565364728e-05, "loss": 1.3323, "step": 2230000 }, { "epoch": 1.34, "learning_rate": 4.667755568808672e-05, "loss": 1.3133, "step": 2230500 }, { "epoch": 1.34, "learning_rate": 4.667545572252615e-05, "loss": 1.3191, "step": 2231000 }, { "epoch": 1.34, "learning_rate": 4.667336415682783e-05, "loss": 1.2958, "step": 2231500 }, { "epoch": 1.34, "learning_rate": 4.667126419126726e-05, "loss": 1.3135, "step": 2232000 }, { "epoch": 1.34, "learning_rate": 4.666916422570669e-05, "loss": 1.2853, "step": 2232500 }, { "epoch": 1.34, "learning_rate": 4.666706426014613e-05, "loss": 1.2745, "step": 2233000 }, { "epoch": 1.34, "learning_rate": 4.6664964294585566e-05, "loss": 1.2897, "step": 2233500 }, { "epoch": 1.34, "learning_rate": 4.6662864329025e-05, "loss": 1.296, "step": 2234000 }, { "epoch": 1.34, "learning_rate": 4.666076436346444e-05, "loss": 1.3027, "step": 2234500 }, { "epoch": 1.34, "learning_rate": 4.6658664397903874e-05, "loss": 1.319, "step": 2235000 }, { "epoch": 1.34, "learning_rate": 4.665656443234331e-05, "loss": 1.2971, "step": 2235500 }, { "epoch": 1.34, "learning_rate": 4.665446446678275e-05, "loss": 1.2938, "step": 2236000 }, { "epoch": 1.34, "learning_rate": 4.665236450122218e-05, "loss": 1.3074, "step": 2236500 }, { "epoch": 1.34, "learning_rate": 4.6650268735592734e-05, "loss": 1.2965, "step": 2237000 }, { "epoch": 1.34, "learning_rate": 4.6648168770032175e-05, "loss": 1.2943, "step": 2237500 }, { "epoch": 1.34, "learning_rate": 4.664606880447161e-05, "loss": 1.2951, "step": 2238000 }, { "epoch": 1.34, "learning_rate": 4.664396883891104e-05, "loss": 1.2736, "step": 2238500 }, { "epoch": 1.34, "learning_rate": 4.6641873073281595e-05, "loss": 1.3118, "step": 2239000 }, { "epoch": 1.34, "learning_rate": 4.6639773107721035e-05, "loss": 1.299, "step": 2239500 }, { "epoch": 1.34, "learning_rate": 4.663767734209159e-05, "loss": 1.3115, "step": 2240000 }, { "epoch": 1.34, "learning_rate": 4.663557737653102e-05, "loss": 1.2997, "step": 2240500 }, { "epoch": 1.34, "learning_rate": 4.6633477410970456e-05, "loss": 1.3099, "step": 2241000 }, { "epoch": 1.34, "learning_rate": 4.6631377445409896e-05, "loss": 1.2724, "step": 2241500 }, { "epoch": 1.34, "learning_rate": 4.662927747984933e-05, "loss": 1.3063, "step": 2242000 }, { "epoch": 1.34, "learning_rate": 4.662717751428876e-05, "loss": 1.2978, "step": 2242500 }, { "epoch": 1.34, "learning_rate": 4.66250775487282e-05, "loss": 1.2564, "step": 2243000 }, { "epoch": 1.35, "learning_rate": 4.662297758316764e-05, "loss": 1.3206, "step": 2243500 }, { "epoch": 1.35, "learning_rate": 4.662088181753819e-05, "loss": 1.2842, "step": 2244000 }, { "epoch": 1.35, "learning_rate": 4.661878185197763e-05, "loss": 1.3035, "step": 2244500 }, { "epoch": 1.35, "learning_rate": 4.6616681886417064e-05, "loss": 1.3145, "step": 2245000 }, { "epoch": 1.35, "learning_rate": 4.66145819208565e-05, "loss": 1.3327, "step": 2245500 }, { "epoch": 1.35, "learning_rate": 4.661248195529594e-05, "loss": 1.2972, "step": 2246000 }, { "epoch": 1.35, "learning_rate": 4.661038198973537e-05, "loss": 1.3036, "step": 2246500 }, { "epoch": 1.35, "learning_rate": 4.66082820241748e-05, "loss": 1.2864, "step": 2247000 }, { "epoch": 1.35, "learning_rate": 4.660618205861424e-05, "loss": 1.3089, "step": 2247500 }, { "epoch": 1.35, "learning_rate": 4.66040862929848e-05, "loss": 1.2754, "step": 2248000 }, { "epoch": 1.35, "learning_rate": 4.660198632742423e-05, "loss": 1.3042, "step": 2248500 }, { "epoch": 1.35, "learning_rate": 4.6599886361863665e-05, "loss": 1.292, "step": 2249000 }, { "epoch": 1.35, "learning_rate": 4.65977863963031e-05, "loss": 1.2758, "step": 2249500 }, { "epoch": 1.35, "learning_rate": 4.659568643074253e-05, "loss": 1.3118, "step": 2250000 }, { "epoch": 1.35, "learning_rate": 4.6593586465181966e-05, "loss": 1.3109, "step": 2250500 }, { "epoch": 1.35, "learning_rate": 4.6591486499621406e-05, "loss": 1.3248, "step": 2251000 }, { "epoch": 1.35, "learning_rate": 4.658939073399196e-05, "loss": 1.2969, "step": 2251500 }, { "epoch": 1.35, "learning_rate": 4.658729076843139e-05, "loss": 1.2684, "step": 2252000 }, { "epoch": 1.35, "learning_rate": 4.658519500280195e-05, "loss": 1.3061, "step": 2252500 }, { "epoch": 1.35, "learning_rate": 4.6583095037241393e-05, "loss": 1.3306, "step": 2253000 }, { "epoch": 1.35, "learning_rate": 4.658099507168083e-05, "loss": 1.2987, "step": 2253500 }, { "epoch": 1.35, "learning_rate": 4.6578895106120254e-05, "loss": 1.3017, "step": 2254000 }, { "epoch": 1.35, "learning_rate": 4.6576795140559694e-05, "loss": 1.2799, "step": 2254500 }, { "epoch": 1.35, "learning_rate": 4.657469517499913e-05, "loss": 1.2884, "step": 2255000 }, { "epoch": 1.35, "learning_rate": 4.657259520943856e-05, "loss": 1.3019, "step": 2255500 }, { "epoch": 1.35, "learning_rate": 4.657049944380912e-05, "loss": 1.2894, "step": 2256000 }, { "epoch": 1.35, "learning_rate": 4.6568399478248555e-05, "loss": 1.2969, "step": 2256500 }, { "epoch": 1.35, "learning_rate": 4.656629951268799e-05, "loss": 1.2884, "step": 2257000 }, { "epoch": 1.35, "learning_rate": 4.656419954712742e-05, "loss": 1.2966, "step": 2257500 }, { "epoch": 1.35, "learning_rate": 4.656209958156686e-05, "loss": 1.2782, "step": 2258000 }, { "epoch": 1.35, "learning_rate": 4.6559999616006295e-05, "loss": 1.3302, "step": 2258500 }, { "epoch": 1.35, "learning_rate": 4.6557899650445735e-05, "loss": 1.2937, "step": 2259000 }, { "epoch": 1.35, "learning_rate": 4.655579968488517e-05, "loss": 1.2745, "step": 2259500 }, { "epoch": 1.35, "learning_rate": 4.65536997193246e-05, "loss": 1.3018, "step": 2260000 }, { "epoch": 1.36, "learning_rate": 4.6551603953695156e-05, "loss": 1.3177, "step": 2260500 }, { "epoch": 1.36, "learning_rate": 4.6549503988134596e-05, "loss": 1.2754, "step": 2261000 }, { "epoch": 1.36, "learning_rate": 4.654740402257403e-05, "loss": 1.3223, "step": 2261500 }, { "epoch": 1.36, "learning_rate": 4.654530405701346e-05, "loss": 1.2784, "step": 2262000 }, { "epoch": 1.36, "learning_rate": 4.65432040914529e-05, "loss": 1.2722, "step": 2262500 }, { "epoch": 1.36, "learning_rate": 4.654110412589234e-05, "loss": 1.3009, "step": 2263000 }, { "epoch": 1.36, "learning_rate": 4.653900836026289e-05, "loss": 1.3102, "step": 2263500 }, { "epoch": 1.36, "learning_rate": 4.6536908394702324e-05, "loss": 1.3325, "step": 2264000 }, { "epoch": 1.36, "learning_rate": 4.6534808429141764e-05, "loss": 1.2946, "step": 2264500 }, { "epoch": 1.36, "learning_rate": 4.65327084635812e-05, "loss": 1.284, "step": 2265000 }, { "epoch": 1.36, "learning_rate": 4.653060849802063e-05, "loss": 1.3078, "step": 2265500 }, { "epoch": 1.36, "learning_rate": 4.652850853246007e-05, "loss": 1.2931, "step": 2266000 }, { "epoch": 1.36, "learning_rate": 4.65264085668995e-05, "loss": 1.2773, "step": 2266500 }, { "epoch": 1.36, "learning_rate": 4.652430860133894e-05, "loss": 1.2688, "step": 2267000 }, { "epoch": 1.36, "learning_rate": 4.65222128357095e-05, "loss": 1.3088, "step": 2267500 }, { "epoch": 1.36, "learning_rate": 4.652011287014893e-05, "loss": 1.2823, "step": 2268000 }, { "epoch": 1.36, "learning_rate": 4.6518017104519486e-05, "loss": 1.2822, "step": 2268500 }, { "epoch": 1.36, "learning_rate": 4.651591713895892e-05, "loss": 1.302, "step": 2269000 }, { "epoch": 1.36, "learning_rate": 4.651381717339836e-05, "loss": 1.3335, "step": 2269500 }, { "epoch": 1.36, "learning_rate": 4.651171720783779e-05, "loss": 1.2964, "step": 2270000 }, { "epoch": 1.36, "learning_rate": 4.6509621442208346e-05, "loss": 1.3176, "step": 2270500 }, { "epoch": 1.36, "learning_rate": 4.650752147664778e-05, "loss": 1.3005, "step": 2271000 }, { "epoch": 1.36, "learning_rate": 4.650542151108722e-05, "loss": 1.2932, "step": 2271500 }, { "epoch": 1.36, "learning_rate": 4.650332154552665e-05, "loss": 1.2737, "step": 2272000 }, { "epoch": 1.36, "learning_rate": 4.650122157996609e-05, "loss": 1.2979, "step": 2272500 }, { "epoch": 1.36, "learning_rate": 4.649912161440553e-05, "loss": 1.3064, "step": 2273000 }, { "epoch": 1.36, "learning_rate": 4.649702164884496e-05, "loss": 1.3267, "step": 2273500 }, { "epoch": 1.36, "learning_rate": 4.6494921683284394e-05, "loss": 1.3039, "step": 2274000 }, { "epoch": 1.36, "learning_rate": 4.6492825917654954e-05, "loss": 1.2954, "step": 2274500 }, { "epoch": 1.36, "learning_rate": 4.649072595209439e-05, "loss": 1.2922, "step": 2275000 }, { "epoch": 1.36, "learning_rate": 4.648863018646494e-05, "loss": 1.3065, "step": 2275500 }, { "epoch": 1.36, "learning_rate": 4.6486534420835495e-05, "loss": 1.3031, "step": 2276000 }, { "epoch": 1.36, "learning_rate": 4.648443445527493e-05, "loss": 1.2907, "step": 2276500 }, { "epoch": 1.37, "learning_rate": 4.648233448971437e-05, "loss": 1.2877, "step": 2277000 }, { "epoch": 1.37, "learning_rate": 4.64802345241538e-05, "loss": 1.2736, "step": 2277500 }, { "epoch": 1.37, "learning_rate": 4.6478134558593236e-05, "loss": 1.2866, "step": 2278000 }, { "epoch": 1.37, "learning_rate": 4.6476034593032676e-05, "loss": 1.2804, "step": 2278500 }, { "epoch": 1.37, "learning_rate": 4.647393462747211e-05, "loss": 1.3138, "step": 2279000 }, { "epoch": 1.37, "learning_rate": 4.647183466191154e-05, "loss": 1.2649, "step": 2279500 }, { "epoch": 1.37, "learning_rate": 4.646973469635098e-05, "loss": 1.3112, "step": 2280000 }, { "epoch": 1.37, "learning_rate": 4.6467634730790416e-05, "loss": 1.3056, "step": 2280500 }, { "epoch": 1.37, "learning_rate": 4.646553476522985e-05, "loss": 1.3003, "step": 2281000 }, { "epoch": 1.37, "learning_rate": 4.646343479966928e-05, "loss": 1.2893, "step": 2281500 }, { "epoch": 1.37, "learning_rate": 4.6461339034039844e-05, "loss": 1.2977, "step": 2282000 }, { "epoch": 1.37, "learning_rate": 4.64592432684104e-05, "loss": 1.2905, "step": 2282500 }, { "epoch": 1.37, "learning_rate": 4.645714330284983e-05, "loss": 1.3007, "step": 2283000 }, { "epoch": 1.37, "learning_rate": 4.6455047537220384e-05, "loss": 1.2901, "step": 2283500 }, { "epoch": 1.37, "learning_rate": 4.6452947571659825e-05, "loss": 1.2815, "step": 2284000 }, { "epoch": 1.37, "learning_rate": 4.645084760609926e-05, "loss": 1.3198, "step": 2284500 }, { "epoch": 1.37, "learning_rate": 4.644874764053869e-05, "loss": 1.2933, "step": 2285000 }, { "epoch": 1.37, "learning_rate": 4.644664767497813e-05, "loss": 1.2763, "step": 2285500 }, { "epoch": 1.37, "learning_rate": 4.6444547709417565e-05, "loss": 1.2977, "step": 2286000 }, { "epoch": 1.37, "learning_rate": 4.644245194378812e-05, "loss": 1.3025, "step": 2286500 }, { "epoch": 1.37, "learning_rate": 4.644035197822756e-05, "loss": 1.2982, "step": 2287000 }, { "epoch": 1.37, "learning_rate": 4.643825201266699e-05, "loss": 1.2912, "step": 2287500 }, { "epoch": 1.37, "learning_rate": 4.6436152047106426e-05, "loss": 1.2818, "step": 2288000 }, { "epoch": 1.37, "learning_rate": 4.6434052081545866e-05, "loss": 1.3461, "step": 2288500 }, { "epoch": 1.37, "learning_rate": 4.64319521159853e-05, "loss": 1.2826, "step": 2289000 }, { "epoch": 1.37, "learning_rate": 4.642985215042473e-05, "loss": 1.3084, "step": 2289500 }, { "epoch": 1.37, "learning_rate": 4.642775218486417e-05, "loss": 1.2781, "step": 2290000 }, { "epoch": 1.37, "learning_rate": 4.64256522193036e-05, "loss": 1.2973, "step": 2290500 }, { "epoch": 1.37, "learning_rate": 4.642355645367416e-05, "loss": 1.2925, "step": 2291000 }, { "epoch": 1.37, "learning_rate": 4.6421456488113594e-05, "loss": 1.2886, "step": 2291500 }, { "epoch": 1.37, "learning_rate": 4.641936072248415e-05, "loss": 1.2997, "step": 2292000 }, { "epoch": 1.37, "learning_rate": 4.641726075692359e-05, "loss": 1.2762, "step": 2292500 }, { "epoch": 1.37, "learning_rate": 4.641516079136302e-05, "loss": 1.28, "step": 2293000 }, { "epoch": 1.38, "learning_rate": 4.6413060825802454e-05, "loss": 1.3279, "step": 2293500 }, { "epoch": 1.38, "learning_rate": 4.6410965060173015e-05, "loss": 1.2638, "step": 2294000 }, { "epoch": 1.38, "learning_rate": 4.640886509461245e-05, "loss": 1.2722, "step": 2294500 }, { "epoch": 1.38, "learning_rate": 4.640676512905188e-05, "loss": 1.3138, "step": 2295000 }, { "epoch": 1.38, "learning_rate": 4.640466516349132e-05, "loss": 1.2903, "step": 2295500 }, { "epoch": 1.38, "learning_rate": 4.6402565197930755e-05, "loss": 1.2959, "step": 2296000 }, { "epoch": 1.38, "learning_rate": 4.640046523237019e-05, "loss": 1.2991, "step": 2296500 }, { "epoch": 1.38, "learning_rate": 4.639836526680963e-05, "loss": 1.2732, "step": 2297000 }, { "epoch": 1.38, "learning_rate": 4.6396265301249056e-05, "loss": 1.3255, "step": 2297500 }, { "epoch": 1.38, "learning_rate": 4.639416533568849e-05, "loss": 1.3055, "step": 2298000 }, { "epoch": 1.38, "learning_rate": 4.639206537012793e-05, "loss": 1.3088, "step": 2298500 }, { "epoch": 1.38, "learning_rate": 4.638996540456736e-05, "loss": 1.2932, "step": 2299000 }, { "epoch": 1.38, "learning_rate": 4.6387865439006796e-05, "loss": 1.2821, "step": 2299500 }, { "epoch": 1.38, "learning_rate": 4.638576547344624e-05, "loss": 1.288, "step": 2300000 }, { "epoch": 1.38, "eval_loss": 1.2455518245697021, "eval_runtime": 1106.2825, "eval_samples_per_second": 476.117, "eval_steps_per_second": 79.353, "step": 2300000 }, { "epoch": 1.38, "learning_rate": 4.638366970781679e-05, "loss": 1.2798, "step": 2300500 }, { "epoch": 1.38, "learning_rate": 4.6381569742256224e-05, "loss": 1.3179, "step": 2301000 }, { "epoch": 1.38, "learning_rate": 4.637946977669566e-05, "loss": 1.3051, "step": 2301500 }, { "epoch": 1.38, "learning_rate": 4.63773698111351e-05, "loss": 1.2896, "step": 2302000 }, { "epoch": 1.38, "learning_rate": 4.637526984557453e-05, "loss": 1.2863, "step": 2302500 }, { "epoch": 1.38, "learning_rate": 4.6373169880013964e-05, "loss": 1.3223, "step": 2303000 }, { "epoch": 1.38, "learning_rate": 4.6371069914453405e-05, "loss": 1.3026, "step": 2303500 }, { "epoch": 1.38, "learning_rate": 4.636897414882396e-05, "loss": 1.2974, "step": 2304000 }, { "epoch": 1.38, "learning_rate": 4.636687418326339e-05, "loss": 1.302, "step": 2304500 }, { "epoch": 1.38, "learning_rate": 4.636477421770283e-05, "loss": 1.2879, "step": 2305000 }, { "epoch": 1.38, "learning_rate": 4.6362674252142265e-05, "loss": 1.2889, "step": 2305500 }, { "epoch": 1.38, "learning_rate": 4.636057848651282e-05, "loss": 1.2862, "step": 2306000 }, { "epoch": 1.38, "learning_rate": 4.635847852095225e-05, "loss": 1.2743, "step": 2306500 }, { "epoch": 1.38, "learning_rate": 4.635637855539169e-05, "loss": 1.2755, "step": 2307000 }, { "epoch": 1.38, "learning_rate": 4.6354278589831126e-05, "loss": 1.2925, "step": 2307500 }, { "epoch": 1.38, "learning_rate": 4.635217862427056e-05, "loss": 1.2772, "step": 2308000 }, { "epoch": 1.38, "learning_rate": 4.635007865871e-05, "loss": 1.2943, "step": 2308500 }, { "epoch": 1.38, "learning_rate": 4.634797869314943e-05, "loss": 1.3097, "step": 2309000 }, { "epoch": 1.38, "learning_rate": 4.634587872758887e-05, "loss": 1.2934, "step": 2309500 }, { "epoch": 1.38, "learning_rate": 4.634377876202831e-05, "loss": 1.2788, "step": 2310000 }, { "epoch": 1.39, "learning_rate": 4.634168299639886e-05, "loss": 1.304, "step": 2310500 }, { "epoch": 1.39, "learning_rate": 4.6339587230769414e-05, "loss": 1.2732, "step": 2311000 }, { "epoch": 1.39, "learning_rate": 4.633748726520885e-05, "loss": 1.3261, "step": 2311500 }, { "epoch": 1.39, "learning_rate": 4.633538729964829e-05, "loss": 1.3063, "step": 2312000 }, { "epoch": 1.39, "learning_rate": 4.633328733408772e-05, "loss": 1.3185, "step": 2312500 }, { "epoch": 1.39, "learning_rate": 4.6331187368527155e-05, "loss": 1.2942, "step": 2313000 }, { "epoch": 1.39, "learning_rate": 4.6329087402966595e-05, "loss": 1.2738, "step": 2313500 }, { "epoch": 1.39, "learning_rate": 4.632698743740603e-05, "loss": 1.3169, "step": 2314000 }, { "epoch": 1.39, "learning_rate": 4.632488747184546e-05, "loss": 1.2872, "step": 2314500 }, { "epoch": 1.39, "learning_rate": 4.6322791706216015e-05, "loss": 1.3158, "step": 2315000 }, { "epoch": 1.39, "learning_rate": 4.6320691740655456e-05, "loss": 1.2949, "step": 2315500 }, { "epoch": 1.39, "learning_rate": 4.631859177509489e-05, "loss": 1.2925, "step": 2316000 }, { "epoch": 1.39, "learning_rate": 4.631649180953432e-05, "loss": 1.3328, "step": 2316500 }, { "epoch": 1.39, "learning_rate": 4.631439184397376e-05, "loss": 1.296, "step": 2317000 }, { "epoch": 1.39, "learning_rate": 4.631229187841319e-05, "loss": 1.2819, "step": 2317500 }, { "epoch": 1.39, "learning_rate": 4.631019191285262e-05, "loss": 1.276, "step": 2318000 }, { "epoch": 1.39, "learning_rate": 4.630809194729206e-05, "loss": 1.2908, "step": 2318500 }, { "epoch": 1.39, "learning_rate": 4.6305996181662623e-05, "loss": 1.3047, "step": 2319000 }, { "epoch": 1.39, "learning_rate": 4.630389621610206e-05, "loss": 1.2927, "step": 2319500 }, { "epoch": 1.39, "learning_rate": 4.630180045047261e-05, "loss": 1.2969, "step": 2320000 }, { "epoch": 1.39, "learning_rate": 4.629970048491205e-05, "loss": 1.3405, "step": 2320500 }, { "epoch": 1.39, "learning_rate": 4.6297600519351484e-05, "loss": 1.3179, "step": 2321000 }, { "epoch": 1.39, "learning_rate": 4.629550055379092e-05, "loss": 1.3056, "step": 2321500 }, { "epoch": 1.39, "learning_rate": 4.629340058823036e-05, "loss": 1.2859, "step": 2322000 }, { "epoch": 1.39, "learning_rate": 4.6291300622669785e-05, "loss": 1.2527, "step": 2322500 }, { "epoch": 1.39, "learning_rate": 4.628920065710922e-05, "loss": 1.2885, "step": 2323000 }, { "epoch": 1.39, "learning_rate": 4.628710069154866e-05, "loss": 1.2838, "step": 2323500 }, { "epoch": 1.39, "learning_rate": 4.628500072598809e-05, "loss": 1.3033, "step": 2324000 }, { "epoch": 1.39, "learning_rate": 4.6282900760427525e-05, "loss": 1.2698, "step": 2324500 }, { "epoch": 1.39, "learning_rate": 4.6280800794866965e-05, "loss": 1.3107, "step": 2325000 }, { "epoch": 1.39, "learning_rate": 4.62787008293064e-05, "loss": 1.2818, "step": 2325500 }, { "epoch": 1.39, "learning_rate": 4.627660506367695e-05, "loss": 1.2776, "step": 2326000 }, { "epoch": 1.39, "learning_rate": 4.627450509811639e-05, "loss": 1.3122, "step": 2326500 }, { "epoch": 1.4, "learning_rate": 4.6272405132555826e-05, "loss": 1.2634, "step": 2327000 }, { "epoch": 1.4, "learning_rate": 4.627030516699526e-05, "loss": 1.2735, "step": 2327500 }, { "epoch": 1.4, "learning_rate": 4.626820940136581e-05, "loss": 1.3148, "step": 2328000 }, { "epoch": 1.4, "learning_rate": 4.6266109435805253e-05, "loss": 1.3328, "step": 2328500 }, { "epoch": 1.4, "learning_rate": 4.626400947024469e-05, "loss": 1.2974, "step": 2329000 }, { "epoch": 1.4, "learning_rate": 4.626190950468412e-05, "loss": 1.2861, "step": 2329500 }, { "epoch": 1.4, "learning_rate": 4.625980953912356e-05, "loss": 1.3123, "step": 2330000 }, { "epoch": 1.4, "learning_rate": 4.6257709573562994e-05, "loss": 1.2748, "step": 2330500 }, { "epoch": 1.4, "learning_rate": 4.625560960800243e-05, "loss": 1.2806, "step": 2331000 }, { "epoch": 1.4, "learning_rate": 4.625350964244187e-05, "loss": 1.3202, "step": 2331500 }, { "epoch": 1.4, "learning_rate": 4.625141387681242e-05, "loss": 1.2773, "step": 2332000 }, { "epoch": 1.4, "learning_rate": 4.6249313911251855e-05, "loss": 1.3239, "step": 2332500 }, { "epoch": 1.4, "learning_rate": 4.624721394569129e-05, "loss": 1.29, "step": 2333000 }, { "epoch": 1.4, "learning_rate": 4.624511818006185e-05, "loss": 1.3223, "step": 2333500 }, { "epoch": 1.4, "learning_rate": 4.624301821450128e-05, "loss": 1.3051, "step": 2334000 }, { "epoch": 1.4, "learning_rate": 4.6240918248940715e-05, "loss": 1.306, "step": 2334500 }, { "epoch": 1.4, "learning_rate": 4.6238818283380156e-05, "loss": 1.3051, "step": 2335000 }, { "epoch": 1.4, "learning_rate": 4.623671831781959e-05, "loss": 1.2673, "step": 2335500 }, { "epoch": 1.4, "learning_rate": 4.623461835225902e-05, "loss": 1.3023, "step": 2336000 }, { "epoch": 1.4, "learning_rate": 4.623251838669846e-05, "loss": 1.2617, "step": 2336500 }, { "epoch": 1.4, "learning_rate": 4.6230418421137896e-05, "loss": 1.285, "step": 2337000 }, { "epoch": 1.4, "learning_rate": 4.622831845557732e-05, "loss": 1.2997, "step": 2337500 }, { "epoch": 1.4, "learning_rate": 4.622621849001676e-05, "loss": 1.2867, "step": 2338000 }, { "epoch": 1.4, "learning_rate": 4.6224122724387324e-05, "loss": 1.3164, "step": 2338500 }, { "epoch": 1.4, "learning_rate": 4.622202695875788e-05, "loss": 1.2961, "step": 2339000 }, { "epoch": 1.4, "learning_rate": 4.621992699319731e-05, "loss": 1.323, "step": 2339500 }, { "epoch": 1.4, "learning_rate": 4.6217827027636744e-05, "loss": 1.3008, "step": 2340000 }, { "epoch": 1.4, "learning_rate": 4.6215731262007304e-05, "loss": 1.3047, "step": 2340500 }, { "epoch": 1.4, "learning_rate": 4.621363129644674e-05, "loss": 1.2989, "step": 2341000 }, { "epoch": 1.4, "learning_rate": 4.621153133088617e-05, "loss": 1.3063, "step": 2341500 }, { "epoch": 1.4, "learning_rate": 4.620943136532561e-05, "loss": 1.3167, "step": 2342000 }, { "epoch": 1.4, "learning_rate": 4.6207331399765045e-05, "loss": 1.3272, "step": 2342500 }, { "epoch": 1.4, "learning_rate": 4.620523143420448e-05, "loss": 1.3168, "step": 2343000 }, { "epoch": 1.41, "learning_rate": 4.620313566857503e-05, "loss": 1.308, "step": 2343500 }, { "epoch": 1.41, "learning_rate": 4.620103570301447e-05, "loss": 1.3284, "step": 2344000 }, { "epoch": 1.41, "learning_rate": 4.6198935737453906e-05, "loss": 1.3037, "step": 2344500 }, { "epoch": 1.41, "learning_rate": 4.619683997182446e-05, "loss": 1.2934, "step": 2345000 }, { "epoch": 1.41, "learning_rate": 4.619474000626389e-05, "loss": 1.2741, "step": 2345500 }, { "epoch": 1.41, "learning_rate": 4.619264004070333e-05, "loss": 1.2789, "step": 2346000 }, { "epoch": 1.41, "learning_rate": 4.6190540075142767e-05, "loss": 1.3141, "step": 2346500 }, { "epoch": 1.41, "learning_rate": 4.61884401095822e-05, "loss": 1.3117, "step": 2347000 }, { "epoch": 1.41, "learning_rate": 4.618634014402164e-05, "loss": 1.2914, "step": 2347500 }, { "epoch": 1.41, "learning_rate": 4.6184240178461074e-05, "loss": 1.2901, "step": 2348000 }, { "epoch": 1.41, "learning_rate": 4.618214021290051e-05, "loss": 1.2943, "step": 2348500 }, { "epoch": 1.41, "learning_rate": 4.618004024733995e-05, "loss": 1.2785, "step": 2349000 }, { "epoch": 1.41, "learning_rate": 4.6177940281779374e-05, "loss": 1.2823, "step": 2349500 }, { "epoch": 1.41, "learning_rate": 4.6175840316218814e-05, "loss": 1.2861, "step": 2350000 }, { "epoch": 1.41, "learning_rate": 4.617374035065825e-05, "loss": 1.2859, "step": 2350500 }, { "epoch": 1.41, "learning_rate": 4.617164038509768e-05, "loss": 1.3077, "step": 2351000 }, { "epoch": 1.41, "learning_rate": 4.616954041953712e-05, "loss": 1.2524, "step": 2351500 }, { "epoch": 1.41, "learning_rate": 4.6167440453976555e-05, "loss": 1.2938, "step": 2352000 }, { "epoch": 1.41, "learning_rate": 4.616534048841599e-05, "loss": 1.3115, "step": 2352500 }, { "epoch": 1.41, "learning_rate": 4.616324052285543e-05, "loss": 1.3106, "step": 2353000 }, { "epoch": 1.41, "learning_rate": 4.616114475722598e-05, "loss": 1.3199, "step": 2353500 }, { "epoch": 1.41, "learning_rate": 4.6159044791665416e-05, "loss": 1.2777, "step": 2354000 }, { "epoch": 1.41, "learning_rate": 4.615694482610485e-05, "loss": 1.2608, "step": 2354500 }, { "epoch": 1.41, "learning_rate": 4.615484486054429e-05, "loss": 1.3005, "step": 2355000 }, { "epoch": 1.41, "learning_rate": 4.615274909491484e-05, "loss": 1.2807, "step": 2355500 }, { "epoch": 1.41, "learning_rate": 4.6150649129354276e-05, "loss": 1.2883, "step": 2356000 }, { "epoch": 1.41, "learning_rate": 4.614854916379371e-05, "loss": 1.2886, "step": 2356500 }, { "epoch": 1.41, "learning_rate": 4.614644919823315e-05, "loss": 1.3124, "step": 2357000 }, { "epoch": 1.41, "learning_rate": 4.6144353432603704e-05, "loss": 1.2828, "step": 2357500 }, { "epoch": 1.41, "learning_rate": 4.614225346704314e-05, "loss": 1.253, "step": 2358000 }, { "epoch": 1.41, "learning_rate": 4.614015350148258e-05, "loss": 1.2813, "step": 2358500 }, { "epoch": 1.41, "learning_rate": 4.613805353592201e-05, "loss": 1.2631, "step": 2359000 }, { "epoch": 1.41, "learning_rate": 4.6135953570361444e-05, "loss": 1.2863, "step": 2359500 }, { "epoch": 1.41, "learning_rate": 4.6133857804732e-05, "loss": 1.3079, "step": 2360000 }, { "epoch": 1.42, "learning_rate": 4.613175783917144e-05, "loss": 1.294, "step": 2360500 }, { "epoch": 1.42, "learning_rate": 4.612965787361087e-05, "loss": 1.2917, "step": 2361000 }, { "epoch": 1.42, "learning_rate": 4.6127557908050305e-05, "loss": 1.289, "step": 2361500 }, { "epoch": 1.42, "learning_rate": 4.6125457942489745e-05, "loss": 1.2959, "step": 2362000 }, { "epoch": 1.42, "learning_rate": 4.612335797692918e-05, "loss": 1.3066, "step": 2362500 }, { "epoch": 1.42, "learning_rate": 4.612125801136861e-05, "loss": 1.3018, "step": 2363000 }, { "epoch": 1.42, "learning_rate": 4.611915804580805e-05, "loss": 1.2903, "step": 2363500 }, { "epoch": 1.42, "learning_rate": 4.6117058080247486e-05, "loss": 1.2832, "step": 2364000 }, { "epoch": 1.42, "learning_rate": 4.611496231461804e-05, "loss": 1.2831, "step": 2364500 }, { "epoch": 1.42, "learning_rate": 4.611286234905748e-05, "loss": 1.314, "step": 2365000 }, { "epoch": 1.42, "learning_rate": 4.611076238349691e-05, "loss": 1.2875, "step": 2365500 }, { "epoch": 1.42, "learning_rate": 4.6108662417936347e-05, "loss": 1.3031, "step": 2366000 }, { "epoch": 1.42, "learning_rate": 4.610656245237578e-05, "loss": 1.2956, "step": 2366500 }, { "epoch": 1.42, "learning_rate": 4.6104462486815213e-05, "loss": 1.3006, "step": 2367000 }, { "epoch": 1.42, "learning_rate": 4.610236252125465e-05, "loss": 1.2941, "step": 2367500 }, { "epoch": 1.42, "learning_rate": 4.610026675562521e-05, "loss": 1.3108, "step": 2368000 }, { "epoch": 1.42, "learning_rate": 4.609816679006465e-05, "loss": 1.2935, "step": 2368500 }, { "epoch": 1.42, "learning_rate": 4.6096066824504074e-05, "loss": 1.2873, "step": 2369000 }, { "epoch": 1.42, "learning_rate": 4.609396685894351e-05, "loss": 1.2912, "step": 2369500 }, { "epoch": 1.42, "learning_rate": 4.609186689338295e-05, "loss": 1.2927, "step": 2370000 }, { "epoch": 1.42, "learning_rate": 4.608976692782238e-05, "loss": 1.2987, "step": 2370500 }, { "epoch": 1.42, "learning_rate": 4.608767116219294e-05, "loss": 1.2951, "step": 2371000 }, { "epoch": 1.42, "learning_rate": 4.6085571196632375e-05, "loss": 1.301, "step": 2371500 }, { "epoch": 1.42, "learning_rate": 4.608347123107181e-05, "loss": 1.2768, "step": 2372000 }, { "epoch": 1.42, "learning_rate": 4.608137126551124e-05, "loss": 1.31, "step": 2372500 }, { "epoch": 1.42, "learning_rate": 4.60792754998818e-05, "loss": 1.3129, "step": 2373000 }, { "epoch": 1.42, "learning_rate": 4.607717553432124e-05, "loss": 1.3254, "step": 2373500 }, { "epoch": 1.42, "learning_rate": 4.6075079768691796e-05, "loss": 1.3078, "step": 2374000 }, { "epoch": 1.42, "learning_rate": 4.607297980313123e-05, "loss": 1.2997, "step": 2374500 }, { "epoch": 1.42, "learning_rate": 4.607087983757066e-05, "loss": 1.3192, "step": 2375000 }, { "epoch": 1.42, "learning_rate": 4.60687798720101e-05, "loss": 1.3055, "step": 2375500 }, { "epoch": 1.42, "learning_rate": 4.606667990644953e-05, "loss": 1.2904, "step": 2376000 }, { "epoch": 1.42, "learning_rate": 4.6064579940888964e-05, "loss": 1.2697, "step": 2376500 }, { "epoch": 1.43, "learning_rate": 4.6062479975328404e-05, "loss": 1.2983, "step": 2377000 }, { "epoch": 1.43, "learning_rate": 4.6060384209698964e-05, "loss": 1.2853, "step": 2377500 }, { "epoch": 1.43, "learning_rate": 4.60582842441384e-05, "loss": 1.2846, "step": 2378000 }, { "epoch": 1.43, "learning_rate": 4.605618427857783e-05, "loss": 1.2802, "step": 2378500 }, { "epoch": 1.43, "learning_rate": 4.6054084313017264e-05, "loss": 1.2968, "step": 2379000 }, { "epoch": 1.43, "learning_rate": 4.60519843474567e-05, "loss": 1.2991, "step": 2379500 }, { "epoch": 1.43, "learning_rate": 4.604988438189614e-05, "loss": 1.2938, "step": 2380000 }, { "epoch": 1.43, "learning_rate": 4.604778441633557e-05, "loss": 1.294, "step": 2380500 }, { "epoch": 1.43, "learning_rate": 4.6045684450775005e-05, "loss": 1.3013, "step": 2381000 }, { "epoch": 1.43, "learning_rate": 4.6043584485214445e-05, "loss": 1.2893, "step": 2381500 }, { "epoch": 1.43, "learning_rate": 4.6041488719585e-05, "loss": 1.2824, "step": 2382000 }, { "epoch": 1.43, "learning_rate": 4.603939715388667e-05, "loss": 1.2879, "step": 2382500 }, { "epoch": 1.43, "learning_rate": 4.603729718832611e-05, "loss": 1.2562, "step": 2383000 }, { "epoch": 1.43, "learning_rate": 4.6035197222765546e-05, "loss": 1.2687, "step": 2383500 }, { "epoch": 1.43, "learning_rate": 4.603309725720498e-05, "loss": 1.2971, "step": 2384000 }, { "epoch": 1.43, "learning_rate": 4.603099729164442e-05, "loss": 1.3108, "step": 2384500 }, { "epoch": 1.43, "learning_rate": 4.6028897326083853e-05, "loss": 1.2912, "step": 2385000 }, { "epoch": 1.43, "learning_rate": 4.602679736052329e-05, "loss": 1.292, "step": 2385500 }, { "epoch": 1.43, "learning_rate": 4.602469739496272e-05, "loss": 1.2852, "step": 2386000 }, { "epoch": 1.43, "learning_rate": 4.6022597429402154e-05, "loss": 1.2846, "step": 2386500 }, { "epoch": 1.43, "learning_rate": 4.6020497463841594e-05, "loss": 1.2607, "step": 2387000 }, { "epoch": 1.43, "learning_rate": 4.601839749828103e-05, "loss": 1.2958, "step": 2387500 }, { "epoch": 1.43, "learning_rate": 4.601629753272046e-05, "loss": 1.2838, "step": 2388000 }, { "epoch": 1.43, "learning_rate": 4.60141975671599e-05, "loss": 1.2743, "step": 2388500 }, { "epoch": 1.43, "learning_rate": 4.6012097601599335e-05, "loss": 1.2688, "step": 2389000 }, { "epoch": 1.43, "learning_rate": 4.600999763603877e-05, "loss": 1.2871, "step": 2389500 }, { "epoch": 1.43, "learning_rate": 4.600790187040932e-05, "loss": 1.3003, "step": 2390000 }, { "epoch": 1.43, "learning_rate": 4.600580190484876e-05, "loss": 1.2955, "step": 2390500 }, { "epoch": 1.43, "learning_rate": 4.6003701939288195e-05, "loss": 1.3004, "step": 2391000 }, { "epoch": 1.43, "learning_rate": 4.600160197372763e-05, "loss": 1.2977, "step": 2391500 }, { "epoch": 1.43, "learning_rate": 4.599950200816707e-05, "loss": 1.3495, "step": 2392000 }, { "epoch": 1.43, "learning_rate": 4.59974020426065e-05, "loss": 1.2895, "step": 2392500 }, { "epoch": 1.43, "learning_rate": 4.5995302077045936e-05, "loss": 1.2887, "step": 2393000 }, { "epoch": 1.44, "learning_rate": 4.599320631141649e-05, "loss": 1.2873, "step": 2393500 }, { "epoch": 1.44, "learning_rate": 4.599110634585593e-05, "loss": 1.2978, "step": 2394000 }, { "epoch": 1.44, "learning_rate": 4.598900638029536e-05, "loss": 1.2998, "step": 2394500 }, { "epoch": 1.44, "learning_rate": 4.59869064147348e-05, "loss": 1.2726, "step": 2395000 }, { "epoch": 1.44, "learning_rate": 4.598480644917424e-05, "loss": 1.3146, "step": 2395500 }, { "epoch": 1.44, "learning_rate": 4.5982706483613664e-05, "loss": 1.3099, "step": 2396000 }, { "epoch": 1.44, "learning_rate": 4.5980606518053104e-05, "loss": 1.293, "step": 2396500 }, { "epoch": 1.44, "learning_rate": 4.597850655249254e-05, "loss": 1.2994, "step": 2397000 }, { "epoch": 1.44, "learning_rate": 4.59764107868631e-05, "loss": 1.2754, "step": 2397500 }, { "epoch": 1.44, "learning_rate": 4.597431502123365e-05, "loss": 1.3155, "step": 2398000 }, { "epoch": 1.44, "learning_rate": 4.5972215055673085e-05, "loss": 1.2789, "step": 2398500 }, { "epoch": 1.44, "learning_rate": 4.5970115090112525e-05, "loss": 1.2833, "step": 2399000 }, { "epoch": 1.44, "learning_rate": 4.596801512455196e-05, "loss": 1.295, "step": 2399500 }, { "epoch": 1.44, "learning_rate": 4.596591515899139e-05, "loss": 1.2783, "step": 2400000 }, { "epoch": 1.44, "eval_loss": 1.2449374198913574, "eval_runtime": 1111.8994, "eval_samples_per_second": 473.712, "eval_steps_per_second": 78.952, "step": 2400000 }, { "epoch": 1.44, "learning_rate": 4.596381519343083e-05, "loss": 1.3102, "step": 2400500 }, { "epoch": 1.44, "learning_rate": 4.5961719427801386e-05, "loss": 1.279, "step": 2401000 }, { "epoch": 1.44, "learning_rate": 4.595961946224082e-05, "loss": 1.3264, "step": 2401500 }, { "epoch": 1.44, "learning_rate": 4.595751949668025e-05, "loss": 1.2797, "step": 2402000 }, { "epoch": 1.44, "learning_rate": 4.595541953111969e-05, "loss": 1.2995, "step": 2402500 }, { "epoch": 1.44, "learning_rate": 4.595331956555912e-05, "loss": 1.3128, "step": 2403000 }, { "epoch": 1.44, "learning_rate": 4.595121959999856e-05, "loss": 1.3285, "step": 2403500 }, { "epoch": 1.44, "learning_rate": 4.594911963443799e-05, "loss": 1.2796, "step": 2404000 }, { "epoch": 1.44, "learning_rate": 4.594701966887743e-05, "loss": 1.2816, "step": 2404500 }, { "epoch": 1.44, "learning_rate": 4.594492390324799e-05, "loss": 1.281, "step": 2405000 }, { "epoch": 1.44, "learning_rate": 4.594282393768742e-05, "loss": 1.2861, "step": 2405500 }, { "epoch": 1.44, "learning_rate": 4.5940723972126854e-05, "loss": 1.3202, "step": 2406000 }, { "epoch": 1.44, "learning_rate": 4.593862400656629e-05, "loss": 1.2889, "step": 2406500 }, { "epoch": 1.44, "learning_rate": 4.593652404100573e-05, "loss": 1.3099, "step": 2407000 }, { "epoch": 1.44, "learning_rate": 4.593442827537629e-05, "loss": 1.2828, "step": 2407500 }, { "epoch": 1.44, "learning_rate": 4.593233250974684e-05, "loss": 1.3041, "step": 2408000 }, { "epoch": 1.44, "learning_rate": 4.5930232544186275e-05, "loss": 1.29, "step": 2408500 }, { "epoch": 1.44, "learning_rate": 4.592813677855683e-05, "loss": 1.2607, "step": 2409000 }, { "epoch": 1.44, "learning_rate": 4.592603681299627e-05, "loss": 1.3062, "step": 2409500 }, { "epoch": 1.44, "learning_rate": 4.59239368474357e-05, "loss": 1.3198, "step": 2410000 }, { "epoch": 1.45, "learning_rate": 4.5921836881875136e-05, "loss": 1.3009, "step": 2410500 }, { "epoch": 1.45, "learning_rate": 4.5919736916314576e-05, "loss": 1.2754, "step": 2411000 }, { "epoch": 1.45, "learning_rate": 4.591763695075401e-05, "loss": 1.2774, "step": 2411500 }, { "epoch": 1.45, "learning_rate": 4.591553698519344e-05, "loss": 1.2868, "step": 2412000 }, { "epoch": 1.45, "learning_rate": 4.5913437019632876e-05, "loss": 1.2933, "step": 2412500 }, { "epoch": 1.45, "learning_rate": 4.591133705407231e-05, "loss": 1.2822, "step": 2413000 }, { "epoch": 1.45, "learning_rate": 4.590924128844287e-05, "loss": 1.3011, "step": 2413500 }, { "epoch": 1.45, "learning_rate": 4.5907141322882304e-05, "loss": 1.2893, "step": 2414000 }, { "epoch": 1.45, "learning_rate": 4.5905041357321744e-05, "loss": 1.3064, "step": 2414500 }, { "epoch": 1.45, "learning_rate": 4.590294139176117e-05, "loss": 1.2824, "step": 2415000 }, { "epoch": 1.45, "learning_rate": 4.5900841426200604e-05, "loss": 1.2849, "step": 2415500 }, { "epoch": 1.45, "learning_rate": 4.5898741460640044e-05, "loss": 1.3107, "step": 2416000 }, { "epoch": 1.45, "learning_rate": 4.589664149507948e-05, "loss": 1.2817, "step": 2416500 }, { "epoch": 1.45, "learning_rate": 4.589454572945004e-05, "loss": 1.328, "step": 2417000 }, { "epoch": 1.45, "learning_rate": 4.589244576388947e-05, "loss": 1.3029, "step": 2417500 }, { "epoch": 1.45, "learning_rate": 4.5890345798328905e-05, "loss": 1.2561, "step": 2418000 }, { "epoch": 1.45, "learning_rate": 4.588824583276834e-05, "loss": 1.2795, "step": 2418500 }, { "epoch": 1.45, "learning_rate": 4.588614586720778e-05, "loss": 1.297, "step": 2419000 }, { "epoch": 1.45, "learning_rate": 4.588404590164721e-05, "loss": 1.3036, "step": 2419500 }, { "epoch": 1.45, "learning_rate": 4.5881950136017766e-05, "loss": 1.3244, "step": 2420000 }, { "epoch": 1.45, "learning_rate": 4.58798501704572e-05, "loss": 1.2817, "step": 2420500 }, { "epoch": 1.45, "learning_rate": 4.587775020489664e-05, "loss": 1.2823, "step": 2421000 }, { "epoch": 1.45, "learning_rate": 4.587565023933607e-05, "loss": 1.2691, "step": 2421500 }, { "epoch": 1.45, "learning_rate": 4.5873550273775506e-05, "loss": 1.3004, "step": 2422000 }, { "epoch": 1.45, "learning_rate": 4.5871450308214947e-05, "loss": 1.2823, "step": 2422500 }, { "epoch": 1.45, "learning_rate": 4.586935034265438e-05, "loss": 1.2543, "step": 2423000 }, { "epoch": 1.45, "learning_rate": 4.5867250377093813e-05, "loss": 1.2933, "step": 2423500 }, { "epoch": 1.45, "learning_rate": 4.5865154611464374e-05, "loss": 1.279, "step": 2424000 }, { "epoch": 1.45, "learning_rate": 4.586305884583493e-05, "loss": 1.2722, "step": 2424500 }, { "epoch": 1.45, "learning_rate": 4.58609672801366e-05, "loss": 1.3164, "step": 2425000 }, { "epoch": 1.45, "learning_rate": 4.585886731457604e-05, "loss": 1.2859, "step": 2425500 }, { "epoch": 1.45, "learning_rate": 4.5856767349015475e-05, "loss": 1.3089, "step": 2426000 }, { "epoch": 1.45, "learning_rate": 4.585466738345491e-05, "loss": 1.2864, "step": 2426500 }, { "epoch": 1.46, "learning_rate": 4.585256741789435e-05, "loss": 1.2942, "step": 2427000 }, { "epoch": 1.46, "learning_rate": 4.585046745233378e-05, "loss": 1.2887, "step": 2427500 }, { "epoch": 1.46, "learning_rate": 4.5848367486773215e-05, "loss": 1.3303, "step": 2428000 }, { "epoch": 1.46, "learning_rate": 4.5846267521212656e-05, "loss": 1.2841, "step": 2428500 }, { "epoch": 1.46, "learning_rate": 4.584416755565209e-05, "loss": 1.285, "step": 2429000 }, { "epoch": 1.46, "learning_rate": 4.5842067590091516e-05, "loss": 1.2766, "step": 2429500 }, { "epoch": 1.46, "learning_rate": 4.5839967624530956e-05, "loss": 1.2875, "step": 2430000 }, { "epoch": 1.46, "learning_rate": 4.583786765897039e-05, "loss": 1.2884, "step": 2430500 }, { "epoch": 1.46, "learning_rate": 4.583576769340983e-05, "loss": 1.2892, "step": 2431000 }, { "epoch": 1.46, "learning_rate": 4.583366772784926e-05, "loss": 1.2817, "step": 2431500 }, { "epoch": 1.46, "learning_rate": 4.5831567762288697e-05, "loss": 1.2835, "step": 2432000 }, { "epoch": 1.46, "learning_rate": 4.582946779672814e-05, "loss": 1.2694, "step": 2432500 }, { "epoch": 1.46, "learning_rate": 4.582737203109869e-05, "loss": 1.3126, "step": 2433000 }, { "epoch": 1.46, "learning_rate": 4.5825272065538124e-05, "loss": 1.2653, "step": 2433500 }, { "epoch": 1.46, "learning_rate": 4.582317209997756e-05, "loss": 1.2622, "step": 2434000 }, { "epoch": 1.46, "learning_rate": 4.5821072134417e-05, "loss": 1.2731, "step": 2434500 }, { "epoch": 1.46, "learning_rate": 4.581897216885643e-05, "loss": 1.2944, "step": 2435000 }, { "epoch": 1.46, "learning_rate": 4.5816872203295864e-05, "loss": 1.2903, "step": 2435500 }, { "epoch": 1.46, "learning_rate": 4.581477643766642e-05, "loss": 1.2766, "step": 2436000 }, { "epoch": 1.46, "learning_rate": 4.581268067203697e-05, "loss": 1.2762, "step": 2436500 }, { "epoch": 1.46, "learning_rate": 4.581058070647641e-05, "loss": 1.3124, "step": 2437000 }, { "epoch": 1.46, "learning_rate": 4.5808480740915845e-05, "loss": 1.2866, "step": 2437500 }, { "epoch": 1.46, "learning_rate": 4.5806380775355286e-05, "loss": 1.2678, "step": 2438000 }, { "epoch": 1.46, "learning_rate": 4.580428080979472e-05, "loss": 1.2629, "step": 2438500 }, { "epoch": 1.46, "learning_rate": 4.580218084423415e-05, "loss": 1.2938, "step": 2439000 }, { "epoch": 1.46, "learning_rate": 4.580008087867359e-05, "loss": 1.2974, "step": 2439500 }, { "epoch": 1.46, "learning_rate": 4.5797980913113026e-05, "loss": 1.271, "step": 2440000 }, { "epoch": 1.46, "learning_rate": 4.579588094755246e-05, "loss": 1.3093, "step": 2440500 }, { "epoch": 1.46, "learning_rate": 4.579378518192301e-05, "loss": 1.3068, "step": 2441000 }, { "epoch": 1.46, "learning_rate": 4.5791685216362453e-05, "loss": 1.2741, "step": 2441500 }, { "epoch": 1.46, "learning_rate": 4.578958525080189e-05, "loss": 1.283, "step": 2442000 }, { "epoch": 1.46, "learning_rate": 4.578748948517244e-05, "loss": 1.2647, "step": 2442500 }, { "epoch": 1.46, "learning_rate": 4.5785389519611874e-05, "loss": 1.2893, "step": 2443000 }, { "epoch": 1.46, "learning_rate": 4.5783293753982434e-05, "loss": 1.2831, "step": 2443500 }, { "epoch": 1.47, "learning_rate": 4.578119378842187e-05, "loss": 1.2566, "step": 2444000 }, { "epoch": 1.47, "learning_rate": 4.57790938228613e-05, "loss": 1.2799, "step": 2444500 }, { "epoch": 1.47, "learning_rate": 4.577699385730074e-05, "loss": 1.3023, "step": 2445000 }, { "epoch": 1.47, "learning_rate": 4.5774893891740175e-05, "loss": 1.3199, "step": 2445500 }, { "epoch": 1.47, "learning_rate": 4.577279392617961e-05, "loss": 1.2825, "step": 2446000 }, { "epoch": 1.47, "learning_rate": 4.577069396061905e-05, "loss": 1.2875, "step": 2446500 }, { "epoch": 1.47, "learning_rate": 4.576859399505848e-05, "loss": 1.289, "step": 2447000 }, { "epoch": 1.47, "learning_rate": 4.5766494029497916e-05, "loss": 1.3108, "step": 2447500 }, { "epoch": 1.47, "learning_rate": 4.5764394063937356e-05, "loss": 1.2929, "step": 2448000 }, { "epoch": 1.47, "learning_rate": 4.576229409837679e-05, "loss": 1.2773, "step": 2448500 }, { "epoch": 1.47, "learning_rate": 4.5760194132816216e-05, "loss": 1.2939, "step": 2449000 }, { "epoch": 1.47, "learning_rate": 4.5758094167255656e-05, "loss": 1.2857, "step": 2449500 }, { "epoch": 1.47, "learning_rate": 4.575599420169509e-05, "loss": 1.2969, "step": 2450000 }, { "epoch": 1.47, "learning_rate": 4.575389423613452e-05, "loss": 1.2902, "step": 2450500 }, { "epoch": 1.47, "learning_rate": 4.575179427057396e-05, "loss": 1.2707, "step": 2451000 }, { "epoch": 1.47, "learning_rate": 4.574969850494452e-05, "loss": 1.3025, "step": 2451500 }, { "epoch": 1.47, "learning_rate": 4.574759853938395e-05, "loss": 1.3052, "step": 2452000 }, { "epoch": 1.47, "learning_rate": 4.574550277375451e-05, "loss": 1.2797, "step": 2452500 }, { "epoch": 1.47, "learning_rate": 4.5743402808193944e-05, "loss": 1.2865, "step": 2453000 }, { "epoch": 1.47, "learning_rate": 4.5741302842633384e-05, "loss": 1.2825, "step": 2453500 }, { "epoch": 1.47, "learning_rate": 4.573920287707281e-05, "loss": 1.3003, "step": 2454000 }, { "epoch": 1.47, "learning_rate": 4.573710711144337e-05, "loss": 1.316, "step": 2454500 }, { "epoch": 1.47, "learning_rate": 4.573500714588281e-05, "loss": 1.3039, "step": 2455000 }, { "epoch": 1.47, "learning_rate": 4.5732907180322245e-05, "loss": 1.2965, "step": 2455500 }, { "epoch": 1.47, "learning_rate": 4.573080721476168e-05, "loss": 1.2978, "step": 2456000 }, { "epoch": 1.47, "learning_rate": 4.572870724920111e-05, "loss": 1.2954, "step": 2456500 }, { "epoch": 1.47, "learning_rate": 4.5726607283640545e-05, "loss": 1.2858, "step": 2457000 }, { "epoch": 1.47, "learning_rate": 4.572450731807998e-05, "loss": 1.2854, "step": 2457500 }, { "epoch": 1.47, "learning_rate": 4.572240735251942e-05, "loss": 1.2753, "step": 2458000 }, { "epoch": 1.47, "learning_rate": 4.572030738695885e-05, "loss": 1.2986, "step": 2458500 }, { "epoch": 1.47, "learning_rate": 4.5718211621329406e-05, "loss": 1.283, "step": 2459000 }, { "epoch": 1.47, "learning_rate": 4.571611165576884e-05, "loss": 1.2909, "step": 2459500 }, { "epoch": 1.47, "learning_rate": 4.571401169020828e-05, "loss": 1.2943, "step": 2460000 }, { "epoch": 1.48, "learning_rate": 4.571191172464771e-05, "loss": 1.2873, "step": 2460500 }, { "epoch": 1.48, "learning_rate": 4.570981175908715e-05, "loss": 1.2884, "step": 2461000 }, { "epoch": 1.48, "learning_rate": 4.570771179352659e-05, "loss": 1.3129, "step": 2461500 }, { "epoch": 1.48, "learning_rate": 4.570561182796602e-05, "loss": 1.2774, "step": 2462000 }, { "epoch": 1.48, "learning_rate": 4.5703511862405454e-05, "loss": 1.2387, "step": 2462500 }, { "epoch": 1.48, "learning_rate": 4.5701416096776014e-05, "loss": 1.2889, "step": 2463000 }, { "epoch": 1.48, "learning_rate": 4.569932033114657e-05, "loss": 1.2891, "step": 2463500 }, { "epoch": 1.48, "learning_rate": 4.5697220365586e-05, "loss": 1.314, "step": 2464000 }, { "epoch": 1.48, "learning_rate": 4.5695120400025435e-05, "loss": 1.2999, "step": 2464500 }, { "epoch": 1.48, "learning_rate": 4.5693020434464875e-05, "loss": 1.2791, "step": 2465000 }, { "epoch": 1.48, "learning_rate": 4.569092046890431e-05, "loss": 1.2661, "step": 2465500 }, { "epoch": 1.48, "learning_rate": 4.568882470327486e-05, "loss": 1.2753, "step": 2466000 }, { "epoch": 1.48, "learning_rate": 4.5686724737714296e-05, "loss": 1.2481, "step": 2466500 }, { "epoch": 1.48, "learning_rate": 4.5684624772153736e-05, "loss": 1.3074, "step": 2467000 }, { "epoch": 1.48, "learning_rate": 4.568252480659317e-05, "loss": 1.2951, "step": 2467500 }, { "epoch": 1.48, "learning_rate": 4.568042904096373e-05, "loss": 1.2984, "step": 2468000 }, { "epoch": 1.48, "learning_rate": 4.567832907540316e-05, "loss": 1.2969, "step": 2468500 }, { "epoch": 1.48, "learning_rate": 4.5676229109842596e-05, "loss": 1.285, "step": 2469000 }, { "epoch": 1.48, "learning_rate": 4.567412914428203e-05, "loss": 1.2838, "step": 2469500 }, { "epoch": 1.48, "learning_rate": 4.567202917872147e-05, "loss": 1.31, "step": 2470000 }, { "epoch": 1.48, "learning_rate": 4.5669929213160904e-05, "loss": 1.3301, "step": 2470500 }, { "epoch": 1.48, "learning_rate": 4.566782924760034e-05, "loss": 1.3014, "step": 2471000 }, { "epoch": 1.48, "learning_rate": 4.566572928203978e-05, "loss": 1.2799, "step": 2471500 }, { "epoch": 1.48, "learning_rate": 4.566362931647921e-05, "loss": 1.3107, "step": 2472000 }, { "epoch": 1.48, "learning_rate": 4.5661529350918644e-05, "loss": 1.3026, "step": 2472500 }, { "epoch": 1.48, "learning_rate": 4.5659429385358084e-05, "loss": 1.2913, "step": 2473000 }, { "epoch": 1.48, "learning_rate": 4.565732941979752e-05, "loss": 1.3244, "step": 2473500 }, { "epoch": 1.48, "learning_rate": 4.565523365416807e-05, "loss": 1.3016, "step": 2474000 }, { "epoch": 1.48, "learning_rate": 4.5653133688607505e-05, "loss": 1.3108, "step": 2474500 }, { "epoch": 1.48, "learning_rate": 4.5651033723046945e-05, "loss": 1.2615, "step": 2475000 }, { "epoch": 1.48, "learning_rate": 4.564893375748638e-05, "loss": 1.3088, "step": 2475500 }, { "epoch": 1.48, "learning_rate": 4.564683799185693e-05, "loss": 1.302, "step": 2476000 }, { "epoch": 1.48, "learning_rate": 4.5644742226227486e-05, "loss": 1.271, "step": 2476500 }, { "epoch": 1.49, "learning_rate": 4.5642642260666926e-05, "loss": 1.2775, "step": 2477000 }, { "epoch": 1.49, "learning_rate": 4.564054229510636e-05, "loss": 1.2906, "step": 2477500 }, { "epoch": 1.49, "learning_rate": 4.563844232954579e-05, "loss": 1.2826, "step": 2478000 }, { "epoch": 1.49, "learning_rate": 4.563634236398523e-05, "loss": 1.3045, "step": 2478500 }, { "epoch": 1.49, "learning_rate": 4.563424659835579e-05, "loss": 1.2764, "step": 2479000 }, { "epoch": 1.49, "learning_rate": 4.563214663279522e-05, "loss": 1.2918, "step": 2479500 }, { "epoch": 1.49, "learning_rate": 4.5630046667234654e-05, "loss": 1.2914, "step": 2480000 }, { "epoch": 1.49, "learning_rate": 4.5627946701674094e-05, "loss": 1.2884, "step": 2480500 }, { "epoch": 1.49, "learning_rate": 4.562584673611353e-05, "loss": 1.3056, "step": 2481000 }, { "epoch": 1.49, "learning_rate": 4.562374677055296e-05, "loss": 1.2832, "step": 2481500 }, { "epoch": 1.49, "learning_rate": 4.56216468049924e-05, "loss": 1.2507, "step": 2482000 }, { "epoch": 1.49, "learning_rate": 4.5619546839431835e-05, "loss": 1.2789, "step": 2482500 }, { "epoch": 1.49, "learning_rate": 4.561745107380239e-05, "loss": 1.2784, "step": 2483000 }, { "epoch": 1.49, "learning_rate": 4.561535530817294e-05, "loss": 1.2693, "step": 2483500 }, { "epoch": 1.49, "learning_rate": 4.561325534261238e-05, "loss": 1.3018, "step": 2484000 }, { "epoch": 1.49, "learning_rate": 4.5611155377051815e-05, "loss": 1.2829, "step": 2484500 }, { "epoch": 1.49, "learning_rate": 4.560905541149125e-05, "loss": 1.2979, "step": 2485000 }, { "epoch": 1.49, "learning_rate": 4.560695544593069e-05, "loss": 1.2843, "step": 2485500 }, { "epoch": 1.49, "learning_rate": 4.560485548037012e-05, "loss": 1.2841, "step": 2486000 }, { "epoch": 1.49, "learning_rate": 4.5602759714740676e-05, "loss": 1.2954, "step": 2486500 }, { "epoch": 1.49, "learning_rate": 4.5600663949111236e-05, "loss": 1.2777, "step": 2487000 }, { "epoch": 1.49, "learning_rate": 4.559856398355066e-05, "loss": 1.3195, "step": 2487500 }, { "epoch": 1.49, "learning_rate": 4.5596464017990103e-05, "loss": 1.2912, "step": 2488000 }, { "epoch": 1.49, "learning_rate": 4.559436405242954e-05, "loss": 1.2836, "step": 2488500 }, { "epoch": 1.49, "learning_rate": 4.559226408686897e-05, "loss": 1.2996, "step": 2489000 }, { "epoch": 1.49, "learning_rate": 4.559016412130841e-05, "loss": 1.2988, "step": 2489500 }, { "epoch": 1.49, "learning_rate": 4.5588064155747844e-05, "loss": 1.2854, "step": 2490000 }, { "epoch": 1.49, "learning_rate": 4.5585964190187284e-05, "loss": 1.2872, "step": 2490500 }, { "epoch": 1.49, "learning_rate": 4.558386842455784e-05, "loss": 1.2494, "step": 2491000 }, { "epoch": 1.49, "learning_rate": 4.558176845899727e-05, "loss": 1.3069, "step": 2491500 }, { "epoch": 1.49, "learning_rate": 4.5579668493436705e-05, "loss": 1.2806, "step": 2492000 }, { "epoch": 1.49, "learning_rate": 4.5577568527876145e-05, "loss": 1.2924, "step": 2492500 }, { "epoch": 1.49, "learning_rate": 4.55754727622467e-05, "loss": 1.2733, "step": 2493000 }, { "epoch": 1.49, "learning_rate": 4.557337279668613e-05, "loss": 1.2912, "step": 2493500 }, { "epoch": 1.5, "learning_rate": 4.5571272831125565e-05, "loss": 1.3073, "step": 2494000 }, { "epoch": 1.5, "learning_rate": 4.5569172865565006e-05, "loss": 1.2972, "step": 2494500 }, { "epoch": 1.5, "learning_rate": 4.556707290000444e-05, "loss": 1.2806, "step": 2495000 }, { "epoch": 1.5, "learning_rate": 4.556497293444387e-05, "loss": 1.2528, "step": 2495500 }, { "epoch": 1.5, "learning_rate": 4.556287296888331e-05, "loss": 1.2791, "step": 2496000 }, { "epoch": 1.5, "learning_rate": 4.5560773003322746e-05, "loss": 1.2974, "step": 2496500 }, { "epoch": 1.5, "learning_rate": 4.555867303776218e-05, "loss": 1.2761, "step": 2497000 }, { "epoch": 1.5, "learning_rate": 4.555657727213274e-05, "loss": 1.2989, "step": 2497500 }, { "epoch": 1.5, "learning_rate": 4.5554477306572174e-05, "loss": 1.288, "step": 2498000 }, { "epoch": 1.5, "learning_rate": 4.555237734101161e-05, "loss": 1.2736, "step": 2498500 }, { "epoch": 1.5, "learning_rate": 4.555027737545105e-05, "loss": 1.2777, "step": 2499000 }, { "epoch": 1.5, "learning_rate": 4.55481816098216e-05, "loss": 1.2958, "step": 2499500 }, { "epoch": 1.5, "learning_rate": 4.5546081644261034e-05, "loss": 1.2887, "step": 2500000 }, { "epoch": 1.5, "eval_loss": 1.2355479001998901, "eval_runtime": 1109.1586, "eval_samples_per_second": 474.883, "eval_steps_per_second": 79.147, "step": 2500000 }, { "epoch": 1.5, "learning_rate": 4.554398167870047e-05, "loss": 1.2709, "step": 2500500 }, { "epoch": 1.5, "learning_rate": 4.554188171313991e-05, "loss": 1.2705, "step": 2501000 }, { "epoch": 1.5, "learning_rate": 4.553978174757934e-05, "loss": 1.2786, "step": 2501500 }, { "epoch": 1.5, "learning_rate": 4.5537681782018775e-05, "loss": 1.2982, "step": 2502000 }, { "epoch": 1.5, "learning_rate": 4.553558181645821e-05, "loss": 1.2999, "step": 2502500 }, { "epoch": 1.5, "learning_rate": 4.553348605082877e-05, "loss": 1.2726, "step": 2503000 }, { "epoch": 1.5, "learning_rate": 4.55313860852682e-05, "loss": 1.2899, "step": 2503500 }, { "epoch": 1.5, "learning_rate": 4.5529286119707636e-05, "loss": 1.2713, "step": 2504000 }, { "epoch": 1.5, "learning_rate": 4.5527186154147076e-05, "loss": 1.2629, "step": 2504500 }, { "epoch": 1.5, "learning_rate": 4.55250861885865e-05, "loss": 1.2921, "step": 2505000 }, { "epoch": 1.5, "learning_rate": 4.552298622302594e-05, "loss": 1.2762, "step": 2505500 }, { "epoch": 1.5, "learning_rate": 4.55208904573965e-05, "loss": 1.2605, "step": 2506000 }, { "epoch": 1.5, "learning_rate": 4.551879889169817e-05, "loss": 1.3046, "step": 2506500 }, { "epoch": 1.5, "learning_rate": 4.551669892613761e-05, "loss": 1.2998, "step": 2507000 }, { "epoch": 1.5, "learning_rate": 4.5514598960577044e-05, "loss": 1.3053, "step": 2507500 }, { "epoch": 1.5, "learning_rate": 4.551249899501648e-05, "loss": 1.2953, "step": 2508000 }, { "epoch": 1.5, "learning_rate": 4.551039902945592e-05, "loss": 1.2653, "step": 2508500 }, { "epoch": 1.5, "learning_rate": 4.550829906389535e-05, "loss": 1.301, "step": 2509000 }, { "epoch": 1.5, "learning_rate": 4.5506199098334784e-05, "loss": 1.2972, "step": 2509500 }, { "epoch": 1.5, "learning_rate": 4.5504099132774225e-05, "loss": 1.2996, "step": 2510000 }, { "epoch": 1.51, "learning_rate": 4.550199916721366e-05, "loss": 1.2793, "step": 2510500 }, { "epoch": 1.51, "learning_rate": 4.549989920165309e-05, "loss": 1.3054, "step": 2511000 }, { "epoch": 1.51, "learning_rate": 4.549779923609253e-05, "loss": 1.2679, "step": 2511500 }, { "epoch": 1.51, "learning_rate": 4.549569927053196e-05, "loss": 1.3086, "step": 2512000 }, { "epoch": 1.51, "learning_rate": 4.549360350490252e-05, "loss": 1.2989, "step": 2512500 }, { "epoch": 1.51, "learning_rate": 4.549150353934196e-05, "loss": 1.3138, "step": 2513000 }, { "epoch": 1.51, "learning_rate": 4.548940777371251e-05, "loss": 1.2683, "step": 2513500 }, { "epoch": 1.51, "learning_rate": 4.5487307808151946e-05, "loss": 1.3015, "step": 2514000 }, { "epoch": 1.51, "learning_rate": 4.548520784259138e-05, "loss": 1.3071, "step": 2514500 }, { "epoch": 1.51, "learning_rate": 4.548310787703082e-05, "loss": 1.283, "step": 2515000 }, { "epoch": 1.51, "learning_rate": 4.548100791147025e-05, "loss": 1.2628, "step": 2515500 }, { "epoch": 1.51, "learning_rate": 4.547890794590969e-05, "loss": 1.2838, "step": 2516000 }, { "epoch": 1.51, "learning_rate": 4.547680798034912e-05, "loss": 1.2543, "step": 2516500 }, { "epoch": 1.51, "learning_rate": 4.5474708014788554e-05, "loss": 1.2399, "step": 2517000 }, { "epoch": 1.51, "learning_rate": 4.547260804922799e-05, "loss": 1.3154, "step": 2517500 }, { "epoch": 1.51, "learning_rate": 4.547050808366743e-05, "loss": 1.277, "step": 2518000 }, { "epoch": 1.51, "learning_rate": 4.546841231803799e-05, "loss": 1.2831, "step": 2518500 }, { "epoch": 1.51, "learning_rate": 4.5466312352477414e-05, "loss": 1.3303, "step": 2519000 }, { "epoch": 1.51, "learning_rate": 4.5464212386916855e-05, "loss": 1.2873, "step": 2519500 }, { "epoch": 1.51, "learning_rate": 4.546211242135629e-05, "loss": 1.2857, "step": 2520000 }, { "epoch": 1.51, "learning_rate": 4.546001245579572e-05, "loss": 1.283, "step": 2520500 }, { "epoch": 1.51, "learning_rate": 4.545791249023516e-05, "loss": 1.2871, "step": 2521000 }, { "epoch": 1.51, "learning_rate": 4.5455816724605715e-05, "loss": 1.2865, "step": 2521500 }, { "epoch": 1.51, "learning_rate": 4.545371675904515e-05, "loss": 1.268, "step": 2522000 }, { "epoch": 1.51, "learning_rate": 4.545161679348458e-05, "loss": 1.2916, "step": 2522500 }, { "epoch": 1.51, "learning_rate": 4.544951682792402e-05, "loss": 1.3, "step": 2523000 }, { "epoch": 1.51, "learning_rate": 4.5447416862363456e-05, "loss": 1.3141, "step": 2523500 }, { "epoch": 1.51, "learning_rate": 4.544532109673401e-05, "loss": 1.2835, "step": 2524000 }, { "epoch": 1.51, "learning_rate": 4.544322113117344e-05, "loss": 1.2896, "step": 2524500 }, { "epoch": 1.51, "learning_rate": 4.544112116561288e-05, "loss": 1.2659, "step": 2525000 }, { "epoch": 1.51, "learning_rate": 4.543902120005232e-05, "loss": 1.276, "step": 2525500 }, { "epoch": 1.51, "learning_rate": 4.543692123449175e-05, "loss": 1.2937, "step": 2526000 }, { "epoch": 1.51, "learning_rate": 4.543482126893119e-05, "loss": 1.2792, "step": 2526500 }, { "epoch": 1.52, "learning_rate": 4.5432721303370624e-05, "loss": 1.2954, "step": 2527000 }, { "epoch": 1.52, "learning_rate": 4.543062133781006e-05, "loss": 1.2844, "step": 2527500 }, { "epoch": 1.52, "learning_rate": 4.542852557218062e-05, "loss": 1.2766, "step": 2528000 }, { "epoch": 1.52, "learning_rate": 4.542642560662005e-05, "loss": 1.3086, "step": 2528500 }, { "epoch": 1.52, "learning_rate": 4.5424325641059485e-05, "loss": 1.2646, "step": 2529000 }, { "epoch": 1.52, "learning_rate": 4.5422225675498925e-05, "loss": 1.2701, "step": 2529500 }, { "epoch": 1.52, "learning_rate": 4.542012990986948e-05, "loss": 1.2832, "step": 2530000 }, { "epoch": 1.52, "learning_rate": 4.541803414424004e-05, "loss": 1.2783, "step": 2530500 }, { "epoch": 1.52, "learning_rate": 4.5415934178679465e-05, "loss": 1.2968, "step": 2531000 }, { "epoch": 1.52, "learning_rate": 4.54138342131189e-05, "loss": 1.2805, "step": 2531500 }, { "epoch": 1.52, "learning_rate": 4.541173424755834e-05, "loss": 1.2604, "step": 2532000 }, { "epoch": 1.52, "learning_rate": 4.540963428199777e-05, "loss": 1.2809, "step": 2532500 }, { "epoch": 1.52, "learning_rate": 4.540753851636833e-05, "loss": 1.2952, "step": 2533000 }, { "epoch": 1.52, "learning_rate": 4.5405438550807766e-05, "loss": 1.3006, "step": 2533500 }, { "epoch": 1.52, "learning_rate": 4.54033385852472e-05, "loss": 1.2728, "step": 2534000 }, { "epoch": 1.52, "learning_rate": 4.540123861968663e-05, "loss": 1.2973, "step": 2534500 }, { "epoch": 1.52, "learning_rate": 4.5399138654126073e-05, "loss": 1.28, "step": 2535000 }, { "epoch": 1.52, "learning_rate": 4.539703868856551e-05, "loss": 1.2993, "step": 2535500 }, { "epoch": 1.52, "learning_rate": 4.539493872300494e-05, "loss": 1.276, "step": 2536000 }, { "epoch": 1.52, "learning_rate": 4.539283875744438e-05, "loss": 1.2823, "step": 2536500 }, { "epoch": 1.52, "learning_rate": 4.5390742991814934e-05, "loss": 1.262, "step": 2537000 }, { "epoch": 1.52, "learning_rate": 4.538864302625437e-05, "loss": 1.2664, "step": 2537500 }, { "epoch": 1.52, "learning_rate": 4.53865430606938e-05, "loss": 1.3091, "step": 2538000 }, { "epoch": 1.52, "learning_rate": 4.538444309513324e-05, "loss": 1.2859, "step": 2538500 }, { "epoch": 1.52, "learning_rate": 4.5382347329503795e-05, "loss": 1.2933, "step": 2539000 }, { "epoch": 1.52, "learning_rate": 4.538024736394323e-05, "loss": 1.2675, "step": 2539500 }, { "epoch": 1.52, "learning_rate": 4.537815159831379e-05, "loss": 1.2796, "step": 2540000 }, { "epoch": 1.52, "learning_rate": 4.537605163275322e-05, "loss": 1.3124, "step": 2540500 }, { "epoch": 1.52, "learning_rate": 4.5373951667192656e-05, "loss": 1.2767, "step": 2541000 }, { "epoch": 1.52, "learning_rate": 4.537185170163209e-05, "loss": 1.2893, "step": 2541500 }, { "epoch": 1.52, "learning_rate": 4.536975173607153e-05, "loss": 1.2997, "step": 2542000 }, { "epoch": 1.52, "learning_rate": 4.536765597044209e-05, "loss": 1.2888, "step": 2542500 }, { "epoch": 1.52, "learning_rate": 4.5365556004881516e-05, "loss": 1.3313, "step": 2543000 }, { "epoch": 1.52, "learning_rate": 4.536345603932095e-05, "loss": 1.2738, "step": 2543500 }, { "epoch": 1.53, "learning_rate": 4.536135607376039e-05, "loss": 1.2988, "step": 2544000 }, { "epoch": 1.53, "learning_rate": 4.535926030813095e-05, "loss": 1.2847, "step": 2544500 }, { "epoch": 1.53, "learning_rate": 4.5357160342570384e-05, "loss": 1.2844, "step": 2545000 }, { "epoch": 1.53, "learning_rate": 4.535506037700981e-05, "loss": 1.2749, "step": 2545500 }, { "epoch": 1.53, "learning_rate": 4.535296041144925e-05, "loss": 1.289, "step": 2546000 }, { "epoch": 1.53, "learning_rate": 4.5350860445888684e-05, "loss": 1.2605, "step": 2546500 }, { "epoch": 1.53, "learning_rate": 4.5348768880190365e-05, "loss": 1.324, "step": 2547000 }, { "epoch": 1.53, "learning_rate": 4.53466689146298e-05, "loss": 1.2942, "step": 2547500 }, { "epoch": 1.53, "learning_rate": 4.534456894906924e-05, "loss": 1.2843, "step": 2548000 }, { "epoch": 1.53, "learning_rate": 4.534246898350867e-05, "loss": 1.2994, "step": 2548500 }, { "epoch": 1.53, "learning_rate": 4.5340369017948105e-05, "loss": 1.2747, "step": 2549000 }, { "epoch": 1.53, "learning_rate": 4.5338269052387546e-05, "loss": 1.332, "step": 2549500 }, { "epoch": 1.53, "learning_rate": 4.533616908682697e-05, "loss": 1.2875, "step": 2550000 }, { "epoch": 1.53, "learning_rate": 4.5334069121266406e-05, "loss": 1.2939, "step": 2550500 }, { "epoch": 1.53, "learning_rate": 4.5331969155705846e-05, "loss": 1.2803, "step": 2551000 }, { "epoch": 1.53, "learning_rate": 4.5329873390076406e-05, "loss": 1.3065, "step": 2551500 }, { "epoch": 1.53, "learning_rate": 4.532777342451584e-05, "loss": 1.3055, "step": 2552000 }, { "epoch": 1.53, "learning_rate": 4.5325673458955266e-05, "loss": 1.2939, "step": 2552500 }, { "epoch": 1.53, "learning_rate": 4.532357349339471e-05, "loss": 1.3022, "step": 2553000 }, { "epoch": 1.53, "learning_rate": 4.532147772776527e-05, "loss": 1.2936, "step": 2553500 }, { "epoch": 1.53, "learning_rate": 4.531938196213582e-05, "loss": 1.2541, "step": 2554000 }, { "epoch": 1.53, "learning_rate": 4.5317281996575254e-05, "loss": 1.2981, "step": 2554500 }, { "epoch": 1.53, "learning_rate": 4.5315182031014694e-05, "loss": 1.2905, "step": 2555000 }, { "epoch": 1.53, "learning_rate": 4.531308206545413e-05, "loss": 1.3155, "step": 2555500 }, { "epoch": 1.53, "learning_rate": 4.531098629982468e-05, "loss": 1.2903, "step": 2556000 }, { "epoch": 1.53, "learning_rate": 4.5308886334264115e-05, "loss": 1.2703, "step": 2556500 }, { "epoch": 1.53, "learning_rate": 4.5306786368703555e-05, "loss": 1.2927, "step": 2557000 }, { "epoch": 1.53, "learning_rate": 4.530468640314299e-05, "loss": 1.2701, "step": 2557500 }, { "epoch": 1.53, "learning_rate": 4.530258643758242e-05, "loss": 1.289, "step": 2558000 }, { "epoch": 1.53, "learning_rate": 4.530048647202186e-05, "loss": 1.2629, "step": 2558500 }, { "epoch": 1.53, "learning_rate": 4.5298390706392416e-05, "loss": 1.2639, "step": 2559000 }, { "epoch": 1.53, "learning_rate": 4.529629074083185e-05, "loss": 1.3199, "step": 2559500 }, { "epoch": 1.53, "learning_rate": 4.529419077527128e-05, "loss": 1.2753, "step": 2560000 }, { "epoch": 1.54, "learning_rate": 4.529209080971072e-05, "loss": 1.2861, "step": 2560500 }, { "epoch": 1.54, "learning_rate": 4.5289990844150156e-05, "loss": 1.2496, "step": 2561000 }, { "epoch": 1.54, "learning_rate": 4.5287890878589597e-05, "loss": 1.2569, "step": 2561500 }, { "epoch": 1.54, "learning_rate": 4.528579091302902e-05, "loss": 1.3157, "step": 2562000 }, { "epoch": 1.54, "learning_rate": 4.5283695147399584e-05, "loss": 1.2619, "step": 2562500 }, { "epoch": 1.54, "learning_rate": 4.528159518183902e-05, "loss": 1.3042, "step": 2563000 }, { "epoch": 1.54, "learning_rate": 4.527949521627846e-05, "loss": 1.2966, "step": 2563500 }, { "epoch": 1.54, "learning_rate": 4.527739525071789e-05, "loss": 1.3004, "step": 2564000 }, { "epoch": 1.54, "learning_rate": 4.527529528515732e-05, "loss": 1.3088, "step": 2564500 }, { "epoch": 1.54, "learning_rate": 4.527319531959676e-05, "loss": 1.3073, "step": 2565000 }, { "epoch": 1.54, "learning_rate": 4.527109535403619e-05, "loss": 1.2696, "step": 2565500 }, { "epoch": 1.54, "learning_rate": 4.5268995388475625e-05, "loss": 1.294, "step": 2566000 }, { "epoch": 1.54, "learning_rate": 4.5266895422915065e-05, "loss": 1.2843, "step": 2566500 }, { "epoch": 1.54, "learning_rate": 4.52647954573545e-05, "loss": 1.2865, "step": 2567000 }, { "epoch": 1.54, "learning_rate": 4.526269549179393e-05, "loss": 1.2804, "step": 2567500 }, { "epoch": 1.54, "learning_rate": 4.526059972616449e-05, "loss": 1.2992, "step": 2568000 }, { "epoch": 1.54, "learning_rate": 4.5258499760603926e-05, "loss": 1.3119, "step": 2568500 }, { "epoch": 1.54, "learning_rate": 4.525639979504336e-05, "loss": 1.2871, "step": 2569000 }, { "epoch": 1.54, "learning_rate": 4.52542998294828e-05, "loss": 1.2824, "step": 2569500 }, { "epoch": 1.54, "learning_rate": 4.525219986392223e-05, "loss": 1.281, "step": 2570000 }, { "epoch": 1.54, "learning_rate": 4.5250099898361666e-05, "loss": 1.2778, "step": 2570500 }, { "epoch": 1.54, "learning_rate": 4.5247999932801106e-05, "loss": 1.2915, "step": 2571000 }, { "epoch": 1.54, "learning_rate": 4.524589996724054e-05, "loss": 1.2814, "step": 2571500 }, { "epoch": 1.54, "learning_rate": 4.524380000167997e-05, "loss": 1.2775, "step": 2572000 }, { "epoch": 1.54, "learning_rate": 4.524170003611941e-05, "loss": 1.2922, "step": 2572500 }, { "epoch": 1.54, "learning_rate": 4.523960007055884e-05, "loss": 1.302, "step": 2573000 }, { "epoch": 1.54, "learning_rate": 4.5237500104998274e-05, "loss": 1.324, "step": 2573500 }, { "epoch": 1.54, "learning_rate": 4.5235404339368834e-05, "loss": 1.3116, "step": 2574000 }, { "epoch": 1.54, "learning_rate": 4.523330437380827e-05, "loss": 1.2843, "step": 2574500 }, { "epoch": 1.54, "learning_rate": 4.52312044082477e-05, "loss": 1.2948, "step": 2575000 }, { "epoch": 1.54, "learning_rate": 4.5229104442687134e-05, "loss": 1.2783, "step": 2575500 }, { "epoch": 1.54, "learning_rate": 4.5227004477126575e-05, "loss": 1.2708, "step": 2576000 }, { "epoch": 1.54, "learning_rate": 4.522490451156601e-05, "loss": 1.2467, "step": 2576500 }, { "epoch": 1.55, "learning_rate": 4.522280454600544e-05, "loss": 1.2924, "step": 2577000 }, { "epoch": 1.55, "learning_rate": 4.5220708780376e-05, "loss": 1.3003, "step": 2577500 }, { "epoch": 1.55, "learning_rate": 4.521861301474656e-05, "loss": 1.2745, "step": 2578000 }, { "epoch": 1.55, "learning_rate": 4.5216513049185996e-05, "loss": 1.2873, "step": 2578500 }, { "epoch": 1.55, "learning_rate": 4.521441308362543e-05, "loss": 1.2833, "step": 2579000 }, { "epoch": 1.55, "learning_rate": 4.521231311806486e-05, "loss": 1.2841, "step": 2579500 }, { "epoch": 1.55, "learning_rate": 4.5210213152504296e-05, "loss": 1.3092, "step": 2580000 }, { "epoch": 1.55, "learning_rate": 4.520811318694373e-05, "loss": 1.2864, "step": 2580500 }, { "epoch": 1.55, "learning_rate": 4.520601322138317e-05, "loss": 1.2591, "step": 2581000 }, { "epoch": 1.55, "learning_rate": 4.52039132558226e-05, "loss": 1.2806, "step": 2581500 }, { "epoch": 1.55, "learning_rate": 4.520181329026204e-05, "loss": 1.2873, "step": 2582000 }, { "epoch": 1.55, "learning_rate": 4.519971752463259e-05, "loss": 1.2471, "step": 2582500 }, { "epoch": 1.55, "learning_rate": 4.519761755907203e-05, "loss": 1.2981, "step": 2583000 }, { "epoch": 1.55, "learning_rate": 4.5195517593511464e-05, "loss": 1.2825, "step": 2583500 }, { "epoch": 1.55, "learning_rate": 4.51934176279509e-05, "loss": 1.2977, "step": 2584000 }, { "epoch": 1.55, "learning_rate": 4.519131766239034e-05, "loss": 1.2983, "step": 2584500 }, { "epoch": 1.55, "learning_rate": 4.518921769682977e-05, "loss": 1.2695, "step": 2585000 }, { "epoch": 1.55, "learning_rate": 4.5187117731269205e-05, "loss": 1.2862, "step": 2585500 }, { "epoch": 1.55, "learning_rate": 4.5185017765708645e-05, "loss": 1.2896, "step": 2586000 }, { "epoch": 1.55, "learning_rate": 4.51829220000792e-05, "loss": 1.3089, "step": 2586500 }, { "epoch": 1.55, "learning_rate": 4.518082203451863e-05, "loss": 1.2799, "step": 2587000 }, { "epoch": 1.55, "learning_rate": 4.517872206895807e-05, "loss": 1.2714, "step": 2587500 }, { "epoch": 1.55, "learning_rate": 4.5176622103397506e-05, "loss": 1.2882, "step": 2588000 }, { "epoch": 1.55, "learning_rate": 4.517452213783694e-05, "loss": 1.3135, "step": 2588500 }, { "epoch": 1.55, "learning_rate": 4.517242217227638e-05, "loss": 1.2768, "step": 2589000 }, { "epoch": 1.55, "learning_rate": 4.5170322206715806e-05, "loss": 1.2649, "step": 2589500 }, { "epoch": 1.55, "learning_rate": 4.5168230641017486e-05, "loss": 1.2943, "step": 2590000 }, { "epoch": 1.55, "learning_rate": 4.516613067545692e-05, "loss": 1.2447, "step": 2590500 }, { "epoch": 1.55, "learning_rate": 4.516403070989635e-05, "loss": 1.2678, "step": 2591000 }, { "epoch": 1.55, "learning_rate": 4.5161930744335794e-05, "loss": 1.2842, "step": 2591500 }, { "epoch": 1.55, "learning_rate": 4.515983077877523e-05, "loss": 1.2881, "step": 2592000 }, { "epoch": 1.55, "learning_rate": 4.515773081321466e-05, "loss": 1.3077, "step": 2592500 }, { "epoch": 1.55, "learning_rate": 4.51556308476541e-05, "loss": 1.2986, "step": 2593000 }, { "epoch": 1.55, "learning_rate": 4.5153530882093534e-05, "loss": 1.2869, "step": 2593500 }, { "epoch": 1.56, "learning_rate": 4.515143091653297e-05, "loss": 1.2916, "step": 2594000 }, { "epoch": 1.56, "learning_rate": 4.51493309509724e-05, "loss": 1.3003, "step": 2594500 }, { "epoch": 1.56, "learning_rate": 4.5147230985411835e-05, "loss": 1.2656, "step": 2595000 }, { "epoch": 1.56, "learning_rate": 4.5145131019851275e-05, "loss": 1.2334, "step": 2595500 }, { "epoch": 1.56, "learning_rate": 4.5143035254221835e-05, "loss": 1.2851, "step": 2596000 }, { "epoch": 1.56, "learning_rate": 4.514093528866127e-05, "loss": 1.2723, "step": 2596500 }, { "epoch": 1.56, "learning_rate": 4.5138835323100695e-05, "loss": 1.2863, "step": 2597000 }, { "epoch": 1.56, "learning_rate": 4.5136735357540136e-05, "loss": 1.2901, "step": 2597500 }, { "epoch": 1.56, "learning_rate": 4.513463539197957e-05, "loss": 1.3008, "step": 2598000 }, { "epoch": 1.56, "learning_rate": 4.5132535426419e-05, "loss": 1.3031, "step": 2598500 }, { "epoch": 1.56, "learning_rate": 4.513043546085844e-05, "loss": 1.2627, "step": 2599000 }, { "epoch": 1.56, "learning_rate": 4.5128335495297876e-05, "loss": 1.2682, "step": 2599500 }, { "epoch": 1.56, "learning_rate": 4.512623972966843e-05, "loss": 1.3191, "step": 2600000 }, { "epoch": 1.56, "eval_loss": 1.2294633388519287, "eval_runtime": 1103.4979, "eval_samples_per_second": 477.319, "eval_steps_per_second": 79.553, "step": 2600000 }, { "epoch": 1.56, "learning_rate": 4.512413976410786e-05, "loss": 1.2858, "step": 2600500 }, { "epoch": 1.56, "learning_rate": 4.5122039798547303e-05, "loss": 1.2558, "step": 2601000 }, { "epoch": 1.56, "learning_rate": 4.511993983298674e-05, "loss": 1.2744, "step": 2601500 }, { "epoch": 1.56, "learning_rate": 4.511783986742618e-05, "loss": 1.2931, "step": 2602000 }, { "epoch": 1.56, "learning_rate": 4.511573990186561e-05, "loss": 1.2964, "step": 2602500 }, { "epoch": 1.56, "learning_rate": 4.5113639936305044e-05, "loss": 1.2838, "step": 2603000 }, { "epoch": 1.56, "learning_rate": 4.5111539970744484e-05, "loss": 1.3014, "step": 2603500 }, { "epoch": 1.56, "learning_rate": 4.510944420511504e-05, "loss": 1.2888, "step": 2604000 }, { "epoch": 1.56, "learning_rate": 4.510734843948559e-05, "loss": 1.2512, "step": 2604500 }, { "epoch": 1.56, "learning_rate": 4.5105248473925025e-05, "loss": 1.283, "step": 2605000 }, { "epoch": 1.56, "learning_rate": 4.510314850836446e-05, "loss": 1.3043, "step": 2605500 }, { "epoch": 1.56, "learning_rate": 4.51010485428039e-05, "loss": 1.2737, "step": 2606000 }, { "epoch": 1.56, "learning_rate": 4.509895277717445e-05, "loss": 1.277, "step": 2606500 }, { "epoch": 1.56, "learning_rate": 4.5096852811613886e-05, "loss": 1.3011, "step": 2607000 }, { "epoch": 1.56, "learning_rate": 4.509475284605332e-05, "loss": 1.3037, "step": 2607500 }, { "epoch": 1.56, "learning_rate": 4.5092657080423886e-05, "loss": 1.2544, "step": 2608000 }, { "epoch": 1.56, "learning_rate": 4.509055711486332e-05, "loss": 1.2733, "step": 2608500 }, { "epoch": 1.56, "learning_rate": 4.5088457149302746e-05, "loss": 1.2428, "step": 2609000 }, { "epoch": 1.56, "learning_rate": 4.5086357183742187e-05, "loss": 1.2869, "step": 2609500 }, { "epoch": 1.56, "learning_rate": 4.508425721818162e-05, "loss": 1.3026, "step": 2610000 }, { "epoch": 1.57, "learning_rate": 4.5082157252621054e-05, "loss": 1.2608, "step": 2610500 }, { "epoch": 1.57, "learning_rate": 4.5080057287060494e-05, "loss": 1.3032, "step": 2611000 }, { "epoch": 1.57, "learning_rate": 4.507795732149993e-05, "loss": 1.2973, "step": 2611500 }, { "epoch": 1.57, "learning_rate": 4.507585735593936e-05, "loss": 1.3113, "step": 2612000 }, { "epoch": 1.57, "learning_rate": 4.50737573903788e-05, "loss": 1.2821, "step": 2612500 }, { "epoch": 1.57, "learning_rate": 4.5071661624749354e-05, "loss": 1.2639, "step": 2613000 }, { "epoch": 1.57, "learning_rate": 4.506956165918879e-05, "loss": 1.2903, "step": 2613500 }, { "epoch": 1.57, "learning_rate": 4.506746169362822e-05, "loss": 1.2997, "step": 2614000 }, { "epoch": 1.57, "learning_rate": 4.506536172806766e-05, "loss": 1.2777, "step": 2614500 }, { "epoch": 1.57, "learning_rate": 4.5063265962438215e-05, "loss": 1.2692, "step": 2615000 }, { "epoch": 1.57, "learning_rate": 4.506116599687765e-05, "loss": 1.2957, "step": 2615500 }, { "epoch": 1.57, "learning_rate": 4.505906603131709e-05, "loss": 1.2923, "step": 2616000 }, { "epoch": 1.57, "learning_rate": 4.505696606575652e-05, "loss": 1.2605, "step": 2616500 }, { "epoch": 1.57, "learning_rate": 4.5054866100195956e-05, "loss": 1.3037, "step": 2617000 }, { "epoch": 1.57, "learning_rate": 4.5052766134635396e-05, "loss": 1.3016, "step": 2617500 }, { "epoch": 1.57, "learning_rate": 4.505067036900595e-05, "loss": 1.2866, "step": 2618000 }, { "epoch": 1.57, "learning_rate": 4.504857040344538e-05, "loss": 1.278, "step": 2618500 }, { "epoch": 1.57, "learning_rate": 4.5046470437884817e-05, "loss": 1.2771, "step": 2619000 }, { "epoch": 1.57, "learning_rate": 4.504437047232426e-05, "loss": 1.2907, "step": 2619500 }, { "epoch": 1.57, "learning_rate": 4.504227050676369e-05, "loss": 1.2826, "step": 2620000 }, { "epoch": 1.57, "learning_rate": 4.5040170541203124e-05, "loss": 1.3166, "step": 2620500 }, { "epoch": 1.57, "learning_rate": 4.503807477557368e-05, "loss": 1.3075, "step": 2621000 }, { "epoch": 1.57, "learning_rate": 4.503597481001312e-05, "loss": 1.2728, "step": 2621500 }, { "epoch": 1.57, "learning_rate": 4.503387484445255e-05, "loss": 1.312, "step": 2622000 }, { "epoch": 1.57, "learning_rate": 4.5031774878891984e-05, "loss": 1.2831, "step": 2622500 }, { "epoch": 1.57, "learning_rate": 4.5029674913331425e-05, "loss": 1.2878, "step": 2623000 }, { "epoch": 1.57, "learning_rate": 4.502757494777086e-05, "loss": 1.3032, "step": 2623500 }, { "epoch": 1.57, "learning_rate": 4.502547498221029e-05, "loss": 1.2846, "step": 2624000 }, { "epoch": 1.57, "learning_rate": 4.502337921658085e-05, "loss": 1.3033, "step": 2624500 }, { "epoch": 1.57, "learning_rate": 4.5021279251020285e-05, "loss": 1.3147, "step": 2625000 }, { "epoch": 1.57, "learning_rate": 4.501917928545972e-05, "loss": 1.2934, "step": 2625500 }, { "epoch": 1.57, "learning_rate": 4.501707931989915e-05, "loss": 1.282, "step": 2626000 }, { "epoch": 1.57, "learning_rate": 4.5014979354338586e-05, "loss": 1.2801, "step": 2626500 }, { "epoch": 1.57, "learning_rate": 4.5012883588709146e-05, "loss": 1.3081, "step": 2627000 }, { "epoch": 1.58, "learning_rate": 4.50107878230797e-05, "loss": 1.291, "step": 2627500 }, { "epoch": 1.58, "learning_rate": 4.500868785751913e-05, "loss": 1.2816, "step": 2628000 }, { "epoch": 1.58, "learning_rate": 4.500658789195857e-05, "loss": 1.3186, "step": 2628500 }, { "epoch": 1.58, "learning_rate": 4.500448792639801e-05, "loss": 1.2771, "step": 2629000 }, { "epoch": 1.58, "learning_rate": 4.500238796083744e-05, "loss": 1.281, "step": 2629500 }, { "epoch": 1.58, "learning_rate": 4.500028799527688e-05, "loss": 1.2775, "step": 2630000 }, { "epoch": 1.58, "learning_rate": 4.4998188029716314e-05, "loss": 1.2693, "step": 2630500 }, { "epoch": 1.58, "learning_rate": 4.499608806415575e-05, "loss": 1.2985, "step": 2631000 }, { "epoch": 1.58, "learning_rate": 4.499398809859518e-05, "loss": 1.2897, "step": 2631500 }, { "epoch": 1.58, "learning_rate": 4.4991888133034614e-05, "loss": 1.2609, "step": 2632000 }, { "epoch": 1.58, "learning_rate": 4.4989788167474055e-05, "loss": 1.2735, "step": 2632500 }, { "epoch": 1.58, "learning_rate": 4.4987692401844615e-05, "loss": 1.2951, "step": 2633000 }, { "epoch": 1.58, "learning_rate": 4.498559243628404e-05, "loss": 1.2729, "step": 2633500 }, { "epoch": 1.58, "learning_rate": 4.49834966706546e-05, "loss": 1.2747, "step": 2634000 }, { "epoch": 1.58, "learning_rate": 4.4981396705094035e-05, "loss": 1.2496, "step": 2634500 }, { "epoch": 1.58, "learning_rate": 4.497930093946459e-05, "loss": 1.2857, "step": 2635000 }, { "epoch": 1.58, "learning_rate": 4.497720097390403e-05, "loss": 1.252, "step": 2635500 }, { "epoch": 1.58, "learning_rate": 4.497510100834346e-05, "loss": 1.2492, "step": 2636000 }, { "epoch": 1.58, "learning_rate": 4.4973001042782896e-05, "loss": 1.2699, "step": 2636500 }, { "epoch": 1.58, "learning_rate": 4.4970901077222336e-05, "loss": 1.3128, "step": 2637000 }, { "epoch": 1.58, "learning_rate": 4.496880111166177e-05, "loss": 1.3217, "step": 2637500 }, { "epoch": 1.58, "learning_rate": 4.49667011461012e-05, "loss": 1.2634, "step": 2638000 }, { "epoch": 1.58, "learning_rate": 4.496460118054064e-05, "loss": 1.3003, "step": 2638500 }, { "epoch": 1.58, "learning_rate": 4.49625054149112e-05, "loss": 1.2762, "step": 2639000 }, { "epoch": 1.58, "learning_rate": 4.496040544935063e-05, "loss": 1.2938, "step": 2639500 }, { "epoch": 1.58, "learning_rate": 4.495830548379007e-05, "loss": 1.2752, "step": 2640000 }, { "epoch": 1.58, "learning_rate": 4.49562055182295e-05, "loss": 1.2598, "step": 2640500 }, { "epoch": 1.58, "learning_rate": 4.495410555266893e-05, "loss": 1.2733, "step": 2641000 }, { "epoch": 1.58, "learning_rate": 4.495200558710837e-05, "loss": 1.2814, "step": 2641500 }, { "epoch": 1.58, "learning_rate": 4.4949905621547805e-05, "loss": 1.2651, "step": 2642000 }, { "epoch": 1.58, "learning_rate": 4.494780565598724e-05, "loss": 1.2976, "step": 2642500 }, { "epoch": 1.58, "learning_rate": 4.494570569042668e-05, "loss": 1.2862, "step": 2643000 }, { "epoch": 1.58, "learning_rate": 4.494360572486611e-05, "loss": 1.3144, "step": 2643500 }, { "epoch": 1.59, "learning_rate": 4.4941509959236665e-05, "loss": 1.2564, "step": 2644000 }, { "epoch": 1.59, "learning_rate": 4.49394099936761e-05, "loss": 1.2608, "step": 2644500 }, { "epoch": 1.59, "learning_rate": 4.493731002811554e-05, "loss": 1.2598, "step": 2645000 }, { "epoch": 1.59, "learning_rate": 4.493521006255497e-05, "loss": 1.2775, "step": 2645500 }, { "epoch": 1.59, "learning_rate": 4.4933110096994406e-05, "loss": 1.2591, "step": 2646000 }, { "epoch": 1.59, "learning_rate": 4.4931014331364966e-05, "loss": 1.3068, "step": 2646500 }, { "epoch": 1.59, "learning_rate": 4.49289143658044e-05, "loss": 1.3016, "step": 2647000 }, { "epoch": 1.59, "learning_rate": 4.492681440024383e-05, "loss": 1.2448, "step": 2647500 }, { "epoch": 1.59, "learning_rate": 4.4924714434683274e-05, "loss": 1.3006, "step": 2648000 }, { "epoch": 1.59, "learning_rate": 4.492261446912271e-05, "loss": 1.258, "step": 2648500 }, { "epoch": 1.59, "learning_rate": 4.492051450356214e-05, "loss": 1.2787, "step": 2649000 }, { "epoch": 1.59, "learning_rate": 4.4918418737932694e-05, "loss": 1.2741, "step": 2649500 }, { "epoch": 1.59, "learning_rate": 4.4916318772372134e-05, "loss": 1.2789, "step": 2650000 }, { "epoch": 1.59, "learning_rate": 4.491421880681157e-05, "loss": 1.2427, "step": 2650500 }, { "epoch": 1.59, "learning_rate": 4.4912118841251e-05, "loss": 1.2882, "step": 2651000 }, { "epoch": 1.59, "learning_rate": 4.4910023075621555e-05, "loss": 1.2815, "step": 2651500 }, { "epoch": 1.59, "learning_rate": 4.4907923110060995e-05, "loss": 1.262, "step": 2652000 }, { "epoch": 1.59, "learning_rate": 4.490582314450043e-05, "loss": 1.3025, "step": 2652500 }, { "epoch": 1.59, "learning_rate": 4.490372317893986e-05, "loss": 1.2743, "step": 2653000 }, { "epoch": 1.59, "learning_rate": 4.490162741331042e-05, "loss": 1.3134, "step": 2653500 }, { "epoch": 1.59, "learning_rate": 4.4899527447749856e-05, "loss": 1.2732, "step": 2654000 }, { "epoch": 1.59, "learning_rate": 4.489742748218929e-05, "loss": 1.2708, "step": 2654500 }, { "epoch": 1.59, "learning_rate": 4.489532751662873e-05, "loss": 1.2945, "step": 2655000 }, { "epoch": 1.59, "learning_rate": 4.489322755106816e-05, "loss": 1.2918, "step": 2655500 }, { "epoch": 1.59, "learning_rate": 4.4891127585507596e-05, "loss": 1.2728, "step": 2656000 }, { "epoch": 1.59, "learning_rate": 4.488903181987815e-05, "loss": 1.2737, "step": 2656500 }, { "epoch": 1.59, "learning_rate": 4.488693185431759e-05, "loss": 1.2728, "step": 2657000 }, { "epoch": 1.59, "learning_rate": 4.4884831888757024e-05, "loss": 1.2816, "step": 2657500 }, { "epoch": 1.59, "learning_rate": 4.488273612312758e-05, "loss": 1.2608, "step": 2658000 }, { "epoch": 1.59, "learning_rate": 4.488063615756701e-05, "loss": 1.3047, "step": 2658500 }, { "epoch": 1.59, "learning_rate": 4.487853619200645e-05, "loss": 1.2897, "step": 2659000 }, { "epoch": 1.59, "learning_rate": 4.4876436226445884e-05, "loss": 1.2787, "step": 2659500 }, { "epoch": 1.59, "learning_rate": 4.487433626088532e-05, "loss": 1.2716, "step": 2660000 }, { "epoch": 1.6, "learning_rate": 4.487223629532476e-05, "loss": 1.296, "step": 2660500 }, { "epoch": 1.6, "learning_rate": 4.487013632976419e-05, "loss": 1.274, "step": 2661000 }, { "epoch": 1.6, "learning_rate": 4.486803636420363e-05, "loss": 1.2686, "step": 2661500 }, { "epoch": 1.6, "learning_rate": 4.4865936398643065e-05, "loss": 1.2757, "step": 2662000 }, { "epoch": 1.6, "learning_rate": 4.486383643308249e-05, "loss": 1.2919, "step": 2662500 }, { "epoch": 1.6, "learning_rate": 4.486173646752193e-05, "loss": 1.2609, "step": 2663000 }, { "epoch": 1.6, "learning_rate": 4.4859636501961366e-05, "loss": 1.2737, "step": 2663500 }, { "epoch": 1.6, "learning_rate": 4.4857544936263046e-05, "loss": 1.2938, "step": 2664000 }, { "epoch": 1.6, "learning_rate": 4.485544497070248e-05, "loss": 1.2725, "step": 2664500 }, { "epoch": 1.6, "learning_rate": 4.485334500514191e-05, "loss": 1.3034, "step": 2665000 }, { "epoch": 1.6, "learning_rate": 4.4851249239512466e-05, "loss": 1.256, "step": 2665500 }, { "epoch": 1.6, "learning_rate": 4.484914927395191e-05, "loss": 1.2894, "step": 2666000 }, { "epoch": 1.6, "learning_rate": 4.484704930839134e-05, "loss": 1.2734, "step": 2666500 }, { "epoch": 1.6, "learning_rate": 4.484494934283078e-05, "loss": 1.2838, "step": 2667000 }, { "epoch": 1.6, "learning_rate": 4.4842849377270214e-05, "loss": 1.2808, "step": 2667500 }, { "epoch": 1.6, "learning_rate": 4.484074941170965e-05, "loss": 1.2721, "step": 2668000 }, { "epoch": 1.6, "learning_rate": 4.483864944614909e-05, "loss": 1.2806, "step": 2668500 }, { "epoch": 1.6, "learning_rate": 4.483654948058852e-05, "loss": 1.2849, "step": 2669000 }, { "epoch": 1.6, "learning_rate": 4.4834453714959075e-05, "loss": 1.2884, "step": 2669500 }, { "epoch": 1.6, "learning_rate": 4.483235374939851e-05, "loss": 1.28, "step": 2670000 }, { "epoch": 1.6, "learning_rate": 4.483025378383795e-05, "loss": 1.2608, "step": 2670500 }, { "epoch": 1.6, "learning_rate": 4.482815381827738e-05, "loss": 1.2964, "step": 2671000 }, { "epoch": 1.6, "learning_rate": 4.4826053852716815e-05, "loss": 1.2929, "step": 2671500 }, { "epoch": 1.6, "learning_rate": 4.482395808708737e-05, "loss": 1.2757, "step": 2672000 }, { "epoch": 1.6, "learning_rate": 4.482185812152681e-05, "loss": 1.2923, "step": 2672500 }, { "epoch": 1.6, "learning_rate": 4.481975815596624e-05, "loss": 1.2737, "step": 2673000 }, { "epoch": 1.6, "learning_rate": 4.4817658190405676e-05, "loss": 1.2722, "step": 2673500 }, { "epoch": 1.6, "learning_rate": 4.4815558224845116e-05, "loss": 1.2952, "step": 2674000 }, { "epoch": 1.6, "learning_rate": 4.481345825928454e-05, "loss": 1.3059, "step": 2674500 }, { "epoch": 1.6, "learning_rate": 4.481135829372398e-05, "loss": 1.2653, "step": 2675000 }, { "epoch": 1.6, "learning_rate": 4.4809258328163417e-05, "loss": 1.2836, "step": 2675500 }, { "epoch": 1.6, "learning_rate": 4.480716256253398e-05, "loss": 1.298, "step": 2676000 }, { "epoch": 1.6, "learning_rate": 4.480506259697341e-05, "loss": 1.2785, "step": 2676500 }, { "epoch": 1.6, "learning_rate": 4.4802966831343964e-05, "loss": 1.2634, "step": 2677000 }, { "epoch": 1.61, "learning_rate": 4.4800866865783404e-05, "loss": 1.2827, "step": 2677500 }, { "epoch": 1.61, "learning_rate": 4.479877110015396e-05, "loss": 1.2763, "step": 2678000 }, { "epoch": 1.61, "learning_rate": 4.479667113459339e-05, "loss": 1.2864, "step": 2678500 }, { "epoch": 1.61, "learning_rate": 4.4794571169032825e-05, "loss": 1.2704, "step": 2679000 }, { "epoch": 1.61, "learning_rate": 4.4792471203472265e-05, "loss": 1.2752, "step": 2679500 }, { "epoch": 1.61, "learning_rate": 4.47903712379117e-05, "loss": 1.2607, "step": 2680000 }, { "epoch": 1.61, "learning_rate": 4.478827127235113e-05, "loss": 1.3022, "step": 2680500 }, { "epoch": 1.61, "learning_rate": 4.478617130679057e-05, "loss": 1.2871, "step": 2681000 }, { "epoch": 1.61, "learning_rate": 4.4784071341230006e-05, "loss": 1.2727, "step": 2681500 }, { "epoch": 1.61, "learning_rate": 4.478197137566944e-05, "loss": 1.276, "step": 2682000 }, { "epoch": 1.61, "learning_rate": 4.477987141010887e-05, "loss": 1.2688, "step": 2682500 }, { "epoch": 1.61, "learning_rate": 4.477777564447943e-05, "loss": 1.3122, "step": 2683000 }, { "epoch": 1.61, "learning_rate": 4.4775675678918866e-05, "loss": 1.2712, "step": 2683500 }, { "epoch": 1.61, "learning_rate": 4.47735757133583e-05, "loss": 1.2611, "step": 2684000 }, { "epoch": 1.61, "learning_rate": 4.477147574779773e-05, "loss": 1.2871, "step": 2684500 }, { "epoch": 1.61, "learning_rate": 4.476937578223717e-05, "loss": 1.2672, "step": 2685000 }, { "epoch": 1.61, "learning_rate": 4.476727581667661e-05, "loss": 1.2632, "step": 2685500 }, { "epoch": 1.61, "learning_rate": 4.476517585111604e-05, "loss": 1.2822, "step": 2686000 }, { "epoch": 1.61, "learning_rate": 4.4763075885555474e-05, "loss": 1.2799, "step": 2686500 }, { "epoch": 1.61, "learning_rate": 4.476098011992603e-05, "loss": 1.2917, "step": 2687000 }, { "epoch": 1.61, "learning_rate": 4.475888015436547e-05, "loss": 1.2349, "step": 2687500 }, { "epoch": 1.61, "learning_rate": 4.47567801888049e-05, "loss": 1.2994, "step": 2688000 }, { "epoch": 1.61, "learning_rate": 4.4754680223244335e-05, "loss": 1.2875, "step": 2688500 }, { "epoch": 1.61, "learning_rate": 4.4752584457614895e-05, "loss": 1.2925, "step": 2689000 }, { "epoch": 1.61, "learning_rate": 4.475048449205433e-05, "loss": 1.3024, "step": 2689500 }, { "epoch": 1.61, "learning_rate": 4.474838452649376e-05, "loss": 1.2844, "step": 2690000 }, { "epoch": 1.61, "learning_rate": 4.47462845609332e-05, "loss": 1.2586, "step": 2690500 }, { "epoch": 1.61, "learning_rate": 4.474418879530376e-05, "loss": 1.2971, "step": 2691000 }, { "epoch": 1.61, "learning_rate": 4.4742093029674316e-05, "loss": 1.2734, "step": 2691500 }, { "epoch": 1.61, "learning_rate": 4.473999306411375e-05, "loss": 1.2926, "step": 2692000 }, { "epoch": 1.61, "learning_rate": 4.473789309855318e-05, "loss": 1.2958, "step": 2692500 }, { "epoch": 1.61, "learning_rate": 4.473579313299262e-05, "loss": 1.2833, "step": 2693000 }, { "epoch": 1.61, "learning_rate": 4.473369736736318e-05, "loss": 1.2844, "step": 2693500 }, { "epoch": 1.62, "learning_rate": 4.473159740180261e-05, "loss": 1.2831, "step": 2694000 }, { "epoch": 1.62, "learning_rate": 4.4729497436242044e-05, "loss": 1.2842, "step": 2694500 }, { "epoch": 1.62, "learning_rate": 4.4727397470681484e-05, "loss": 1.2921, "step": 2695000 }, { "epoch": 1.62, "learning_rate": 4.472529750512092e-05, "loss": 1.2868, "step": 2695500 }, { "epoch": 1.62, "learning_rate": 4.472319753956035e-05, "loss": 1.3167, "step": 2696000 }, { "epoch": 1.62, "learning_rate": 4.4721097573999784e-05, "loss": 1.2865, "step": 2696500 }, { "epoch": 1.62, "learning_rate": 4.4719001808370345e-05, "loss": 1.2542, "step": 2697000 }, { "epoch": 1.62, "learning_rate": 4.47169060427409e-05, "loss": 1.2872, "step": 2697500 }, { "epoch": 1.62, "learning_rate": 4.471480607718033e-05, "loss": 1.2858, "step": 2698000 }, { "epoch": 1.62, "learning_rate": 4.471270611161977e-05, "loss": 1.2537, "step": 2698500 }, { "epoch": 1.62, "learning_rate": 4.4710606146059205e-05, "loss": 1.2622, "step": 2699000 }, { "epoch": 1.62, "learning_rate": 4.470850618049864e-05, "loss": 1.2992, "step": 2699500 }, { "epoch": 1.62, "learning_rate": 4.470640621493808e-05, "loss": 1.2761, "step": 2700000 }, { "epoch": 1.62, "eval_loss": 1.2280163764953613, "eval_runtime": 1111.7976, "eval_samples_per_second": 473.755, "eval_steps_per_second": 78.96, "step": 2700000 }, { "epoch": 1.62, "learning_rate": 4.470430624937751e-05, "loss": 1.281, "step": 2700500 }, { "epoch": 1.62, "learning_rate": 4.470220628381694e-05, "loss": 1.2715, "step": 2701000 }, { "epoch": 1.62, "learning_rate": 4.47001105181875e-05, "loss": 1.2663, "step": 2701500 }, { "epoch": 1.62, "learning_rate": 4.469801055262694e-05, "loss": 1.2829, "step": 2702000 }, { "epoch": 1.62, "learning_rate": 4.469591058706637e-05, "loss": 1.3043, "step": 2702500 }, { "epoch": 1.62, "learning_rate": 4.4693810621505807e-05, "loss": 1.3026, "step": 2703000 }, { "epoch": 1.62, "learning_rate": 4.469171065594524e-05, "loss": 1.288, "step": 2703500 }, { "epoch": 1.62, "learning_rate": 4.4689610690384674e-05, "loss": 1.2662, "step": 2704000 }, { "epoch": 1.62, "learning_rate": 4.4687514924755234e-05, "loss": 1.2436, "step": 2704500 }, { "epoch": 1.62, "learning_rate": 4.4685414959194674e-05, "loss": 1.2721, "step": 2705000 }, { "epoch": 1.62, "learning_rate": 4.46833149936341e-05, "loss": 1.2826, "step": 2705500 }, { "epoch": 1.62, "learning_rate": 4.4681215028073534e-05, "loss": 1.2634, "step": 2706000 }, { "epoch": 1.62, "learning_rate": 4.4679119262444095e-05, "loss": 1.2816, "step": 2706500 }, { "epoch": 1.62, "learning_rate": 4.4677019296883535e-05, "loss": 1.2865, "step": 2707000 }, { "epoch": 1.62, "learning_rate": 4.467491933132297e-05, "loss": 1.2966, "step": 2707500 }, { "epoch": 1.62, "learning_rate": 4.467282356569352e-05, "loss": 1.3025, "step": 2708000 }, { "epoch": 1.62, "learning_rate": 4.4670723600132955e-05, "loss": 1.2434, "step": 2708500 }, { "epoch": 1.62, "learning_rate": 4.4668623634572396e-05, "loss": 1.2705, "step": 2709000 }, { "epoch": 1.62, "learning_rate": 4.466652366901183e-05, "loss": 1.2885, "step": 2709500 }, { "epoch": 1.62, "learning_rate": 4.466442370345126e-05, "loss": 1.2539, "step": 2710000 }, { "epoch": 1.63, "learning_rate": 4.4662323737890696e-05, "loss": 1.2856, "step": 2710500 }, { "epoch": 1.63, "learning_rate": 4.466022377233013e-05, "loss": 1.2649, "step": 2711000 }, { "epoch": 1.63, "learning_rate": 4.465812380676957e-05, "loss": 1.2792, "step": 2711500 }, { "epoch": 1.63, "learning_rate": 4.4656023841209e-05, "loss": 1.2829, "step": 2712000 }, { "epoch": 1.63, "learning_rate": 4.4653928075579563e-05, "loss": 1.2671, "step": 2712500 }, { "epoch": 1.63, "learning_rate": 4.465182811001899e-05, "loss": 1.2669, "step": 2713000 }, { "epoch": 1.63, "learning_rate": 4.464972814445843e-05, "loss": 1.2659, "step": 2713500 }, { "epoch": 1.63, "learning_rate": 4.4647628178897864e-05, "loss": 1.2569, "step": 2714000 }, { "epoch": 1.63, "learning_rate": 4.4645532413268424e-05, "loss": 1.2823, "step": 2714500 }, { "epoch": 1.63, "learning_rate": 4.464343244770785e-05, "loss": 1.2627, "step": 2715000 }, { "epoch": 1.63, "learning_rate": 4.464133248214729e-05, "loss": 1.2723, "step": 2715500 }, { "epoch": 1.63, "learning_rate": 4.4639232516586725e-05, "loss": 1.282, "step": 2716000 }, { "epoch": 1.63, "learning_rate": 4.463713255102616e-05, "loss": 1.3103, "step": 2716500 }, { "epoch": 1.63, "learning_rate": 4.463503678539672e-05, "loss": 1.2907, "step": 2717000 }, { "epoch": 1.63, "learning_rate": 4.463293681983615e-05, "loss": 1.2643, "step": 2717500 }, { "epoch": 1.63, "learning_rate": 4.4630836854275585e-05, "loss": 1.299, "step": 2718000 }, { "epoch": 1.63, "learning_rate": 4.4628736888715026e-05, "loss": 1.2953, "step": 2718500 }, { "epoch": 1.63, "learning_rate": 4.462663692315446e-05, "loss": 1.2915, "step": 2719000 }, { "epoch": 1.63, "learning_rate": 4.462453695759389e-05, "loss": 1.2624, "step": 2719500 }, { "epoch": 1.63, "learning_rate": 4.462243699203333e-05, "loss": 1.2789, "step": 2720000 }, { "epoch": 1.63, "learning_rate": 4.4620337026472766e-05, "loss": 1.283, "step": 2720500 }, { "epoch": 1.63, "learning_rate": 4.46182370609122e-05, "loss": 1.2595, "step": 2721000 }, { "epoch": 1.63, "learning_rate": 4.461614129528275e-05, "loss": 1.2692, "step": 2721500 }, { "epoch": 1.63, "learning_rate": 4.4614041329722193e-05, "loss": 1.2892, "step": 2722000 }, { "epoch": 1.63, "learning_rate": 4.461194556409275e-05, "loss": 1.2747, "step": 2722500 }, { "epoch": 1.63, "learning_rate": 4.460984559853218e-05, "loss": 1.2646, "step": 2723000 }, { "epoch": 1.63, "learning_rate": 4.4607745632971614e-05, "loss": 1.2666, "step": 2723500 }, { "epoch": 1.63, "learning_rate": 4.4605645667411054e-05, "loss": 1.2622, "step": 2724000 }, { "epoch": 1.63, "learning_rate": 4.460354570185049e-05, "loss": 1.2708, "step": 2724500 }, { "epoch": 1.63, "learning_rate": 4.460144993622104e-05, "loss": 1.2832, "step": 2725000 }, { "epoch": 1.63, "learning_rate": 4.459934997066048e-05, "loss": 1.2835, "step": 2725500 }, { "epoch": 1.63, "learning_rate": 4.4597250005099915e-05, "loss": 1.2776, "step": 2726000 }, { "epoch": 1.63, "learning_rate": 4.459515003953935e-05, "loss": 1.2853, "step": 2726500 }, { "epoch": 1.63, "learning_rate": 4.459305007397879e-05, "loss": 1.3128, "step": 2727000 }, { "epoch": 1.64, "learning_rate": 4.459095010841822e-05, "loss": 1.252, "step": 2727500 }, { "epoch": 1.64, "learning_rate": 4.4588850142857655e-05, "loss": 1.2898, "step": 2728000 }, { "epoch": 1.64, "learning_rate": 4.458675437722821e-05, "loss": 1.2854, "step": 2728500 }, { "epoch": 1.64, "learning_rate": 4.458465441166765e-05, "loss": 1.2467, "step": 2729000 }, { "epoch": 1.64, "learning_rate": 4.458255444610708e-05, "loss": 1.2758, "step": 2729500 }, { "epoch": 1.64, "learning_rate": 4.4580454480546516e-05, "loss": 1.294, "step": 2730000 }, { "epoch": 1.64, "learning_rate": 4.4578354514985956e-05, "loss": 1.2922, "step": 2730500 }, { "epoch": 1.64, "learning_rate": 4.457625454942539e-05, "loss": 1.2706, "step": 2731000 }, { "epoch": 1.64, "learning_rate": 4.457415458386482e-05, "loss": 1.2872, "step": 2731500 }, { "epoch": 1.64, "learning_rate": 4.457205881823538e-05, "loss": 1.2639, "step": 2732000 }, { "epoch": 1.64, "learning_rate": 4.456995885267482e-05, "loss": 1.2723, "step": 2732500 }, { "epoch": 1.64, "learning_rate": 4.456785888711425e-05, "loss": 1.2789, "step": 2733000 }, { "epoch": 1.64, "learning_rate": 4.456575892155369e-05, "loss": 1.2482, "step": 2733500 }, { "epoch": 1.64, "learning_rate": 4.4563663155924244e-05, "loss": 1.2766, "step": 2734000 }, { "epoch": 1.64, "learning_rate": 4.45615673902948e-05, "loss": 1.2582, "step": 2734500 }, { "epoch": 1.64, "learning_rate": 4.455946742473423e-05, "loss": 1.2885, "step": 2735000 }, { "epoch": 1.64, "learning_rate": 4.4557367459173665e-05, "loss": 1.2748, "step": 2735500 }, { "epoch": 1.64, "learning_rate": 4.4555267493613105e-05, "loss": 1.2998, "step": 2736000 }, { "epoch": 1.64, "learning_rate": 4.455316752805254e-05, "loss": 1.309, "step": 2736500 }, { "epoch": 1.64, "learning_rate": 4.455106756249197e-05, "loss": 1.2683, "step": 2737000 }, { "epoch": 1.64, "learning_rate": 4.454896759693141e-05, "loss": 1.2889, "step": 2737500 }, { "epoch": 1.64, "learning_rate": 4.4546867631370846e-05, "loss": 1.2577, "step": 2738000 }, { "epoch": 1.64, "learning_rate": 4.454476766581028e-05, "loss": 1.2825, "step": 2738500 }, { "epoch": 1.64, "learning_rate": 4.454266770024972e-05, "loss": 1.2675, "step": 2739000 }, { "epoch": 1.64, "learning_rate": 4.454056773468915e-05, "loss": 1.2951, "step": 2739500 }, { "epoch": 1.64, "learning_rate": 4.4538467769128586e-05, "loss": 1.2578, "step": 2740000 }, { "epoch": 1.64, "learning_rate": 4.453637200349915e-05, "loss": 1.2886, "step": 2740500 }, { "epoch": 1.64, "learning_rate": 4.453427203793858e-05, "loss": 1.2841, "step": 2741000 }, { "epoch": 1.64, "learning_rate": 4.4532176272309134e-05, "loss": 1.2679, "step": 2741500 }, { "epoch": 1.64, "learning_rate": 4.453007630674857e-05, "loss": 1.2747, "step": 2742000 }, { "epoch": 1.64, "learning_rate": 4.452798054111912e-05, "loss": 1.2718, "step": 2742500 }, { "epoch": 1.64, "learning_rate": 4.452588057555856e-05, "loss": 1.2925, "step": 2743000 }, { "epoch": 1.64, "learning_rate": 4.4523780609997994e-05, "loss": 1.2809, "step": 2743500 }, { "epoch": 1.65, "learning_rate": 4.452168064443743e-05, "loss": 1.2814, "step": 2744000 }, { "epoch": 1.65, "learning_rate": 4.451958067887687e-05, "loss": 1.2981, "step": 2744500 }, { "epoch": 1.65, "learning_rate": 4.45174807133163e-05, "loss": 1.2569, "step": 2745000 }, { "epoch": 1.65, "learning_rate": 4.4515380747755735e-05, "loss": 1.2387, "step": 2745500 }, { "epoch": 1.65, "learning_rate": 4.4513280782195175e-05, "loss": 1.2727, "step": 2746000 }, { "epoch": 1.65, "learning_rate": 4.451118081663461e-05, "loss": 1.3173, "step": 2746500 }, { "epoch": 1.65, "learning_rate": 4.450908085107404e-05, "loss": 1.2991, "step": 2747000 }, { "epoch": 1.65, "learning_rate": 4.45069850854446e-05, "loss": 1.2762, "step": 2747500 }, { "epoch": 1.65, "learning_rate": 4.4504885119884036e-05, "loss": 1.2756, "step": 2748000 }, { "epoch": 1.65, "learning_rate": 4.450278515432347e-05, "loss": 1.2669, "step": 2748500 }, { "epoch": 1.65, "learning_rate": 4.450068518876291e-05, "loss": 1.2771, "step": 2749000 }, { "epoch": 1.65, "learning_rate": 4.4498585223202336e-05, "loss": 1.3009, "step": 2749500 }, { "epoch": 1.65, "learning_rate": 4.449648525764177e-05, "loss": 1.283, "step": 2750000 }, { "epoch": 1.65, "learning_rate": 4.449438529208121e-05, "loss": 1.2454, "step": 2750500 }, { "epoch": 1.65, "learning_rate": 4.449228952645177e-05, "loss": 1.2566, "step": 2751000 }, { "epoch": 1.65, "learning_rate": 4.44901895608912e-05, "loss": 1.3038, "step": 2751500 }, { "epoch": 1.65, "learning_rate": 4.448808959533063e-05, "loss": 1.3018, "step": 2752000 }, { "epoch": 1.65, "learning_rate": 4.448598962977007e-05, "loss": 1.2697, "step": 2752500 }, { "epoch": 1.65, "learning_rate": 4.448389386414063e-05, "loss": 1.2759, "step": 2753000 }, { "epoch": 1.65, "learning_rate": 4.4481793898580065e-05, "loss": 1.2569, "step": 2753500 }, { "epoch": 1.65, "learning_rate": 4.44796939330195e-05, "loss": 1.2954, "step": 2754000 }, { "epoch": 1.65, "learning_rate": 4.447759396745893e-05, "loss": 1.2901, "step": 2754500 }, { "epoch": 1.65, "learning_rate": 4.4475494001898365e-05, "loss": 1.2459, "step": 2755000 }, { "epoch": 1.65, "learning_rate": 4.4473394036337805e-05, "loss": 1.2932, "step": 2755500 }, { "epoch": 1.65, "learning_rate": 4.447129407077724e-05, "loss": 1.3017, "step": 2756000 }, { "epoch": 1.65, "learning_rate": 4.446919410521667e-05, "loss": 1.2887, "step": 2756500 }, { "epoch": 1.65, "learning_rate": 4.446710253951835e-05, "loss": 1.2613, "step": 2757000 }, { "epoch": 1.65, "learning_rate": 4.4465006773888906e-05, "loss": 1.2699, "step": 2757500 }, { "epoch": 1.65, "learning_rate": 4.446290680832834e-05, "loss": 1.2701, "step": 2758000 }, { "epoch": 1.65, "learning_rate": 4.446080684276778e-05, "loss": 1.2729, "step": 2758500 }, { "epoch": 1.65, "learning_rate": 4.4458706877207213e-05, "loss": 1.2623, "step": 2759000 }, { "epoch": 1.65, "learning_rate": 4.445660691164665e-05, "loss": 1.2595, "step": 2759500 }, { "epoch": 1.65, "learning_rate": 4.445450694608609e-05, "loss": 1.2573, "step": 2760000 }, { "epoch": 1.66, "learning_rate": 4.445240698052552e-05, "loss": 1.2304, "step": 2760500 }, { "epoch": 1.66, "learning_rate": 4.4450307014964954e-05, "loss": 1.2761, "step": 2761000 }, { "epoch": 1.66, "learning_rate": 4.444820704940439e-05, "loss": 1.2885, "step": 2761500 }, { "epoch": 1.66, "learning_rate": 4.444611128377495e-05, "loss": 1.2958, "step": 2762000 }, { "epoch": 1.66, "learning_rate": 4.444401131821438e-05, "loss": 1.2791, "step": 2762500 }, { "epoch": 1.66, "learning_rate": 4.444191135265382e-05, "loss": 1.2702, "step": 2763000 }, { "epoch": 1.66, "learning_rate": 4.443981138709325e-05, "loss": 1.2463, "step": 2763500 }, { "epoch": 1.66, "learning_rate": 4.443771142153268e-05, "loss": 1.2878, "step": 2764000 }, { "epoch": 1.66, "learning_rate": 4.443561145597212e-05, "loss": 1.3098, "step": 2764500 }, { "epoch": 1.66, "learning_rate": 4.4433511490411555e-05, "loss": 1.295, "step": 2765000 }, { "epoch": 1.66, "learning_rate": 4.443141152485099e-05, "loss": 1.2844, "step": 2765500 }, { "epoch": 1.66, "learning_rate": 4.442931575922154e-05, "loss": 1.2834, "step": 2766000 }, { "epoch": 1.66, "learning_rate": 4.442721579366098e-05, "loss": 1.3025, "step": 2766500 }, { "epoch": 1.66, "learning_rate": 4.4425115828100416e-05, "loss": 1.2601, "step": 2767000 }, { "epoch": 1.66, "learning_rate": 4.442301586253985e-05, "loss": 1.2807, "step": 2767500 }, { "epoch": 1.66, "learning_rate": 4.442092009691041e-05, "loss": 1.2902, "step": 2768000 }, { "epoch": 1.66, "learning_rate": 4.441882013134984e-05, "loss": 1.2739, "step": 2768500 }, { "epoch": 1.66, "learning_rate": 4.441672016578928e-05, "loss": 1.249, "step": 2769000 }, { "epoch": 1.66, "learning_rate": 4.441462020022872e-05, "loss": 1.2955, "step": 2769500 }, { "epoch": 1.66, "learning_rate": 4.441252443459928e-05, "loss": 1.2952, "step": 2770000 }, { "epoch": 1.66, "learning_rate": 4.4410424469038704e-05, "loss": 1.2536, "step": 2770500 }, { "epoch": 1.66, "learning_rate": 4.440832450347814e-05, "loss": 1.284, "step": 2771000 }, { "epoch": 1.66, "learning_rate": 4.440622453791758e-05, "loss": 1.2813, "step": 2771500 }, { "epoch": 1.66, "learning_rate": 4.440412457235701e-05, "loss": 1.2933, "step": 2772000 }, { "epoch": 1.66, "learning_rate": 4.4402024606796445e-05, "loss": 1.2654, "step": 2772500 }, { "epoch": 1.66, "learning_rate": 4.4399924641235885e-05, "loss": 1.2739, "step": 2773000 }, { "epoch": 1.66, "learning_rate": 4.439782887560644e-05, "loss": 1.2778, "step": 2773500 }, { "epoch": 1.66, "learning_rate": 4.439572891004587e-05, "loss": 1.2719, "step": 2774000 }, { "epoch": 1.66, "learning_rate": 4.4393628944485305e-05, "loss": 1.3077, "step": 2774500 }, { "epoch": 1.66, "learning_rate": 4.4391528978924746e-05, "loss": 1.2711, "step": 2775000 }, { "epoch": 1.66, "learning_rate": 4.438942901336418e-05, "loss": 1.2713, "step": 2775500 }, { "epoch": 1.66, "learning_rate": 4.438732904780361e-05, "loss": 1.2693, "step": 2776000 }, { "epoch": 1.66, "learning_rate": 4.438522908224305e-05, "loss": 1.2937, "step": 2776500 }, { "epoch": 1.66, "learning_rate": 4.4383129116682486e-05, "loss": 1.265, "step": 2777000 }, { "epoch": 1.67, "learning_rate": 4.438103755098417e-05, "loss": 1.2554, "step": 2777500 }, { "epoch": 1.67, "learning_rate": 4.4378937585423593e-05, "loss": 1.2583, "step": 2778000 }, { "epoch": 1.67, "learning_rate": 4.4376837619863034e-05, "loss": 1.3052, "step": 2778500 }, { "epoch": 1.67, "learning_rate": 4.437473765430247e-05, "loss": 1.29, "step": 2779000 }, { "epoch": 1.67, "learning_rate": 4.43726376887419e-05, "loss": 1.2633, "step": 2779500 }, { "epoch": 1.67, "learning_rate": 4.437054192311246e-05, "loss": 1.2774, "step": 2780000 }, { "epoch": 1.67, "learning_rate": 4.4368441957551894e-05, "loss": 1.2989, "step": 2780500 }, { "epoch": 1.67, "learning_rate": 4.436634199199133e-05, "loss": 1.2444, "step": 2781000 }, { "epoch": 1.67, "learning_rate": 4.436424202643076e-05, "loss": 1.2488, "step": 2781500 }, { "epoch": 1.67, "learning_rate": 4.436214626080132e-05, "loss": 1.2693, "step": 2782000 }, { "epoch": 1.67, "learning_rate": 4.4360046295240755e-05, "loss": 1.2537, "step": 2782500 }, { "epoch": 1.67, "learning_rate": 4.435794632968019e-05, "loss": 1.2616, "step": 2783000 }, { "epoch": 1.67, "learning_rate": 4.435584636411963e-05, "loss": 1.2884, "step": 2783500 }, { "epoch": 1.67, "learning_rate": 4.435374639855906e-05, "loss": 1.2757, "step": 2784000 }, { "epoch": 1.67, "learning_rate": 4.435165063292962e-05, "loss": 1.2979, "step": 2784500 }, { "epoch": 1.67, "learning_rate": 4.434955066736905e-05, "loss": 1.2576, "step": 2785000 }, { "epoch": 1.67, "learning_rate": 4.434745070180849e-05, "loss": 1.3013, "step": 2785500 }, { "epoch": 1.67, "learning_rate": 4.434535073624792e-05, "loss": 1.2771, "step": 2786000 }, { "epoch": 1.67, "learning_rate": 4.4343250770687356e-05, "loss": 1.2668, "step": 2786500 }, { "epoch": 1.67, "learning_rate": 4.43411508051268e-05, "loss": 1.2539, "step": 2787000 }, { "epoch": 1.67, "learning_rate": 4.433905503949735e-05, "loss": 1.3054, "step": 2787500 }, { "epoch": 1.67, "learning_rate": 4.4336955073936784e-05, "loss": 1.2678, "step": 2788000 }, { "epoch": 1.67, "learning_rate": 4.433485510837622e-05, "loss": 1.2617, "step": 2788500 }, { "epoch": 1.67, "learning_rate": 4.433275514281566e-05, "loss": 1.248, "step": 2789000 }, { "epoch": 1.67, "learning_rate": 4.433065937718622e-05, "loss": 1.268, "step": 2789500 }, { "epoch": 1.67, "learning_rate": 4.432856361155677e-05, "loss": 1.2703, "step": 2790000 }, { "epoch": 1.67, "learning_rate": 4.4326463645996205e-05, "loss": 1.2807, "step": 2790500 }, { "epoch": 1.67, "learning_rate": 4.4324363680435645e-05, "loss": 1.2749, "step": 2791000 }, { "epoch": 1.67, "learning_rate": 4.432226371487508e-05, "loss": 1.262, "step": 2791500 }, { "epoch": 1.67, "learning_rate": 4.4320163749314505e-05, "loss": 1.2939, "step": 2792000 }, { "epoch": 1.67, "learning_rate": 4.4318063783753945e-05, "loss": 1.2863, "step": 2792500 }, { "epoch": 1.67, "learning_rate": 4.431596381819338e-05, "loss": 1.2631, "step": 2793000 }, { "epoch": 1.67, "learning_rate": 4.431386385263281e-05, "loss": 1.292, "step": 2793500 }, { "epoch": 1.68, "learning_rate": 4.431176388707225e-05, "loss": 1.2809, "step": 2794000 }, { "epoch": 1.68, "learning_rate": 4.4309663921511686e-05, "loss": 1.2875, "step": 2794500 }, { "epoch": 1.68, "learning_rate": 4.430756815588224e-05, "loss": 1.2951, "step": 2795000 }, { "epoch": 1.68, "learning_rate": 4.430546819032167e-05, "loss": 1.2674, "step": 2795500 }, { "epoch": 1.68, "learning_rate": 4.430336822476111e-05, "loss": 1.2705, "step": 2796000 }, { "epoch": 1.68, "learning_rate": 4.430126825920055e-05, "loss": 1.2844, "step": 2796500 }, { "epoch": 1.68, "learning_rate": 4.42991724935711e-05, "loss": 1.2606, "step": 2797000 }, { "epoch": 1.68, "learning_rate": 4.429707252801054e-05, "loss": 1.2584, "step": 2797500 }, { "epoch": 1.68, "learning_rate": 4.4294972562449974e-05, "loss": 1.2507, "step": 2798000 }, { "epoch": 1.68, "learning_rate": 4.429287259688941e-05, "loss": 1.2705, "step": 2798500 }, { "epoch": 1.68, "learning_rate": 4.429077263132885e-05, "loss": 1.2818, "step": 2799000 }, { "epoch": 1.68, "learning_rate": 4.42886768656994e-05, "loss": 1.2817, "step": 2799500 }, { "epoch": 1.68, "learning_rate": 4.4286576900138835e-05, "loss": 1.2715, "step": 2800000 }, { "epoch": 1.68, "eval_loss": 1.2226179838180542, "eval_runtime": 1103.9672, "eval_samples_per_second": 477.116, "eval_steps_per_second": 79.52, "step": 2800000 }, { "epoch": 1.68, "learning_rate": 4.428447693457827e-05, "loss": 1.294, "step": 2800500 }, { "epoch": 1.68, "learning_rate": 4.428237696901771e-05, "loss": 1.2937, "step": 2801000 }, { "epoch": 1.68, "learning_rate": 4.428028120338826e-05, "loss": 1.3009, "step": 2801500 }, { "epoch": 1.68, "learning_rate": 4.4278181237827695e-05, "loss": 1.2765, "step": 2802000 }, { "epoch": 1.68, "learning_rate": 4.427608127226713e-05, "loss": 1.2804, "step": 2802500 }, { "epoch": 1.68, "learning_rate": 4.427398130670657e-05, "loss": 1.2421, "step": 2803000 }, { "epoch": 1.68, "learning_rate": 4.427188554107713e-05, "loss": 1.2775, "step": 2803500 }, { "epoch": 1.68, "learning_rate": 4.4269785575516556e-05, "loss": 1.2546, "step": 2804000 }, { "epoch": 1.68, "learning_rate": 4.4267685609955996e-05, "loss": 1.2518, "step": 2804500 }, { "epoch": 1.68, "learning_rate": 4.426558564439543e-05, "loss": 1.2727, "step": 2805000 }, { "epoch": 1.68, "learning_rate": 4.426348567883486e-05, "loss": 1.2878, "step": 2805500 }, { "epoch": 1.68, "learning_rate": 4.4261385713274304e-05, "loss": 1.2705, "step": 2806000 }, { "epoch": 1.68, "learning_rate": 4.425928994764486e-05, "loss": 1.2553, "step": 2806500 }, { "epoch": 1.68, "learning_rate": 4.425718998208429e-05, "loss": 1.2811, "step": 2807000 }, { "epoch": 1.68, "learning_rate": 4.4255090016523724e-05, "loss": 1.2443, "step": 2807500 }, { "epoch": 1.68, "learning_rate": 4.4252990050963164e-05, "loss": 1.296, "step": 2808000 }, { "epoch": 1.68, "learning_rate": 4.4250894285333725e-05, "loss": 1.2692, "step": 2808500 }, { "epoch": 1.68, "learning_rate": 4.424879851970428e-05, "loss": 1.2825, "step": 2809000 }, { "epoch": 1.68, "learning_rate": 4.424669855414371e-05, "loss": 1.3024, "step": 2809500 }, { "epoch": 1.68, "learning_rate": 4.4244598588583145e-05, "loss": 1.2659, "step": 2810000 }, { "epoch": 1.69, "learning_rate": 4.4242498623022585e-05, "loss": 1.274, "step": 2810500 }, { "epoch": 1.69, "learning_rate": 4.424039865746202e-05, "loss": 1.2771, "step": 2811000 }, { "epoch": 1.69, "learning_rate": 4.423829869190145e-05, "loss": 1.2545, "step": 2811500 }, { "epoch": 1.69, "learning_rate": 4.4236198726340886e-05, "loss": 1.3149, "step": 2812000 }, { "epoch": 1.69, "learning_rate": 4.4234102960711446e-05, "loss": 1.2534, "step": 2812500 }, { "epoch": 1.69, "learning_rate": 4.423200299515088e-05, "loss": 1.2642, "step": 2813000 }, { "epoch": 1.69, "learning_rate": 4.422990722952143e-05, "loss": 1.2629, "step": 2813500 }, { "epoch": 1.69, "learning_rate": 4.4227807263960873e-05, "loss": 1.2385, "step": 2814000 }, { "epoch": 1.69, "learning_rate": 4.422570729840031e-05, "loss": 1.2798, "step": 2814500 }, { "epoch": 1.69, "learning_rate": 4.422361153277086e-05, "loss": 1.2952, "step": 2815000 }, { "epoch": 1.69, "learning_rate": 4.4221511567210294e-05, "loss": 1.261, "step": 2815500 }, { "epoch": 1.69, "learning_rate": 4.4219411601649734e-05, "loss": 1.2523, "step": 2816000 }, { "epoch": 1.69, "learning_rate": 4.421731163608917e-05, "loss": 1.2845, "step": 2816500 }, { "epoch": 1.69, "learning_rate": 4.421521167052861e-05, "loss": 1.2959, "step": 2817000 }, { "epoch": 1.69, "learning_rate": 4.421311170496804e-05, "loss": 1.2691, "step": 2817500 }, { "epoch": 1.69, "learning_rate": 4.4211011739407475e-05, "loss": 1.2564, "step": 2818000 }, { "epoch": 1.69, "learning_rate": 4.420891177384691e-05, "loss": 1.2879, "step": 2818500 }, { "epoch": 1.69, "learning_rate": 4.420681180828634e-05, "loss": 1.2757, "step": 2819000 }, { "epoch": 1.69, "learning_rate": 4.4204711842725775e-05, "loss": 1.2955, "step": 2819500 }, { "epoch": 1.69, "learning_rate": 4.4202611877165215e-05, "loss": 1.2819, "step": 2820000 }, { "epoch": 1.69, "learning_rate": 4.420051191160465e-05, "loss": 1.264, "step": 2820500 }, { "epoch": 1.69, "learning_rate": 4.41984161459752e-05, "loss": 1.2737, "step": 2821000 }, { "epoch": 1.69, "learning_rate": 4.4196316180414636e-05, "loss": 1.2553, "step": 2821500 }, { "epoch": 1.69, "learning_rate": 4.4194216214854076e-05, "loss": 1.2577, "step": 2822000 }, { "epoch": 1.69, "learning_rate": 4.419211624929351e-05, "loss": 1.2754, "step": 2822500 }, { "epoch": 1.69, "learning_rate": 4.419001628373294e-05, "loss": 1.2544, "step": 2823000 }, { "epoch": 1.69, "learning_rate": 4.41879205181035e-05, "loss": 1.279, "step": 2823500 }, { "epoch": 1.69, "learning_rate": 4.418582055254294e-05, "loss": 1.2617, "step": 2824000 }, { "epoch": 1.69, "learning_rate": 4.418372058698237e-05, "loss": 1.2698, "step": 2824500 }, { "epoch": 1.69, "learning_rate": 4.418162062142181e-05, "loss": 1.2622, "step": 2825000 }, { "epoch": 1.69, "learning_rate": 4.4179520655861244e-05, "loss": 1.2624, "step": 2825500 }, { "epoch": 1.69, "learning_rate": 4.417742069030068e-05, "loss": 1.2539, "step": 2826000 }, { "epoch": 1.69, "learning_rate": 4.417532072474012e-05, "loss": 1.2666, "step": 2826500 }, { "epoch": 1.69, "learning_rate": 4.417322075917955e-05, "loss": 1.3247, "step": 2827000 }, { "epoch": 1.7, "learning_rate": 4.4171120793618985e-05, "loss": 1.3033, "step": 2827500 }, { "epoch": 1.7, "learning_rate": 4.416902502798954e-05, "loss": 1.2853, "step": 2828000 }, { "epoch": 1.7, "learning_rate": 4.416692506242898e-05, "loss": 1.2842, "step": 2828500 }, { "epoch": 1.7, "learning_rate": 4.416482509686841e-05, "loss": 1.2583, "step": 2829000 }, { "epoch": 1.7, "learning_rate": 4.4162725131307845e-05, "loss": 1.2951, "step": 2829500 }, { "epoch": 1.7, "learning_rate": 4.41606293656784e-05, "loss": 1.2889, "step": 2830000 }, { "epoch": 1.7, "learning_rate": 4.415852940011784e-05, "loss": 1.296, "step": 2830500 }, { "epoch": 1.7, "learning_rate": 4.415642943455727e-05, "loss": 1.2616, "step": 2831000 }, { "epoch": 1.7, "learning_rate": 4.4154329468996706e-05, "loss": 1.2624, "step": 2831500 }, { "epoch": 1.7, "learning_rate": 4.4152229503436146e-05, "loss": 1.2785, "step": 2832000 }, { "epoch": 1.7, "learning_rate": 4.415012953787558e-05, "loss": 1.2784, "step": 2832500 }, { "epoch": 1.7, "learning_rate": 4.414803377224613e-05, "loss": 1.2761, "step": 2833000 }, { "epoch": 1.7, "learning_rate": 4.4145933806685574e-05, "loss": 1.2756, "step": 2833500 }, { "epoch": 1.7, "learning_rate": 4.414383384112501e-05, "loss": 1.2913, "step": 2834000 }, { "epoch": 1.7, "learning_rate": 4.414173387556444e-05, "loss": 1.2844, "step": 2834500 }, { "epoch": 1.7, "learning_rate": 4.4139638109934994e-05, "loss": 1.2809, "step": 2835000 }, { "epoch": 1.7, "learning_rate": 4.413754234430555e-05, "loss": 1.2538, "step": 2835500 }, { "epoch": 1.7, "learning_rate": 4.413544237874499e-05, "loss": 1.2484, "step": 2836000 }, { "epoch": 1.7, "learning_rate": 4.413334241318442e-05, "loss": 1.2803, "step": 2836500 }, { "epoch": 1.7, "learning_rate": 4.4131242447623855e-05, "loss": 1.2876, "step": 2837000 }, { "epoch": 1.7, "learning_rate": 4.4129142482063295e-05, "loss": 1.2538, "step": 2837500 }, { "epoch": 1.7, "learning_rate": 4.412704251650273e-05, "loss": 1.2659, "step": 2838000 }, { "epoch": 1.7, "learning_rate": 4.412494255094216e-05, "loss": 1.3031, "step": 2838500 }, { "epoch": 1.7, "learning_rate": 4.41228425853816e-05, "loss": 1.285, "step": 2839000 }, { "epoch": 1.7, "learning_rate": 4.4120742619821036e-05, "loss": 1.2525, "step": 2839500 }, { "epoch": 1.7, "learning_rate": 4.411864265426047e-05, "loss": 1.2602, "step": 2840000 }, { "epoch": 1.7, "learning_rate": 4.41165426886999e-05, "loss": 1.297, "step": 2840500 }, { "epoch": 1.7, "learning_rate": 4.4114442723139336e-05, "loss": 1.2712, "step": 2841000 }, { "epoch": 1.7, "learning_rate": 4.4112346957509896e-05, "loss": 1.2449, "step": 2841500 }, { "epoch": 1.7, "learning_rate": 4.411025119188045e-05, "loss": 1.2277, "step": 2842000 }, { "epoch": 1.7, "learning_rate": 4.4108155426251003e-05, "loss": 1.2735, "step": 2842500 }, { "epoch": 1.7, "learning_rate": 4.4106055460690444e-05, "loss": 1.3002, "step": 2843000 }, { "epoch": 1.7, "learning_rate": 4.410395549512988e-05, "loss": 1.2743, "step": 2843500 }, { "epoch": 1.71, "learning_rate": 4.410185552956931e-05, "loss": 1.2622, "step": 2844000 }, { "epoch": 1.71, "learning_rate": 4.409975556400875e-05, "loss": 1.2638, "step": 2844500 }, { "epoch": 1.71, "learning_rate": 4.4097655598448184e-05, "loss": 1.2857, "step": 2845000 }, { "epoch": 1.71, "learning_rate": 4.409555563288762e-05, "loss": 1.2835, "step": 2845500 }, { "epoch": 1.71, "learning_rate": 4.409345566732706e-05, "loss": 1.2801, "step": 2846000 }, { "epoch": 1.71, "learning_rate": 4.409135570176649e-05, "loss": 1.2927, "step": 2846500 }, { "epoch": 1.71, "learning_rate": 4.4089255736205925e-05, "loss": 1.3046, "step": 2847000 }, { "epoch": 1.71, "learning_rate": 4.4087159970576485e-05, "loss": 1.2726, "step": 2847500 }, { "epoch": 1.71, "learning_rate": 4.408506000501592e-05, "loss": 1.2844, "step": 2848000 }, { "epoch": 1.71, "learning_rate": 4.408296003945535e-05, "loss": 1.2841, "step": 2848500 }, { "epoch": 1.71, "learning_rate": 4.408086007389479e-05, "loss": 1.283, "step": 2849000 }, { "epoch": 1.71, "learning_rate": 4.4078760108334226e-05, "loss": 1.2581, "step": 2849500 }, { "epoch": 1.71, "learning_rate": 4.407666434270478e-05, "loss": 1.287, "step": 2850000 }, { "epoch": 1.71, "learning_rate": 4.407456437714421e-05, "loss": 1.276, "step": 2850500 }, { "epoch": 1.71, "learning_rate": 4.407246441158365e-05, "loss": 1.2657, "step": 2851000 }, { "epoch": 1.71, "learning_rate": 4.407036444602309e-05, "loss": 1.274, "step": 2851500 }, { "epoch": 1.71, "learning_rate": 4.406826868039364e-05, "loss": 1.2848, "step": 2852000 }, { "epoch": 1.71, "learning_rate": 4.4066168714833074e-05, "loss": 1.2651, "step": 2852500 }, { "epoch": 1.71, "learning_rate": 4.4064068749272514e-05, "loss": 1.2794, "step": 2853000 }, { "epoch": 1.71, "learning_rate": 4.406196878371195e-05, "loss": 1.3028, "step": 2853500 }, { "epoch": 1.71, "learning_rate": 4.405986881815138e-05, "loss": 1.3084, "step": 2854000 }, { "epoch": 1.71, "learning_rate": 4.405777305252194e-05, "loss": 1.2676, "step": 2854500 }, { "epoch": 1.71, "learning_rate": 4.4055673086961375e-05, "loss": 1.2972, "step": 2855000 }, { "epoch": 1.71, "learning_rate": 4.405357312140081e-05, "loss": 1.2309, "step": 2855500 }, { "epoch": 1.71, "learning_rate": 4.405147315584025e-05, "loss": 1.2657, "step": 2856000 }, { "epoch": 1.71, "learning_rate": 4.40493773902108e-05, "loss": 1.2729, "step": 2856500 }, { "epoch": 1.71, "learning_rate": 4.4047277424650235e-05, "loss": 1.2866, "step": 2857000 }, { "epoch": 1.71, "learning_rate": 4.404517745908967e-05, "loss": 1.2765, "step": 2857500 }, { "epoch": 1.71, "learning_rate": 4.404307749352911e-05, "loss": 1.2708, "step": 2858000 }, { "epoch": 1.71, "learning_rate": 4.404097752796854e-05, "loss": 1.2642, "step": 2858500 }, { "epoch": 1.71, "learning_rate": 4.4038877562407976e-05, "loss": 1.2957, "step": 2859000 }, { "epoch": 1.71, "learning_rate": 4.403677759684741e-05, "loss": 1.2841, "step": 2859500 }, { "epoch": 1.71, "learning_rate": 4.403468183121797e-05, "loss": 1.2551, "step": 2860000 }, { "epoch": 1.71, "learning_rate": 4.40325818656574e-05, "loss": 1.2733, "step": 2860500 }, { "epoch": 1.72, "learning_rate": 4.403048190009684e-05, "loss": 1.2524, "step": 2861000 }, { "epoch": 1.72, "learning_rate": 4.402838193453628e-05, "loss": 1.2945, "step": 2861500 }, { "epoch": 1.72, "learning_rate": 4.4026281968975704e-05, "loss": 1.2743, "step": 2862000 }, { "epoch": 1.72, "learning_rate": 4.4024182003415144e-05, "loss": 1.2744, "step": 2862500 }, { "epoch": 1.72, "learning_rate": 4.402208203785458e-05, "loss": 1.2906, "step": 2863000 }, { "epoch": 1.72, "learning_rate": 4.401998627222514e-05, "loss": 1.2877, "step": 2863500 }, { "epoch": 1.72, "learning_rate": 4.401788630666457e-05, "loss": 1.2748, "step": 2864000 }, { "epoch": 1.72, "learning_rate": 4.4015786341104005e-05, "loss": 1.244, "step": 2864500 }, { "epoch": 1.72, "learning_rate": 4.401368637554344e-05, "loss": 1.2454, "step": 2865000 }, { "epoch": 1.72, "learning_rate": 4.401158640998287e-05, "loss": 1.2843, "step": 2865500 }, { "epoch": 1.72, "learning_rate": 4.400948644442231e-05, "loss": 1.2632, "step": 2866000 }, { "epoch": 1.72, "learning_rate": 4.4007386478861745e-05, "loss": 1.2558, "step": 2866500 }, { "epoch": 1.72, "learning_rate": 4.400528651330118e-05, "loss": 1.2715, "step": 2867000 }, { "epoch": 1.72, "learning_rate": 4.400319074767173e-05, "loss": 1.2719, "step": 2867500 }, { "epoch": 1.72, "learning_rate": 4.400109078211117e-05, "loss": 1.299, "step": 2868000 }, { "epoch": 1.72, "learning_rate": 4.3998990816550606e-05, "loss": 1.2808, "step": 2868500 }, { "epoch": 1.72, "learning_rate": 4.399689505092116e-05, "loss": 1.2802, "step": 2869000 }, { "epoch": 1.72, "learning_rate": 4.39947950853606e-05, "loss": 1.2669, "step": 2869500 }, { "epoch": 1.72, "learning_rate": 4.399269511980003e-05, "loss": 1.2659, "step": 2870000 }, { "epoch": 1.72, "learning_rate": 4.399059515423947e-05, "loss": 1.2638, "step": 2870500 }, { "epoch": 1.72, "learning_rate": 4.398849518867891e-05, "loss": 1.2692, "step": 2871000 }, { "epoch": 1.72, "learning_rate": 4.398639522311834e-05, "loss": 1.2922, "step": 2871500 }, { "epoch": 1.72, "learning_rate": 4.3984295257557774e-05, "loss": 1.2768, "step": 2872000 }, { "epoch": 1.72, "learning_rate": 4.3982195291997214e-05, "loss": 1.2906, "step": 2872500 }, { "epoch": 1.72, "learning_rate": 4.398009952636777e-05, "loss": 1.2858, "step": 2873000 }, { "epoch": 1.72, "learning_rate": 4.39779995608072e-05, "loss": 1.2792, "step": 2873500 }, { "epoch": 1.72, "learning_rate": 4.3975903795177755e-05, "loss": 1.292, "step": 2874000 }, { "epoch": 1.72, "learning_rate": 4.397380382961719e-05, "loss": 1.2844, "step": 2874500 }, { "epoch": 1.72, "learning_rate": 4.397170386405663e-05, "loss": 1.2774, "step": 2875000 }, { "epoch": 1.72, "learning_rate": 4.396960389849606e-05, "loss": 1.3109, "step": 2875500 }, { "epoch": 1.72, "learning_rate": 4.396750813286662e-05, "loss": 1.2479, "step": 2876000 }, { "epoch": 1.72, "learning_rate": 4.3965408167306056e-05, "loss": 1.2731, "step": 2876500 }, { "epoch": 1.72, "learning_rate": 4.396330820174549e-05, "loss": 1.2692, "step": 2877000 }, { "epoch": 1.73, "learning_rate": 4.396121243611605e-05, "loss": 1.2584, "step": 2877500 }, { "epoch": 1.73, "learning_rate": 4.395911247055548e-05, "loss": 1.2503, "step": 2878000 }, { "epoch": 1.73, "learning_rate": 4.395701250499492e-05, "loss": 1.2696, "step": 2878500 }, { "epoch": 1.73, "learning_rate": 4.395491253943435e-05, "loss": 1.2599, "step": 2879000 }, { "epoch": 1.73, "learning_rate": 4.395281257387378e-05, "loss": 1.279, "step": 2879500 }, { "epoch": 1.73, "learning_rate": 4.3950716808244344e-05, "loss": 1.2657, "step": 2880000 }, { "epoch": 1.73, "learning_rate": 4.3948616842683784e-05, "loss": 1.2749, "step": 2880500 }, { "epoch": 1.73, "learning_rate": 4.394651687712321e-05, "loss": 1.2687, "step": 2881000 }, { "epoch": 1.73, "learning_rate": 4.394442111149377e-05, "loss": 1.241, "step": 2881500 }, { "epoch": 1.73, "learning_rate": 4.3942321145933204e-05, "loss": 1.2776, "step": 2882000 }, { "epoch": 1.73, "learning_rate": 4.3940221180372645e-05, "loss": 1.2631, "step": 2882500 }, { "epoch": 1.73, "learning_rate": 4.393812121481208e-05, "loss": 1.2985, "step": 2883000 }, { "epoch": 1.73, "learning_rate": 4.393602124925151e-05, "loss": 1.3127, "step": 2883500 }, { "epoch": 1.73, "learning_rate": 4.3933921283690945e-05, "loss": 1.2485, "step": 2884000 }, { "epoch": 1.73, "learning_rate": 4.393182131813038e-05, "loss": 1.2889, "step": 2884500 }, { "epoch": 1.73, "learning_rate": 4.392972135256982e-05, "loss": 1.2742, "step": 2885000 }, { "epoch": 1.73, "learning_rate": 4.392762138700925e-05, "loss": 1.2753, "step": 2885500 }, { "epoch": 1.73, "learning_rate": 4.3925525621379806e-05, "loss": 1.252, "step": 2886000 }, { "epoch": 1.73, "learning_rate": 4.392342565581924e-05, "loss": 1.2911, "step": 2886500 }, { "epoch": 1.73, "learning_rate": 4.392132569025868e-05, "loss": 1.2844, "step": 2887000 }, { "epoch": 1.73, "learning_rate": 4.391922572469811e-05, "loss": 1.2594, "step": 2887500 }, { "epoch": 1.73, "learning_rate": 4.3917125759137546e-05, "loss": 1.2723, "step": 2888000 }, { "epoch": 1.73, "learning_rate": 4.3915025793576987e-05, "loss": 1.2955, "step": 2888500 }, { "epoch": 1.73, "learning_rate": 4.391293002794754e-05, "loss": 1.2501, "step": 2889000 }, { "epoch": 1.73, "learning_rate": 4.3910830062386974e-05, "loss": 1.2553, "step": 2889500 }, { "epoch": 1.73, "learning_rate": 4.3908730096826414e-05, "loss": 1.2625, "step": 2890000 }, { "epoch": 1.73, "learning_rate": 4.390663013126585e-05, "loss": 1.3029, "step": 2890500 }, { "epoch": 1.73, "learning_rate": 4.390453016570528e-05, "loss": 1.2679, "step": 2891000 }, { "epoch": 1.73, "learning_rate": 4.390243020014472e-05, "loss": 1.2776, "step": 2891500 }, { "epoch": 1.73, "learning_rate": 4.3900330234584154e-05, "loss": 1.2888, "step": 2892000 }, { "epoch": 1.73, "learning_rate": 4.389823026902359e-05, "loss": 1.2898, "step": 2892500 }, { "epoch": 1.73, "learning_rate": 4.389613030346303e-05, "loss": 1.3113, "step": 2893000 }, { "epoch": 1.73, "learning_rate": 4.389403033790246e-05, "loss": 1.268, "step": 2893500 }, { "epoch": 1.74, "learning_rate": 4.389193037234189e-05, "loss": 1.296, "step": 2894000 }, { "epoch": 1.74, "learning_rate": 4.388983040678133e-05, "loss": 1.2824, "step": 2894500 }, { "epoch": 1.74, "learning_rate": 4.388773464115189e-05, "loss": 1.2835, "step": 2895000 }, { "epoch": 1.74, "learning_rate": 4.388563467559132e-05, "loss": 1.2843, "step": 2895500 }, { "epoch": 1.74, "learning_rate": 4.3883538909961876e-05, "loss": 1.2494, "step": 2896000 }, { "epoch": 1.74, "learning_rate": 4.388143894440131e-05, "loss": 1.2731, "step": 2896500 }, { "epoch": 1.74, "learning_rate": 4.387933897884075e-05, "loss": 1.2725, "step": 2897000 }, { "epoch": 1.74, "learning_rate": 4.387723901328018e-05, "loss": 1.2774, "step": 2897500 }, { "epoch": 1.74, "learning_rate": 4.3875139047719616e-05, "loss": 1.2902, "step": 2898000 }, { "epoch": 1.74, "learning_rate": 4.387303908215905e-05, "loss": 1.2858, "step": 2898500 }, { "epoch": 1.74, "learning_rate": 4.387093911659848e-05, "loss": 1.2664, "step": 2899000 }, { "epoch": 1.74, "learning_rate": 4.3868839151037924e-05, "loss": 1.3078, "step": 2899500 }, { "epoch": 1.74, "learning_rate": 4.3866743385408484e-05, "loss": 1.2707, "step": 2900000 }, { "epoch": 1.74, "eval_loss": 1.2214449644088745, "eval_runtime": 1124.1633, "eval_samples_per_second": 468.544, "eval_steps_per_second": 78.091, "step": 2900000 }, { "epoch": 1.74, "learning_rate": 4.386464341984792e-05, "loss": 1.2383, "step": 2900500 }, { "epoch": 1.74, "learning_rate": 4.3862543454287344e-05, "loss": 1.2507, "step": 2901000 }, { "epoch": 1.74, "learning_rate": 4.3860443488726784e-05, "loss": 1.2754, "step": 2901500 }, { "epoch": 1.74, "learning_rate": 4.385834352316622e-05, "loss": 1.2775, "step": 2902000 }, { "epoch": 1.74, "learning_rate": 4.385624355760565e-05, "loss": 1.286, "step": 2902500 }, { "epoch": 1.74, "learning_rate": 4.385414359204509e-05, "loss": 1.2687, "step": 2903000 }, { "epoch": 1.74, "learning_rate": 4.3852043626484525e-05, "loss": 1.2695, "step": 2903500 }, { "epoch": 1.74, "learning_rate": 4.384994786085508e-05, "loss": 1.2489, "step": 2904000 }, { "epoch": 1.74, "learning_rate": 4.384785209522564e-05, "loss": 1.285, "step": 2904500 }, { "epoch": 1.74, "learning_rate": 4.384575212966507e-05, "loss": 1.2849, "step": 2905000 }, { "epoch": 1.74, "learning_rate": 4.3843652164104506e-05, "loss": 1.2667, "step": 2905500 }, { "epoch": 1.74, "learning_rate": 4.384155219854394e-05, "loss": 1.253, "step": 2906000 }, { "epoch": 1.74, "learning_rate": 4.383945223298338e-05, "loss": 1.2526, "step": 2906500 }, { "epoch": 1.74, "learning_rate": 4.383735226742281e-05, "loss": 1.2656, "step": 2907000 }, { "epoch": 1.74, "learning_rate": 4.3835252301862246e-05, "loss": 1.2558, "step": 2907500 }, { "epoch": 1.74, "learning_rate": 4.38331565362328e-05, "loss": 1.2571, "step": 2908000 }, { "epoch": 1.74, "learning_rate": 4.383105657067224e-05, "loss": 1.2562, "step": 2908500 }, { "epoch": 1.74, "learning_rate": 4.3828956605111674e-05, "loss": 1.272, "step": 2909000 }, { "epoch": 1.74, "learning_rate": 4.382685663955111e-05, "loss": 1.241, "step": 2909500 }, { "epoch": 1.74, "learning_rate": 4.382475667399055e-05, "loss": 1.2726, "step": 2910000 }, { "epoch": 1.74, "learning_rate": 4.38226609083611e-05, "loss": 1.2858, "step": 2910500 }, { "epoch": 1.75, "learning_rate": 4.3820560942800534e-05, "loss": 1.2924, "step": 2911000 }, { "epoch": 1.75, "learning_rate": 4.381846097723997e-05, "loss": 1.2769, "step": 2911500 }, { "epoch": 1.75, "learning_rate": 4.381636101167941e-05, "loss": 1.2921, "step": 2912000 }, { "epoch": 1.75, "learning_rate": 4.381426104611884e-05, "loss": 1.2759, "step": 2912500 }, { "epoch": 1.75, "learning_rate": 4.3812161080558275e-05, "loss": 1.2482, "step": 2913000 }, { "epoch": 1.75, "learning_rate": 4.3810061114997715e-05, "loss": 1.2885, "step": 2913500 }, { "epoch": 1.75, "learning_rate": 4.3807969549299396e-05, "loss": 1.2662, "step": 2914000 }, { "epoch": 1.75, "learning_rate": 4.380586958373883e-05, "loss": 1.2619, "step": 2914500 }, { "epoch": 1.75, "learning_rate": 4.380376961817826e-05, "loss": 1.2657, "step": 2915000 }, { "epoch": 1.75, "learning_rate": 4.3801669652617696e-05, "loss": 1.2536, "step": 2915500 }, { "epoch": 1.75, "learning_rate": 4.379956968705713e-05, "loss": 1.267, "step": 2916000 }, { "epoch": 1.75, "learning_rate": 4.379746972149656e-05, "loss": 1.2333, "step": 2916500 }, { "epoch": 1.75, "learning_rate": 4.3795369755936e-05, "loss": 1.2517, "step": 2917000 }, { "epoch": 1.75, "learning_rate": 4.379326979037544e-05, "loss": 1.2552, "step": 2917500 }, { "epoch": 1.75, "learning_rate": 4.379116982481487e-05, "loss": 1.2464, "step": 2918000 }, { "epoch": 1.75, "learning_rate": 4.378906985925431e-05, "loss": 1.2496, "step": 2918500 }, { "epoch": 1.75, "learning_rate": 4.3786969893693744e-05, "loss": 1.2622, "step": 2919000 }, { "epoch": 1.75, "learning_rate": 4.37848741280643e-05, "loss": 1.2754, "step": 2919500 }, { "epoch": 1.75, "learning_rate": 4.378277416250373e-05, "loss": 1.2571, "step": 2920000 }, { "epoch": 1.75, "learning_rate": 4.378067419694317e-05, "loss": 1.2673, "step": 2920500 }, { "epoch": 1.75, "learning_rate": 4.3778574231382605e-05, "loss": 1.2946, "step": 2921000 }, { "epoch": 1.75, "learning_rate": 4.377647426582204e-05, "loss": 1.2752, "step": 2921500 }, { "epoch": 1.75, "learning_rate": 4.37743785001926e-05, "loss": 1.2904, "step": 2922000 }, { "epoch": 1.75, "learning_rate": 4.377228273456315e-05, "loss": 1.2781, "step": 2922500 }, { "epoch": 1.75, "learning_rate": 4.3770182769002585e-05, "loss": 1.2698, "step": 2923000 }, { "epoch": 1.75, "learning_rate": 4.376808280344202e-05, "loss": 1.2699, "step": 2923500 }, { "epoch": 1.75, "learning_rate": 4.376598283788146e-05, "loss": 1.2648, "step": 2924000 }, { "epoch": 1.75, "learning_rate": 4.376388287232089e-05, "loss": 1.2872, "step": 2924500 }, { "epoch": 1.75, "learning_rate": 4.3761782906760326e-05, "loss": 1.2993, "step": 2925000 }, { "epoch": 1.75, "learning_rate": 4.3759682941199766e-05, "loss": 1.2596, "step": 2925500 }, { "epoch": 1.75, "learning_rate": 4.37575829756392e-05, "loss": 1.244, "step": 2926000 }, { "epoch": 1.75, "learning_rate": 4.375548301007863e-05, "loss": 1.2727, "step": 2926500 }, { "epoch": 1.75, "learning_rate": 4.375338724444919e-05, "loss": 1.2797, "step": 2927000 }, { "epoch": 1.76, "learning_rate": 4.375128727888863e-05, "loss": 1.2758, "step": 2927500 }, { "epoch": 1.76, "learning_rate": 4.374918731332806e-05, "loss": 1.237, "step": 2928000 }, { "epoch": 1.76, "learning_rate": 4.3747087347767494e-05, "loss": 1.2569, "step": 2928500 }, { "epoch": 1.76, "learning_rate": 4.3744987382206934e-05, "loss": 1.2649, "step": 2929000 }, { "epoch": 1.76, "learning_rate": 4.374288741664637e-05, "loss": 1.2924, "step": 2929500 }, { "epoch": 1.76, "learning_rate": 4.374079165101692e-05, "loss": 1.2451, "step": 2930000 }, { "epoch": 1.76, "learning_rate": 4.373869168545636e-05, "loss": 1.3132, "step": 2930500 }, { "epoch": 1.76, "learning_rate": 4.3736591719895795e-05, "loss": 1.2719, "step": 2931000 }, { "epoch": 1.76, "learning_rate": 4.373449175433523e-05, "loss": 1.2473, "step": 2931500 }, { "epoch": 1.76, "learning_rate": 4.373239178877467e-05, "loss": 1.2582, "step": 2932000 }, { "epoch": 1.76, "learning_rate": 4.3730291823214095e-05, "loss": 1.2387, "step": 2932500 }, { "epoch": 1.76, "learning_rate": 4.372819185765353e-05, "loss": 1.3085, "step": 2933000 }, { "epoch": 1.76, "learning_rate": 4.372609189209297e-05, "loss": 1.2424, "step": 2933500 }, { "epoch": 1.76, "learning_rate": 4.372399612646353e-05, "loss": 1.2901, "step": 2934000 }, { "epoch": 1.76, "learning_rate": 4.372189616090296e-05, "loss": 1.2433, "step": 2934500 }, { "epoch": 1.76, "learning_rate": 4.371979619534239e-05, "loss": 1.2585, "step": 2935000 }, { "epoch": 1.76, "learning_rate": 4.371769622978183e-05, "loss": 1.273, "step": 2935500 }, { "epoch": 1.76, "learning_rate": 4.371560046415239e-05, "loss": 1.2416, "step": 2936000 }, { "epoch": 1.76, "learning_rate": 4.3713500498591824e-05, "loss": 1.2494, "step": 2936500 }, { "epoch": 1.76, "learning_rate": 4.3711400533031264e-05, "loss": 1.2569, "step": 2937000 }, { "epoch": 1.76, "learning_rate": 4.370930056747069e-05, "loss": 1.2505, "step": 2937500 }, { "epoch": 1.76, "learning_rate": 4.370720480184125e-05, "loss": 1.2539, "step": 2938000 }, { "epoch": 1.76, "learning_rate": 4.3705104836280684e-05, "loss": 1.252, "step": 2938500 }, { "epoch": 1.76, "learning_rate": 4.3703004870720124e-05, "loss": 1.2508, "step": 2939000 }, { "epoch": 1.76, "learning_rate": 4.370090490515956e-05, "loss": 1.2588, "step": 2939500 }, { "epoch": 1.76, "learning_rate": 4.3698804939598985e-05, "loss": 1.2657, "step": 2940000 }, { "epoch": 1.76, "learning_rate": 4.3696709173969545e-05, "loss": 1.2612, "step": 2940500 }, { "epoch": 1.76, "learning_rate": 4.3694609208408985e-05, "loss": 1.2671, "step": 2941000 }, { "epoch": 1.76, "learning_rate": 4.369250924284842e-05, "loss": 1.2621, "step": 2941500 }, { "epoch": 1.76, "learning_rate": 4.369040927728785e-05, "loss": 1.3084, "step": 2942000 }, { "epoch": 1.76, "learning_rate": 4.3688309311727286e-05, "loss": 1.2611, "step": 2942500 }, { "epoch": 1.76, "learning_rate": 4.3686213546097846e-05, "loss": 1.2815, "step": 2943000 }, { "epoch": 1.76, "learning_rate": 4.368411358053728e-05, "loss": 1.2471, "step": 2943500 }, { "epoch": 1.77, "learning_rate": 4.368201361497672e-05, "loss": 1.2752, "step": 2944000 }, { "epoch": 1.77, "learning_rate": 4.3679913649416146e-05, "loss": 1.2855, "step": 2944500 }, { "epoch": 1.77, "learning_rate": 4.367781368385558e-05, "loss": 1.2629, "step": 2945000 }, { "epoch": 1.77, "learning_rate": 4.367571371829502e-05, "loss": 1.2581, "step": 2945500 }, { "epoch": 1.77, "learning_rate": 4.3673613752734453e-05, "loss": 1.2516, "step": 2946000 }, { "epoch": 1.77, "learning_rate": 4.367151378717389e-05, "loss": 1.2788, "step": 2946500 }, { "epoch": 1.77, "learning_rate": 4.366941802154444e-05, "loss": 1.281, "step": 2947000 }, { "epoch": 1.77, "learning_rate": 4.366731805598388e-05, "loss": 1.2421, "step": 2947500 }, { "epoch": 1.77, "learning_rate": 4.366522229035444e-05, "loss": 1.2779, "step": 2948000 }, { "epoch": 1.77, "learning_rate": 4.3663122324793875e-05, "loss": 1.2748, "step": 2948500 }, { "epoch": 1.77, "learning_rate": 4.366102235923331e-05, "loss": 1.2938, "step": 2949000 }, { "epoch": 1.77, "learning_rate": 4.365892239367274e-05, "loss": 1.2804, "step": 2949500 }, { "epoch": 1.77, "learning_rate": 4.3656822428112175e-05, "loss": 1.2656, "step": 2950000 }, { "epoch": 1.77, "learning_rate": 4.3654722462551615e-05, "loss": 1.2637, "step": 2950500 }, { "epoch": 1.77, "learning_rate": 4.365262249699105e-05, "loss": 1.2759, "step": 2951000 }, { "epoch": 1.77, "learning_rate": 4.365052253143048e-05, "loss": 1.3018, "step": 2951500 }, { "epoch": 1.77, "learning_rate": 4.3648426765801036e-05, "loss": 1.2506, "step": 2952000 }, { "epoch": 1.77, "learning_rate": 4.3646326800240476e-05, "loss": 1.2807, "step": 2952500 }, { "epoch": 1.77, "learning_rate": 4.364422683467991e-05, "loss": 1.2556, "step": 2953000 }, { "epoch": 1.77, "learning_rate": 4.364212686911934e-05, "loss": 1.2495, "step": 2953500 }, { "epoch": 1.77, "learning_rate": 4.364002690355878e-05, "loss": 1.2583, "step": 2954000 }, { "epoch": 1.77, "learning_rate": 4.3637926937998216e-05, "loss": 1.2634, "step": 2954500 }, { "epoch": 1.77, "learning_rate": 4.363582697243765e-05, "loss": 1.2818, "step": 2955000 }, { "epoch": 1.77, "learning_rate": 4.363372700687709e-05, "loss": 1.2658, "step": 2955500 }, { "epoch": 1.77, "learning_rate": 4.3631627041316524e-05, "loss": 1.2508, "step": 2956000 }, { "epoch": 1.77, "learning_rate": 4.362952707575596e-05, "loss": 1.2946, "step": 2956500 }, { "epoch": 1.77, "learning_rate": 4.362743131012651e-05, "loss": 1.2787, "step": 2957000 }, { "epoch": 1.77, "learning_rate": 4.362533134456595e-05, "loss": 1.2815, "step": 2957500 }, { "epoch": 1.77, "learning_rate": 4.3623231379005384e-05, "loss": 1.2718, "step": 2958000 }, { "epoch": 1.77, "learning_rate": 4.362113141344482e-05, "loss": 1.2848, "step": 2958500 }, { "epoch": 1.77, "learning_rate": 4.361903144788426e-05, "loss": 1.2711, "step": 2959000 }, { "epoch": 1.77, "learning_rate": 4.361693568225481e-05, "loss": 1.2805, "step": 2959500 }, { "epoch": 1.77, "learning_rate": 4.3614835716694245e-05, "loss": 1.2787, "step": 2960000 }, { "epoch": 1.77, "learning_rate": 4.3612735751133685e-05, "loss": 1.235, "step": 2960500 }, { "epoch": 1.78, "learning_rate": 4.361063578557312e-05, "loss": 1.2918, "step": 2961000 }, { "epoch": 1.78, "learning_rate": 4.360853582001255e-05, "loss": 1.3014, "step": 2961500 }, { "epoch": 1.78, "learning_rate": 4.3606435854451986e-05, "loss": 1.2773, "step": 2962000 }, { "epoch": 1.78, "learning_rate": 4.3604340088822546e-05, "loss": 1.2502, "step": 2962500 }, { "epoch": 1.78, "learning_rate": 4.360224012326198e-05, "loss": 1.2761, "step": 2963000 }, { "epoch": 1.78, "learning_rate": 4.360014015770141e-05, "loss": 1.2191, "step": 2963500 }, { "epoch": 1.78, "learning_rate": 4.359804019214085e-05, "loss": 1.2778, "step": 2964000 }, { "epoch": 1.78, "learning_rate": 4.359594022658028e-05, "loss": 1.2618, "step": 2964500 }, { "epoch": 1.78, "learning_rate": 4.359384026101971e-05, "loss": 1.2734, "step": 2965000 }, { "epoch": 1.78, "learning_rate": 4.3591740295459154e-05, "loss": 1.2641, "step": 2965500 }, { "epoch": 1.78, "learning_rate": 4.358964032989859e-05, "loss": 1.274, "step": 2966000 }, { "epoch": 1.78, "learning_rate": 4.358754036433802e-05, "loss": 1.2451, "step": 2966500 }, { "epoch": 1.78, "learning_rate": 4.35854487986397e-05, "loss": 1.2825, "step": 2967000 }, { "epoch": 1.78, "learning_rate": 4.358334883307914e-05, "loss": 1.2664, "step": 2967500 }, { "epoch": 1.78, "learning_rate": 4.3581248867518575e-05, "loss": 1.2477, "step": 2968000 }, { "epoch": 1.78, "learning_rate": 4.357914890195801e-05, "loss": 1.279, "step": 2968500 }, { "epoch": 1.78, "learning_rate": 4.357704893639744e-05, "loss": 1.2749, "step": 2969000 }, { "epoch": 1.78, "learning_rate": 4.3574948970836875e-05, "loss": 1.2427, "step": 2969500 }, { "epoch": 1.78, "learning_rate": 4.357284900527631e-05, "loss": 1.2901, "step": 2970000 }, { "epoch": 1.78, "learning_rate": 4.357074903971575e-05, "loss": 1.2501, "step": 2970500 }, { "epoch": 1.78, "learning_rate": 4.356865327408631e-05, "loss": 1.269, "step": 2971000 }, { "epoch": 1.78, "learning_rate": 4.3566553308525736e-05, "loss": 1.2711, "step": 2971500 }, { "epoch": 1.78, "learning_rate": 4.3564457542896296e-05, "loss": 1.2674, "step": 2972000 }, { "epoch": 1.78, "learning_rate": 4.356235757733573e-05, "loss": 1.2632, "step": 2972500 }, { "epoch": 1.78, "learning_rate": 4.356025761177517e-05, "loss": 1.2817, "step": 2973000 }, { "epoch": 1.78, "learning_rate": 4.35581576462146e-05, "loss": 1.24, "step": 2973500 }, { "epoch": 1.78, "learning_rate": 4.355605768065404e-05, "loss": 1.2769, "step": 2974000 }, { "epoch": 1.78, "learning_rate": 4.355395771509347e-05, "loss": 1.2673, "step": 2974500 }, { "epoch": 1.78, "learning_rate": 4.3551857749532904e-05, "loss": 1.2529, "step": 2975000 }, { "epoch": 1.78, "learning_rate": 4.3549757783972344e-05, "loss": 1.2799, "step": 2975500 }, { "epoch": 1.78, "learning_rate": 4.3547662018342904e-05, "loss": 1.2601, "step": 2976000 }, { "epoch": 1.78, "learning_rate": 4.354556205278233e-05, "loss": 1.2751, "step": 2976500 }, { "epoch": 1.78, "learning_rate": 4.3543462087221764e-05, "loss": 1.2592, "step": 2977000 }, { "epoch": 1.79, "learning_rate": 4.3541362121661205e-05, "loss": 1.2692, "step": 2977500 }, { "epoch": 1.79, "learning_rate": 4.3539266356031765e-05, "loss": 1.2813, "step": 2978000 }, { "epoch": 1.79, "learning_rate": 4.353717059040232e-05, "loss": 1.2671, "step": 2978500 }, { "epoch": 1.79, "learning_rate": 4.353507062484175e-05, "loss": 1.2601, "step": 2979000 }, { "epoch": 1.79, "learning_rate": 4.3532970659281185e-05, "loss": 1.2517, "step": 2979500 }, { "epoch": 1.79, "learning_rate": 4.3530870693720626e-05, "loss": 1.2983, "step": 2980000 }, { "epoch": 1.79, "learning_rate": 4.352877072816006e-05, "loss": 1.2892, "step": 2980500 }, { "epoch": 1.79, "learning_rate": 4.352667076259949e-05, "loss": 1.2624, "step": 2981000 }, { "epoch": 1.79, "learning_rate": 4.3524570797038926e-05, "loss": 1.2708, "step": 2981500 }, { "epoch": 1.79, "learning_rate": 4.352247083147836e-05, "loss": 1.2479, "step": 2982000 }, { "epoch": 1.79, "learning_rate": 4.352037506584892e-05, "loss": 1.2673, "step": 2982500 }, { "epoch": 1.79, "learning_rate": 4.351827510028836e-05, "loss": 1.2788, "step": 2983000 }, { "epoch": 1.79, "learning_rate": 4.351617513472779e-05, "loss": 1.2673, "step": 2983500 }, { "epoch": 1.79, "learning_rate": 4.351407936909835e-05, "loss": 1.262, "step": 2984000 }, { "epoch": 1.79, "learning_rate": 4.351197940353778e-05, "loss": 1.2664, "step": 2984500 }, { "epoch": 1.79, "learning_rate": 4.350987943797722e-05, "loss": 1.2552, "step": 2985000 }, { "epoch": 1.79, "learning_rate": 4.3507779472416654e-05, "loss": 1.2421, "step": 2985500 }, { "epoch": 1.79, "learning_rate": 4.350567950685608e-05, "loss": 1.2544, "step": 2986000 }, { "epoch": 1.79, "learning_rate": 4.350357954129552e-05, "loss": 1.2573, "step": 2986500 }, { "epoch": 1.79, "learning_rate": 4.3501479575734955e-05, "loss": 1.276, "step": 2987000 }, { "epoch": 1.79, "learning_rate": 4.3499383810105515e-05, "loss": 1.2476, "step": 2987500 }, { "epoch": 1.79, "learning_rate": 4.3497283844544955e-05, "loss": 1.2706, "step": 2988000 }, { "epoch": 1.79, "learning_rate": 4.349518387898438e-05, "loss": 1.2864, "step": 2988500 }, { "epoch": 1.79, "learning_rate": 4.3493083913423815e-05, "loss": 1.2899, "step": 2989000 }, { "epoch": 1.79, "learning_rate": 4.3490983947863256e-05, "loss": 1.2559, "step": 2989500 }, { "epoch": 1.79, "learning_rate": 4.348888398230269e-05, "loss": 1.2899, "step": 2990000 }, { "epoch": 1.79, "learning_rate": 4.348678401674212e-05, "loss": 1.2857, "step": 2990500 }, { "epoch": 1.79, "learning_rate": 4.348468405118156e-05, "loss": 1.2683, "step": 2991000 }, { "epoch": 1.79, "learning_rate": 4.3482584085620996e-05, "loss": 1.2764, "step": 2991500 }, { "epoch": 1.79, "learning_rate": 4.348048412006043e-05, "loss": 1.2795, "step": 2992000 }, { "epoch": 1.79, "learning_rate": 4.347838415449987e-05, "loss": 1.2778, "step": 2992500 }, { "epoch": 1.79, "learning_rate": 4.3476284188939303e-05, "loss": 1.2778, "step": 2993000 }, { "epoch": 1.79, "learning_rate": 4.347418842330986e-05, "loss": 1.2904, "step": 2993500 }, { "epoch": 1.8, "learning_rate": 4.347208845774929e-05, "loss": 1.2703, "step": 2994000 }, { "epoch": 1.8, "learning_rate": 4.346998849218873e-05, "loss": 1.2864, "step": 2994500 }, { "epoch": 1.8, "learning_rate": 4.3467892726559284e-05, "loss": 1.2469, "step": 2995000 }, { "epoch": 1.8, "learning_rate": 4.346579276099872e-05, "loss": 1.2605, "step": 2995500 }, { "epoch": 1.8, "learning_rate": 4.346369279543816e-05, "loss": 1.2682, "step": 2996000 }, { "epoch": 1.8, "learning_rate": 4.346159282987759e-05, "loss": 1.2911, "step": 2996500 }, { "epoch": 1.8, "learning_rate": 4.3459497064248145e-05, "loss": 1.2421, "step": 2997000 }, { "epoch": 1.8, "learning_rate": 4.345739709868758e-05, "loss": 1.2776, "step": 2997500 }, { "epoch": 1.8, "learning_rate": 4.345529713312702e-05, "loss": 1.2784, "step": 2998000 }, { "epoch": 1.8, "learning_rate": 4.345319716756645e-05, "loss": 1.2667, "step": 2998500 }, { "epoch": 1.8, "learning_rate": 4.3451097202005886e-05, "loss": 1.2566, "step": 2999000 }, { "epoch": 1.8, "learning_rate": 4.3448997236445326e-05, "loss": 1.2322, "step": 2999500 }, { "epoch": 1.8, "learning_rate": 4.344690147081588e-05, "loss": 1.2769, "step": 3000000 }, { "epoch": 1.8, "eval_loss": 1.2166739702224731, "eval_runtime": 1103.532, "eval_samples_per_second": 477.304, "eval_steps_per_second": 79.551, "step": 3000000 }, { "epoch": 1.8, "learning_rate": 4.344480150525531e-05, "loss": 1.285, "step": 3000500 }, { "epoch": 1.8, "learning_rate": 4.3442701539694746e-05, "loss": 1.2474, "step": 3001000 }, { "epoch": 1.8, "learning_rate": 4.3440601574134187e-05, "loss": 1.2763, "step": 3001500 }, { "epoch": 1.8, "learning_rate": 4.343850580850474e-05, "loss": 1.2567, "step": 3002000 }, { "epoch": 1.8, "learning_rate": 4.3436405842944174e-05, "loss": 1.2724, "step": 3002500 }, { "epoch": 1.8, "learning_rate": 4.3434305877383614e-05, "loss": 1.2459, "step": 3003000 }, { "epoch": 1.8, "learning_rate": 4.343220591182305e-05, "loss": 1.2671, "step": 3003500 }, { "epoch": 1.8, "learning_rate": 4.34301101461936e-05, "loss": 1.2762, "step": 3004000 }, { "epoch": 1.8, "learning_rate": 4.3428010180633034e-05, "loss": 1.2305, "step": 3004500 }, { "epoch": 1.8, "learning_rate": 4.3425910215072475e-05, "loss": 1.2917, "step": 3005000 }, { "epoch": 1.8, "learning_rate": 4.342381024951191e-05, "loss": 1.245, "step": 3005500 }, { "epoch": 1.8, "learning_rate": 4.342171028395134e-05, "loss": 1.246, "step": 3006000 }, { "epoch": 1.8, "learning_rate": 4.341961031839078e-05, "loss": 1.2648, "step": 3006500 }, { "epoch": 1.8, "learning_rate": 4.3417510352830215e-05, "loss": 1.2847, "step": 3007000 }, { "epoch": 1.8, "learning_rate": 4.341541038726965e-05, "loss": 1.2734, "step": 3007500 }, { "epoch": 1.8, "learning_rate": 4.341331042170908e-05, "loss": 1.2603, "step": 3008000 }, { "epoch": 1.8, "learning_rate": 4.3411210456148516e-05, "loss": 1.244, "step": 3008500 }, { "epoch": 1.8, "learning_rate": 4.3409114690519076e-05, "loss": 1.262, "step": 3009000 }, { "epoch": 1.8, "learning_rate": 4.340701472495851e-05, "loss": 1.2634, "step": 3009500 }, { "epoch": 1.8, "learning_rate": 4.340491475939795e-05, "loss": 1.2499, "step": 3010000 }, { "epoch": 1.8, "learning_rate": 4.3402814793837376e-05, "loss": 1.2837, "step": 3010500 }, { "epoch": 1.81, "learning_rate": 4.3400714828276817e-05, "loss": 1.2401, "step": 3011000 }, { "epoch": 1.81, "learning_rate": 4.339861906264738e-05, "loss": 1.2328, "step": 3011500 }, { "epoch": 1.81, "learning_rate": 4.339651909708681e-05, "loss": 1.2649, "step": 3012000 }, { "epoch": 1.81, "learning_rate": 4.3394419131526244e-05, "loss": 1.2763, "step": 3012500 }, { "epoch": 1.81, "learning_rate": 4.339231916596568e-05, "loss": 1.2608, "step": 3013000 }, { "epoch": 1.81, "learning_rate": 4.339022340033624e-05, "loss": 1.2733, "step": 3013500 }, { "epoch": 1.81, "learning_rate": 4.338812343477567e-05, "loss": 1.2699, "step": 3014000 }, { "epoch": 1.81, "learning_rate": 4.3386023469215105e-05, "loss": 1.2651, "step": 3014500 }, { "epoch": 1.81, "learning_rate": 4.338392350365454e-05, "loss": 1.2768, "step": 3015000 }, { "epoch": 1.81, "learning_rate": 4.33818277380251e-05, "loss": 1.26, "step": 3015500 }, { "epoch": 1.81, "learning_rate": 4.337972777246453e-05, "loss": 1.2685, "step": 3016000 }, { "epoch": 1.81, "learning_rate": 4.3377627806903965e-05, "loss": 1.2775, "step": 3016500 }, { "epoch": 1.81, "learning_rate": 4.3375532041274526e-05, "loss": 1.2761, "step": 3017000 }, { "epoch": 1.81, "learning_rate": 4.337343207571396e-05, "loss": 1.2665, "step": 3017500 }, { "epoch": 1.81, "learning_rate": 4.337133211015339e-05, "loss": 1.2405, "step": 3018000 }, { "epoch": 1.81, "learning_rate": 4.336923214459283e-05, "loss": 1.2575, "step": 3018500 }, { "epoch": 1.81, "learning_rate": 4.3367132179032266e-05, "loss": 1.2699, "step": 3019000 }, { "epoch": 1.81, "learning_rate": 4.33650322134717e-05, "loss": 1.2717, "step": 3019500 }, { "epoch": 1.81, "learning_rate": 4.336293224791113e-05, "loss": 1.2667, "step": 3020000 }, { "epoch": 1.81, "learning_rate": 4.3360832282350567e-05, "loss": 1.2688, "step": 3020500 }, { "epoch": 1.81, "learning_rate": 4.335873651672113e-05, "loss": 1.2488, "step": 3021000 }, { "epoch": 1.81, "learning_rate": 4.335664075109168e-05, "loss": 1.2516, "step": 3021500 }, { "epoch": 1.81, "learning_rate": 4.3354540785531114e-05, "loss": 1.2612, "step": 3022000 }, { "epoch": 1.81, "learning_rate": 4.3352440819970554e-05, "loss": 1.2439, "step": 3022500 }, { "epoch": 1.81, "learning_rate": 4.335034085440999e-05, "loss": 1.2796, "step": 3023000 }, { "epoch": 1.81, "learning_rate": 4.334824088884942e-05, "loss": 1.2655, "step": 3023500 }, { "epoch": 1.81, "learning_rate": 4.334614092328886e-05, "loss": 1.2487, "step": 3024000 }, { "epoch": 1.81, "learning_rate": 4.3344040957728295e-05, "loss": 1.2884, "step": 3024500 }, { "epoch": 1.81, "learning_rate": 4.334194099216773e-05, "loss": 1.2638, "step": 3025000 }, { "epoch": 1.81, "learning_rate": 4.333984102660716e-05, "loss": 1.2818, "step": 3025500 }, { "epoch": 1.81, "learning_rate": 4.3337741061046595e-05, "loss": 1.2924, "step": 3026000 }, { "epoch": 1.81, "learning_rate": 4.3335641095486035e-05, "loss": 1.2554, "step": 3026500 }, { "epoch": 1.81, "learning_rate": 4.333354112992547e-05, "loss": 1.279, "step": 3027000 }, { "epoch": 1.82, "learning_rate": 4.333144536429602e-05, "loss": 1.2362, "step": 3027500 }, { "epoch": 1.82, "learning_rate": 4.332934959866658e-05, "loss": 1.2775, "step": 3028000 }, { "epoch": 1.82, "learning_rate": 4.3327249633106016e-05, "loss": 1.252, "step": 3028500 }, { "epoch": 1.82, "learning_rate": 4.3325149667545456e-05, "loss": 1.2668, "step": 3029000 }, { "epoch": 1.82, "learning_rate": 4.332304970198488e-05, "loss": 1.2745, "step": 3029500 }, { "epoch": 1.82, "learning_rate": 4.332094973642432e-05, "loss": 1.2744, "step": 3030000 }, { "epoch": 1.82, "learning_rate": 4.331885397079488e-05, "loss": 1.2852, "step": 3030500 }, { "epoch": 1.82, "learning_rate": 4.331675400523432e-05, "loss": 1.2807, "step": 3031000 }, { "epoch": 1.82, "learning_rate": 4.331465403967375e-05, "loss": 1.2418, "step": 3031500 }, { "epoch": 1.82, "learning_rate": 4.3312554074113184e-05, "loss": 1.2644, "step": 3032000 }, { "epoch": 1.82, "learning_rate": 4.331045410855262e-05, "loss": 1.225, "step": 3032500 }, { "epoch": 1.82, "learning_rate": 4.330835414299205e-05, "loss": 1.2714, "step": 3033000 }, { "epoch": 1.82, "learning_rate": 4.330625417743149e-05, "loss": 1.2526, "step": 3033500 }, { "epoch": 1.82, "learning_rate": 4.330415841180205e-05, "loss": 1.2309, "step": 3034000 }, { "epoch": 1.82, "learning_rate": 4.330205844624148e-05, "loss": 1.2763, "step": 3034500 }, { "epoch": 1.82, "learning_rate": 4.329995848068091e-05, "loss": 1.2683, "step": 3035000 }, { "epoch": 1.82, "learning_rate": 4.329785851512035e-05, "loss": 1.2836, "step": 3035500 }, { "epoch": 1.82, "learning_rate": 4.3295758549559785e-05, "loss": 1.2767, "step": 3036000 }, { "epoch": 1.82, "learning_rate": 4.329365858399922e-05, "loss": 1.2516, "step": 3036500 }, { "epoch": 1.82, "learning_rate": 4.329155861843866e-05, "loss": 1.2932, "step": 3037000 }, { "epoch": 1.82, "learning_rate": 4.328945865287809e-05, "loss": 1.2552, "step": 3037500 }, { "epoch": 1.82, "learning_rate": 4.3287362887248646e-05, "loss": 1.2731, "step": 3038000 }, { "epoch": 1.82, "learning_rate": 4.328526292168808e-05, "loss": 1.272, "step": 3038500 }, { "epoch": 1.82, "learning_rate": 4.328316295612752e-05, "loss": 1.2585, "step": 3039000 }, { "epoch": 1.82, "learning_rate": 4.328106299056695e-05, "loss": 1.269, "step": 3039500 }, { "epoch": 1.82, "learning_rate": 4.3278971424868634e-05, "loss": 1.265, "step": 3040000 }, { "epoch": 1.82, "learning_rate": 4.327687145930807e-05, "loss": 1.2741, "step": 3040500 }, { "epoch": 1.82, "learning_rate": 4.327477149374751e-05, "loss": 1.2566, "step": 3041000 }, { "epoch": 1.82, "learning_rate": 4.3272671528186934e-05, "loss": 1.2786, "step": 3041500 }, { "epoch": 1.82, "learning_rate": 4.327057156262637e-05, "loss": 1.2618, "step": 3042000 }, { "epoch": 1.82, "learning_rate": 4.326847159706581e-05, "loss": 1.2394, "step": 3042500 }, { "epoch": 1.82, "learning_rate": 4.326637163150524e-05, "loss": 1.279, "step": 3043000 }, { "epoch": 1.82, "learning_rate": 4.3264271665944675e-05, "loss": 1.29, "step": 3043500 }, { "epoch": 1.83, "learning_rate": 4.3262171700384115e-05, "loss": 1.2744, "step": 3044000 }, { "epoch": 1.83, "learning_rate": 4.326007593475467e-05, "loss": 1.268, "step": 3044500 }, { "epoch": 1.83, "learning_rate": 4.32579759691941e-05, "loss": 1.2659, "step": 3045000 }, { "epoch": 1.83, "learning_rate": 4.3255876003633536e-05, "loss": 1.2498, "step": 3045500 }, { "epoch": 1.83, "learning_rate": 4.3253776038072976e-05, "loss": 1.2531, "step": 3046000 }, { "epoch": 1.83, "learning_rate": 4.325167607251241e-05, "loss": 1.2629, "step": 3046500 }, { "epoch": 1.83, "learning_rate": 4.324957610695184e-05, "loss": 1.2382, "step": 3047000 }, { "epoch": 1.83, "learning_rate": 4.32474803413224e-05, "loss": 1.2571, "step": 3047500 }, { "epoch": 1.83, "learning_rate": 4.3245380375761837e-05, "loss": 1.2552, "step": 3048000 }, { "epoch": 1.83, "learning_rate": 4.324328041020127e-05, "loss": 1.2866, "step": 3048500 }, { "epoch": 1.83, "learning_rate": 4.324118044464071e-05, "loss": 1.278, "step": 3049000 }, { "epoch": 1.83, "learning_rate": 4.3239080479080144e-05, "loss": 1.2773, "step": 3049500 }, { "epoch": 1.83, "learning_rate": 4.323698051351958e-05, "loss": 1.2513, "step": 3050000 }, { "epoch": 1.83, "learning_rate": 4.323488054795902e-05, "loss": 1.2832, "step": 3050500 }, { "epoch": 1.83, "learning_rate": 4.323278058239845e-05, "loss": 1.2647, "step": 3051000 }, { "epoch": 1.83, "learning_rate": 4.3230684816769004e-05, "loss": 1.2592, "step": 3051500 }, { "epoch": 1.83, "learning_rate": 4.322858485120844e-05, "loss": 1.2351, "step": 3052000 }, { "epoch": 1.83, "learning_rate": 4.322648488564788e-05, "loss": 1.2721, "step": 3052500 }, { "epoch": 1.83, "learning_rate": 4.322438492008731e-05, "loss": 1.2302, "step": 3053000 }, { "epoch": 1.83, "learning_rate": 4.3222289154457865e-05, "loss": 1.2492, "step": 3053500 }, { "epoch": 1.83, "learning_rate": 4.3220189188897305e-05, "loss": 1.2827, "step": 3054000 }, { "epoch": 1.83, "learning_rate": 4.321808922333674e-05, "loss": 1.264, "step": 3054500 }, { "epoch": 1.83, "learning_rate": 4.321598925777617e-05, "loss": 1.2814, "step": 3055000 }, { "epoch": 1.83, "learning_rate": 4.321389769207785e-05, "loss": 1.261, "step": 3055500 }, { "epoch": 1.83, "learning_rate": 4.321179772651728e-05, "loss": 1.2875, "step": 3056000 }, { "epoch": 1.83, "learning_rate": 4.320969776095672e-05, "loss": 1.2747, "step": 3056500 }, { "epoch": 1.83, "learning_rate": 4.320759779539615e-05, "loss": 1.2918, "step": 3057000 }, { "epoch": 1.83, "learning_rate": 4.3205497829835587e-05, "loss": 1.256, "step": 3057500 }, { "epoch": 1.83, "learning_rate": 4.320339786427503e-05, "loss": 1.2927, "step": 3058000 }, { "epoch": 1.83, "learning_rate": 4.320129789871446e-05, "loss": 1.2408, "step": 3058500 }, { "epoch": 1.83, "learning_rate": 4.3199197933153894e-05, "loss": 1.2707, "step": 3059000 }, { "epoch": 1.83, "learning_rate": 4.319710216752445e-05, "loss": 1.2634, "step": 3059500 }, { "epoch": 1.83, "learning_rate": 4.319500220196389e-05, "loss": 1.2753, "step": 3060000 }, { "epoch": 1.83, "learning_rate": 4.319290643633444e-05, "loss": 1.2585, "step": 3060500 }, { "epoch": 1.84, "learning_rate": 4.3190806470773875e-05, "loss": 1.2788, "step": 3061000 }, { "epoch": 1.84, "learning_rate": 4.3188706505213315e-05, "loss": 1.2403, "step": 3061500 }, { "epoch": 1.84, "learning_rate": 4.318660653965275e-05, "loss": 1.2662, "step": 3062000 }, { "epoch": 1.84, "learning_rate": 4.318451077402331e-05, "loss": 1.2324, "step": 3062500 }, { "epoch": 1.84, "learning_rate": 4.3182410808462735e-05, "loss": 1.2866, "step": 3063000 }, { "epoch": 1.84, "learning_rate": 4.3180310842902176e-05, "loss": 1.2782, "step": 3063500 }, { "epoch": 1.84, "learning_rate": 4.3178215077272736e-05, "loss": 1.251, "step": 3064000 }, { "epoch": 1.84, "learning_rate": 4.317611511171217e-05, "loss": 1.2853, "step": 3064500 }, { "epoch": 1.84, "learning_rate": 4.31740151461516e-05, "loss": 1.2673, "step": 3065000 }, { "epoch": 1.84, "learning_rate": 4.3171919380522156e-05, "loss": 1.254, "step": 3065500 }, { "epoch": 1.84, "learning_rate": 4.31698194149616e-05, "loss": 1.3198, "step": 3066000 }, { "epoch": 1.84, "learning_rate": 4.316771944940103e-05, "loss": 1.253, "step": 3066500 }, { "epoch": 1.84, "learning_rate": 4.316561948384047e-05, "loss": 1.2679, "step": 3067000 }, { "epoch": 1.84, "learning_rate": 4.31635195182799e-05, "loss": 1.2806, "step": 3067500 }, { "epoch": 1.84, "learning_rate": 4.316141955271933e-05, "loss": 1.2512, "step": 3068000 }, { "epoch": 1.84, "learning_rate": 4.315931958715877e-05, "loss": 1.2734, "step": 3068500 }, { "epoch": 1.84, "learning_rate": 4.3157219621598204e-05, "loss": 1.2736, "step": 3069000 }, { "epoch": 1.84, "learning_rate": 4.315511965603764e-05, "loss": 1.2782, "step": 3069500 }, { "epoch": 1.84, "learning_rate": 4.315301969047708e-05, "loss": 1.2426, "step": 3070000 }, { "epoch": 1.84, "learning_rate": 4.315091972491651e-05, "loss": 1.2562, "step": 3070500 }, { "epoch": 1.84, "learning_rate": 4.3148819759355945e-05, "loss": 1.2655, "step": 3071000 }, { "epoch": 1.84, "learning_rate": 4.3146719793795385e-05, "loss": 1.2834, "step": 3071500 }, { "epoch": 1.84, "learning_rate": 4.314461982823482e-05, "loss": 1.2869, "step": 3072000 }, { "epoch": 1.84, "learning_rate": 4.314251986267425e-05, "loss": 1.2349, "step": 3072500 }, { "epoch": 1.84, "learning_rate": 4.3140419897113685e-05, "loss": 1.2657, "step": 3073000 }, { "epoch": 1.84, "learning_rate": 4.3138324131484246e-05, "loss": 1.2556, "step": 3073500 }, { "epoch": 1.84, "learning_rate": 4.313622416592368e-05, "loss": 1.2601, "step": 3074000 }, { "epoch": 1.84, "learning_rate": 4.313412840029423e-05, "loss": 1.2684, "step": 3074500 }, { "epoch": 1.84, "learning_rate": 4.313202843473367e-05, "loss": 1.2592, "step": 3075000 }, { "epoch": 1.84, "learning_rate": 4.3129928469173106e-05, "loss": 1.2785, "step": 3075500 }, { "epoch": 1.84, "learning_rate": 4.312782850361254e-05, "loss": 1.2401, "step": 3076000 }, { "epoch": 1.84, "learning_rate": 4.312572853805198e-05, "loss": 1.2561, "step": 3076500 }, { "epoch": 1.84, "learning_rate": 4.3123628572491414e-05, "loss": 1.2457, "step": 3077000 }, { "epoch": 1.85, "learning_rate": 4.312152860693085e-05, "loss": 1.2543, "step": 3077500 }, { "epoch": 1.85, "learning_rate": 4.31194328413014e-05, "loss": 1.2555, "step": 3078000 }, { "epoch": 1.85, "learning_rate": 4.311733287574084e-05, "loss": 1.2487, "step": 3078500 }, { "epoch": 1.85, "learning_rate": 4.3115232910180274e-05, "loss": 1.2905, "step": 3079000 }, { "epoch": 1.85, "learning_rate": 4.311313294461971e-05, "loss": 1.242, "step": 3079500 }, { "epoch": 1.85, "learning_rate": 4.311103297905915e-05, "loss": 1.2668, "step": 3080000 }, { "epoch": 1.85, "learning_rate": 4.3108933013498575e-05, "loss": 1.2938, "step": 3080500 }, { "epoch": 1.85, "learning_rate": 4.310683304793801e-05, "loss": 1.2484, "step": 3081000 }, { "epoch": 1.85, "learning_rate": 4.310473308237745e-05, "loss": 1.2249, "step": 3081500 }, { "epoch": 1.85, "learning_rate": 4.310263731674801e-05, "loss": 1.2519, "step": 3082000 }, { "epoch": 1.85, "learning_rate": 4.3100537351187435e-05, "loss": 1.2675, "step": 3082500 }, { "epoch": 1.85, "learning_rate": 4.3098437385626876e-05, "loss": 1.2619, "step": 3083000 }, { "epoch": 1.85, "learning_rate": 4.3096341619997436e-05, "loss": 1.2876, "step": 3083500 }, { "epoch": 1.85, "learning_rate": 4.309424165443687e-05, "loss": 1.2662, "step": 3084000 }, { "epoch": 1.85, "learning_rate": 4.30921416888763e-05, "loss": 1.2465, "step": 3084500 }, { "epoch": 1.85, "learning_rate": 4.3090041723315736e-05, "loss": 1.2585, "step": 3085000 }, { "epoch": 1.85, "learning_rate": 4.308794175775517e-05, "loss": 1.2758, "step": 3085500 }, { "epoch": 1.85, "learning_rate": 4.30858417921946e-05, "loss": 1.2778, "step": 3086000 }, { "epoch": 1.85, "learning_rate": 4.3083741826634044e-05, "loss": 1.2478, "step": 3086500 }, { "epoch": 1.85, "learning_rate": 4.308164186107348e-05, "loss": 1.2621, "step": 3087000 }, { "epoch": 1.85, "learning_rate": 4.307955029537516e-05, "loss": 1.2696, "step": 3087500 }, { "epoch": 1.85, "learning_rate": 4.307745032981459e-05, "loss": 1.2838, "step": 3088000 }, { "epoch": 1.85, "learning_rate": 4.3075350364254024e-05, "loss": 1.2666, "step": 3088500 }, { "epoch": 1.85, "learning_rate": 4.3073250398693465e-05, "loss": 1.2525, "step": 3089000 }, { "epoch": 1.85, "learning_rate": 4.30711504331329e-05, "loss": 1.257, "step": 3089500 }, { "epoch": 1.85, "learning_rate": 4.306905046757233e-05, "loss": 1.2452, "step": 3090000 }, { "epoch": 1.85, "learning_rate": 4.3066950502011765e-05, "loss": 1.2539, "step": 3090500 }, { "epoch": 1.85, "learning_rate": 4.30648505364512e-05, "loss": 1.2632, "step": 3091000 }, { "epoch": 1.85, "learning_rate": 4.306275057089064e-05, "loss": 1.2429, "step": 3091500 }, { "epoch": 1.85, "learning_rate": 4.30606548052612e-05, "loss": 1.2784, "step": 3092000 }, { "epoch": 1.85, "learning_rate": 4.3058554839700626e-05, "loss": 1.2769, "step": 3092500 }, { "epoch": 1.85, "learning_rate": 4.305645487414006e-05, "loss": 1.272, "step": 3093000 }, { "epoch": 1.85, "learning_rate": 4.30543549085795e-05, "loss": 1.2478, "step": 3093500 }, { "epoch": 1.85, "learning_rate": 4.305225494301893e-05, "loss": 1.2507, "step": 3094000 }, { "epoch": 1.86, "learning_rate": 4.3050159177389486e-05, "loss": 1.2705, "step": 3094500 }, { "epoch": 1.86, "learning_rate": 4.304805921182892e-05, "loss": 1.2597, "step": 3095000 }, { "epoch": 1.86, "learning_rate": 4.304595924626836e-05, "loss": 1.2493, "step": 3095500 }, { "epoch": 1.86, "learning_rate": 4.3043859280707794e-05, "loss": 1.2844, "step": 3096000 }, { "epoch": 1.86, "learning_rate": 4.304175931514723e-05, "loss": 1.2512, "step": 3096500 }, { "epoch": 1.86, "learning_rate": 4.303965934958667e-05, "loss": 1.2771, "step": 3097000 }, { "epoch": 1.86, "learning_rate": 4.30375593840261e-05, "loss": 1.2674, "step": 3097500 }, { "epoch": 1.86, "learning_rate": 4.3035459418465534e-05, "loss": 1.2445, "step": 3098000 }, { "epoch": 1.86, "learning_rate": 4.3033359452904974e-05, "loss": 1.2614, "step": 3098500 }, { "epoch": 1.86, "learning_rate": 4.303126368727553e-05, "loss": 1.253, "step": 3099000 }, { "epoch": 1.86, "learning_rate": 4.302916372171496e-05, "loss": 1.2486, "step": 3099500 }, { "epoch": 1.86, "learning_rate": 4.30270637561544e-05, "loss": 1.2472, "step": 3100000 }, { "epoch": 1.86, "eval_loss": 1.209123134613037, "eval_runtime": 1124.5793, "eval_samples_per_second": 468.371, "eval_steps_per_second": 78.062, "step": 3100000 }, { "epoch": 1.86, "learning_rate": 4.3024963790593835e-05, "loss": 1.2343, "step": 3100500 }, { "epoch": 1.86, "learning_rate": 4.302286802496439e-05, "loss": 1.224, "step": 3101000 }, { "epoch": 1.86, "learning_rate": 4.302076805940382e-05, "loss": 1.26, "step": 3101500 }, { "epoch": 1.86, "learning_rate": 4.301866809384326e-05, "loss": 1.2775, "step": 3102000 }, { "epoch": 1.86, "learning_rate": 4.3016568128282696e-05, "loss": 1.2587, "step": 3102500 }, { "epoch": 1.86, "learning_rate": 4.301447236265325e-05, "loss": 1.2448, "step": 3103000 }, { "epoch": 1.86, "learning_rate": 4.301237659702381e-05, "loss": 1.2553, "step": 3103500 }, { "epoch": 1.86, "learning_rate": 4.3010280831394363e-05, "loss": 1.2446, "step": 3104000 }, { "epoch": 1.86, "learning_rate": 4.3008180865833804e-05, "loss": 1.29, "step": 3104500 }, { "epoch": 1.86, "learning_rate": 4.300608090027324e-05, "loss": 1.2545, "step": 3105000 }, { "epoch": 1.86, "learning_rate": 4.300398093471267e-05, "loss": 1.2646, "step": 3105500 }, { "epoch": 1.86, "learning_rate": 4.300188096915211e-05, "loss": 1.2599, "step": 3106000 }, { "epoch": 1.86, "learning_rate": 4.299978100359154e-05, "loss": 1.2752, "step": 3106500 }, { "epoch": 1.86, "learning_rate": 4.299768103803097e-05, "loss": 1.276, "step": 3107000 }, { "epoch": 1.86, "learning_rate": 4.299558107247041e-05, "loss": 1.2405, "step": 3107500 }, { "epoch": 1.86, "learning_rate": 4.2993481106909845e-05, "loss": 1.2672, "step": 3108000 }, { "epoch": 1.86, "learning_rate": 4.299138114134928e-05, "loss": 1.2679, "step": 3108500 }, { "epoch": 1.86, "learning_rate": 4.298928537571983e-05, "loss": 1.2633, "step": 3109000 }, { "epoch": 1.86, "learning_rate": 4.298718541015927e-05, "loss": 1.2684, "step": 3109500 }, { "epoch": 1.86, "learning_rate": 4.2985085444598705e-05, "loss": 1.2687, "step": 3110000 }, { "epoch": 1.86, "learning_rate": 4.298298547903814e-05, "loss": 1.2805, "step": 3110500 }, { "epoch": 1.87, "learning_rate": 4.298088551347758e-05, "loss": 1.2818, "step": 3111000 }, { "epoch": 1.87, "learning_rate": 4.297878974784813e-05, "loss": 1.2645, "step": 3111500 }, { "epoch": 1.87, "learning_rate": 4.2976689782287566e-05, "loss": 1.2534, "step": 3112000 }, { "epoch": 1.87, "learning_rate": 4.2974589816727006e-05, "loss": 1.2574, "step": 3112500 }, { "epoch": 1.87, "learning_rate": 4.297248985116644e-05, "loss": 1.2234, "step": 3113000 }, { "epoch": 1.87, "learning_rate": 4.297038988560587e-05, "loss": 1.2387, "step": 3113500 }, { "epoch": 1.87, "learning_rate": 4.2968289920045313e-05, "loss": 1.2629, "step": 3114000 }, { "epoch": 1.87, "learning_rate": 4.296619415441587e-05, "loss": 1.2619, "step": 3114500 }, { "epoch": 1.87, "learning_rate": 4.29640941888553e-05, "loss": 1.282, "step": 3115000 }, { "epoch": 1.87, "learning_rate": 4.2961994223294734e-05, "loss": 1.2529, "step": 3115500 }, { "epoch": 1.87, "learning_rate": 4.2959894257734174e-05, "loss": 1.2745, "step": 3116000 }, { "epoch": 1.87, "learning_rate": 4.295779429217361e-05, "loss": 1.2258, "step": 3116500 }, { "epoch": 1.87, "learning_rate": 4.295569432661304e-05, "loss": 1.2688, "step": 3117000 }, { "epoch": 1.87, "learning_rate": 4.2953598560983595e-05, "loss": 1.2593, "step": 3117500 }, { "epoch": 1.87, "learning_rate": 4.2951498595423035e-05, "loss": 1.2499, "step": 3118000 }, { "epoch": 1.87, "learning_rate": 4.294939862986247e-05, "loss": 1.2717, "step": 3118500 }, { "epoch": 1.87, "learning_rate": 4.294729866430191e-05, "loss": 1.2887, "step": 3119000 }, { "epoch": 1.87, "learning_rate": 4.294519869874134e-05, "loss": 1.2704, "step": 3119500 }, { "epoch": 1.87, "learning_rate": 4.2943098733180776e-05, "loss": 1.2706, "step": 3120000 }, { "epoch": 1.87, "learning_rate": 4.294100296755133e-05, "loss": 1.2656, "step": 3120500 }, { "epoch": 1.87, "learning_rate": 4.293890300199077e-05, "loss": 1.2632, "step": 3121000 }, { "epoch": 1.87, "learning_rate": 4.29368030364302e-05, "loss": 1.3073, "step": 3121500 }, { "epoch": 1.87, "learning_rate": 4.2934703070869636e-05, "loss": 1.2898, "step": 3122000 }, { "epoch": 1.87, "learning_rate": 4.2932603105309077e-05, "loss": 1.2434, "step": 3122500 }, { "epoch": 1.87, "learning_rate": 4.293050733967963e-05, "loss": 1.2646, "step": 3123000 }, { "epoch": 1.87, "learning_rate": 4.2928407374119064e-05, "loss": 1.2725, "step": 3123500 }, { "epoch": 1.87, "learning_rate": 4.29263074085585e-05, "loss": 1.2688, "step": 3124000 }, { "epoch": 1.87, "learning_rate": 4.292420744299794e-05, "loss": 1.2712, "step": 3124500 }, { "epoch": 1.87, "learning_rate": 4.292211167736849e-05, "loss": 1.2777, "step": 3125000 }, { "epoch": 1.87, "learning_rate": 4.2920011711807924e-05, "loss": 1.2765, "step": 3125500 }, { "epoch": 1.87, "learning_rate": 4.2917911746247365e-05, "loss": 1.2509, "step": 3126000 }, { "epoch": 1.87, "learning_rate": 4.29158117806868e-05, "loss": 1.2731, "step": 3126500 }, { "epoch": 1.87, "learning_rate": 4.291371181512623e-05, "loss": 1.2582, "step": 3127000 }, { "epoch": 1.88, "learning_rate": 4.2911616049496785e-05, "loss": 1.2426, "step": 3127500 }, { "epoch": 1.88, "learning_rate": 4.2909516083936225e-05, "loss": 1.2886, "step": 3128000 }, { "epoch": 1.88, "learning_rate": 4.290741611837566e-05, "loss": 1.2447, "step": 3128500 }, { "epoch": 1.88, "learning_rate": 4.290531615281509e-05, "loss": 1.2977, "step": 3129000 }, { "epoch": 1.88, "learning_rate": 4.290321618725453e-05, "loss": 1.292, "step": 3129500 }, { "epoch": 1.88, "learning_rate": 4.2901116221693966e-05, "loss": 1.2684, "step": 3130000 }, { "epoch": 1.88, "learning_rate": 4.28990162561334e-05, "loss": 1.2826, "step": 3130500 }, { "epoch": 1.88, "learning_rate": 4.289691629057283e-05, "loss": 1.2486, "step": 3131000 }, { "epoch": 1.88, "learning_rate": 4.289482052494339e-05, "loss": 1.2773, "step": 3131500 }, { "epoch": 1.88, "learning_rate": 4.2892720559382827e-05, "loss": 1.2591, "step": 3132000 }, { "epoch": 1.88, "learning_rate": 4.289062059382226e-05, "loss": 1.2362, "step": 3132500 }, { "epoch": 1.88, "learning_rate": 4.288852482819282e-05, "loss": 1.2681, "step": 3133000 }, { "epoch": 1.88, "learning_rate": 4.2886429062563374e-05, "loss": 1.2386, "step": 3133500 }, { "epoch": 1.88, "learning_rate": 4.288432909700281e-05, "loss": 1.2811, "step": 3134000 }, { "epoch": 1.88, "learning_rate": 4.288223333137337e-05, "loss": 1.2685, "step": 3134500 }, { "epoch": 1.88, "learning_rate": 4.2880133365812794e-05, "loss": 1.2437, "step": 3135000 }, { "epoch": 1.88, "learning_rate": 4.2878033400252235e-05, "loss": 1.2618, "step": 3135500 }, { "epoch": 1.88, "learning_rate": 4.287593343469167e-05, "loss": 1.2622, "step": 3136000 }, { "epoch": 1.88, "learning_rate": 4.28738334691311e-05, "loss": 1.2779, "step": 3136500 }, { "epoch": 1.88, "learning_rate": 4.287173350357054e-05, "loss": 1.2474, "step": 3137000 }, { "epoch": 1.88, "learning_rate": 4.2869633538009975e-05, "loss": 1.2797, "step": 3137500 }, { "epoch": 1.88, "learning_rate": 4.286753357244941e-05, "loss": 1.2861, "step": 3138000 }, { "epoch": 1.88, "learning_rate": 4.286543360688885e-05, "loss": 1.2461, "step": 3138500 }, { "epoch": 1.88, "learning_rate": 4.286333364132828e-05, "loss": 1.2612, "step": 3139000 }, { "epoch": 1.88, "learning_rate": 4.2861233675767716e-05, "loss": 1.2474, "step": 3139500 }, { "epoch": 1.88, "learning_rate": 4.2859133710207156e-05, "loss": 1.2752, "step": 3140000 }, { "epoch": 1.88, "learning_rate": 4.285703374464658e-05, "loss": 1.2569, "step": 3140500 }, { "epoch": 1.88, "learning_rate": 4.285493377908602e-05, "loss": 1.2494, "step": 3141000 }, { "epoch": 1.88, "learning_rate": 4.2852833813525457e-05, "loss": 1.2851, "step": 3141500 }, { "epoch": 1.88, "learning_rate": 4.285073384796489e-05, "loss": 1.2685, "step": 3142000 }, { "epoch": 1.88, "learning_rate": 4.284863388240433e-05, "loss": 1.2703, "step": 3142500 }, { "epoch": 1.88, "learning_rate": 4.2846542316706004e-05, "loss": 1.2539, "step": 3143000 }, { "epoch": 1.88, "learning_rate": 4.2844442351145444e-05, "loss": 1.2712, "step": 3143500 }, { "epoch": 1.88, "learning_rate": 4.284234238558488e-05, "loss": 1.2302, "step": 3144000 }, { "epoch": 1.89, "learning_rate": 4.284024661995543e-05, "loss": 1.2721, "step": 3144500 }, { "epoch": 1.89, "learning_rate": 4.2838146654394865e-05, "loss": 1.2504, "step": 3145000 }, { "epoch": 1.89, "learning_rate": 4.2836046688834305e-05, "loss": 1.2896, "step": 3145500 }, { "epoch": 1.89, "learning_rate": 4.283394672327374e-05, "loss": 1.2657, "step": 3146000 }, { "epoch": 1.89, "learning_rate": 4.283184675771317e-05, "loss": 1.2266, "step": 3146500 }, { "epoch": 1.89, "learning_rate": 4.282974679215261e-05, "loss": 1.2686, "step": 3147000 }, { "epoch": 1.89, "learning_rate": 4.2827646826592045e-05, "loss": 1.2506, "step": 3147500 }, { "epoch": 1.89, "learning_rate": 4.282554686103148e-05, "loss": 1.2483, "step": 3148000 }, { "epoch": 1.89, "learning_rate": 4.282344689547091e-05, "loss": 1.2456, "step": 3148500 }, { "epoch": 1.89, "learning_rate": 4.2821346929910346e-05, "loss": 1.2521, "step": 3149000 }, { "epoch": 1.89, "learning_rate": 4.2819251164280906e-05, "loss": 1.2561, "step": 3149500 }, { "epoch": 1.89, "learning_rate": 4.281715539865146e-05, "loss": 1.25, "step": 3150000 }, { "epoch": 1.89, "learning_rate": 4.281505963302201e-05, "loss": 1.2924, "step": 3150500 }, { "epoch": 1.89, "learning_rate": 4.2812959667461454e-05, "loss": 1.243, "step": 3151000 }, { "epoch": 1.89, "learning_rate": 4.281085970190089e-05, "loss": 1.2674, "step": 3151500 }, { "epoch": 1.89, "learning_rate": 4.280875973634032e-05, "loss": 1.2568, "step": 3152000 }, { "epoch": 1.89, "learning_rate": 4.280665977077976e-05, "loss": 1.2808, "step": 3152500 }, { "epoch": 1.89, "learning_rate": 4.2804559805219194e-05, "loss": 1.2469, "step": 3153000 }, { "epoch": 1.89, "learning_rate": 4.280245983965863e-05, "loss": 1.2478, "step": 3153500 }, { "epoch": 1.89, "learning_rate": 4.280035987409807e-05, "loss": 1.275, "step": 3154000 }, { "epoch": 1.89, "learning_rate": 4.27982599085375e-05, "loss": 1.2479, "step": 3154500 }, { "epoch": 1.89, "learning_rate": 4.2796159942976935e-05, "loss": 1.2588, "step": 3155000 }, { "epoch": 1.89, "learning_rate": 4.279405997741637e-05, "loss": 1.2645, "step": 3155500 }, { "epoch": 1.89, "learning_rate": 4.27919600118558e-05, "loss": 1.2647, "step": 3156000 }, { "epoch": 1.89, "learning_rate": 4.278986004629524e-05, "loss": 1.266, "step": 3156500 }, { "epoch": 1.89, "learning_rate": 4.27877642806658e-05, "loss": 1.2583, "step": 3157000 }, { "epoch": 1.89, "learning_rate": 4.278566431510523e-05, "loss": 1.2339, "step": 3157500 }, { "epoch": 1.89, "learning_rate": 4.278356434954466e-05, "loss": 1.299, "step": 3158000 }, { "epoch": 1.89, "learning_rate": 4.27814643839841e-05, "loss": 1.2854, "step": 3158500 }, { "epoch": 1.89, "learning_rate": 4.2779364418423536e-05, "loss": 1.2619, "step": 3159000 }, { "epoch": 1.89, "learning_rate": 4.277726445286297e-05, "loss": 1.2424, "step": 3159500 }, { "epoch": 1.89, "learning_rate": 4.277516448730241e-05, "loss": 1.2615, "step": 3160000 }, { "epoch": 1.89, "learning_rate": 4.277306452174184e-05, "loss": 1.2373, "step": 3160500 }, { "epoch": 1.9, "learning_rate": 4.27709687561124e-05, "loss": 1.2556, "step": 3161000 }, { "epoch": 1.9, "learning_rate": 4.276886879055183e-05, "loss": 1.229, "step": 3161500 }, { "epoch": 1.9, "learning_rate": 4.276676882499127e-05, "loss": 1.2517, "step": 3162000 }, { "epoch": 1.9, "learning_rate": 4.2764668859430704e-05, "loss": 1.266, "step": 3162500 }, { "epoch": 1.9, "learning_rate": 4.276256889387014e-05, "loss": 1.2544, "step": 3163000 }, { "epoch": 1.9, "learning_rate": 4.27604731282407e-05, "loss": 1.2833, "step": 3163500 }, { "epoch": 1.9, "learning_rate": 4.275837316268013e-05, "loss": 1.2409, "step": 3164000 }, { "epoch": 1.9, "learning_rate": 4.2756273197119565e-05, "loss": 1.3014, "step": 3164500 }, { "epoch": 1.9, "learning_rate": 4.2754173231559005e-05, "loss": 1.2382, "step": 3165000 }, { "epoch": 1.9, "learning_rate": 4.275207326599844e-05, "loss": 1.2918, "step": 3165500 }, { "epoch": 1.9, "learning_rate": 4.274997330043787e-05, "loss": 1.2665, "step": 3166000 }, { "epoch": 1.9, "learning_rate": 4.274787333487731e-05, "loss": 1.2504, "step": 3166500 }, { "epoch": 1.9, "learning_rate": 4.2745773369316746e-05, "loss": 1.2563, "step": 3167000 }, { "epoch": 1.9, "learning_rate": 4.27436776036873e-05, "loss": 1.2843, "step": 3167500 }, { "epoch": 1.9, "learning_rate": 4.274157763812673e-05, "loss": 1.256, "step": 3168000 }, { "epoch": 1.9, "learning_rate": 4.273947767256617e-05, "loss": 1.2548, "step": 3168500 }, { "epoch": 1.9, "learning_rate": 4.2737377707005606e-05, "loss": 1.2641, "step": 3169000 }, { "epoch": 1.9, "learning_rate": 4.273528194137616e-05, "loss": 1.2389, "step": 3169500 }, { "epoch": 1.9, "learning_rate": 4.273318197581559e-05, "loss": 1.2574, "step": 3170000 }, { "epoch": 1.9, "learning_rate": 4.2731082010255034e-05, "loss": 1.2612, "step": 3170500 }, { "epoch": 1.9, "learning_rate": 4.272898204469447e-05, "loss": 1.2486, "step": 3171000 }, { "epoch": 1.9, "learning_rate": 4.272688627906502e-05, "loss": 1.218, "step": 3171500 }, { "epoch": 1.9, "learning_rate": 4.2724790513435574e-05, "loss": 1.3072, "step": 3172000 }, { "epoch": 1.9, "learning_rate": 4.2722690547875014e-05, "loss": 1.2725, "step": 3172500 }, { "epoch": 1.9, "learning_rate": 4.272059058231445e-05, "loss": 1.2654, "step": 3173000 }, { "epoch": 1.9, "learning_rate": 4.271849061675388e-05, "loss": 1.2903, "step": 3173500 }, { "epoch": 1.9, "learning_rate": 4.271639065119332e-05, "loss": 1.2635, "step": 3174000 }, { "epoch": 1.9, "learning_rate": 4.2714290685632755e-05, "loss": 1.2605, "step": 3174500 }, { "epoch": 1.9, "learning_rate": 4.271219072007219e-05, "loss": 1.2348, "step": 3175000 }, { "epoch": 1.9, "learning_rate": 4.271009075451163e-05, "loss": 1.2429, "step": 3175500 }, { "epoch": 1.9, "learning_rate": 4.270799498888218e-05, "loss": 1.2244, "step": 3176000 }, { "epoch": 1.9, "learning_rate": 4.2705895023321616e-05, "loss": 1.2714, "step": 3176500 }, { "epoch": 1.9, "learning_rate": 4.270379925769217e-05, "loss": 1.2345, "step": 3177000 }, { "epoch": 1.91, "learning_rate": 4.270169929213161e-05, "loss": 1.2598, "step": 3177500 }, { "epoch": 1.91, "learning_rate": 4.269959932657104e-05, "loss": 1.2585, "step": 3178000 }, { "epoch": 1.91, "learning_rate": 4.2697499361010477e-05, "loss": 1.2597, "step": 3178500 }, { "epoch": 1.91, "learning_rate": 4.269539939544992e-05, "loss": 1.2622, "step": 3179000 }, { "epoch": 1.91, "learning_rate": 4.269330362982047e-05, "loss": 1.2243, "step": 3179500 }, { "epoch": 1.91, "learning_rate": 4.2691203664259904e-05, "loss": 1.2413, "step": 3180000 }, { "epoch": 1.91, "learning_rate": 4.268910369869934e-05, "loss": 1.2682, "step": 3180500 }, { "epoch": 1.91, "learning_rate": 4.268700373313878e-05, "loss": 1.2487, "step": 3181000 }, { "epoch": 1.91, "learning_rate": 4.268490376757821e-05, "loss": 1.2745, "step": 3181500 }, { "epoch": 1.91, "learning_rate": 4.2682808001948765e-05, "loss": 1.2335, "step": 3182000 }, { "epoch": 1.91, "learning_rate": 4.26807080363882e-05, "loss": 1.2597, "step": 3182500 }, { "epoch": 1.91, "learning_rate": 4.267860807082764e-05, "loss": 1.2745, "step": 3183000 }, { "epoch": 1.91, "learning_rate": 4.267651230519819e-05, "loss": 1.2564, "step": 3183500 }, { "epoch": 1.91, "learning_rate": 4.2674412339637625e-05, "loss": 1.2421, "step": 3184000 }, { "epoch": 1.91, "learning_rate": 4.2672312374077065e-05, "loss": 1.253, "step": 3184500 }, { "epoch": 1.91, "learning_rate": 4.26702124085165e-05, "loss": 1.2387, "step": 3185000 }, { "epoch": 1.91, "learning_rate": 4.266811244295593e-05, "loss": 1.2667, "step": 3185500 }, { "epoch": 1.91, "learning_rate": 4.266601247739537e-05, "loss": 1.2406, "step": 3186000 }, { "epoch": 1.91, "learning_rate": 4.2663912511834806e-05, "loss": 1.2563, "step": 3186500 }, { "epoch": 1.91, "learning_rate": 4.266181254627424e-05, "loss": 1.2614, "step": 3187000 }, { "epoch": 1.91, "learning_rate": 4.265971258071368e-05, "loss": 1.2685, "step": 3187500 }, { "epoch": 1.91, "learning_rate": 4.265761681508423e-05, "loss": 1.2725, "step": 3188000 }, { "epoch": 1.91, "learning_rate": 4.265551684952367e-05, "loss": 1.2487, "step": 3188500 }, { "epoch": 1.91, "learning_rate": 4.26534168839631e-05, "loss": 1.2513, "step": 3189000 }, { "epoch": 1.91, "learning_rate": 4.265131691840254e-05, "loss": 1.2562, "step": 3189500 }, { "epoch": 1.91, "learning_rate": 4.2649216952841974e-05, "loss": 1.2393, "step": 3190000 }, { "epoch": 1.91, "learning_rate": 4.264711698728141e-05, "loss": 1.2384, "step": 3190500 }, { "epoch": 1.91, "learning_rate": 4.264502122165197e-05, "loss": 1.2399, "step": 3191000 }, { "epoch": 1.91, "learning_rate": 4.26429212560914e-05, "loss": 1.2503, "step": 3191500 }, { "epoch": 1.91, "learning_rate": 4.2640821290530835e-05, "loss": 1.2649, "step": 3192000 }, { "epoch": 1.91, "learning_rate": 4.2638721324970275e-05, "loss": 1.248, "step": 3192500 }, { "epoch": 1.91, "learning_rate": 4.263662135940971e-05, "loss": 1.2149, "step": 3193000 }, { "epoch": 1.91, "learning_rate": 4.263452139384914e-05, "loss": 1.2251, "step": 3193500 }, { "epoch": 1.91, "learning_rate": 4.2632421428288575e-05, "loss": 1.2697, "step": 3194000 }, { "epoch": 1.92, "learning_rate": 4.263032146272801e-05, "loss": 1.2772, "step": 3194500 }, { "epoch": 1.92, "learning_rate": 4.262822989702969e-05, "loss": 1.2437, "step": 3195000 }, { "epoch": 1.92, "learning_rate": 4.262612993146912e-05, "loss": 1.2492, "step": 3195500 }, { "epoch": 1.92, "learning_rate": 4.2624029965908556e-05, "loss": 1.2316, "step": 3196000 }, { "epoch": 1.92, "learning_rate": 4.2621930000347996e-05, "loss": 1.2439, "step": 3196500 }, { "epoch": 1.92, "learning_rate": 4.261983003478743e-05, "loss": 1.2871, "step": 3197000 }, { "epoch": 1.92, "learning_rate": 4.261773006922686e-05, "loss": 1.2511, "step": 3197500 }, { "epoch": 1.92, "learning_rate": 4.2615630103666304e-05, "loss": 1.2308, "step": 3198000 }, { "epoch": 1.92, "learning_rate": 4.261353013810573e-05, "loss": 1.2586, "step": 3198500 }, { "epoch": 1.92, "learning_rate": 4.261143437247629e-05, "loss": 1.253, "step": 3199000 }, { "epoch": 1.92, "learning_rate": 4.2609338606846844e-05, "loss": 1.2774, "step": 3199500 }, { "epoch": 1.92, "learning_rate": 4.2607238641286284e-05, "loss": 1.2039, "step": 3200000 }, { "epoch": 1.92, "eval_loss": 1.2074214220046997, "eval_runtime": 1108.4818, "eval_samples_per_second": 475.172, "eval_steps_per_second": 79.196, "step": 3200000 }, { "epoch": 1.92, "learning_rate": 4.260513867572572e-05, "loss": 1.2663, "step": 3200500 }, { "epoch": 1.92, "learning_rate": 4.260303871016515e-05, "loss": 1.253, "step": 3201000 }, { "epoch": 1.92, "learning_rate": 4.260093874460459e-05, "loss": 1.2607, "step": 3201500 }, { "epoch": 1.92, "learning_rate": 4.2598838779044025e-05, "loss": 1.2493, "step": 3202000 }, { "epoch": 1.92, "learning_rate": 4.259674301341458e-05, "loss": 1.2476, "step": 3202500 }, { "epoch": 1.92, "learning_rate": 4.259464304785401e-05, "loss": 1.2731, "step": 3203000 }, { "epoch": 1.92, "learning_rate": 4.259254308229345e-05, "loss": 1.2501, "step": 3203500 }, { "epoch": 1.92, "learning_rate": 4.2590443116732886e-05, "loss": 1.2505, "step": 3204000 }, { "epoch": 1.92, "learning_rate": 4.258834315117232e-05, "loss": 1.2692, "step": 3204500 }, { "epoch": 1.92, "learning_rate": 4.258624318561176e-05, "loss": 1.245, "step": 3205000 }, { "epoch": 1.92, "learning_rate": 4.258414741998231e-05, "loss": 1.2613, "step": 3205500 }, { "epoch": 1.92, "learning_rate": 4.2582047454421746e-05, "loss": 1.2524, "step": 3206000 }, { "epoch": 1.92, "learning_rate": 4.257994748886119e-05, "loss": 1.2695, "step": 3206500 }, { "epoch": 1.92, "learning_rate": 4.257784752330062e-05, "loss": 1.2779, "step": 3207000 }, { "epoch": 1.92, "learning_rate": 4.2575747557740054e-05, "loss": 1.2607, "step": 3207500 }, { "epoch": 1.92, "learning_rate": 4.257364759217949e-05, "loss": 1.2864, "step": 3208000 }, { "epoch": 1.92, "learning_rate": 4.257155602648116e-05, "loss": 1.2378, "step": 3208500 }, { "epoch": 1.92, "learning_rate": 4.25694560609206e-05, "loss": 1.2748, "step": 3209000 }, { "epoch": 1.92, "learning_rate": 4.2567356095360034e-05, "loss": 1.3, "step": 3209500 }, { "epoch": 1.92, "learning_rate": 4.256525612979947e-05, "loss": 1.2768, "step": 3210000 }, { "epoch": 1.92, "learning_rate": 4.256315616423891e-05, "loss": 1.2322, "step": 3210500 }, { "epoch": 1.93, "learning_rate": 4.256105619867834e-05, "loss": 1.2621, "step": 3211000 }, { "epoch": 1.93, "learning_rate": 4.2558956233117775e-05, "loss": 1.2457, "step": 3211500 }, { "epoch": 1.93, "learning_rate": 4.2556860467488335e-05, "loss": 1.2913, "step": 3212000 }, { "epoch": 1.93, "learning_rate": 4.255476050192777e-05, "loss": 1.2595, "step": 3212500 }, { "epoch": 1.93, "learning_rate": 4.25526605363672e-05, "loss": 1.3004, "step": 3213000 }, { "epoch": 1.93, "learning_rate": 4.255056057080664e-05, "loss": 1.2453, "step": 3213500 }, { "epoch": 1.93, "learning_rate": 4.2548460605246076e-05, "loss": 1.2538, "step": 3214000 }, { "epoch": 1.93, "learning_rate": 4.254636063968551e-05, "loss": 1.2464, "step": 3214500 }, { "epoch": 1.93, "learning_rate": 4.254426067412495e-05, "loss": 1.28, "step": 3215000 }, { "epoch": 1.93, "learning_rate": 4.2542160708564376e-05, "loss": 1.2426, "step": 3215500 }, { "epoch": 1.93, "learning_rate": 4.254006074300381e-05, "loss": 1.2328, "step": 3216000 }, { "epoch": 1.93, "learning_rate": 4.253796077744325e-05, "loss": 1.2959, "step": 3216500 }, { "epoch": 1.93, "learning_rate": 4.2535860811882684e-05, "loss": 1.2548, "step": 3217000 }, { "epoch": 1.93, "learning_rate": 4.253376504625324e-05, "loss": 1.2665, "step": 3217500 }, { "epoch": 1.93, "learning_rate": 4.253166508069267e-05, "loss": 1.2709, "step": 3218000 }, { "epoch": 1.93, "learning_rate": 4.252956511513211e-05, "loss": 1.2179, "step": 3218500 }, { "epoch": 1.93, "learning_rate": 4.2527465149571544e-05, "loss": 1.2469, "step": 3219000 }, { "epoch": 1.93, "learning_rate": 4.252536518401098e-05, "loss": 1.2408, "step": 3219500 }, { "epoch": 1.93, "learning_rate": 4.252326941838154e-05, "loss": 1.2658, "step": 3220000 }, { "epoch": 1.93, "learning_rate": 4.252116945282097e-05, "loss": 1.2749, "step": 3220500 }, { "epoch": 1.93, "learning_rate": 4.2519069487260405e-05, "loss": 1.259, "step": 3221000 }, { "epoch": 1.93, "learning_rate": 4.2516969521699845e-05, "loss": 1.2758, "step": 3221500 }, { "epoch": 1.93, "learning_rate": 4.251486955613928e-05, "loss": 1.2234, "step": 3222000 }, { "epoch": 1.93, "learning_rate": 4.251276959057871e-05, "loss": 1.258, "step": 3222500 }, { "epoch": 1.93, "learning_rate": 4.251066962501815e-05, "loss": 1.258, "step": 3223000 }, { "epoch": 1.93, "learning_rate": 4.2508569659457586e-05, "loss": 1.2398, "step": 3223500 }, { "epoch": 1.93, "learning_rate": 4.250647389382814e-05, "loss": 1.228, "step": 3224000 }, { "epoch": 1.93, "learning_rate": 4.250437392826757e-05, "loss": 1.2617, "step": 3224500 }, { "epoch": 1.93, "learning_rate": 4.250227396270701e-05, "loss": 1.2707, "step": 3225000 }, { "epoch": 1.93, "learning_rate": 4.2500173997146447e-05, "loss": 1.2718, "step": 3225500 }, { "epoch": 1.93, "learning_rate": 4.2498078231517e-05, "loss": 1.2469, "step": 3226000 }, { "epoch": 1.93, "learning_rate": 4.249598246588756e-05, "loss": 1.2453, "step": 3226500 }, { "epoch": 1.93, "learning_rate": 4.2493882500326994e-05, "loss": 1.2633, "step": 3227000 }, { "epoch": 1.94, "learning_rate": 4.249178253476643e-05, "loss": 1.2703, "step": 3227500 }, { "epoch": 1.94, "learning_rate": 4.248968256920586e-05, "loss": 1.2821, "step": 3228000 }, { "epoch": 1.94, "learning_rate": 4.24875826036453e-05, "loss": 1.2644, "step": 3228500 }, { "epoch": 1.94, "learning_rate": 4.248548683801586e-05, "loss": 1.245, "step": 3229000 }, { "epoch": 1.94, "learning_rate": 4.248338687245529e-05, "loss": 1.2813, "step": 3229500 }, { "epoch": 1.94, "learning_rate": 4.248128690689472e-05, "loss": 1.241, "step": 3230000 }, { "epoch": 1.94, "learning_rate": 4.247918694133416e-05, "loss": 1.2352, "step": 3230500 }, { "epoch": 1.94, "learning_rate": 4.2477086975773595e-05, "loss": 1.2492, "step": 3231000 }, { "epoch": 1.94, "learning_rate": 4.247498701021303e-05, "loss": 1.2615, "step": 3231500 }, { "epoch": 1.94, "learning_rate": 4.247288704465247e-05, "loss": 1.2399, "step": 3232000 }, { "epoch": 1.94, "learning_rate": 4.24707870790919e-05, "loss": 1.2631, "step": 3232500 }, { "epoch": 1.94, "learning_rate": 4.2468691313462456e-05, "loss": 1.2476, "step": 3233000 }, { "epoch": 1.94, "learning_rate": 4.2466595547833016e-05, "loss": 1.2551, "step": 3233500 }, { "epoch": 1.94, "learning_rate": 4.246449558227245e-05, "loss": 1.2667, "step": 3234000 }, { "epoch": 1.94, "learning_rate": 4.246239561671188e-05, "loss": 1.2636, "step": 3234500 }, { "epoch": 1.94, "learning_rate": 4.246029565115132e-05, "loss": 1.219, "step": 3235000 }, { "epoch": 1.94, "learning_rate": 4.245819568559076e-05, "loss": 1.2677, "step": 3235500 }, { "epoch": 1.94, "learning_rate": 4.245609572003019e-05, "loss": 1.2509, "step": 3236000 }, { "epoch": 1.94, "learning_rate": 4.2453995754469624e-05, "loss": 1.2561, "step": 3236500 }, { "epoch": 1.94, "learning_rate": 4.2451895788909064e-05, "loss": 1.2597, "step": 3237000 }, { "epoch": 1.94, "learning_rate": 4.24497958233485e-05, "loss": 1.2341, "step": 3237500 }, { "epoch": 1.94, "learning_rate": 4.244769585778793e-05, "loss": 1.2512, "step": 3238000 }, { "epoch": 1.94, "learning_rate": 4.244559589222737e-05, "loss": 1.2507, "step": 3238500 }, { "epoch": 1.94, "learning_rate": 4.2443495926666805e-05, "loss": 1.2449, "step": 3239000 }, { "epoch": 1.94, "learning_rate": 4.244139596110624e-05, "loss": 1.2726, "step": 3239500 }, { "epoch": 1.94, "learning_rate": 4.243930019547679e-05, "loss": 1.2887, "step": 3240000 }, { "epoch": 1.94, "learning_rate": 4.243720022991623e-05, "loss": 1.2233, "step": 3240500 }, { "epoch": 1.94, "learning_rate": 4.2435100264355666e-05, "loss": 1.2724, "step": 3241000 }, { "epoch": 1.94, "learning_rate": 4.24330002987951e-05, "loss": 1.2573, "step": 3241500 }, { "epoch": 1.94, "learning_rate": 4.243090453316565e-05, "loss": 1.2553, "step": 3242000 }, { "epoch": 1.94, "learning_rate": 4.242880876753621e-05, "loss": 1.2407, "step": 3242500 }, { "epoch": 1.94, "learning_rate": 4.2426708801975646e-05, "loss": 1.286, "step": 3243000 }, { "epoch": 1.94, "learning_rate": 4.242460883641508e-05, "loss": 1.2374, "step": 3243500 }, { "epoch": 1.94, "learning_rate": 4.2422513070785633e-05, "loss": 1.2559, "step": 3244000 }, { "epoch": 1.95, "learning_rate": 4.2420413105225074e-05, "loss": 1.2762, "step": 3244500 }, { "epoch": 1.95, "learning_rate": 4.241831313966451e-05, "loss": 1.2666, "step": 3245000 }, { "epoch": 1.95, "learning_rate": 4.241621317410394e-05, "loss": 1.28, "step": 3245500 }, { "epoch": 1.95, "learning_rate": 4.24141174084745e-05, "loss": 1.2758, "step": 3246000 }, { "epoch": 1.95, "learning_rate": 4.2412017442913934e-05, "loss": 1.2608, "step": 3246500 }, { "epoch": 1.95, "learning_rate": 4.240991747735337e-05, "loss": 1.2418, "step": 3247000 }, { "epoch": 1.95, "learning_rate": 4.24078175117928e-05, "loss": 1.2353, "step": 3247500 }, { "epoch": 1.95, "learning_rate": 4.240571754623224e-05, "loss": 1.2717, "step": 3248000 }, { "epoch": 1.95, "learning_rate": 4.2403617580671675e-05, "loss": 1.2264, "step": 3248500 }, { "epoch": 1.95, "learning_rate": 4.240152181504223e-05, "loss": 1.2419, "step": 3249000 }, { "epoch": 1.95, "learning_rate": 4.239942184948167e-05, "loss": 1.2698, "step": 3249500 }, { "epoch": 1.95, "learning_rate": 4.23973218839211e-05, "loss": 1.2194, "step": 3250000 }, { "epoch": 1.95, "learning_rate": 4.2395221918360536e-05, "loss": 1.2654, "step": 3250500 }, { "epoch": 1.95, "learning_rate": 4.239312615273109e-05, "loss": 1.2396, "step": 3251000 }, { "epoch": 1.95, "learning_rate": 4.239102618717053e-05, "loss": 1.2614, "step": 3251500 }, { "epoch": 1.95, "learning_rate": 4.238892622160996e-05, "loss": 1.2914, "step": 3252000 }, { "epoch": 1.95, "learning_rate": 4.2386826256049396e-05, "loss": 1.2677, "step": 3252500 }, { "epoch": 1.95, "learning_rate": 4.238472629048884e-05, "loss": 1.2343, "step": 3253000 }, { "epoch": 1.95, "learning_rate": 4.238262632492827e-05, "loss": 1.2466, "step": 3253500 }, { "epoch": 1.95, "learning_rate": 4.2380526359367704e-05, "loss": 1.2603, "step": 3254000 }, { "epoch": 1.95, "learning_rate": 4.2378426393807144e-05, "loss": 1.249, "step": 3254500 }, { "epoch": 1.95, "learning_rate": 4.237632642824658e-05, "loss": 1.262, "step": 3255000 }, { "epoch": 1.95, "learning_rate": 4.237422646268601e-05, "loss": 1.2699, "step": 3255500 }, { "epoch": 1.95, "learning_rate": 4.237212649712545e-05, "loss": 1.2444, "step": 3256000 }, { "epoch": 1.95, "learning_rate": 4.237002653156488e-05, "loss": 1.2671, "step": 3256500 }, { "epoch": 1.95, "learning_rate": 4.236793076593544e-05, "loss": 1.2631, "step": 3257000 }, { "epoch": 1.95, "learning_rate": 4.236583080037488e-05, "loss": 1.2807, "step": 3257500 }, { "epoch": 1.95, "learning_rate": 4.236373083481431e-05, "loss": 1.2529, "step": 3258000 }, { "epoch": 1.95, "learning_rate": 4.2361630869253745e-05, "loss": 1.2813, "step": 3258500 }, { "epoch": 1.95, "learning_rate": 4.235953090369318e-05, "loss": 1.2389, "step": 3259000 }, { "epoch": 1.95, "learning_rate": 4.235743093813261e-05, "loss": 1.2514, "step": 3259500 }, { "epoch": 1.95, "learning_rate": 4.235533517250317e-05, "loss": 1.2191, "step": 3260000 }, { "epoch": 1.95, "learning_rate": 4.2353235206942606e-05, "loss": 1.2344, "step": 3260500 }, { "epoch": 1.96, "learning_rate": 4.2351135241382046e-05, "loss": 1.2406, "step": 3261000 }, { "epoch": 1.96, "learning_rate": 4.234903527582147e-05, "loss": 1.2533, "step": 3261500 }, { "epoch": 1.96, "learning_rate": 4.2346935310260906e-05, "loss": 1.2339, "step": 3262000 }, { "epoch": 1.96, "learning_rate": 4.2344835344700346e-05, "loss": 1.256, "step": 3262500 }, { "epoch": 1.96, "learning_rate": 4.234273537913978e-05, "loss": 1.231, "step": 3263000 }, { "epoch": 1.96, "learning_rate": 4.2340635413579213e-05, "loss": 1.2715, "step": 3263500 }, { "epoch": 1.96, "learning_rate": 4.2338539647949774e-05, "loss": 1.2553, "step": 3264000 }, { "epoch": 1.96, "learning_rate": 4.233643968238921e-05, "loss": 1.2449, "step": 3264500 }, { "epoch": 1.96, "learning_rate": 4.233433971682864e-05, "loss": 1.2567, "step": 3265000 }, { "epoch": 1.96, "learning_rate": 4.233223975126808e-05, "loss": 1.2715, "step": 3265500 }, { "epoch": 1.96, "learning_rate": 4.2330143985638634e-05, "loss": 1.2638, "step": 3266000 }, { "epoch": 1.96, "learning_rate": 4.232804402007807e-05, "loss": 1.2496, "step": 3266500 }, { "epoch": 1.96, "learning_rate": 4.23259440545175e-05, "loss": 1.2638, "step": 3267000 }, { "epoch": 1.96, "learning_rate": 4.232384408895694e-05, "loss": 1.2339, "step": 3267500 }, { "epoch": 1.96, "learning_rate": 4.23217483233275e-05, "loss": 1.2569, "step": 3268000 }, { "epoch": 1.96, "learning_rate": 4.231964835776693e-05, "loss": 1.2453, "step": 3268500 }, { "epoch": 1.96, "learning_rate": 4.231755259213749e-05, "loss": 1.2279, "step": 3269000 }, { "epoch": 1.96, "learning_rate": 4.231545262657692e-05, "loss": 1.2554, "step": 3269500 }, { "epoch": 1.96, "learning_rate": 4.231335266101636e-05, "loss": 1.2364, "step": 3270000 }, { "epoch": 1.96, "learning_rate": 4.2311252695455796e-05, "loss": 1.2518, "step": 3270500 }, { "epoch": 1.96, "learning_rate": 4.230915272989523e-05, "loss": 1.2416, "step": 3271000 }, { "epoch": 1.96, "learning_rate": 4.230705276433466e-05, "loss": 1.2475, "step": 3271500 }, { "epoch": 1.96, "learning_rate": 4.2304956998705223e-05, "loss": 1.2499, "step": 3272000 }, { "epoch": 1.96, "learning_rate": 4.230285703314466e-05, "loss": 1.2287, "step": 3272500 }, { "epoch": 1.96, "learning_rate": 4.230076126751521e-05, "loss": 1.2467, "step": 3273000 }, { "epoch": 1.96, "learning_rate": 4.229866130195465e-05, "loss": 1.2494, "step": 3273500 }, { "epoch": 1.96, "learning_rate": 4.2296561336394084e-05, "loss": 1.2477, "step": 3274000 }, { "epoch": 1.96, "learning_rate": 4.229446137083352e-05, "loss": 1.257, "step": 3274500 }, { "epoch": 1.96, "learning_rate": 4.229236140527296e-05, "loss": 1.2359, "step": 3275000 }, { "epoch": 1.96, "learning_rate": 4.229026563964351e-05, "loss": 1.259, "step": 3275500 }, { "epoch": 1.96, "learning_rate": 4.2288165674082945e-05, "loss": 1.2504, "step": 3276000 }, { "epoch": 1.96, "learning_rate": 4.228606570852238e-05, "loss": 1.2543, "step": 3276500 }, { "epoch": 1.96, "learning_rate": 4.228396994289294e-05, "loss": 1.2482, "step": 3277000 }, { "epoch": 1.96, "learning_rate": 4.228186997733237e-05, "loss": 1.2365, "step": 3277500 }, { "epoch": 1.97, "learning_rate": 4.2279770011771806e-05, "loss": 1.273, "step": 3278000 }, { "epoch": 1.97, "learning_rate": 4.2277670046211246e-05, "loss": 1.2801, "step": 3278500 }, { "epoch": 1.97, "learning_rate": 4.227557008065068e-05, "loss": 1.2342, "step": 3279000 }, { "epoch": 1.97, "learning_rate": 4.227347011509011e-05, "loss": 1.2276, "step": 3279500 }, { "epoch": 1.97, "learning_rate": 4.227137014952955e-05, "loss": 1.262, "step": 3280000 }, { "epoch": 1.97, "learning_rate": 4.226927018396898e-05, "loss": 1.2404, "step": 3280500 }, { "epoch": 1.97, "learning_rate": 4.226717021840841e-05, "loss": 1.2392, "step": 3281000 }, { "epoch": 1.97, "learning_rate": 4.2265070252847853e-05, "loss": 1.2491, "step": 3281500 }, { "epoch": 1.97, "learning_rate": 4.226297028728729e-05, "loss": 1.2557, "step": 3282000 }, { "epoch": 1.97, "learning_rate": 4.226087032172672e-05, "loss": 1.2356, "step": 3282500 }, { "epoch": 1.97, "learning_rate": 4.225877035616616e-05, "loss": 1.252, "step": 3283000 }, { "epoch": 1.97, "learning_rate": 4.2256670390605594e-05, "loss": 1.272, "step": 3283500 }, { "epoch": 1.97, "learning_rate": 4.225457462497615e-05, "loss": 1.2863, "step": 3284000 }, { "epoch": 1.97, "learning_rate": 4.225247885934671e-05, "loss": 1.2522, "step": 3284500 }, { "epoch": 1.97, "learning_rate": 4.225037889378614e-05, "loss": 1.2599, "step": 3285000 }, { "epoch": 1.97, "learning_rate": 4.2248278928225575e-05, "loss": 1.2393, "step": 3285500 }, { "epoch": 1.97, "learning_rate": 4.224617896266501e-05, "loss": 1.2691, "step": 3286000 }, { "epoch": 1.97, "learning_rate": 4.224407899710445e-05, "loss": 1.2463, "step": 3286500 }, { "epoch": 1.97, "learning_rate": 4.224197903154388e-05, "loss": 1.2306, "step": 3287000 }, { "epoch": 1.97, "learning_rate": 4.2239879065983315e-05, "loss": 1.2334, "step": 3287500 }, { "epoch": 1.97, "learning_rate": 4.2237779100422756e-05, "loss": 1.2817, "step": 3288000 }, { "epoch": 1.97, "learning_rate": 4.223567913486219e-05, "loss": 1.261, "step": 3288500 }, { "epoch": 1.97, "learning_rate": 4.223357916930162e-05, "loss": 1.2443, "step": 3289000 }, { "epoch": 1.97, "learning_rate": 4.2231483403672176e-05, "loss": 1.2694, "step": 3289500 }, { "epoch": 1.97, "learning_rate": 4.2229383438111616e-05, "loss": 1.2468, "step": 3290000 }, { "epoch": 1.97, "learning_rate": 4.222728347255105e-05, "loss": 1.2669, "step": 3290500 }, { "epoch": 1.97, "learning_rate": 4.222518350699048e-05, "loss": 1.2399, "step": 3291000 }, { "epoch": 1.97, "learning_rate": 4.2223083541429924e-05, "loss": 1.2915, "step": 3291500 }, { "epoch": 1.97, "learning_rate": 4.222098357586936e-05, "loss": 1.2601, "step": 3292000 }, { "epoch": 1.97, "learning_rate": 4.221888361030879e-05, "loss": 1.2774, "step": 3292500 }, { "epoch": 1.97, "learning_rate": 4.2216783644748224e-05, "loss": 1.2671, "step": 3293000 }, { "epoch": 1.97, "learning_rate": 4.2214687879118784e-05, "loss": 1.2529, "step": 3293500 }, { "epoch": 1.97, "learning_rate": 4.221259211348934e-05, "loss": 1.2297, "step": 3294000 }, { "epoch": 1.98, "learning_rate": 4.221049214792877e-05, "loss": 1.249, "step": 3294500 }, { "epoch": 1.98, "learning_rate": 4.220839218236821e-05, "loss": 1.215, "step": 3295000 }, { "epoch": 1.98, "learning_rate": 4.2206292216807645e-05, "loss": 1.2759, "step": 3295500 }, { "epoch": 1.98, "learning_rate": 4.220419225124708e-05, "loss": 1.2869, "step": 3296000 }, { "epoch": 1.98, "learning_rate": 4.220209228568652e-05, "loss": 1.2703, "step": 3296500 }, { "epoch": 1.98, "learning_rate": 4.219999652005707e-05, "loss": 1.2599, "step": 3297000 }, { "epoch": 1.98, "learning_rate": 4.2197896554496506e-05, "loss": 1.251, "step": 3297500 }, { "epoch": 1.98, "learning_rate": 4.219579658893594e-05, "loss": 1.2561, "step": 3298000 }, { "epoch": 1.98, "learning_rate": 4.219369662337538e-05, "loss": 1.2464, "step": 3298500 }, { "epoch": 1.98, "learning_rate": 4.219160085774593e-05, "loss": 1.2714, "step": 3299000 }, { "epoch": 1.98, "learning_rate": 4.218950509211649e-05, "loss": 1.2492, "step": 3299500 }, { "epoch": 1.98, "learning_rate": 4.218740512655592e-05, "loss": 1.2767, "step": 3300000 }, { "epoch": 1.98, "eval_loss": 1.2033525705337524, "eval_runtime": 1102.9314, "eval_samples_per_second": 477.564, "eval_steps_per_second": 79.594, "step": 3300000 }, { "epoch": 1.98, "learning_rate": 4.218530516099536e-05, "loss": 1.2708, "step": 3300500 }, { "epoch": 1.98, "learning_rate": 4.2183205195434794e-05, "loss": 1.2561, "step": 3301000 }, { "epoch": 1.98, "learning_rate": 4.218110522987423e-05, "loss": 1.2472, "step": 3301500 }, { "epoch": 1.98, "learning_rate": 4.217900526431367e-05, "loss": 1.2677, "step": 3302000 }, { "epoch": 1.98, "learning_rate": 4.21769052987531e-05, "loss": 1.2653, "step": 3302500 }, { "epoch": 1.98, "learning_rate": 4.2174805333192534e-05, "loss": 1.2271, "step": 3303000 }, { "epoch": 1.98, "learning_rate": 4.217270956756309e-05, "loss": 1.2333, "step": 3303500 }, { "epoch": 1.98, "learning_rate": 4.217060960200253e-05, "loss": 1.2417, "step": 3304000 }, { "epoch": 1.98, "learning_rate": 4.216850963644196e-05, "loss": 1.2627, "step": 3304500 }, { "epoch": 1.98, "learning_rate": 4.2166409670881395e-05, "loss": 1.257, "step": 3305000 }, { "epoch": 1.98, "learning_rate": 4.2164309705320835e-05, "loss": 1.2694, "step": 3305500 }, { "epoch": 1.98, "learning_rate": 4.216220973976027e-05, "loss": 1.2381, "step": 3306000 }, { "epoch": 1.98, "learning_rate": 4.21601097741997e-05, "loss": 1.2477, "step": 3306500 }, { "epoch": 1.98, "learning_rate": 4.215800980863914e-05, "loss": 1.2748, "step": 3307000 }, { "epoch": 1.98, "learning_rate": 4.215590984307857e-05, "loss": 1.2409, "step": 3307500 }, { "epoch": 1.98, "learning_rate": 4.215381407744913e-05, "loss": 1.2652, "step": 3308000 }, { "epoch": 1.98, "learning_rate": 4.215171411188856e-05, "loss": 1.2544, "step": 3308500 }, { "epoch": 1.98, "learning_rate": 4.2149614146328e-05, "loss": 1.2546, "step": 3309000 }, { "epoch": 1.98, "learning_rate": 4.214751418076744e-05, "loss": 1.2503, "step": 3309500 }, { "epoch": 1.98, "learning_rate": 4.214541421520687e-05, "loss": 1.2463, "step": 3310000 }, { "epoch": 1.98, "learning_rate": 4.2143314249646304e-05, "loss": 1.2593, "step": 3310500 }, { "epoch": 1.99, "learning_rate": 4.214121428408574e-05, "loss": 1.2696, "step": 3311000 }, { "epoch": 1.99, "learning_rate": 4.213911431852518e-05, "loss": 1.2522, "step": 3311500 }, { "epoch": 1.99, "learning_rate": 4.213701855289573e-05, "loss": 1.2546, "step": 3312000 }, { "epoch": 1.99, "learning_rate": 4.213492278726629e-05, "loss": 1.2457, "step": 3312500 }, { "epoch": 1.99, "learning_rate": 4.2132822821705725e-05, "loss": 1.2387, "step": 3313000 }, { "epoch": 1.99, "learning_rate": 4.213072285614516e-05, "loss": 1.2456, "step": 3313500 }, { "epoch": 1.99, "learning_rate": 4.21286228905846e-05, "loss": 1.2579, "step": 3314000 }, { "epoch": 1.99, "learning_rate": 4.2126522925024025e-05, "loss": 1.2592, "step": 3314500 }, { "epoch": 1.99, "learning_rate": 4.212442295946346e-05, "loss": 1.2537, "step": 3315000 }, { "epoch": 1.99, "learning_rate": 4.212233139376514e-05, "loss": 1.2857, "step": 3315500 }, { "epoch": 1.99, "learning_rate": 4.212023142820458e-05, "loss": 1.2259, "step": 3316000 }, { "epoch": 1.99, "learning_rate": 4.211813146264401e-05, "loss": 1.27, "step": 3316500 }, { "epoch": 1.99, "learning_rate": 4.2116031497083446e-05, "loss": 1.2551, "step": 3317000 }, { "epoch": 1.99, "learning_rate": 4.2113931531522886e-05, "loss": 1.2545, "step": 3317500 }, { "epoch": 1.99, "learning_rate": 4.211183156596232e-05, "loss": 1.2653, "step": 3318000 }, { "epoch": 1.99, "learning_rate": 4.210973160040175e-05, "loss": 1.2679, "step": 3318500 }, { "epoch": 1.99, "learning_rate": 4.2107631634841194e-05, "loss": 1.2723, "step": 3319000 }, { "epoch": 1.99, "learning_rate": 4.210553166928062e-05, "loss": 1.2721, "step": 3319500 }, { "epoch": 1.99, "learning_rate": 4.2103431703720054e-05, "loss": 1.2557, "step": 3320000 }, { "epoch": 1.99, "learning_rate": 4.2101331738159494e-05, "loss": 1.2296, "step": 3320500 }, { "epoch": 1.99, "learning_rate": 4.209923177259893e-05, "loss": 1.281, "step": 3321000 }, { "epoch": 1.99, "learning_rate": 4.209713600696948e-05, "loss": 1.2263, "step": 3321500 }, { "epoch": 1.99, "learning_rate": 4.209503604140892e-05, "loss": 1.2488, "step": 3322000 }, { "epoch": 1.99, "learning_rate": 4.2092936075848355e-05, "loss": 1.2555, "step": 3322500 }, { "epoch": 1.99, "learning_rate": 4.209083611028779e-05, "loss": 1.238, "step": 3323000 }, { "epoch": 1.99, "learning_rate": 4.208874034465835e-05, "loss": 1.2498, "step": 3323500 }, { "epoch": 1.99, "learning_rate": 4.208664037909778e-05, "loss": 1.2839, "step": 3324000 }, { "epoch": 1.99, "learning_rate": 4.2084540413537215e-05, "loss": 1.2234, "step": 3324500 }, { "epoch": 1.99, "learning_rate": 4.2082453047770016e-05, "loss": 1.2577, "step": 3325000 }, { "epoch": 1.99, "learning_rate": 4.208035308220945e-05, "loss": 1.2438, "step": 3325500 }, { "epoch": 1.99, "learning_rate": 4.207825311664888e-05, "loss": 1.2413, "step": 3326000 }, { "epoch": 1.99, "learning_rate": 4.2076153151088316e-05, "loss": 1.2751, "step": 3326500 }, { "epoch": 1.99, "learning_rate": 4.2074053185527757e-05, "loss": 1.2194, "step": 3327000 }, { "epoch": 1.99, "learning_rate": 4.207195321996719e-05, "loss": 1.2353, "step": 3327500 }, { "epoch": 2.0, "learning_rate": 4.2069853254406623e-05, "loss": 1.2602, "step": 3328000 }, { "epoch": 2.0, "learning_rate": 4.2067753288846064e-05, "loss": 1.236, "step": 3328500 }, { "epoch": 2.0, "learning_rate": 4.20656533232855e-05, "loss": 1.2609, "step": 3329000 }, { "epoch": 2.0, "learning_rate": 4.206355335772494e-05, "loss": 1.2641, "step": 3329500 }, { "epoch": 2.0, "learning_rate": 4.206145339216437e-05, "loss": 1.2569, "step": 3330000 }, { "epoch": 2.0, "learning_rate": 4.2059353426603804e-05, "loss": 1.2481, "step": 3330500 }, { "epoch": 2.0, "learning_rate": 4.205725346104324e-05, "loss": 1.2419, "step": 3331000 }, { "epoch": 2.0, "learning_rate": 4.205515349548267e-05, "loss": 1.2608, "step": 3331500 }, { "epoch": 2.0, "learning_rate": 4.2053053529922105e-05, "loss": 1.2305, "step": 3332000 }, { "epoch": 2.0, "learning_rate": 4.2050953564361545e-05, "loss": 1.2409, "step": 3332500 }, { "epoch": 2.0, "learning_rate": 4.2048857798732105e-05, "loss": 1.2405, "step": 3333000 }, { "epoch": 2.0, "learning_rate": 4.204676203310266e-05, "loss": 1.2761, "step": 3333500 }, { "epoch": 2.0, "learning_rate": 4.204466206754209e-05, "loss": 1.2441, "step": 3334000 }, { "epoch": 2.0, "learning_rate": 4.2042562101981526e-05, "loss": 1.2889, "step": 3334500 }, { "epoch": 2.0, "learning_rate": 4.2040462136420966e-05, "loss": 1.2861, "step": 3335000 }, { "epoch": 2.0, "learning_rate": 4.20383621708604e-05, "loss": 1.2382, "step": 3335500 }, { "epoch": 2.0, "learning_rate": 4.203627060516207e-05, "loss": 1.2626, "step": 3336000 }, { "epoch": 2.0, "learning_rate": 4.203417063960151e-05, "loss": 1.2198, "step": 3336500 }, { "epoch": 2.0, "learning_rate": 4.203207067404095e-05, "loss": 1.2162, "step": 3337000 }, { "epoch": 2.0, "learning_rate": 4.202997070848038e-05, "loss": 1.2305, "step": 3337500 }, { "epoch": 2.0, "learning_rate": 4.2027870742919814e-05, "loss": 1.2166, "step": 3338000 }, { "epoch": 2.0, "learning_rate": 4.2025770777359254e-05, "loss": 1.2541, "step": 3338500 }, { "epoch": 2.0, "learning_rate": 4.202367081179869e-05, "loss": 1.2032, "step": 3339000 }, { "epoch": 2.0, "learning_rate": 4.202157084623812e-05, "loss": 1.209, "step": 3339500 }, { "epoch": 2.0, "learning_rate": 4.201947088067756e-05, "loss": 1.1905, "step": 3340000 }, { "epoch": 2.0, "learning_rate": 4.2017370915116995e-05, "loss": 1.2406, "step": 3340500 }, { "epoch": 2.0, "learning_rate": 4.201527094955642e-05, "loss": 1.1992, "step": 3341000 }, { "epoch": 2.0, "learning_rate": 4.201317518392698e-05, "loss": 1.2321, "step": 3341500 }, { "epoch": 2.0, "learning_rate": 4.201107521836642e-05, "loss": 1.2198, "step": 3342000 }, { "epoch": 2.0, "learning_rate": 4.2008975252805855e-05, "loss": 1.2011, "step": 3342500 }, { "epoch": 2.0, "learning_rate": 4.200687528724529e-05, "loss": 1.2217, "step": 3343000 }, { "epoch": 2.0, "learning_rate": 4.200477952161585e-05, "loss": 1.2124, "step": 3343500 }, { "epoch": 2.0, "learning_rate": 4.200267955605528e-05, "loss": 1.2352, "step": 3344000 }, { "epoch": 2.01, "learning_rate": 4.2000579590494716e-05, "loss": 1.2127, "step": 3344500 }, { "epoch": 2.01, "learning_rate": 4.1998479624934156e-05, "loss": 1.22, "step": 3345000 }, { "epoch": 2.01, "learning_rate": 4.199638385930471e-05, "loss": 1.237, "step": 3345500 }, { "epoch": 2.01, "learning_rate": 4.199428389374414e-05, "loss": 1.2126, "step": 3346000 }, { "epoch": 2.01, "learning_rate": 4.199218392818358e-05, "loss": 1.2094, "step": 3346500 }, { "epoch": 2.01, "learning_rate": 4.199008396262302e-05, "loss": 1.231, "step": 3347000 }, { "epoch": 2.01, "learning_rate": 4.198798399706245e-05, "loss": 1.2546, "step": 3347500 }, { "epoch": 2.01, "learning_rate": 4.1985888231433004e-05, "loss": 1.2435, "step": 3348000 }, { "epoch": 2.01, "learning_rate": 4.198378826587244e-05, "loss": 1.2206, "step": 3348500 }, { "epoch": 2.01, "learning_rate": 4.1981692500243e-05, "loss": 1.1801, "step": 3349000 }, { "epoch": 2.01, "learning_rate": 4.197959253468243e-05, "loss": 1.2382, "step": 3349500 }, { "epoch": 2.01, "learning_rate": 4.1977492569121865e-05, "loss": 1.2125, "step": 3350000 }, { "epoch": 2.01, "learning_rate": 4.1975392603561305e-05, "loss": 1.2069, "step": 3350500 }, { "epoch": 2.01, "learning_rate": 4.197329263800074e-05, "loss": 1.1793, "step": 3351000 }, { "epoch": 2.01, "learning_rate": 4.197119267244017e-05, "loss": 1.2416, "step": 3351500 }, { "epoch": 2.01, "learning_rate": 4.196909270687961e-05, "loss": 1.2327, "step": 3352000 }, { "epoch": 2.01, "learning_rate": 4.196699274131904e-05, "loss": 1.2248, "step": 3352500 }, { "epoch": 2.01, "learning_rate": 4.196489277575847e-05, "loss": 1.2351, "step": 3353000 }, { "epoch": 2.01, "learning_rate": 4.196279281019791e-05, "loss": 1.2355, "step": 3353500 }, { "epoch": 2.01, "learning_rate": 4.1960692844637346e-05, "loss": 1.1975, "step": 3354000 }, { "epoch": 2.01, "learning_rate": 4.195859287907678e-05, "loss": 1.2151, "step": 3354500 }, { "epoch": 2.01, "learning_rate": 4.195650131337846e-05, "loss": 1.2216, "step": 3355000 }, { "epoch": 2.01, "learning_rate": 4.1954401347817893e-05, "loss": 1.2075, "step": 3355500 }, { "epoch": 2.01, "learning_rate": 4.1952301382257334e-05, "loss": 1.2321, "step": 3356000 }, { "epoch": 2.01, "learning_rate": 4.195020141669677e-05, "loss": 1.2194, "step": 3356500 }, { "epoch": 2.01, "learning_rate": 4.19481014511362e-05, "loss": 1.2372, "step": 3357000 }, { "epoch": 2.01, "learning_rate": 4.1946001485575634e-05, "loss": 1.2411, "step": 3357500 }, { "epoch": 2.01, "learning_rate": 4.194390152001507e-05, "loss": 1.2438, "step": 3358000 }, { "epoch": 2.01, "learning_rate": 4.194180155445451e-05, "loss": 1.2259, "step": 3358500 }, { "epoch": 2.01, "learning_rate": 4.193970578882507e-05, "loss": 1.2535, "step": 3359000 }, { "epoch": 2.01, "learning_rate": 4.19376058232645e-05, "loss": 1.2145, "step": 3359500 }, { "epoch": 2.01, "learning_rate": 4.1935510057635055e-05, "loss": 1.2192, "step": 3360000 }, { "epoch": 2.01, "learning_rate": 4.193341009207449e-05, "loss": 1.2145, "step": 3360500 }, { "epoch": 2.02, "learning_rate": 4.193131012651393e-05, "loss": 1.2104, "step": 3361000 }, { "epoch": 2.02, "learning_rate": 4.192921016095336e-05, "loss": 1.2195, "step": 3361500 }, { "epoch": 2.02, "learning_rate": 4.1927114395323916e-05, "loss": 1.1939, "step": 3362000 }, { "epoch": 2.02, "learning_rate": 4.192501442976335e-05, "loss": 1.2449, "step": 3362500 }, { "epoch": 2.02, "learning_rate": 4.192291446420279e-05, "loss": 1.2233, "step": 3363000 }, { "epoch": 2.02, "learning_rate": 4.192081449864222e-05, "loss": 1.2038, "step": 3363500 }, { "epoch": 2.02, "learning_rate": 4.1918714533081656e-05, "loss": 1.2076, "step": 3364000 }, { "epoch": 2.02, "learning_rate": 4.191661456752109e-05, "loss": 1.2652, "step": 3364500 }, { "epoch": 2.02, "learning_rate": 4.191451460196052e-05, "loss": 1.219, "step": 3365000 }, { "epoch": 2.02, "learning_rate": 4.1912414636399964e-05, "loss": 1.1913, "step": 3365500 }, { "epoch": 2.02, "learning_rate": 4.19103146708394e-05, "loss": 1.2265, "step": 3366000 }, { "epoch": 2.02, "learning_rate": 4.190821470527883e-05, "loss": 1.2151, "step": 3366500 }, { "epoch": 2.02, "learning_rate": 4.190611473971827e-05, "loss": 1.2252, "step": 3367000 }, { "epoch": 2.02, "learning_rate": 4.1904014774157704e-05, "loss": 1.2086, "step": 3367500 }, { "epoch": 2.02, "learning_rate": 4.1901923208459385e-05, "loss": 1.215, "step": 3368000 }, { "epoch": 2.02, "learning_rate": 4.189982324289882e-05, "loss": 1.2167, "step": 3368500 }, { "epoch": 2.02, "learning_rate": 4.189772327733825e-05, "loss": 1.2496, "step": 3369000 }, { "epoch": 2.02, "learning_rate": 4.1895623311777685e-05, "loss": 1.2426, "step": 3369500 }, { "epoch": 2.02, "learning_rate": 4.189352334621712e-05, "loss": 1.2192, "step": 3370000 }, { "epoch": 2.02, "learning_rate": 4.189142338065655e-05, "loss": 1.2157, "step": 3370500 }, { "epoch": 2.02, "learning_rate": 4.188932341509599e-05, "loss": 1.2326, "step": 3371000 }, { "epoch": 2.02, "learning_rate": 4.1887227649466546e-05, "loss": 1.2165, "step": 3371500 }, { "epoch": 2.02, "learning_rate": 4.188512768390598e-05, "loss": 1.2458, "step": 3372000 }, { "epoch": 2.02, "learning_rate": 4.188302771834542e-05, "loss": 1.2398, "step": 3372500 }, { "epoch": 2.02, "learning_rate": 4.188092775278485e-05, "loss": 1.2033, "step": 3373000 }, { "epoch": 2.02, "learning_rate": 4.1878827787224286e-05, "loss": 1.2292, "step": 3373500 }, { "epoch": 2.02, "learning_rate": 4.187673202159484e-05, "loss": 1.2279, "step": 3374000 }, { "epoch": 2.02, "learning_rate": 4.187463205603428e-05, "loss": 1.2338, "step": 3374500 }, { "epoch": 2.02, "learning_rate": 4.1872532090473714e-05, "loss": 1.1969, "step": 3375000 }, { "epoch": 2.02, "learning_rate": 4.187043212491315e-05, "loss": 1.1877, "step": 3375500 }, { "epoch": 2.02, "learning_rate": 4.186833215935259e-05, "loss": 1.2651, "step": 3376000 }, { "epoch": 2.02, "learning_rate": 4.186623639372314e-05, "loss": 1.2203, "step": 3376500 }, { "epoch": 2.02, "learning_rate": 4.18641406280937e-05, "loss": 1.2326, "step": 3377000 }, { "epoch": 2.02, "learning_rate": 4.1862040662533135e-05, "loss": 1.226, "step": 3377500 }, { "epoch": 2.03, "learning_rate": 4.185994069697257e-05, "loss": 1.2216, "step": 3378000 }, { "epoch": 2.03, "learning_rate": 4.185784073141201e-05, "loss": 1.2174, "step": 3378500 }, { "epoch": 2.03, "learning_rate": 4.1855740765851435e-05, "loss": 1.2111, "step": 3379000 }, { "epoch": 2.03, "learning_rate": 4.1853640800290875e-05, "loss": 1.2416, "step": 3379500 }, { "epoch": 2.03, "learning_rate": 4.185154083473031e-05, "loss": 1.2204, "step": 3380000 }, { "epoch": 2.03, "learning_rate": 4.184944086916974e-05, "loss": 1.2264, "step": 3380500 }, { "epoch": 2.03, "learning_rate": 4.184734090360918e-05, "loss": 1.224, "step": 3381000 }, { "epoch": 2.03, "learning_rate": 4.1845245137979736e-05, "loss": 1.199, "step": 3381500 }, { "epoch": 2.03, "learning_rate": 4.184314517241917e-05, "loss": 1.2083, "step": 3382000 }, { "epoch": 2.03, "learning_rate": 4.18410452068586e-05, "loss": 1.2069, "step": 3382500 }, { "epoch": 2.03, "learning_rate": 4.183894524129804e-05, "loss": 1.2393, "step": 3383000 }, { "epoch": 2.03, "learning_rate": 4.183684527573748e-05, "loss": 1.2264, "step": 3383500 }, { "epoch": 2.03, "learning_rate": 4.183474531017691e-05, "loss": 1.2552, "step": 3384000 }, { "epoch": 2.03, "learning_rate": 4.1832649544547464e-05, "loss": 1.2178, "step": 3384500 }, { "epoch": 2.03, "learning_rate": 4.1830549578986904e-05, "loss": 1.2385, "step": 3385000 }, { "epoch": 2.03, "learning_rate": 4.182844961342634e-05, "loss": 1.2373, "step": 3385500 }, { "epoch": 2.03, "learning_rate": 4.182634964786577e-05, "loss": 1.198, "step": 3386000 }, { "epoch": 2.03, "learning_rate": 4.182424968230521e-05, "loss": 1.2394, "step": 3386500 }, { "epoch": 2.03, "learning_rate": 4.1822149716744645e-05, "loss": 1.2254, "step": 3387000 }, { "epoch": 2.03, "learning_rate": 4.1820049751184085e-05, "loss": 1.2216, "step": 3387500 }, { "epoch": 2.03, "learning_rate": 4.181794978562352e-05, "loss": 1.2435, "step": 3388000 }, { "epoch": 2.03, "learning_rate": 4.181585401999407e-05, "loss": 1.1905, "step": 3388500 }, { "epoch": 2.03, "learning_rate": 4.1813758254364625e-05, "loss": 1.2216, "step": 3389000 }, { "epoch": 2.03, "learning_rate": 4.181165828880406e-05, "loss": 1.2113, "step": 3389500 }, { "epoch": 2.03, "learning_rate": 4.18095583232435e-05, "loss": 1.2289, "step": 3390000 }, { "epoch": 2.03, "learning_rate": 4.180745835768293e-05, "loss": 1.2403, "step": 3390500 }, { "epoch": 2.03, "learning_rate": 4.1805358392122366e-05, "loss": 1.2307, "step": 3391000 }, { "epoch": 2.03, "learning_rate": 4.1803258426561806e-05, "loss": 1.2269, "step": 3391500 }, { "epoch": 2.03, "learning_rate": 4.180115846100124e-05, "loss": 1.2084, "step": 3392000 }, { "epoch": 2.03, "learning_rate": 4.179905849544067e-05, "loss": 1.2356, "step": 3392500 }, { "epoch": 2.03, "learning_rate": 4.179696272981123e-05, "loss": 1.2204, "step": 3393000 }, { "epoch": 2.03, "learning_rate": 4.179486276425067e-05, "loss": 1.2372, "step": 3393500 }, { "epoch": 2.03, "learning_rate": 4.179276699862122e-05, "loss": 1.2304, "step": 3394000 }, { "epoch": 2.04, "learning_rate": 4.1790667033060654e-05, "loss": 1.2305, "step": 3394500 }, { "epoch": 2.04, "learning_rate": 4.1788567067500094e-05, "loss": 1.2504, "step": 3395000 }, { "epoch": 2.04, "learning_rate": 4.178646710193953e-05, "loss": 1.2269, "step": 3395500 }, { "epoch": 2.04, "learning_rate": 4.178437133631008e-05, "loss": 1.2155, "step": 3396000 }, { "epoch": 2.04, "learning_rate": 4.1782271370749515e-05, "loss": 1.2351, "step": 3396500 }, { "epoch": 2.04, "learning_rate": 4.1780171405188955e-05, "loss": 1.2375, "step": 3397000 }, { "epoch": 2.04, "learning_rate": 4.177807143962839e-05, "loss": 1.2498, "step": 3397500 }, { "epoch": 2.04, "learning_rate": 4.177597567399894e-05, "loss": 1.2327, "step": 3398000 }, { "epoch": 2.04, "learning_rate": 4.1773875708438375e-05, "loss": 1.2327, "step": 3398500 }, { "epoch": 2.04, "learning_rate": 4.1771775742877816e-05, "loss": 1.2099, "step": 3399000 }, { "epoch": 2.04, "learning_rate": 4.176967577731725e-05, "loss": 1.232, "step": 3399500 }, { "epoch": 2.04, "learning_rate": 4.176757581175669e-05, "loss": 1.2407, "step": 3400000 }, { "epoch": 2.04, "eval_loss": 1.1995346546173096, "eval_runtime": 1104.2629, "eval_samples_per_second": 476.988, "eval_steps_per_second": 79.498, "step": 3400000 }, { "epoch": 2.04, "learning_rate": 4.176547584619612e-05, "loss": 1.2394, "step": 3400500 }, { "epoch": 2.04, "learning_rate": 4.1763375880635556e-05, "loss": 1.2466, "step": 3401000 }, { "epoch": 2.04, "learning_rate": 4.1761275915074997e-05, "loss": 1.2169, "step": 3401500 }, { "epoch": 2.04, "learning_rate": 4.175917594951443e-05, "loss": 1.2255, "step": 3402000 }, { "epoch": 2.04, "learning_rate": 4.1757075983953863e-05, "loss": 1.2384, "step": 3402500 }, { "epoch": 2.04, "learning_rate": 4.175498441825554e-05, "loss": 1.2456, "step": 3403000 }, { "epoch": 2.04, "learning_rate": 4.17528886526261e-05, "loss": 1.248, "step": 3403500 }, { "epoch": 2.04, "learning_rate": 4.175078868706553e-05, "loss": 1.2369, "step": 3404000 }, { "epoch": 2.04, "learning_rate": 4.174868872150497e-05, "loss": 1.2359, "step": 3404500 }, { "epoch": 2.04, "learning_rate": 4.17465887559444e-05, "loss": 1.2531, "step": 3405000 }, { "epoch": 2.04, "learning_rate": 4.174448879038383e-05, "loss": 1.2493, "step": 3405500 }, { "epoch": 2.04, "learning_rate": 4.174238882482327e-05, "loss": 1.2164, "step": 3406000 }, { "epoch": 2.04, "learning_rate": 4.1740288859262705e-05, "loss": 1.2284, "step": 3406500 }, { "epoch": 2.04, "learning_rate": 4.1738188893702145e-05, "loss": 1.2662, "step": 3407000 }, { "epoch": 2.04, "learning_rate": 4.173608892814158e-05, "loss": 1.2377, "step": 3407500 }, { "epoch": 2.04, "learning_rate": 4.173398896258101e-05, "loss": 1.223, "step": 3408000 }, { "epoch": 2.04, "learning_rate": 4.173188899702045e-05, "loss": 1.2476, "step": 3408500 }, { "epoch": 2.04, "learning_rate": 4.1729789031459886e-05, "loss": 1.2279, "step": 3409000 }, { "epoch": 2.04, "learning_rate": 4.172768906589932e-05, "loss": 1.2808, "step": 3409500 }, { "epoch": 2.04, "learning_rate": 4.172558910033876e-05, "loss": 1.2192, "step": 3410000 }, { "epoch": 2.04, "learning_rate": 4.172349333470931e-05, "loss": 1.2598, "step": 3410500 }, { "epoch": 2.05, "learning_rate": 4.172139336914875e-05, "loss": 1.2182, "step": 3411000 }, { "epoch": 2.05, "learning_rate": 4.17192976035193e-05, "loss": 1.2103, "step": 3411500 }, { "epoch": 2.05, "learning_rate": 4.1717197637958734e-05, "loss": 1.2197, "step": 3412000 }, { "epoch": 2.05, "learning_rate": 4.1715097672398174e-05, "loss": 1.2106, "step": 3412500 }, { "epoch": 2.05, "learning_rate": 4.171299770683761e-05, "loss": 1.2403, "step": 3413000 }, { "epoch": 2.05, "learning_rate": 4.171090194120816e-05, "loss": 1.2443, "step": 3413500 }, { "epoch": 2.05, "learning_rate": 4.17088019756476e-05, "loss": 1.1998, "step": 3414000 }, { "epoch": 2.05, "learning_rate": 4.1706706210018155e-05, "loss": 1.231, "step": 3414500 }, { "epoch": 2.05, "learning_rate": 4.170460624445759e-05, "loss": 1.2469, "step": 3415000 }, { "epoch": 2.05, "learning_rate": 4.170250627889702e-05, "loss": 1.2463, "step": 3415500 }, { "epoch": 2.05, "learning_rate": 4.170040631333646e-05, "loss": 1.2177, "step": 3416000 }, { "epoch": 2.05, "learning_rate": 4.1698306347775895e-05, "loss": 1.2238, "step": 3416500 }, { "epoch": 2.05, "learning_rate": 4.169620638221533e-05, "loss": 1.2112, "step": 3417000 }, { "epoch": 2.05, "learning_rate": 4.169410641665477e-05, "loss": 1.2427, "step": 3417500 }, { "epoch": 2.05, "learning_rate": 4.16920064510942e-05, "loss": 1.2545, "step": 3418000 }, { "epoch": 2.05, "learning_rate": 4.1689906485533636e-05, "loss": 1.2304, "step": 3418500 }, { "epoch": 2.05, "learning_rate": 4.1687806519973076e-05, "loss": 1.2084, "step": 3419000 }, { "epoch": 2.05, "learning_rate": 4.168570655441251e-05, "loss": 1.2293, "step": 3419500 }, { "epoch": 2.05, "learning_rate": 4.1683606588851936e-05, "loss": 1.222, "step": 3420000 }, { "epoch": 2.05, "learning_rate": 4.1681506623291377e-05, "loss": 1.2257, "step": 3420500 }, { "epoch": 2.05, "learning_rate": 4.167940665773081e-05, "loss": 1.1888, "step": 3421000 }, { "epoch": 2.05, "learning_rate": 4.1677306692170243e-05, "loss": 1.2141, "step": 3421500 }, { "epoch": 2.05, "learning_rate": 4.1675210926540804e-05, "loss": 1.2359, "step": 3422000 }, { "epoch": 2.05, "learning_rate": 4.167311096098024e-05, "loss": 1.2536, "step": 3422500 }, { "epoch": 2.05, "learning_rate": 4.167101099541967e-05, "loss": 1.2324, "step": 3423000 }, { "epoch": 2.05, "learning_rate": 4.166891102985911e-05, "loss": 1.222, "step": 3423500 }, { "epoch": 2.05, "learning_rate": 4.1666811064298544e-05, "loss": 1.2551, "step": 3424000 }, { "epoch": 2.05, "learning_rate": 4.166471109873798e-05, "loss": 1.2209, "step": 3424500 }, { "epoch": 2.05, "learning_rate": 4.166261113317742e-05, "loss": 1.2397, "step": 3425000 }, { "epoch": 2.05, "learning_rate": 4.166051956747909e-05, "loss": 1.2731, "step": 3425500 }, { "epoch": 2.05, "learning_rate": 4.165841960191853e-05, "loss": 1.2385, "step": 3426000 }, { "epoch": 2.05, "learning_rate": 4.1656319636357966e-05, "loss": 1.242, "step": 3426500 }, { "epoch": 2.05, "learning_rate": 4.16542196707974e-05, "loss": 1.2372, "step": 3427000 }, { "epoch": 2.05, "learning_rate": 4.165211970523683e-05, "loss": 1.1994, "step": 3427500 }, { "epoch": 2.06, "learning_rate": 4.1650019739676266e-05, "loss": 1.2346, "step": 3428000 }, { "epoch": 2.06, "learning_rate": 4.16479197741157e-05, "loss": 1.2372, "step": 3428500 }, { "epoch": 2.06, "learning_rate": 4.164581980855514e-05, "loss": 1.2293, "step": 3429000 }, { "epoch": 2.06, "learning_rate": 4.164371984299457e-05, "loss": 1.2457, "step": 3429500 }, { "epoch": 2.06, "learning_rate": 4.1641619877434007e-05, "loss": 1.2465, "step": 3430000 }, { "epoch": 2.06, "learning_rate": 4.163951991187345e-05, "loss": 1.2104, "step": 3430500 }, { "epoch": 2.06, "learning_rate": 4.163741994631288e-05, "loss": 1.2047, "step": 3431000 }, { "epoch": 2.06, "learning_rate": 4.1635324180683434e-05, "loss": 1.2, "step": 3431500 }, { "epoch": 2.06, "learning_rate": 4.1633224215122874e-05, "loss": 1.2296, "step": 3432000 }, { "epoch": 2.06, "learning_rate": 4.163112424956231e-05, "loss": 1.2321, "step": 3432500 }, { "epoch": 2.06, "learning_rate": 4.162902428400174e-05, "loss": 1.2377, "step": 3433000 }, { "epoch": 2.06, "learning_rate": 4.162692431844118e-05, "loss": 1.228, "step": 3433500 }, { "epoch": 2.06, "learning_rate": 4.1624828552811735e-05, "loss": 1.2278, "step": 3434000 }, { "epoch": 2.06, "learning_rate": 4.162272858725117e-05, "loss": 1.2536, "step": 3434500 }, { "epoch": 2.06, "learning_rate": 4.16206286216906e-05, "loss": 1.2067, "step": 3435000 }, { "epoch": 2.06, "learning_rate": 4.161852865613004e-05, "loss": 1.2149, "step": 3435500 }, { "epoch": 2.06, "learning_rate": 4.1616428690569475e-05, "loss": 1.2283, "step": 3436000 }, { "epoch": 2.06, "learning_rate": 4.161433292494003e-05, "loss": 1.2661, "step": 3436500 }, { "epoch": 2.06, "learning_rate": 4.161223295937946e-05, "loss": 1.2302, "step": 3437000 }, { "epoch": 2.06, "learning_rate": 4.16101329938189e-05, "loss": 1.2363, "step": 3437500 }, { "epoch": 2.06, "learning_rate": 4.1608033028258336e-05, "loss": 1.2443, "step": 3438000 }, { "epoch": 2.06, "learning_rate": 4.160593306269777e-05, "loss": 1.2265, "step": 3438500 }, { "epoch": 2.06, "learning_rate": 4.160383729706833e-05, "loss": 1.2134, "step": 3439000 }, { "epoch": 2.06, "learning_rate": 4.160173733150776e-05, "loss": 1.2167, "step": 3439500 }, { "epoch": 2.06, "learning_rate": 4.15996373659472e-05, "loss": 1.2021, "step": 3440000 }, { "epoch": 2.06, "learning_rate": 4.159753740038664e-05, "loss": 1.2388, "step": 3440500 }, { "epoch": 2.06, "learning_rate": 4.159544163475719e-05, "loss": 1.2328, "step": 3441000 }, { "epoch": 2.06, "learning_rate": 4.1593341669196624e-05, "loss": 1.2251, "step": 3441500 }, { "epoch": 2.06, "learning_rate": 4.159124170363606e-05, "loss": 1.2456, "step": 3442000 }, { "epoch": 2.06, "learning_rate": 4.15891417380755e-05, "loss": 1.2272, "step": 3442500 }, { "epoch": 2.06, "learning_rate": 4.158704597244605e-05, "loss": 1.1844, "step": 3443000 }, { "epoch": 2.06, "learning_rate": 4.1584946006885485e-05, "loss": 1.2297, "step": 3443500 }, { "epoch": 2.06, "learning_rate": 4.158284604132492e-05, "loss": 1.2249, "step": 3444000 }, { "epoch": 2.07, "learning_rate": 4.158075027569548e-05, "loss": 1.254, "step": 3444500 }, { "epoch": 2.07, "learning_rate": 4.157865031013491e-05, "loss": 1.2145, "step": 3445000 }, { "epoch": 2.07, "learning_rate": 4.1576550344574346e-05, "loss": 1.2321, "step": 3445500 }, { "epoch": 2.07, "learning_rate": 4.1574450379013786e-05, "loss": 1.2322, "step": 3446000 }, { "epoch": 2.07, "learning_rate": 4.157235041345322e-05, "loss": 1.2327, "step": 3446500 }, { "epoch": 2.07, "learning_rate": 4.157025044789265e-05, "loss": 1.2095, "step": 3447000 }, { "epoch": 2.07, "learning_rate": 4.156815048233209e-05, "loss": 1.2214, "step": 3447500 }, { "epoch": 2.07, "learning_rate": 4.1566050516771526e-05, "loss": 1.262, "step": 3448000 }, { "epoch": 2.07, "learning_rate": 4.156395475114208e-05, "loss": 1.2392, "step": 3448500 }, { "epoch": 2.07, "learning_rate": 4.1561854785581513e-05, "loss": 1.1848, "step": 3449000 }, { "epoch": 2.07, "learning_rate": 4.1559754820020954e-05, "loss": 1.2267, "step": 3449500 }, { "epoch": 2.07, "learning_rate": 4.155765485446039e-05, "loss": 1.2404, "step": 3450000 }, { "epoch": 2.07, "learning_rate": 4.155555488889982e-05, "loss": 1.221, "step": 3450500 }, { "epoch": 2.07, "learning_rate": 4.1553459123270374e-05, "loss": 1.2428, "step": 3451000 }, { "epoch": 2.07, "learning_rate": 4.1551359157709814e-05, "loss": 1.2228, "step": 3451500 }, { "epoch": 2.07, "learning_rate": 4.154925919214925e-05, "loss": 1.2029, "step": 3452000 }, { "epoch": 2.07, "learning_rate": 4.154715922658868e-05, "loss": 1.2183, "step": 3452500 }, { "epoch": 2.07, "learning_rate": 4.154505926102812e-05, "loss": 1.2054, "step": 3453000 }, { "epoch": 2.07, "learning_rate": 4.1542959295467555e-05, "loss": 1.2406, "step": 3453500 }, { "epoch": 2.07, "learning_rate": 4.154086352983811e-05, "loss": 1.2163, "step": 3454000 }, { "epoch": 2.07, "learning_rate": 4.153876356427755e-05, "loss": 1.2391, "step": 3454500 }, { "epoch": 2.07, "learning_rate": 4.153666359871698e-05, "loss": 1.2256, "step": 3455000 }, { "epoch": 2.07, "learning_rate": 4.1534563633156416e-05, "loss": 1.2156, "step": 3455500 }, { "epoch": 2.07, "learning_rate": 4.1532463667595856e-05, "loss": 1.2426, "step": 3456000 }, { "epoch": 2.07, "learning_rate": 4.153036370203528e-05, "loss": 1.2507, "step": 3456500 }, { "epoch": 2.07, "learning_rate": 4.1528263736474716e-05, "loss": 1.2142, "step": 3457000 }, { "epoch": 2.07, "learning_rate": 4.1526163770914156e-05, "loss": 1.2403, "step": 3457500 }, { "epoch": 2.07, "learning_rate": 4.152406800528472e-05, "loss": 1.2124, "step": 3458000 }, { "epoch": 2.07, "learning_rate": 4.152196803972415e-05, "loss": 1.2475, "step": 3458500 }, { "epoch": 2.07, "learning_rate": 4.1519872274094704e-05, "loss": 1.2159, "step": 3459000 }, { "epoch": 2.07, "learning_rate": 4.1517772308534144e-05, "loss": 1.2058, "step": 3459500 }, { "epoch": 2.07, "learning_rate": 4.151567234297358e-05, "loss": 1.2364, "step": 3460000 }, { "epoch": 2.07, "learning_rate": 4.151357237741301e-05, "loss": 1.1895, "step": 3460500 }, { "epoch": 2.08, "learning_rate": 4.151147241185245e-05, "loss": 1.2301, "step": 3461000 }, { "epoch": 2.08, "learning_rate": 4.150937244629188e-05, "loss": 1.2055, "step": 3461500 }, { "epoch": 2.08, "learning_rate": 4.150727248073131e-05, "loss": 1.2452, "step": 3462000 }, { "epoch": 2.08, "learning_rate": 4.150517251517075e-05, "loss": 1.2129, "step": 3462500 }, { "epoch": 2.08, "learning_rate": 4.1503080949472425e-05, "loss": 1.2424, "step": 3463000 }, { "epoch": 2.08, "learning_rate": 4.1500980983911865e-05, "loss": 1.2232, "step": 3463500 }, { "epoch": 2.08, "learning_rate": 4.14988810183513e-05, "loss": 1.2265, "step": 3464000 }, { "epoch": 2.08, "learning_rate": 4.149678105279073e-05, "loss": 1.2374, "step": 3464500 }, { "epoch": 2.08, "learning_rate": 4.149468948709241e-05, "loss": 1.2224, "step": 3465000 }, { "epoch": 2.08, "learning_rate": 4.1492589521531846e-05, "loss": 1.2308, "step": 3465500 }, { "epoch": 2.08, "learning_rate": 4.149048955597128e-05, "loss": 1.2467, "step": 3466000 }, { "epoch": 2.08, "learning_rate": 4.148838959041071e-05, "loss": 1.2269, "step": 3466500 }, { "epoch": 2.08, "learning_rate": 4.1486289624850153e-05, "loss": 1.2319, "step": 3467000 }, { "epoch": 2.08, "learning_rate": 4.148418965928959e-05, "loss": 1.222, "step": 3467500 }, { "epoch": 2.08, "learning_rate": 4.148208969372902e-05, "loss": 1.231, "step": 3468000 }, { "epoch": 2.08, "learning_rate": 4.147998972816846e-05, "loss": 1.2495, "step": 3468500 }, { "epoch": 2.08, "learning_rate": 4.1477889762607894e-05, "loss": 1.2213, "step": 3469000 }, { "epoch": 2.08, "learning_rate": 4.147578979704733e-05, "loss": 1.235, "step": 3469500 }, { "epoch": 2.08, "learning_rate": 4.147368983148677e-05, "loss": 1.2433, "step": 3470000 }, { "epoch": 2.08, "learning_rate": 4.14715898659262e-05, "loss": 1.2384, "step": 3470500 }, { "epoch": 2.08, "learning_rate": 4.146948990036563e-05, "loss": 1.2304, "step": 3471000 }, { "epoch": 2.08, "learning_rate": 4.146738993480507e-05, "loss": 1.2309, "step": 3471500 }, { "epoch": 2.08, "learning_rate": 4.146529416917563e-05, "loss": 1.2203, "step": 3472000 }, { "epoch": 2.08, "learning_rate": 4.146319420361506e-05, "loss": 1.2341, "step": 3472500 }, { "epoch": 2.08, "learning_rate": 4.1461094238054495e-05, "loss": 1.2289, "step": 3473000 }, { "epoch": 2.08, "learning_rate": 4.145899427249393e-05, "loss": 1.2271, "step": 3473500 }, { "epoch": 2.08, "learning_rate": 4.145689430693336e-05, "loss": 1.2185, "step": 3474000 }, { "epoch": 2.08, "learning_rate": 4.14547943413728e-05, "loss": 1.2475, "step": 3474500 }, { "epoch": 2.08, "learning_rate": 4.1452694375812236e-05, "loss": 1.2306, "step": 3475000 }, { "epoch": 2.08, "learning_rate": 4.145059441025167e-05, "loss": 1.2225, "step": 3475500 }, { "epoch": 2.08, "learning_rate": 4.144849864462222e-05, "loss": 1.2461, "step": 3476000 }, { "epoch": 2.08, "learning_rate": 4.144639867906166e-05, "loss": 1.244, "step": 3476500 }, { "epoch": 2.08, "learning_rate": 4.1444302913432224e-05, "loss": 1.2447, "step": 3477000 }, { "epoch": 2.08, "learning_rate": 4.144220294787166e-05, "loss": 1.2357, "step": 3477500 }, { "epoch": 2.09, "learning_rate": 4.1440102982311084e-05, "loss": 1.2141, "step": 3478000 }, { "epoch": 2.09, "learning_rate": 4.1438003016750524e-05, "loss": 1.2331, "step": 3478500 }, { "epoch": 2.09, "learning_rate": 4.143590305118996e-05, "loss": 1.2179, "step": 3479000 }, { "epoch": 2.09, "learning_rate": 4.143380728556052e-05, "loss": 1.234, "step": 3479500 }, { "epoch": 2.09, "learning_rate": 4.143170731999995e-05, "loss": 1.2484, "step": 3480000 }, { "epoch": 2.09, "learning_rate": 4.1429607354439385e-05, "loss": 1.2231, "step": 3480500 }, { "epoch": 2.09, "learning_rate": 4.142750738887882e-05, "loss": 1.228, "step": 3481000 }, { "epoch": 2.09, "learning_rate": 4.142540742331826e-05, "loss": 1.2168, "step": 3481500 }, { "epoch": 2.09, "learning_rate": 4.142330745775769e-05, "loss": 1.2402, "step": 3482000 }, { "epoch": 2.09, "learning_rate": 4.1421207492197125e-05, "loss": 1.2272, "step": 3482500 }, { "epoch": 2.09, "learning_rate": 4.1419107526636566e-05, "loss": 1.2321, "step": 3483000 }, { "epoch": 2.09, "learning_rate": 4.1417007561076e-05, "loss": 1.2245, "step": 3483500 }, { "epoch": 2.09, "learning_rate": 4.141490759551543e-05, "loss": 1.227, "step": 3484000 }, { "epoch": 2.09, "learning_rate": 4.141280762995487e-05, "loss": 1.244, "step": 3484500 }, { "epoch": 2.09, "learning_rate": 4.1410707664394306e-05, "loss": 1.2173, "step": 3485000 }, { "epoch": 2.09, "learning_rate": 4.140861189876486e-05, "loss": 1.2061, "step": 3485500 }, { "epoch": 2.09, "learning_rate": 4.140651613313541e-05, "loss": 1.235, "step": 3486000 }, { "epoch": 2.09, "learning_rate": 4.140441616757485e-05, "loss": 1.2211, "step": 3486500 }, { "epoch": 2.09, "learning_rate": 4.140231620201429e-05, "loss": 1.2069, "step": 3487000 }, { "epoch": 2.09, "learning_rate": 4.140021623645372e-05, "loss": 1.2444, "step": 3487500 }, { "epoch": 2.09, "learning_rate": 4.1398120470824274e-05, "loss": 1.2429, "step": 3488000 }, { "epoch": 2.09, "learning_rate": 4.1396020505263714e-05, "loss": 1.2227, "step": 3488500 }, { "epoch": 2.09, "learning_rate": 4.139392053970315e-05, "loss": 1.2269, "step": 3489000 }, { "epoch": 2.09, "learning_rate": 4.139182057414258e-05, "loss": 1.2472, "step": 3489500 }, { "epoch": 2.09, "learning_rate": 4.138972060858202e-05, "loss": 1.2061, "step": 3490000 }, { "epoch": 2.09, "learning_rate": 4.1387620643021455e-05, "loss": 1.24, "step": 3490500 }, { "epoch": 2.09, "learning_rate": 4.138552487739201e-05, "loss": 1.2378, "step": 3491000 }, { "epoch": 2.09, "learning_rate": 4.138342491183144e-05, "loss": 1.2219, "step": 3491500 }, { "epoch": 2.09, "learning_rate": 4.138132494627088e-05, "loss": 1.2121, "step": 3492000 }, { "epoch": 2.09, "learning_rate": 4.1379224980710316e-05, "loss": 1.2498, "step": 3492500 }, { "epoch": 2.09, "learning_rate": 4.137712501514975e-05, "loss": 1.2114, "step": 3493000 }, { "epoch": 2.09, "learning_rate": 4.137502504958919e-05, "loss": 1.1985, "step": 3493500 }, { "epoch": 2.09, "learning_rate": 4.137293348389086e-05, "loss": 1.2494, "step": 3494000 }, { "epoch": 2.1, "learning_rate": 4.13708335183303e-05, "loss": 1.2352, "step": 3494500 }, { "epoch": 2.1, "learning_rate": 4.136873355276973e-05, "loss": 1.2027, "step": 3495000 }, { "epoch": 2.1, "learning_rate": 4.136663358720917e-05, "loss": 1.2658, "step": 3495500 }, { "epoch": 2.1, "learning_rate": 4.1364533621648604e-05, "loss": 1.2395, "step": 3496000 }, { "epoch": 2.1, "learning_rate": 4.136243365608804e-05, "loss": 1.2204, "step": 3496500 }, { "epoch": 2.1, "learning_rate": 4.136033369052748e-05, "loss": 1.2042, "step": 3497000 }, { "epoch": 2.1, "learning_rate": 4.135823372496691e-05, "loss": 1.2156, "step": 3497500 }, { "epoch": 2.1, "learning_rate": 4.1356133759406344e-05, "loss": 1.2516, "step": 3498000 }, { "epoch": 2.1, "learning_rate": 4.13540379937769e-05, "loss": 1.2226, "step": 3498500 }, { "epoch": 2.1, "learning_rate": 4.135193802821634e-05, "loss": 1.2301, "step": 3499000 }, { "epoch": 2.1, "learning_rate": 4.134983806265577e-05, "loss": 1.2171, "step": 3499500 }, { "epoch": 2.1, "learning_rate": 4.1347738097095205e-05, "loss": 1.2252, "step": 3500000 }, { "epoch": 2.1, "eval_loss": 1.1996427774429321, "eval_runtime": 1100.4525, "eval_samples_per_second": 478.639, "eval_steps_per_second": 79.774, "step": 3500000 }, { "epoch": 2.1, "learning_rate": 4.1345638131534645e-05, "loss": 1.248, "step": 3500500 }, { "epoch": 2.1, "learning_rate": 4.13435423659052e-05, "loss": 1.235, "step": 3501000 }, { "epoch": 2.1, "learning_rate": 4.134144240034463e-05, "loss": 1.2172, "step": 3501500 }, { "epoch": 2.1, "learning_rate": 4.1339342434784066e-05, "loss": 1.2192, "step": 3502000 }, { "epoch": 2.1, "learning_rate": 4.1337242469223506e-05, "loss": 1.2215, "step": 3502500 }, { "epoch": 2.1, "learning_rate": 4.133514250366294e-05, "loss": 1.2154, "step": 3503000 }, { "epoch": 2.1, "learning_rate": 4.133304253810237e-05, "loss": 1.255, "step": 3503500 }, { "epoch": 2.1, "learning_rate": 4.133094677247293e-05, "loss": 1.2364, "step": 3504000 }, { "epoch": 2.1, "learning_rate": 4.132884680691237e-05, "loss": 1.2171, "step": 3504500 }, { "epoch": 2.1, "learning_rate": 4.13267468413518e-05, "loss": 1.2397, "step": 3505000 }, { "epoch": 2.1, "learning_rate": 4.132464687579124e-05, "loss": 1.226, "step": 3505500 }, { "epoch": 2.1, "learning_rate": 4.1322546910230674e-05, "loss": 1.2268, "step": 3506000 }, { "epoch": 2.1, "learning_rate": 4.132045114460123e-05, "loss": 1.2206, "step": 3506500 }, { "epoch": 2.1, "learning_rate": 4.131835537897178e-05, "loss": 1.2344, "step": 3507000 }, { "epoch": 2.1, "learning_rate": 4.1316255413411214e-05, "loss": 1.2253, "step": 3507500 }, { "epoch": 2.1, "learning_rate": 4.1314155447850655e-05, "loss": 1.2526, "step": 3508000 }, { "epoch": 2.1, "learning_rate": 4.131205548229009e-05, "loss": 1.2111, "step": 3508500 }, { "epoch": 2.1, "learning_rate": 4.130995551672952e-05, "loss": 1.2455, "step": 3509000 }, { "epoch": 2.1, "learning_rate": 4.130785555116896e-05, "loss": 1.2818, "step": 3509500 }, { "epoch": 2.1, "learning_rate": 4.1305755585608395e-05, "loss": 1.2294, "step": 3510000 }, { "epoch": 2.1, "learning_rate": 4.130365562004783e-05, "loss": 1.1845, "step": 3510500 }, { "epoch": 2.1, "learning_rate": 4.130155565448727e-05, "loss": 1.2215, "step": 3511000 }, { "epoch": 2.11, "learning_rate": 4.12994556889267e-05, "loss": 1.2234, "step": 3511500 }, { "epoch": 2.11, "learning_rate": 4.1297355723366136e-05, "loss": 1.2079, "step": 3512000 }, { "epoch": 2.11, "learning_rate": 4.1295259957736696e-05, "loss": 1.2202, "step": 3512500 }, { "epoch": 2.11, "learning_rate": 4.129315999217613e-05, "loss": 1.2175, "step": 3513000 }, { "epoch": 2.11, "learning_rate": 4.129106002661556e-05, "loss": 1.2392, "step": 3513500 }, { "epoch": 2.11, "learning_rate": 4.1288960061055e-05, "loss": 1.2231, "step": 3514000 }, { "epoch": 2.11, "learning_rate": 4.128686009549443e-05, "loss": 1.2203, "step": 3514500 }, { "epoch": 2.11, "learning_rate": 4.1284760129933864e-05, "loss": 1.2551, "step": 3515000 }, { "epoch": 2.11, "learning_rate": 4.1282660164373304e-05, "loss": 1.2142, "step": 3515500 }, { "epoch": 2.11, "learning_rate": 4.128056019881274e-05, "loss": 1.2432, "step": 3516000 }, { "epoch": 2.11, "learning_rate": 4.127846863311442e-05, "loss": 1.2502, "step": 3516500 }, { "epoch": 2.11, "learning_rate": 4.127636866755385e-05, "loss": 1.2369, "step": 3517000 }, { "epoch": 2.11, "learning_rate": 4.1274268701993285e-05, "loss": 1.2499, "step": 3517500 }, { "epoch": 2.11, "learning_rate": 4.1272168736432725e-05, "loss": 1.2356, "step": 3518000 }, { "epoch": 2.11, "learning_rate": 4.127006877087216e-05, "loss": 1.2287, "step": 3518500 }, { "epoch": 2.11, "learning_rate": 4.12679688053116e-05, "loss": 1.2071, "step": 3519000 }, { "epoch": 2.11, "learning_rate": 4.126587303968215e-05, "loss": 1.2358, "step": 3519500 }, { "epoch": 2.11, "learning_rate": 4.1263773074121586e-05, "loss": 1.2269, "step": 3520000 }, { "epoch": 2.11, "learning_rate": 4.126167310856102e-05, "loss": 1.2406, "step": 3520500 }, { "epoch": 2.11, "learning_rate": 4.125957314300046e-05, "loss": 1.2383, "step": 3521000 }, { "epoch": 2.11, "learning_rate": 4.125747317743989e-05, "loss": 1.2097, "step": 3521500 }, { "epoch": 2.11, "learning_rate": 4.1255377411810446e-05, "loss": 1.2258, "step": 3522000 }, { "epoch": 2.11, "learning_rate": 4.125327744624988e-05, "loss": 1.2323, "step": 3522500 }, { "epoch": 2.11, "learning_rate": 4.125117748068932e-05, "loss": 1.2419, "step": 3523000 }, { "epoch": 2.11, "learning_rate": 4.1249077515128753e-05, "loss": 1.2142, "step": 3523500 }, { "epoch": 2.11, "learning_rate": 4.124698174949931e-05, "loss": 1.2518, "step": 3524000 }, { "epoch": 2.11, "learning_rate": 4.124488178393875e-05, "loss": 1.2314, "step": 3524500 }, { "epoch": 2.11, "learning_rate": 4.124278181837818e-05, "loss": 1.2163, "step": 3525000 }, { "epoch": 2.11, "learning_rate": 4.1240681852817614e-05, "loss": 1.2036, "step": 3525500 }, { "epoch": 2.11, "learning_rate": 4.1238581887257054e-05, "loss": 1.2061, "step": 3526000 }, { "epoch": 2.11, "learning_rate": 4.123648192169648e-05, "loss": 1.24, "step": 3526500 }, { "epoch": 2.11, "learning_rate": 4.1234381956135915e-05, "loss": 1.2288, "step": 3527000 }, { "epoch": 2.11, "learning_rate": 4.1232281990575355e-05, "loss": 1.2306, "step": 3527500 }, { "epoch": 2.12, "learning_rate": 4.1230186224945915e-05, "loss": 1.2451, "step": 3528000 }, { "epoch": 2.12, "learning_rate": 4.122809045931647e-05, "loss": 1.2354, "step": 3528500 }, { "epoch": 2.12, "learning_rate": 4.12259904937559e-05, "loss": 1.2141, "step": 3529000 }, { "epoch": 2.12, "learning_rate": 4.1223890528195336e-05, "loss": 1.233, "step": 3529500 }, { "epoch": 2.12, "learning_rate": 4.1221790562634776e-05, "loss": 1.2152, "step": 3530000 }, { "epoch": 2.12, "learning_rate": 4.121969059707421e-05, "loss": 1.2528, "step": 3530500 }, { "epoch": 2.12, "learning_rate": 4.121759063151364e-05, "loss": 1.2356, "step": 3531000 }, { "epoch": 2.12, "learning_rate": 4.1215490665953076e-05, "loss": 1.2097, "step": 3531500 }, { "epoch": 2.12, "learning_rate": 4.1213394900323637e-05, "loss": 1.2307, "step": 3532000 }, { "epoch": 2.12, "learning_rate": 4.121129493476307e-05, "loss": 1.2151, "step": 3532500 }, { "epoch": 2.12, "learning_rate": 4.120919496920251e-05, "loss": 1.2007, "step": 3533000 }, { "epoch": 2.12, "learning_rate": 4.120709500364194e-05, "loss": 1.2277, "step": 3533500 }, { "epoch": 2.12, "learning_rate": 4.12049992380125e-05, "loss": 1.225, "step": 3534000 }, { "epoch": 2.12, "learning_rate": 4.120289927245193e-05, "loss": 1.2351, "step": 3534500 }, { "epoch": 2.12, "learning_rate": 4.120079930689137e-05, "loss": 1.2407, "step": 3535000 }, { "epoch": 2.12, "learning_rate": 4.1198699341330804e-05, "loss": 1.2415, "step": 3535500 }, { "epoch": 2.12, "learning_rate": 4.119659937577023e-05, "loss": 1.2279, "step": 3536000 }, { "epoch": 2.12, "learning_rate": 4.119449941020967e-05, "loss": 1.2467, "step": 3536500 }, { "epoch": 2.12, "learning_rate": 4.119240364458023e-05, "loss": 1.2529, "step": 3537000 }, { "epoch": 2.12, "learning_rate": 4.1190303679019665e-05, "loss": 1.2579, "step": 3537500 }, { "epoch": 2.12, "learning_rate": 4.11882037134591e-05, "loss": 1.243, "step": 3538000 }, { "epoch": 2.12, "learning_rate": 4.118610374789853e-05, "loss": 1.2212, "step": 3538500 }, { "epoch": 2.12, "learning_rate": 4.1184003782337966e-05, "loss": 1.2285, "step": 3539000 }, { "epoch": 2.12, "learning_rate": 4.1181903816777406e-05, "loss": 1.2538, "step": 3539500 }, { "epoch": 2.12, "learning_rate": 4.117980385121684e-05, "loss": 1.2445, "step": 3540000 }, { "epoch": 2.12, "learning_rate": 4.11777080855874e-05, "loss": 1.2141, "step": 3540500 }, { "epoch": 2.12, "learning_rate": 4.1175608120026826e-05, "loss": 1.2616, "step": 3541000 }, { "epoch": 2.12, "learning_rate": 4.1173508154466267e-05, "loss": 1.2623, "step": 3541500 }, { "epoch": 2.12, "learning_rate": 4.11714081889057e-05, "loss": 1.221, "step": 3542000 }, { "epoch": 2.12, "learning_rate": 4.1169308223345133e-05, "loss": 1.241, "step": 3542500 }, { "epoch": 2.12, "learning_rate": 4.116721245771569e-05, "loss": 1.2284, "step": 3543000 }, { "epoch": 2.12, "learning_rate": 4.116511249215513e-05, "loss": 1.2147, "step": 3543500 }, { "epoch": 2.12, "learning_rate": 4.116301252659456e-05, "loss": 1.2416, "step": 3544000 }, { "epoch": 2.13, "learning_rate": 4.1160912561033994e-05, "loss": 1.2338, "step": 3544500 }, { "epoch": 2.13, "learning_rate": 4.1158812595473434e-05, "loss": 1.2398, "step": 3545000 }, { "epoch": 2.13, "learning_rate": 4.115671682984399e-05, "loss": 1.2359, "step": 3545500 }, { "epoch": 2.13, "learning_rate": 4.115461686428342e-05, "loss": 1.2084, "step": 3546000 }, { "epoch": 2.13, "learning_rate": 4.115251689872286e-05, "loss": 1.2191, "step": 3546500 }, { "epoch": 2.13, "learning_rate": 4.1150416933162295e-05, "loss": 1.2479, "step": 3547000 }, { "epoch": 2.13, "learning_rate": 4.114831696760173e-05, "loss": 1.2123, "step": 3547500 }, { "epoch": 2.13, "learning_rate": 4.114621700204117e-05, "loss": 1.2314, "step": 3548000 }, { "epoch": 2.13, "learning_rate": 4.114412123641172e-05, "loss": 1.2204, "step": 3548500 }, { "epoch": 2.13, "learning_rate": 4.1142021270851156e-05, "loss": 1.2198, "step": 3549000 }, { "epoch": 2.13, "learning_rate": 4.113992130529059e-05, "loss": 1.21, "step": 3549500 }, { "epoch": 2.13, "learning_rate": 4.113782133973003e-05, "loss": 1.2273, "step": 3550000 }, { "epoch": 2.13, "learning_rate": 4.113572137416946e-05, "loss": 1.2323, "step": 3550500 }, { "epoch": 2.13, "learning_rate": 4.1133625608540017e-05, "loss": 1.2359, "step": 3551000 }, { "epoch": 2.13, "learning_rate": 4.113152564297945e-05, "loss": 1.2505, "step": 3551500 }, { "epoch": 2.13, "learning_rate": 4.112942987735001e-05, "loss": 1.2459, "step": 3552000 }, { "epoch": 2.13, "learning_rate": 4.112732991178945e-05, "loss": 1.2046, "step": 3552500 }, { "epoch": 2.13, "learning_rate": 4.1125234146160004e-05, "loss": 1.2202, "step": 3553000 }, { "epoch": 2.13, "learning_rate": 4.112313418059944e-05, "loss": 1.2222, "step": 3553500 }, { "epoch": 2.13, "learning_rate": 4.112103421503888e-05, "loss": 1.251, "step": 3554000 }, { "epoch": 2.13, "learning_rate": 4.111893424947831e-05, "loss": 1.2437, "step": 3554500 }, { "epoch": 2.13, "learning_rate": 4.111683428391774e-05, "loss": 1.2565, "step": 3555000 }, { "epoch": 2.13, "learning_rate": 4.111473431835718e-05, "loss": 1.2552, "step": 3555500 }, { "epoch": 2.13, "learning_rate": 4.111263435279661e-05, "loss": 1.2169, "step": 3556000 }, { "epoch": 2.13, "learning_rate": 4.1110534387236045e-05, "loss": 1.2241, "step": 3556500 }, { "epoch": 2.13, "learning_rate": 4.1108434421675485e-05, "loss": 1.239, "step": 3557000 }, { "epoch": 2.13, "learning_rate": 4.110633445611492e-05, "loss": 1.2272, "step": 3557500 }, { "epoch": 2.13, "learning_rate": 4.110423449055435e-05, "loss": 1.2569, "step": 3558000 }, { "epoch": 2.13, "learning_rate": 4.110213452499379e-05, "loss": 1.2195, "step": 3558500 }, { "epoch": 2.13, "learning_rate": 4.1100034559433226e-05, "loss": 1.2572, "step": 3559000 }, { "epoch": 2.13, "learning_rate": 4.109793879380378e-05, "loss": 1.2303, "step": 3559500 }, { "epoch": 2.13, "learning_rate": 4.109583882824321e-05, "loss": 1.2465, "step": 3560000 }, { "epoch": 2.13, "learning_rate": 4.109373886268265e-05, "loss": 1.2202, "step": 3560500 }, { "epoch": 2.13, "learning_rate": 4.109163889712209e-05, "loss": 1.239, "step": 3561000 }, { "epoch": 2.14, "learning_rate": 4.108954313149264e-05, "loss": 1.2675, "step": 3561500 }, { "epoch": 2.14, "learning_rate": 4.108744316593208e-05, "loss": 1.2567, "step": 3562000 }, { "epoch": 2.14, "learning_rate": 4.1085343200371514e-05, "loss": 1.2044, "step": 3562500 }, { "epoch": 2.14, "learning_rate": 4.108324323481095e-05, "loss": 1.2437, "step": 3563000 }, { "epoch": 2.14, "learning_rate": 4.108114326925039e-05, "loss": 1.2077, "step": 3563500 }, { "epoch": 2.14, "learning_rate": 4.107904330368982e-05, "loss": 1.2339, "step": 3564000 }, { "epoch": 2.14, "learning_rate": 4.1076943338129255e-05, "loss": 1.2318, "step": 3564500 }, { "epoch": 2.14, "learning_rate": 4.1074843372568695e-05, "loss": 1.2257, "step": 3565000 }, { "epoch": 2.14, "learning_rate": 4.107275180687036e-05, "loss": 1.2363, "step": 3565500 }, { "epoch": 2.14, "learning_rate": 4.10706518413098e-05, "loss": 1.234, "step": 3566000 }, { "epoch": 2.14, "learning_rate": 4.1068551875749236e-05, "loss": 1.2525, "step": 3566500 }, { "epoch": 2.14, "learning_rate": 4.106645191018867e-05, "loss": 1.2372, "step": 3567000 }, { "epoch": 2.14, "learning_rate": 4.106435194462811e-05, "loss": 1.2331, "step": 3567500 }, { "epoch": 2.14, "learning_rate": 4.106225197906754e-05, "loss": 1.2273, "step": 3568000 }, { "epoch": 2.14, "learning_rate": 4.1060156213438096e-05, "loss": 1.2108, "step": 3568500 }, { "epoch": 2.14, "learning_rate": 4.1058056247877536e-05, "loss": 1.2362, "step": 3569000 }, { "epoch": 2.14, "learning_rate": 4.105596048224809e-05, "loss": 1.2525, "step": 3569500 }, { "epoch": 2.14, "learning_rate": 4.1053860516687524e-05, "loss": 1.2313, "step": 3570000 }, { "epoch": 2.14, "learning_rate": 4.105176055112696e-05, "loss": 1.2501, "step": 3570500 }, { "epoch": 2.14, "learning_rate": 4.10496605855664e-05, "loss": 1.2583, "step": 3571000 }, { "epoch": 2.14, "learning_rate": 4.104756062000583e-05, "loss": 1.2126, "step": 3571500 }, { "epoch": 2.14, "learning_rate": 4.1045460654445264e-05, "loss": 1.2128, "step": 3572000 }, { "epoch": 2.14, "learning_rate": 4.1043360688884704e-05, "loss": 1.2059, "step": 3572500 }, { "epoch": 2.14, "learning_rate": 4.104126072332414e-05, "loss": 1.1771, "step": 3573000 }, { "epoch": 2.14, "learning_rate": 4.103916075776357e-05, "loss": 1.2223, "step": 3573500 }, { "epoch": 2.14, "learning_rate": 4.103706079220301e-05, "loss": 1.2126, "step": 3574000 }, { "epoch": 2.14, "learning_rate": 4.1034960826642445e-05, "loss": 1.2197, "step": 3574500 }, { "epoch": 2.14, "learning_rate": 4.103286086108187e-05, "loss": 1.229, "step": 3575000 }, { "epoch": 2.14, "learning_rate": 4.103076509545243e-05, "loss": 1.1865, "step": 3575500 }, { "epoch": 2.14, "learning_rate": 4.102866512989187e-05, "loss": 1.2268, "step": 3576000 }, { "epoch": 2.14, "learning_rate": 4.1026569364262426e-05, "loss": 1.2483, "step": 3576500 }, { "epoch": 2.14, "learning_rate": 4.102446939870186e-05, "loss": 1.2043, "step": 3577000 }, { "epoch": 2.14, "learning_rate": 4.10223694331413e-05, "loss": 1.2436, "step": 3577500 }, { "epoch": 2.15, "learning_rate": 4.102026946758073e-05, "loss": 1.2041, "step": 3578000 }, { "epoch": 2.15, "learning_rate": 4.1018173701951287e-05, "loss": 1.1972, "step": 3578500 }, { "epoch": 2.15, "learning_rate": 4.101607373639072e-05, "loss": 1.2306, "step": 3579000 }, { "epoch": 2.15, "learning_rate": 4.101397377083016e-05, "loss": 1.2229, "step": 3579500 }, { "epoch": 2.15, "learning_rate": 4.1011873805269594e-05, "loss": 1.2086, "step": 3580000 }, { "epoch": 2.15, "learning_rate": 4.100977803964015e-05, "loss": 1.2452, "step": 3580500 }, { "epoch": 2.15, "learning_rate": 4.100767807407958e-05, "loss": 1.2577, "step": 3581000 }, { "epoch": 2.15, "learning_rate": 4.100557810851902e-05, "loss": 1.2387, "step": 3581500 }, { "epoch": 2.15, "learning_rate": 4.1003478142958454e-05, "loss": 1.2188, "step": 3582000 }, { "epoch": 2.15, "learning_rate": 4.100137817739789e-05, "loss": 1.2117, "step": 3582500 }, { "epoch": 2.15, "learning_rate": 4.099927821183733e-05, "loss": 1.2442, "step": 3583000 }, { "epoch": 2.15, "learning_rate": 4.099717824627676e-05, "loss": 1.2268, "step": 3583500 }, { "epoch": 2.15, "learning_rate": 4.09950782807162e-05, "loss": 1.1932, "step": 3584000 }, { "epoch": 2.15, "learning_rate": 4.099297831515563e-05, "loss": 1.2111, "step": 3584500 }, { "epoch": 2.15, "learning_rate": 4.099089094938843e-05, "loss": 1.2372, "step": 3585000 }, { "epoch": 2.15, "learning_rate": 4.098879098382787e-05, "loss": 1.2462, "step": 3585500 }, { "epoch": 2.15, "learning_rate": 4.0986691018267296e-05, "loss": 1.2332, "step": 3586000 }, { "epoch": 2.15, "learning_rate": 4.098459105270673e-05, "loss": 1.2067, "step": 3586500 }, { "epoch": 2.15, "learning_rate": 4.098249108714617e-05, "loss": 1.2383, "step": 3587000 }, { "epoch": 2.15, "learning_rate": 4.09803911215856e-05, "loss": 1.2644, "step": 3587500 }, { "epoch": 2.15, "learning_rate": 4.0978291156025037e-05, "loss": 1.2061, "step": 3588000 }, { "epoch": 2.15, "learning_rate": 4.097619119046448e-05, "loss": 1.2285, "step": 3588500 }, { "epoch": 2.15, "learning_rate": 4.097409122490391e-05, "loss": 1.2196, "step": 3589000 }, { "epoch": 2.15, "learning_rate": 4.0971991259343344e-05, "loss": 1.2348, "step": 3589500 }, { "epoch": 2.15, "learning_rate": 4.0969891293782784e-05, "loss": 1.202, "step": 3590000 }, { "epoch": 2.15, "learning_rate": 4.096779132822222e-05, "loss": 1.2456, "step": 3590500 }, { "epoch": 2.15, "learning_rate": 4.096569136266166e-05, "loss": 1.1937, "step": 3591000 }, { "epoch": 2.15, "learning_rate": 4.096359559703221e-05, "loss": 1.1942, "step": 3591500 }, { "epoch": 2.15, "learning_rate": 4.0961495631471645e-05, "loss": 1.2394, "step": 3592000 }, { "epoch": 2.15, "learning_rate": 4.095939566591108e-05, "loss": 1.2142, "step": 3592500 }, { "epoch": 2.15, "learning_rate": 4.095729570035052e-05, "loss": 1.244, "step": 3593000 }, { "epoch": 2.15, "learning_rate": 4.095519993472107e-05, "loss": 1.2574, "step": 3593500 }, { "epoch": 2.15, "learning_rate": 4.0953104169091626e-05, "loss": 1.2328, "step": 3594000 }, { "epoch": 2.16, "learning_rate": 4.095100420353106e-05, "loss": 1.2613, "step": 3594500 }, { "epoch": 2.16, "learning_rate": 4.094890423797049e-05, "loss": 1.2338, "step": 3595000 }, { "epoch": 2.16, "learning_rate": 4.094680427240993e-05, "loss": 1.2463, "step": 3595500 }, { "epoch": 2.16, "learning_rate": 4.0944704306849366e-05, "loss": 1.2282, "step": 3596000 }, { "epoch": 2.16, "learning_rate": 4.0942604341288806e-05, "loss": 1.1977, "step": 3596500 }, { "epoch": 2.16, "learning_rate": 4.094050437572824e-05, "loss": 1.2276, "step": 3597000 }, { "epoch": 2.16, "learning_rate": 4.093840441016767e-05, "loss": 1.2254, "step": 3597500 }, { "epoch": 2.16, "learning_rate": 4.0936304444607114e-05, "loss": 1.2333, "step": 3598000 }, { "epoch": 2.16, "learning_rate": 4.093420867897767e-05, "loss": 1.2006, "step": 3598500 }, { "epoch": 2.16, "learning_rate": 4.09321087134171e-05, "loss": 1.2201, "step": 3599000 }, { "epoch": 2.16, "learning_rate": 4.0930012947787654e-05, "loss": 1.2306, "step": 3599500 }, { "epoch": 2.16, "learning_rate": 4.092791298222709e-05, "loss": 1.2166, "step": 3600000 }, { "epoch": 2.16, "eval_loss": 1.1922718286514282, "eval_runtime": 1098.7908, "eval_samples_per_second": 479.363, "eval_steps_per_second": 79.894, "step": 3600000 }, { "epoch": 2.16, "learning_rate": 4.092581301666653e-05, "loss": 1.2243, "step": 3600500 }, { "epoch": 2.16, "learning_rate": 4.092371305110596e-05, "loss": 1.2198, "step": 3601000 }, { "epoch": 2.16, "learning_rate": 4.0921613085545395e-05, "loss": 1.2253, "step": 3601500 }, { "epoch": 2.16, "learning_rate": 4.0919513119984835e-05, "loss": 1.2125, "step": 3602000 }, { "epoch": 2.16, "learning_rate": 4.091741315442427e-05, "loss": 1.2157, "step": 3602500 }, { "epoch": 2.16, "learning_rate": 4.09153131888637e-05, "loss": 1.2382, "step": 3603000 }, { "epoch": 2.16, "learning_rate": 4.091321742323426e-05, "loss": 1.2563, "step": 3603500 }, { "epoch": 2.16, "learning_rate": 4.0911121657604816e-05, "loss": 1.1968, "step": 3604000 }, { "epoch": 2.16, "learning_rate": 4.090902169204425e-05, "loss": 1.2192, "step": 3604500 }, { "epoch": 2.16, "learning_rate": 4.090692172648368e-05, "loss": 1.2573, "step": 3605000 }, { "epoch": 2.16, "learning_rate": 4.090482176092312e-05, "loss": 1.2138, "step": 3605500 }, { "epoch": 2.16, "learning_rate": 4.0902721795362556e-05, "loss": 1.2522, "step": 3606000 }, { "epoch": 2.16, "learning_rate": 4.090062182980199e-05, "loss": 1.2643, "step": 3606500 }, { "epoch": 2.16, "learning_rate": 4.089852186424143e-05, "loss": 1.2317, "step": 3607000 }, { "epoch": 2.16, "learning_rate": 4.0896421898680864e-05, "loss": 1.2595, "step": 3607500 }, { "epoch": 2.16, "learning_rate": 4.089432613305142e-05, "loss": 1.2503, "step": 3608000 }, { "epoch": 2.16, "learning_rate": 4.089222616749085e-05, "loss": 1.2369, "step": 3608500 }, { "epoch": 2.16, "learning_rate": 4.0890130401861404e-05, "loss": 1.2174, "step": 3609000 }, { "epoch": 2.16, "learning_rate": 4.0888030436300844e-05, "loss": 1.1942, "step": 3609500 }, { "epoch": 2.16, "learning_rate": 4.088593047074028e-05, "loss": 1.1876, "step": 3610000 }, { "epoch": 2.16, "learning_rate": 4.088383050517972e-05, "loss": 1.2287, "step": 3610500 }, { "epoch": 2.16, "learning_rate": 4.088173053961915e-05, "loss": 1.234, "step": 3611000 }, { "epoch": 2.17, "learning_rate": 4.0879630574058585e-05, "loss": 1.2279, "step": 3611500 }, { "epoch": 2.17, "learning_rate": 4.0877530608498025e-05, "loss": 1.2328, "step": 3612000 }, { "epoch": 2.17, "learning_rate": 4.087543484286858e-05, "loss": 1.2313, "step": 3612500 }, { "epoch": 2.17, "learning_rate": 4.087333487730801e-05, "loss": 1.2286, "step": 3613000 }, { "epoch": 2.17, "learning_rate": 4.0871234911747446e-05, "loss": 1.2097, "step": 3613500 }, { "epoch": 2.17, "learning_rate": 4.0869134946186886e-05, "loss": 1.2304, "step": 3614000 }, { "epoch": 2.17, "learning_rate": 4.086703498062632e-05, "loss": 1.2479, "step": 3614500 }, { "epoch": 2.17, "learning_rate": 4.086493501506575e-05, "loss": 1.24, "step": 3615000 }, { "epoch": 2.17, "learning_rate": 4.0862835049505186e-05, "loss": 1.1988, "step": 3615500 }, { "epoch": 2.17, "learning_rate": 4.086073508394462e-05, "loss": 1.2198, "step": 3616000 }, { "epoch": 2.17, "learning_rate": 4.085863511838405e-05, "loss": 1.2425, "step": 3616500 }, { "epoch": 2.17, "learning_rate": 4.0856535152823494e-05, "loss": 1.2301, "step": 3617000 }, { "epoch": 2.17, "learning_rate": 4.085443518726293e-05, "loss": 1.2253, "step": 3617500 }, { "epoch": 2.17, "learning_rate": 4.085233522170236e-05, "loss": 1.228, "step": 3618000 }, { "epoch": 2.17, "learning_rate": 4.085023945607292e-05, "loss": 1.215, "step": 3618500 }, { "epoch": 2.17, "learning_rate": 4.0848139490512354e-05, "loss": 1.205, "step": 3619000 }, { "epoch": 2.17, "learning_rate": 4.084603952495179e-05, "loss": 1.229, "step": 3619500 }, { "epoch": 2.17, "learning_rate": 4.084394375932235e-05, "loss": 1.2065, "step": 3620000 }, { "epoch": 2.17, "learning_rate": 4.08418479936929e-05, "loss": 1.2249, "step": 3620500 }, { "epoch": 2.17, "learning_rate": 4.083974802813234e-05, "loss": 1.2448, "step": 3621000 }, { "epoch": 2.17, "learning_rate": 4.0837648062571775e-05, "loss": 1.2269, "step": 3621500 }, { "epoch": 2.17, "learning_rate": 4.083554809701121e-05, "loss": 1.2452, "step": 3622000 }, { "epoch": 2.17, "learning_rate": 4.083344813145064e-05, "loss": 1.2182, "step": 3622500 }, { "epoch": 2.17, "learning_rate": 4.0831348165890076e-05, "loss": 1.2113, "step": 3623000 }, { "epoch": 2.17, "learning_rate": 4.082924820032951e-05, "loss": 1.2451, "step": 3623500 }, { "epoch": 2.17, "learning_rate": 4.082714823476895e-05, "loss": 1.2117, "step": 3624000 }, { "epoch": 2.17, "learning_rate": 4.082505246913951e-05, "loss": 1.2248, "step": 3624500 }, { "epoch": 2.17, "learning_rate": 4.0822952503578936e-05, "loss": 1.2089, "step": 3625000 }, { "epoch": 2.17, "learning_rate": 4.082085253801838e-05, "loss": 1.2073, "step": 3625500 }, { "epoch": 2.17, "learning_rate": 4.081875257245781e-05, "loss": 1.2196, "step": 3626000 }, { "epoch": 2.17, "learning_rate": 4.0816652606897244e-05, "loss": 1.2505, "step": 3626500 }, { "epoch": 2.17, "learning_rate": 4.0814552641336684e-05, "loss": 1.2404, "step": 3627000 }, { "epoch": 2.17, "learning_rate": 4.081245267577612e-05, "loss": 1.2212, "step": 3627500 }, { "epoch": 2.18, "learning_rate": 4.081035271021555e-05, "loss": 1.2246, "step": 3628000 }, { "epoch": 2.18, "learning_rate": 4.080825274465499e-05, "loss": 1.2115, "step": 3628500 }, { "epoch": 2.18, "learning_rate": 4.0806156979025545e-05, "loss": 1.232, "step": 3629000 }, { "epoch": 2.18, "learning_rate": 4.080405701346498e-05, "loss": 1.2346, "step": 3629500 }, { "epoch": 2.18, "learning_rate": 4.080195704790441e-05, "loss": 1.2611, "step": 3630000 }, { "epoch": 2.18, "learning_rate": 4.079985708234385e-05, "loss": 1.2433, "step": 3630500 }, { "epoch": 2.18, "learning_rate": 4.0797757116783285e-05, "loss": 1.2363, "step": 3631000 }, { "epoch": 2.18, "learning_rate": 4.079565715122272e-05, "loss": 1.2324, "step": 3631500 }, { "epoch": 2.18, "learning_rate": 4.079355718566216e-05, "loss": 1.2324, "step": 3632000 }, { "epoch": 2.18, "learning_rate": 4.079145722010159e-05, "loss": 1.2142, "step": 3632500 }, { "epoch": 2.18, "learning_rate": 4.0789361454472146e-05, "loss": 1.2238, "step": 3633000 }, { "epoch": 2.18, "learning_rate": 4.078726148891158e-05, "loss": 1.2173, "step": 3633500 }, { "epoch": 2.18, "learning_rate": 4.078516152335102e-05, "loss": 1.2172, "step": 3634000 }, { "epoch": 2.18, "learning_rate": 4.078306155779045e-05, "loss": 1.2255, "step": 3634500 }, { "epoch": 2.18, "learning_rate": 4.078096579216101e-05, "loss": 1.2241, "step": 3635000 }, { "epoch": 2.18, "learning_rate": 4.077887002653156e-05, "loss": 1.2359, "step": 3635500 }, { "epoch": 2.18, "learning_rate": 4.0776770060971e-05, "loss": 1.1963, "step": 3636000 }, { "epoch": 2.18, "learning_rate": 4.0774670095410434e-05, "loss": 1.2616, "step": 3636500 }, { "epoch": 2.18, "learning_rate": 4.077257012984987e-05, "loss": 1.2506, "step": 3637000 }, { "epoch": 2.18, "learning_rate": 4.077047016428931e-05, "loss": 1.2387, "step": 3637500 }, { "epoch": 2.18, "learning_rate": 4.076837019872874e-05, "loss": 1.2228, "step": 3638000 }, { "epoch": 2.18, "learning_rate": 4.0766270233168175e-05, "loss": 1.2354, "step": 3638500 }, { "epoch": 2.18, "learning_rate": 4.0764170267607615e-05, "loss": 1.2344, "step": 3639000 }, { "epoch": 2.18, "learning_rate": 4.076207030204705e-05, "loss": 1.2388, "step": 3639500 }, { "epoch": 2.18, "learning_rate": 4.07599745364176e-05, "loss": 1.2291, "step": 3640000 }, { "epoch": 2.18, "learning_rate": 4.0757874570857035e-05, "loss": 1.21, "step": 3640500 }, { "epoch": 2.18, "learning_rate": 4.0755774605296476e-05, "loss": 1.2431, "step": 3641000 }, { "epoch": 2.18, "learning_rate": 4.075367463973591e-05, "loss": 1.2163, "step": 3641500 }, { "epoch": 2.18, "learning_rate": 4.075158307403758e-05, "loss": 1.2121, "step": 3642000 }, { "epoch": 2.18, "learning_rate": 4.0749483108477016e-05, "loss": 1.2476, "step": 3642500 }, { "epoch": 2.18, "learning_rate": 4.0747383142916456e-05, "loss": 1.268, "step": 3643000 }, { "epoch": 2.18, "learning_rate": 4.074528737728702e-05, "loss": 1.2429, "step": 3643500 }, { "epoch": 2.18, "learning_rate": 4.0743187411726443e-05, "loss": 1.2271, "step": 3644000 }, { "epoch": 2.19, "learning_rate": 4.074108744616588e-05, "loss": 1.2201, "step": 3644500 }, { "epoch": 2.19, "learning_rate": 4.073898748060532e-05, "loss": 1.2605, "step": 3645000 }, { "epoch": 2.19, "learning_rate": 4.073688751504475e-05, "loss": 1.2273, "step": 3645500 }, { "epoch": 2.19, "learning_rate": 4.0734787549484184e-05, "loss": 1.2194, "step": 3646000 }, { "epoch": 2.19, "learning_rate": 4.0732687583923624e-05, "loss": 1.2482, "step": 3646500 }, { "epoch": 2.19, "learning_rate": 4.073058761836306e-05, "loss": 1.2196, "step": 3647000 }, { "epoch": 2.19, "learning_rate": 4.072848765280249e-05, "loss": 1.2508, "step": 3647500 }, { "epoch": 2.19, "learning_rate": 4.072638768724193e-05, "loss": 1.2245, "step": 3648000 }, { "epoch": 2.19, "learning_rate": 4.0724287721681365e-05, "loss": 1.2326, "step": 3648500 }, { "epoch": 2.19, "learning_rate": 4.072219195605192e-05, "loss": 1.2206, "step": 3649000 }, { "epoch": 2.19, "learning_rate": 4.072009199049136e-05, "loss": 1.234, "step": 3649500 }, { "epoch": 2.19, "learning_rate": 4.071799202493079e-05, "loss": 1.2378, "step": 3650000 }, { "epoch": 2.19, "learning_rate": 4.0715892059370226e-05, "loss": 1.2241, "step": 3650500 }, { "epoch": 2.19, "learning_rate": 4.0713792093809666e-05, "loss": 1.2061, "step": 3651000 }, { "epoch": 2.19, "learning_rate": 4.07116921282491e-05, "loss": 1.2387, "step": 3651500 }, { "epoch": 2.19, "learning_rate": 4.070959636261965e-05, "loss": 1.2423, "step": 3652000 }, { "epoch": 2.19, "learning_rate": 4.0707496397059086e-05, "loss": 1.2047, "step": 3652500 }, { "epoch": 2.19, "learning_rate": 4.0705396431498527e-05, "loss": 1.2417, "step": 3653000 }, { "epoch": 2.19, "learning_rate": 4.070329646593796e-05, "loss": 1.2263, "step": 3653500 }, { "epoch": 2.19, "learning_rate": 4.0701196500377393e-05, "loss": 1.2074, "step": 3654000 }, { "epoch": 2.19, "learning_rate": 4.069910073474795e-05, "loss": 1.2336, "step": 3654500 }, { "epoch": 2.19, "learning_rate": 4.069700076918739e-05, "loss": 1.2786, "step": 3655000 }, { "epoch": 2.19, "learning_rate": 4.069490080362682e-05, "loss": 1.2572, "step": 3655500 }, { "epoch": 2.19, "learning_rate": 4.069280083806626e-05, "loss": 1.2065, "step": 3656000 }, { "epoch": 2.19, "learning_rate": 4.0690700872505694e-05, "loss": 1.2142, "step": 3656500 }, { "epoch": 2.19, "learning_rate": 4.068860090694512e-05, "loss": 1.2379, "step": 3657000 }, { "epoch": 2.19, "learning_rate": 4.068650094138456e-05, "loss": 1.2094, "step": 3657500 }, { "epoch": 2.19, "learning_rate": 4.0684400975823995e-05, "loss": 1.2192, "step": 3658000 }, { "epoch": 2.19, "learning_rate": 4.068230101026343e-05, "loss": 1.2333, "step": 3658500 }, { "epoch": 2.19, "learning_rate": 4.068020944456511e-05, "loss": 1.1983, "step": 3659000 }, { "epoch": 2.19, "learning_rate": 4.067810947900454e-05, "loss": 1.2629, "step": 3659500 }, { "epoch": 2.19, "learning_rate": 4.067600951344398e-05, "loss": 1.2069, "step": 3660000 }, { "epoch": 2.19, "learning_rate": 4.0673909547883416e-05, "loss": 1.2524, "step": 3660500 }, { "epoch": 2.19, "learning_rate": 4.067180958232285e-05, "loss": 1.246, "step": 3661000 }, { "epoch": 2.2, "learning_rate": 4.06697138166934e-05, "loss": 1.2532, "step": 3661500 }, { "epoch": 2.2, "learning_rate": 4.066761385113284e-05, "loss": 1.212, "step": 3662000 }, { "epoch": 2.2, "learning_rate": 4.066551388557228e-05, "loss": 1.2123, "step": 3662500 }, { "epoch": 2.2, "learning_rate": 4.066341392001172e-05, "loss": 1.2382, "step": 3663000 }, { "epoch": 2.2, "learning_rate": 4.066131395445115e-05, "loss": 1.2281, "step": 3663500 }, { "epoch": 2.2, "learning_rate": 4.0659222388752824e-05, "loss": 1.2386, "step": 3664000 }, { "epoch": 2.2, "learning_rate": 4.065712242319226e-05, "loss": 1.2523, "step": 3664500 }, { "epoch": 2.2, "learning_rate": 4.065502665756282e-05, "loss": 1.2435, "step": 3665000 }, { "epoch": 2.2, "learning_rate": 4.0652926692002244e-05, "loss": 1.2438, "step": 3665500 }, { "epoch": 2.2, "learning_rate": 4.0650826726441685e-05, "loss": 1.2506, "step": 3666000 }, { "epoch": 2.2, "learning_rate": 4.064872676088112e-05, "loss": 1.2182, "step": 3666500 }, { "epoch": 2.2, "learning_rate": 4.064662679532055e-05, "loss": 1.2312, "step": 3667000 }, { "epoch": 2.2, "learning_rate": 4.064452682975999e-05, "loss": 1.2317, "step": 3667500 }, { "epoch": 2.2, "learning_rate": 4.0642426864199425e-05, "loss": 1.2195, "step": 3668000 }, { "epoch": 2.2, "learning_rate": 4.0640326898638866e-05, "loss": 1.1908, "step": 3668500 }, { "epoch": 2.2, "learning_rate": 4.06382269330783e-05, "loss": 1.2278, "step": 3669000 }, { "epoch": 2.2, "learning_rate": 4.063612696751773e-05, "loss": 1.2565, "step": 3669500 }, { "epoch": 2.2, "learning_rate": 4.063402700195717e-05, "loss": 1.242, "step": 3670000 }, { "epoch": 2.2, "learning_rate": 4.0631927036396606e-05, "loss": 1.2207, "step": 3670500 }, { "epoch": 2.2, "learning_rate": 4.062983127076716e-05, "loss": 1.2385, "step": 3671000 }, { "epoch": 2.2, "learning_rate": 4.062773130520659e-05, "loss": 1.2234, "step": 3671500 }, { "epoch": 2.2, "learning_rate": 4.0625631339646033e-05, "loss": 1.2211, "step": 3672000 }, { "epoch": 2.2, "learning_rate": 4.062353137408547e-05, "loss": 1.2357, "step": 3672500 }, { "epoch": 2.2, "learning_rate": 4.062143560845602e-05, "loss": 1.2332, "step": 3673000 }, { "epoch": 2.2, "learning_rate": 4.0619339842826574e-05, "loss": 1.2096, "step": 3673500 }, { "epoch": 2.2, "learning_rate": 4.061723987726601e-05, "loss": 1.2311, "step": 3674000 }, { "epoch": 2.2, "learning_rate": 4.061513991170545e-05, "loss": 1.2103, "step": 3674500 }, { "epoch": 2.2, "learning_rate": 4.061303994614488e-05, "loss": 1.2281, "step": 3675000 }, { "epoch": 2.2, "learning_rate": 4.061093998058432e-05, "loss": 1.2381, "step": 3675500 }, { "epoch": 2.2, "learning_rate": 4.0608840015023755e-05, "loss": 1.2298, "step": 3676000 }, { "epoch": 2.2, "learning_rate": 4.060674004946319e-05, "loss": 1.2287, "step": 3676500 }, { "epoch": 2.2, "learning_rate": 4.060464008390263e-05, "loss": 1.2298, "step": 3677000 }, { "epoch": 2.2, "learning_rate": 4.060254011834206e-05, "loss": 1.2294, "step": 3677500 }, { "epoch": 2.21, "learning_rate": 4.060044015278149e-05, "loss": 1.1901, "step": 3678000 }, { "epoch": 2.21, "learning_rate": 4.059834018722093e-05, "loss": 1.2617, "step": 3678500 }, { "epoch": 2.21, "learning_rate": 4.059624442159149e-05, "loss": 1.2458, "step": 3679000 }, { "epoch": 2.21, "learning_rate": 4.059414445603092e-05, "loss": 1.2122, "step": 3679500 }, { "epoch": 2.21, "learning_rate": 4.0592044490470356e-05, "loss": 1.2364, "step": 3680000 }, { "epoch": 2.21, "learning_rate": 4.058994872484091e-05, "loss": 1.2302, "step": 3680500 }, { "epoch": 2.21, "learning_rate": 4.058784875928035e-05, "loss": 1.2124, "step": 3681000 }, { "epoch": 2.21, "learning_rate": 4.0585748793719784e-05, "loss": 1.2392, "step": 3681500 }, { "epoch": 2.21, "learning_rate": 4.058364882815922e-05, "loss": 1.2562, "step": 3682000 }, { "epoch": 2.21, "learning_rate": 4.058154886259866e-05, "loss": 1.2149, "step": 3682500 }, { "epoch": 2.21, "learning_rate": 4.0579448897038084e-05, "loss": 1.2255, "step": 3683000 }, { "epoch": 2.21, "learning_rate": 4.0577348931477524e-05, "loss": 1.209, "step": 3683500 }, { "epoch": 2.21, "learning_rate": 4.057524896591696e-05, "loss": 1.225, "step": 3684000 }, { "epoch": 2.21, "learning_rate": 4.057314900035639e-05, "loss": 1.2127, "step": 3684500 }, { "epoch": 2.21, "learning_rate": 4.057104903479583e-05, "loss": 1.2378, "step": 3685000 }, { "epoch": 2.21, "learning_rate": 4.0568953269166385e-05, "loss": 1.2231, "step": 3685500 }, { "epoch": 2.21, "learning_rate": 4.056685330360582e-05, "loss": 1.1944, "step": 3686000 }, { "epoch": 2.21, "learning_rate": 4.056475333804525e-05, "loss": 1.2109, "step": 3686500 }, { "epoch": 2.21, "learning_rate": 4.056265757241581e-05, "loss": 1.2287, "step": 3687000 }, { "epoch": 2.21, "learning_rate": 4.0560557606855246e-05, "loss": 1.2089, "step": 3687500 }, { "epoch": 2.21, "learning_rate": 4.055845764129468e-05, "loss": 1.2439, "step": 3688000 }, { "epoch": 2.21, "learning_rate": 4.055635767573411e-05, "loss": 1.2123, "step": 3688500 }, { "epoch": 2.21, "learning_rate": 4.055425771017355e-05, "loss": 1.2368, "step": 3689000 }, { "epoch": 2.21, "learning_rate": 4.055216194454411e-05, "loss": 1.2622, "step": 3689500 }, { "epoch": 2.21, "learning_rate": 4.055006197898354e-05, "loss": 1.2529, "step": 3690000 }, { "epoch": 2.21, "learning_rate": 4.054796201342298e-05, "loss": 1.2587, "step": 3690500 }, { "epoch": 2.21, "learning_rate": 4.0545862047862413e-05, "loss": 1.2493, "step": 3691000 }, { "epoch": 2.21, "learning_rate": 4.054376208230185e-05, "loss": 1.1992, "step": 3691500 }, { "epoch": 2.21, "learning_rate": 4.054166211674129e-05, "loss": 1.2125, "step": 3692000 }, { "epoch": 2.21, "learning_rate": 4.053956215118072e-05, "loss": 1.2267, "step": 3692500 }, { "epoch": 2.21, "learning_rate": 4.0537462185620154e-05, "loss": 1.2221, "step": 3693000 }, { "epoch": 2.21, "learning_rate": 4.0535362220059594e-05, "loss": 1.2125, "step": 3693500 }, { "epoch": 2.21, "learning_rate": 4.053326645443015e-05, "loss": 1.2395, "step": 3694000 }, { "epoch": 2.22, "learning_rate": 4.053116648886958e-05, "loss": 1.2272, "step": 3694500 }, { "epoch": 2.22, "learning_rate": 4.0529066523309015e-05, "loss": 1.2237, "step": 3695000 }, { "epoch": 2.22, "learning_rate": 4.0526966557748455e-05, "loss": 1.2141, "step": 3695500 }, { "epoch": 2.22, "learning_rate": 4.052486659218789e-05, "loss": 1.2262, "step": 3696000 }, { "epoch": 2.22, "learning_rate": 4.052276662662732e-05, "loss": 1.2308, "step": 3696500 }, { "epoch": 2.22, "learning_rate": 4.052066666106676e-05, "loss": 1.2152, "step": 3697000 }, { "epoch": 2.22, "learning_rate": 4.0518566695506196e-05, "loss": 1.2367, "step": 3697500 }, { "epoch": 2.22, "learning_rate": 4.051646672994562e-05, "loss": 1.2155, "step": 3698000 }, { "epoch": 2.22, "learning_rate": 4.051437096431618e-05, "loss": 1.2354, "step": 3698500 }, { "epoch": 2.22, "learning_rate": 4.051227519868674e-05, "loss": 1.2028, "step": 3699000 }, { "epoch": 2.22, "learning_rate": 4.0510175233126177e-05, "loss": 1.2355, "step": 3699500 }, { "epoch": 2.22, "learning_rate": 4.050807526756561e-05, "loss": 1.2172, "step": 3700000 }, { "epoch": 2.22, "eval_loss": 1.192676305770874, "eval_runtime": 1108.3091, "eval_samples_per_second": 475.246, "eval_steps_per_second": 79.208, "step": 3700000 }, { "epoch": 2.22, "learning_rate": 4.050597530200505e-05, "loss": 1.2434, "step": 3700500 }, { "epoch": 2.22, "learning_rate": 4.0503875336444484e-05, "loss": 1.2336, "step": 3701000 }, { "epoch": 2.22, "learning_rate": 4.050177537088392e-05, "loss": 1.2125, "step": 3701500 }, { "epoch": 2.22, "learning_rate": 4.049967540532336e-05, "loss": 1.2513, "step": 3702000 }, { "epoch": 2.22, "learning_rate": 4.049757543976279e-05, "loss": 1.2471, "step": 3702500 }, { "epoch": 2.22, "learning_rate": 4.0495479674133344e-05, "loss": 1.2444, "step": 3703000 }, { "epoch": 2.22, "learning_rate": 4.049337970857278e-05, "loss": 1.2351, "step": 3703500 }, { "epoch": 2.22, "learning_rate": 4.049128394294333e-05, "loss": 1.2162, "step": 3704000 }, { "epoch": 2.22, "learning_rate": 4.048918397738277e-05, "loss": 1.2194, "step": 3704500 }, { "epoch": 2.22, "learning_rate": 4.0487084011822205e-05, "loss": 1.2149, "step": 3705000 }, { "epoch": 2.22, "learning_rate": 4.048498404626164e-05, "loss": 1.2341, "step": 3705500 }, { "epoch": 2.22, "learning_rate": 4.04828882806322e-05, "loss": 1.2217, "step": 3706000 }, { "epoch": 2.22, "learning_rate": 4.048078831507163e-05, "loss": 1.2377, "step": 3706500 }, { "epoch": 2.22, "learning_rate": 4.0478688349511066e-05, "loss": 1.1999, "step": 3707000 }, { "epoch": 2.22, "learning_rate": 4.0476588383950506e-05, "loss": 1.2312, "step": 3707500 }, { "epoch": 2.22, "learning_rate": 4.047448841838994e-05, "loss": 1.2233, "step": 3708000 }, { "epoch": 2.22, "learning_rate": 4.047238845282937e-05, "loss": 1.2217, "step": 3708500 }, { "epoch": 2.22, "learning_rate": 4.047028848726881e-05, "loss": 1.2382, "step": 3709000 }, { "epoch": 2.22, "learning_rate": 4.046818852170825e-05, "loss": 1.2415, "step": 3709500 }, { "epoch": 2.22, "learning_rate": 4.04660927560788e-05, "loss": 1.209, "step": 3710000 }, { "epoch": 2.22, "learning_rate": 4.0463992790518234e-05, "loss": 1.2346, "step": 3710500 }, { "epoch": 2.22, "learning_rate": 4.0461892824957674e-05, "loss": 1.2342, "step": 3711000 }, { "epoch": 2.23, "learning_rate": 4.045979285939711e-05, "loss": 1.236, "step": 3711500 }, { "epoch": 2.23, "learning_rate": 4.045769289383654e-05, "loss": 1.2323, "step": 3712000 }, { "epoch": 2.23, "learning_rate": 4.0455597128207094e-05, "loss": 1.2306, "step": 3712500 }, { "epoch": 2.23, "learning_rate": 4.0453497162646535e-05, "loss": 1.2228, "step": 3713000 }, { "epoch": 2.23, "learning_rate": 4.045139719708597e-05, "loss": 1.2014, "step": 3713500 }, { "epoch": 2.23, "learning_rate": 4.04492972315254e-05, "loss": 1.2309, "step": 3714000 }, { "epoch": 2.23, "learning_rate": 4.044720146589596e-05, "loss": 1.2202, "step": 3714500 }, { "epoch": 2.23, "learning_rate": 4.0445101500335395e-05, "loss": 1.2246, "step": 3715000 }, { "epoch": 2.23, "learning_rate": 4.044300573470595e-05, "loss": 1.234, "step": 3715500 }, { "epoch": 2.23, "learning_rate": 4.044090576914538e-05, "loss": 1.2274, "step": 3716000 }, { "epoch": 2.23, "learning_rate": 4.043880580358482e-05, "loss": 1.2261, "step": 3716500 }, { "epoch": 2.23, "learning_rate": 4.0436705838024256e-05, "loss": 1.2294, "step": 3717000 }, { "epoch": 2.23, "learning_rate": 4.043460587246369e-05, "loss": 1.2285, "step": 3717500 }, { "epoch": 2.23, "learning_rate": 4.043250590690313e-05, "loss": 1.2242, "step": 3718000 }, { "epoch": 2.23, "learning_rate": 4.043040594134256e-05, "loss": 1.2183, "step": 3718500 }, { "epoch": 2.23, "learning_rate": 4.0428305975782e-05, "loss": 1.2699, "step": 3719000 }, { "epoch": 2.23, "learning_rate": 4.042620601022143e-05, "loss": 1.2293, "step": 3719500 }, { "epoch": 2.23, "learning_rate": 4.0424106044660864e-05, "loss": 1.2611, "step": 3720000 }, { "epoch": 2.23, "learning_rate": 4.04220060791003e-05, "loss": 1.2013, "step": 3720500 }, { "epoch": 2.23, "learning_rate": 4.041990611353974e-05, "loss": 1.2404, "step": 3721000 }, { "epoch": 2.23, "learning_rate": 4.04178103479103e-05, "loss": 1.234, "step": 3721500 }, { "epoch": 2.23, "learning_rate": 4.0415710382349724e-05, "loss": 1.2356, "step": 3722000 }, { "epoch": 2.23, "learning_rate": 4.0413610416789165e-05, "loss": 1.239, "step": 3722500 }, { "epoch": 2.23, "learning_rate": 4.04115104512286e-05, "loss": 1.2559, "step": 3723000 }, { "epoch": 2.23, "learning_rate": 4.040941048566803e-05, "loss": 1.2246, "step": 3723500 }, { "epoch": 2.23, "learning_rate": 4.040731052010747e-05, "loss": 1.2373, "step": 3724000 }, { "epoch": 2.23, "learning_rate": 4.0405210554546905e-05, "loss": 1.2451, "step": 3724500 }, { "epoch": 2.23, "learning_rate": 4.040311058898634e-05, "loss": 1.2407, "step": 3725000 }, { "epoch": 2.23, "learning_rate": 4.040101482335689e-05, "loss": 1.2441, "step": 3725500 }, { "epoch": 2.23, "learning_rate": 4.039891485779633e-05, "loss": 1.209, "step": 3726000 }, { "epoch": 2.23, "learning_rate": 4.0396814892235766e-05, "loss": 1.2323, "step": 3726500 }, { "epoch": 2.23, "learning_rate": 4.03947149266752e-05, "loss": 1.2416, "step": 3727000 }, { "epoch": 2.23, "learning_rate": 4.039261496111464e-05, "loss": 1.2182, "step": 3727500 }, { "epoch": 2.24, "learning_rate": 4.039051919548519e-05, "loss": 1.2359, "step": 3728000 }, { "epoch": 2.24, "learning_rate": 4.038841922992463e-05, "loss": 1.2215, "step": 3728500 }, { "epoch": 2.24, "learning_rate": 4.038631926436407e-05, "loss": 1.2037, "step": 3729000 }, { "epoch": 2.24, "learning_rate": 4.03842192988035e-05, "loss": 1.2159, "step": 3729500 }, { "epoch": 2.24, "learning_rate": 4.0382119333242934e-05, "loss": 1.2106, "step": 3730000 }, { "epoch": 2.24, "learning_rate": 4.038002356761349e-05, "loss": 1.2295, "step": 3730500 }, { "epoch": 2.24, "learning_rate": 4.037792360205293e-05, "loss": 1.2282, "step": 3731000 }, { "epoch": 2.24, "learning_rate": 4.037582363649236e-05, "loss": 1.2091, "step": 3731500 }, { "epoch": 2.24, "learning_rate": 4.0373723670931795e-05, "loss": 1.2213, "step": 3732000 }, { "epoch": 2.24, "learning_rate": 4.037162790530235e-05, "loss": 1.2201, "step": 3732500 }, { "epoch": 2.24, "learning_rate": 4.036952793974179e-05, "loss": 1.2254, "step": 3733000 }, { "epoch": 2.24, "learning_rate": 4.036742797418122e-05, "loss": 1.2457, "step": 3733500 }, { "epoch": 2.24, "learning_rate": 4.0365332208551775e-05, "loss": 1.1988, "step": 3734000 }, { "epoch": 2.24, "learning_rate": 4.0363232242991216e-05, "loss": 1.2321, "step": 3734500 }, { "epoch": 2.24, "learning_rate": 4.036113227743065e-05, "loss": 1.2142, "step": 3735000 }, { "epoch": 2.24, "learning_rate": 4.035903231187008e-05, "loss": 1.2166, "step": 3735500 }, { "epoch": 2.24, "learning_rate": 4.035693234630952e-05, "loss": 1.2453, "step": 3736000 }, { "epoch": 2.24, "learning_rate": 4.0354832380748956e-05, "loss": 1.252, "step": 3736500 }, { "epoch": 2.24, "learning_rate": 4.035273661511951e-05, "loss": 1.2193, "step": 3737000 }, { "epoch": 2.24, "learning_rate": 4.035063664955894e-05, "loss": 1.2318, "step": 3737500 }, { "epoch": 2.24, "learning_rate": 4.0348536683998384e-05, "loss": 1.1904, "step": 3738000 }, { "epoch": 2.24, "learning_rate": 4.034643671843782e-05, "loss": 1.2344, "step": 3738500 }, { "epoch": 2.24, "learning_rate": 4.034433675287725e-05, "loss": 1.2372, "step": 3739000 }, { "epoch": 2.24, "learning_rate": 4.034223678731669e-05, "loss": 1.251, "step": 3739500 }, { "epoch": 2.24, "learning_rate": 4.0340136821756124e-05, "loss": 1.1963, "step": 3740000 }, { "epoch": 2.24, "learning_rate": 4.033804105612668e-05, "loss": 1.1995, "step": 3740500 }, { "epoch": 2.24, "learning_rate": 4.033594109056611e-05, "loss": 1.2382, "step": 3741000 }, { "epoch": 2.24, "learning_rate": 4.033384112500555e-05, "loss": 1.2309, "step": 3741500 }, { "epoch": 2.24, "learning_rate": 4.0331741159444985e-05, "loss": 1.2407, "step": 3742000 }, { "epoch": 2.24, "learning_rate": 4.032964119388442e-05, "loss": 1.2217, "step": 3742500 }, { "epoch": 2.24, "learning_rate": 4.032754122832386e-05, "loss": 1.2138, "step": 3743000 }, { "epoch": 2.24, "learning_rate": 4.032544126276329e-05, "loss": 1.2027, "step": 3743500 }, { "epoch": 2.24, "learning_rate": 4.0323341297202725e-05, "loss": 1.2205, "step": 3744000 }, { "epoch": 2.24, "learning_rate": 4.032124133164216e-05, "loss": 1.2238, "step": 3744500 }, { "epoch": 2.25, "learning_rate": 4.031914136608159e-05, "loss": 1.2236, "step": 3745000 }, { "epoch": 2.25, "learning_rate": 4.031704140052103e-05, "loss": 1.2203, "step": 3745500 }, { "epoch": 2.25, "learning_rate": 4.0314941434960466e-05, "loss": 1.2014, "step": 3746000 }, { "epoch": 2.25, "learning_rate": 4.031284566933102e-05, "loss": 1.2485, "step": 3746500 }, { "epoch": 2.25, "learning_rate": 4.031074990370158e-05, "loss": 1.2402, "step": 3747000 }, { "epoch": 2.25, "learning_rate": 4.0308649938141013e-05, "loss": 1.2122, "step": 3747500 }, { "epoch": 2.25, "learning_rate": 4.0306549972580454e-05, "loss": 1.2152, "step": 3748000 }, { "epoch": 2.25, "learning_rate": 4.030445000701989e-05, "loss": 1.2247, "step": 3748500 }, { "epoch": 2.25, "learning_rate": 4.0302350041459314e-05, "loss": 1.2051, "step": 3749000 }, { "epoch": 2.25, "learning_rate": 4.0300254275829874e-05, "loss": 1.2276, "step": 3749500 }, { "epoch": 2.25, "learning_rate": 4.0298154310269314e-05, "loss": 1.2253, "step": 3750000 }, { "epoch": 2.25, "learning_rate": 4.029605434470875e-05, "loss": 1.2133, "step": 3750500 }, { "epoch": 2.25, "learning_rate": 4.029395437914818e-05, "loss": 1.2134, "step": 3751000 }, { "epoch": 2.25, "learning_rate": 4.0291854413587615e-05, "loss": 1.221, "step": 3751500 }, { "epoch": 2.25, "learning_rate": 4.0289758647958175e-05, "loss": 1.2105, "step": 3752000 }, { "epoch": 2.25, "learning_rate": 4.028765868239761e-05, "loss": 1.2212, "step": 3752500 }, { "epoch": 2.25, "learning_rate": 4.028555871683705e-05, "loss": 1.2855, "step": 3753000 }, { "epoch": 2.25, "learning_rate": 4.0283458751276476e-05, "loss": 1.2226, "step": 3753500 }, { "epoch": 2.25, "learning_rate": 4.0281362985647036e-05, "loss": 1.1972, "step": 3754000 }, { "epoch": 2.25, "learning_rate": 4.027926302008647e-05, "loss": 1.2402, "step": 3754500 }, { "epoch": 2.25, "learning_rate": 4.027716305452591e-05, "loss": 1.254, "step": 3755000 }, { "epoch": 2.25, "learning_rate": 4.027506308896534e-05, "loss": 1.2116, "step": 3755500 }, { "epoch": 2.25, "learning_rate": 4.02729673233359e-05, "loss": 1.2319, "step": 3756000 }, { "epoch": 2.25, "learning_rate": 4.027086735777533e-05, "loss": 1.2095, "step": 3756500 }, { "epoch": 2.25, "learning_rate": 4.026876739221477e-05, "loss": 1.2341, "step": 3757000 }, { "epoch": 2.25, "learning_rate": 4.0266671626585324e-05, "loss": 1.2214, "step": 3757500 }, { "epoch": 2.25, "learning_rate": 4.026457166102476e-05, "loss": 1.2548, "step": 3758000 }, { "epoch": 2.25, "learning_rate": 4.02624716954642e-05, "loss": 1.2287, "step": 3758500 }, { "epoch": 2.25, "learning_rate": 4.026037172990363e-05, "loss": 1.2418, "step": 3759000 }, { "epoch": 2.25, "learning_rate": 4.0258271764343065e-05, "loss": 1.2027, "step": 3759500 }, { "epoch": 2.25, "learning_rate": 4.025617599871362e-05, "loss": 1.2405, "step": 3760000 }, { "epoch": 2.25, "learning_rate": 4.025407603315306e-05, "loss": 1.1929, "step": 3760500 }, { "epoch": 2.25, "learning_rate": 4.025197606759249e-05, "loss": 1.2297, "step": 3761000 }, { "epoch": 2.26, "learning_rate": 4.0249876102031925e-05, "loss": 1.229, "step": 3761500 }, { "epoch": 2.26, "learning_rate": 4.024778033640248e-05, "loss": 1.2265, "step": 3762000 }, { "epoch": 2.26, "learning_rate": 4.024568037084192e-05, "loss": 1.2253, "step": 3762500 }, { "epoch": 2.26, "learning_rate": 4.024358040528135e-05, "loss": 1.1993, "step": 3763000 }, { "epoch": 2.26, "learning_rate": 4.0241480439720786e-05, "loss": 1.2261, "step": 3763500 }, { "epoch": 2.26, "learning_rate": 4.0239380474160226e-05, "loss": 1.2344, "step": 3764000 }, { "epoch": 2.26, "learning_rate": 4.023728050859966e-05, "loss": 1.2172, "step": 3764500 }, { "epoch": 2.26, "learning_rate": 4.023518054303909e-05, "loss": 1.2214, "step": 3765000 }, { "epoch": 2.26, "learning_rate": 4.0233080577478527e-05, "loss": 1.208, "step": 3765500 }, { "epoch": 2.26, "learning_rate": 4.023098061191796e-05, "loss": 1.206, "step": 3766000 }, { "epoch": 2.26, "learning_rate": 4.022888484628852e-05, "loss": 1.2395, "step": 3766500 }, { "epoch": 2.26, "learning_rate": 4.0226789080659074e-05, "loss": 1.2404, "step": 3767000 }, { "epoch": 2.26, "learning_rate": 4.0224689115098514e-05, "loss": 1.2381, "step": 3767500 }, { "epoch": 2.26, "learning_rate": 4.022258914953795e-05, "loss": 1.224, "step": 3768000 }, { "epoch": 2.26, "learning_rate": 4.022048918397738e-05, "loss": 1.2424, "step": 3768500 }, { "epoch": 2.26, "learning_rate": 4.021838921841682e-05, "loss": 1.2153, "step": 3769000 }, { "epoch": 2.26, "learning_rate": 4.0216289252856255e-05, "loss": 1.225, "step": 3769500 }, { "epoch": 2.26, "learning_rate": 4.021419348722681e-05, "loss": 1.2436, "step": 3770000 }, { "epoch": 2.26, "learning_rate": 4.021209352166624e-05, "loss": 1.2217, "step": 3770500 }, { "epoch": 2.26, "learning_rate": 4.020999355610568e-05, "loss": 1.2131, "step": 3771000 }, { "epoch": 2.26, "learning_rate": 4.0207893590545116e-05, "loss": 1.2384, "step": 3771500 }, { "epoch": 2.26, "learning_rate": 4.020579362498455e-05, "loss": 1.2014, "step": 3772000 }, { "epoch": 2.26, "learning_rate": 4.020369365942398e-05, "loss": 1.232, "step": 3772500 }, { "epoch": 2.26, "learning_rate": 4.0201593693863416e-05, "loss": 1.2192, "step": 3773000 }, { "epoch": 2.26, "learning_rate": 4.0199493728302856e-05, "loss": 1.2221, "step": 3773500 }, { "epoch": 2.26, "learning_rate": 4.0197397962673417e-05, "loss": 1.2128, "step": 3774000 }, { "epoch": 2.26, "learning_rate": 4.019529799711285e-05, "loss": 1.2326, "step": 3774500 }, { "epoch": 2.26, "learning_rate": 4.019319803155228e-05, "loss": 1.2197, "step": 3775000 }, { "epoch": 2.26, "learning_rate": 4.019110226592284e-05, "loss": 1.2171, "step": 3775500 }, { "epoch": 2.26, "learning_rate": 4.018900230036228e-05, "loss": 1.1986, "step": 3776000 }, { "epoch": 2.26, "learning_rate": 4.018690233480171e-05, "loss": 1.2416, "step": 3776500 }, { "epoch": 2.26, "learning_rate": 4.0184802369241144e-05, "loss": 1.2002, "step": 3777000 }, { "epoch": 2.26, "learning_rate": 4.018270240368058e-05, "loss": 1.2206, "step": 3777500 }, { "epoch": 2.27, "learning_rate": 4.018060243812001e-05, "loss": 1.2283, "step": 3778000 }, { "epoch": 2.27, "learning_rate": 4.0178502472559445e-05, "loss": 1.2574, "step": 3778500 }, { "epoch": 2.27, "learning_rate": 4.0176406706930005e-05, "loss": 1.2058, "step": 3779000 }, { "epoch": 2.27, "learning_rate": 4.0174306741369445e-05, "loss": 1.2127, "step": 3779500 }, { "epoch": 2.27, "learning_rate": 4.017220677580887e-05, "loss": 1.2163, "step": 3780000 }, { "epoch": 2.27, "learning_rate": 4.017010681024831e-05, "loss": 1.2169, "step": 3780500 }, { "epoch": 2.27, "learning_rate": 4.0168006844687745e-05, "loss": 1.1947, "step": 3781000 }, { "epoch": 2.27, "learning_rate": 4.016590687912718e-05, "loss": 1.2692, "step": 3781500 }, { "epoch": 2.27, "learning_rate": 4.016380691356662e-05, "loss": 1.2323, "step": 3782000 }, { "epoch": 2.27, "learning_rate": 4.016170694800605e-05, "loss": 1.1993, "step": 3782500 }, { "epoch": 2.27, "learning_rate": 4.0159606982445486e-05, "loss": 1.2416, "step": 3783000 }, { "epoch": 2.27, "learning_rate": 4.015751121681604e-05, "loss": 1.2126, "step": 3783500 }, { "epoch": 2.27, "learning_rate": 4.015541125125548e-05, "loss": 1.2343, "step": 3784000 }, { "epoch": 2.27, "learning_rate": 4.015331128569491e-05, "loss": 1.2284, "step": 3784500 }, { "epoch": 2.27, "learning_rate": 4.015121132013435e-05, "loss": 1.2446, "step": 3785000 }, { "epoch": 2.27, "learning_rate": 4.014911135457379e-05, "loss": 1.2092, "step": 3785500 }, { "epoch": 2.27, "learning_rate": 4.014701558894434e-05, "loss": 1.2086, "step": 3786000 }, { "epoch": 2.27, "learning_rate": 4.0144915623383774e-05, "loss": 1.2135, "step": 3786500 }, { "epoch": 2.27, "learning_rate": 4.0142815657823214e-05, "loss": 1.2134, "step": 3787000 }, { "epoch": 2.27, "learning_rate": 4.014071569226265e-05, "loss": 1.2327, "step": 3787500 }, { "epoch": 2.27, "learning_rate": 4.013861572670208e-05, "loss": 1.2734, "step": 3788000 }, { "epoch": 2.27, "learning_rate": 4.0136519961072635e-05, "loss": 1.2331, "step": 3788500 }, { "epoch": 2.27, "learning_rate": 4.0134419995512075e-05, "loss": 1.2391, "step": 3789000 }, { "epoch": 2.27, "learning_rate": 4.013232002995151e-05, "loss": 1.2187, "step": 3789500 }, { "epoch": 2.27, "learning_rate": 4.013022006439094e-05, "loss": 1.2273, "step": 3790000 }, { "epoch": 2.27, "learning_rate": 4.012812009883038e-05, "loss": 1.2226, "step": 3790500 }, { "epoch": 2.27, "learning_rate": 4.0126020133269816e-05, "loss": 1.2266, "step": 3791000 }, { "epoch": 2.27, "learning_rate": 4.012392436764037e-05, "loss": 1.2393, "step": 3791500 }, { "epoch": 2.27, "learning_rate": 4.012182860201092e-05, "loss": 1.2179, "step": 3792000 }, { "epoch": 2.27, "learning_rate": 4.0119728636450356e-05, "loss": 1.2371, "step": 3792500 }, { "epoch": 2.27, "learning_rate": 4.0117628670889797e-05, "loss": 1.2009, "step": 3793000 }, { "epoch": 2.27, "learning_rate": 4.011552870532923e-05, "loss": 1.2375, "step": 3793500 }, { "epoch": 2.27, "learning_rate": 4.011342873976867e-05, "loss": 1.2117, "step": 3794000 }, { "epoch": 2.27, "learning_rate": 4.0111328774208104e-05, "loss": 1.2382, "step": 3794500 }, { "epoch": 2.28, "learning_rate": 4.010922880864754e-05, "loss": 1.225, "step": 3795000 }, { "epoch": 2.28, "learning_rate": 4.010712884308698e-05, "loss": 1.2032, "step": 3795500 }, { "epoch": 2.28, "learning_rate": 4.010502887752641e-05, "loss": 1.2264, "step": 3796000 }, { "epoch": 2.28, "learning_rate": 4.0102933111896964e-05, "loss": 1.2476, "step": 3796500 }, { "epoch": 2.28, "learning_rate": 4.010083734626752e-05, "loss": 1.2524, "step": 3797000 }, { "epoch": 2.28, "learning_rate": 4.009873738070695e-05, "loss": 1.2193, "step": 3797500 }, { "epoch": 2.28, "learning_rate": 4.009663741514639e-05, "loss": 1.2413, "step": 3798000 }, { "epoch": 2.28, "learning_rate": 4.0094537449585825e-05, "loss": 1.2038, "step": 3798500 }, { "epoch": 2.28, "learning_rate": 4.009243748402526e-05, "loss": 1.2231, "step": 3799000 }, { "epoch": 2.28, "learning_rate": 4.00903375184647e-05, "loss": 1.228, "step": 3799500 }, { "epoch": 2.28, "learning_rate": 4.008824175283525e-05, "loss": 1.2295, "step": 3800000 }, { "epoch": 2.28, "eval_loss": 1.1859376430511475, "eval_runtime": 1106.7769, "eval_samples_per_second": 475.904, "eval_steps_per_second": 79.318, "step": 3800000 }, { "epoch": 2.28, "learning_rate": 4.0086141787274686e-05, "loss": 1.2295, "step": 3800500 }, { "epoch": 2.28, "learning_rate": 4.0084041821714126e-05, "loss": 1.2621, "step": 3801000 }, { "epoch": 2.28, "learning_rate": 4.008194185615356e-05, "loss": 1.235, "step": 3801500 }, { "epoch": 2.28, "learning_rate": 4.007984189059299e-05, "loss": 1.2179, "step": 3802000 }, { "epoch": 2.28, "learning_rate": 4.007774192503243e-05, "loss": 1.2533, "step": 3802500 }, { "epoch": 2.28, "learning_rate": 4.007564195947187e-05, "loss": 1.2081, "step": 3803000 }, { "epoch": 2.28, "learning_rate": 4.00735419939113e-05, "loss": 1.2337, "step": 3803500 }, { "epoch": 2.28, "learning_rate": 4.007144202835074e-05, "loss": 1.2429, "step": 3804000 }, { "epoch": 2.28, "learning_rate": 4.0069346262721294e-05, "loss": 1.2493, "step": 3804500 }, { "epoch": 2.28, "learning_rate": 4.006724629716073e-05, "loss": 1.2188, "step": 3805000 }, { "epoch": 2.28, "learning_rate": 4.006514633160016e-05, "loss": 1.2603, "step": 3805500 }, { "epoch": 2.28, "learning_rate": 4.00630463660396e-05, "loss": 1.2455, "step": 3806000 }, { "epoch": 2.28, "learning_rate": 4.0060946400479035e-05, "loss": 1.2194, "step": 3806500 }, { "epoch": 2.28, "learning_rate": 4.005885063484959e-05, "loss": 1.2155, "step": 3807000 }, { "epoch": 2.28, "learning_rate": 4.005675066928902e-05, "loss": 1.2382, "step": 3807500 }, { "epoch": 2.28, "learning_rate": 4.005465490365958e-05, "loss": 1.2212, "step": 3808000 }, { "epoch": 2.28, "learning_rate": 4.0052554938099015e-05, "loss": 1.2376, "step": 3808500 }, { "epoch": 2.28, "learning_rate": 4.005045497253845e-05, "loss": 1.239, "step": 3809000 }, { "epoch": 2.28, "learning_rate": 4.004835500697789e-05, "loss": 1.2201, "step": 3809500 }, { "epoch": 2.28, "learning_rate": 4.004625504141732e-05, "loss": 1.2301, "step": 3810000 }, { "epoch": 2.28, "learning_rate": 4.0044155075856756e-05, "loss": 1.2337, "step": 3810500 }, { "epoch": 2.28, "learning_rate": 4.004205931022731e-05, "loss": 1.2127, "step": 3811000 }, { "epoch": 2.29, "learning_rate": 4.003995934466675e-05, "loss": 1.2537, "step": 3811500 }, { "epoch": 2.29, "learning_rate": 4.003785937910618e-05, "loss": 1.218, "step": 3812000 }, { "epoch": 2.29, "learning_rate": 4.003575941354562e-05, "loss": 1.2413, "step": 3812500 }, { "epoch": 2.29, "learning_rate": 4.003365944798506e-05, "loss": 1.2389, "step": 3813000 }, { "epoch": 2.29, "learning_rate": 4.003155948242449e-05, "loss": 1.2313, "step": 3813500 }, { "epoch": 2.29, "learning_rate": 4.002945951686392e-05, "loss": 1.2005, "step": 3814000 }, { "epoch": 2.29, "learning_rate": 4.002735955130336e-05, "loss": 1.2185, "step": 3814500 }, { "epoch": 2.29, "learning_rate": 4.002525958574279e-05, "loss": 1.1975, "step": 3815000 }, { "epoch": 2.29, "learning_rate": 4.002316382011335e-05, "loss": 1.2022, "step": 3815500 }, { "epoch": 2.29, "learning_rate": 4.0021063854552785e-05, "loss": 1.2372, "step": 3816000 }, { "epoch": 2.29, "learning_rate": 4.0018968088923345e-05, "loss": 1.2136, "step": 3816500 }, { "epoch": 2.29, "learning_rate": 4.001686812336278e-05, "loss": 1.2496, "step": 3817000 }, { "epoch": 2.29, "learning_rate": 4.001476815780221e-05, "loss": 1.2184, "step": 3817500 }, { "epoch": 2.29, "learning_rate": 4.001266819224165e-05, "loss": 1.2038, "step": 3818000 }, { "epoch": 2.29, "learning_rate": 4.001056822668108e-05, "loss": 1.2349, "step": 3818500 }, { "epoch": 2.29, "learning_rate": 4.000847246105164e-05, "loss": 1.2321, "step": 3819000 }, { "epoch": 2.29, "learning_rate": 4.000637249549107e-05, "loss": 1.2216, "step": 3819500 }, { "epoch": 2.29, "learning_rate": 4.000427252993051e-05, "loss": 1.2294, "step": 3820000 }, { "epoch": 2.29, "learning_rate": 4.0002176764301066e-05, "loss": 1.218, "step": 3820500 }, { "epoch": 2.29, "learning_rate": 4.00000767987405e-05, "loss": 1.2304, "step": 3821000 }, { "epoch": 2.29, "learning_rate": 3.999797683317993e-05, "loss": 1.229, "step": 3821500 }, { "epoch": 2.29, "learning_rate": 3.9995876867619374e-05, "loss": 1.2447, "step": 3822000 }, { "epoch": 2.29, "learning_rate": 3.999377690205881e-05, "loss": 1.2044, "step": 3822500 }, { "epoch": 2.29, "learning_rate": 3.999167693649824e-05, "loss": 1.2295, "step": 3823000 }, { "epoch": 2.29, "learning_rate": 3.9989576970937674e-05, "loss": 1.2095, "step": 3823500 }, { "epoch": 2.29, "learning_rate": 3.998747700537711e-05, "loss": 1.2296, "step": 3824000 }, { "epoch": 2.29, "learning_rate": 3.998537703981655e-05, "loss": 1.2479, "step": 3824500 }, { "epoch": 2.29, "learning_rate": 3.998327707425598e-05, "loss": 1.202, "step": 3825000 }, { "epoch": 2.29, "learning_rate": 3.9981177108695415e-05, "loss": 1.2597, "step": 3825500 }, { "epoch": 2.29, "learning_rate": 3.9979077143134855e-05, "loss": 1.2141, "step": 3826000 }, { "epoch": 2.29, "learning_rate": 3.997697717757429e-05, "loss": 1.2214, "step": 3826500 }, { "epoch": 2.29, "learning_rate": 3.997487721201372e-05, "loss": 1.2346, "step": 3827000 }, { "epoch": 2.29, "learning_rate": 3.9972781446384275e-05, "loss": 1.2272, "step": 3827500 }, { "epoch": 2.3, "learning_rate": 3.9970681480823716e-05, "loss": 1.2402, "step": 3828000 }, { "epoch": 2.3, "learning_rate": 3.996858151526315e-05, "loss": 1.2146, "step": 3828500 }, { "epoch": 2.3, "learning_rate": 3.996648154970258e-05, "loss": 1.2089, "step": 3829000 }, { "epoch": 2.3, "learning_rate": 3.996438158414202e-05, "loss": 1.2236, "step": 3829500 }, { "epoch": 2.3, "learning_rate": 3.9962281618581456e-05, "loss": 1.2246, "step": 3830000 }, { "epoch": 2.3, "learning_rate": 3.996018585295201e-05, "loss": 1.2155, "step": 3830500 }, { "epoch": 2.3, "learning_rate": 3.995808588739144e-05, "loss": 1.2294, "step": 3831000 }, { "epoch": 2.3, "learning_rate": 3.9955985921830883e-05, "loss": 1.244, "step": 3831500 }, { "epoch": 2.3, "learning_rate": 3.995388595627032e-05, "loss": 1.203, "step": 3832000 }, { "epoch": 2.3, "learning_rate": 3.995178599070975e-05, "loss": 1.2403, "step": 3832500 }, { "epoch": 2.3, "learning_rate": 3.994968602514919e-05, "loss": 1.2284, "step": 3833000 }, { "epoch": 2.3, "learning_rate": 3.9947590259519744e-05, "loss": 1.1975, "step": 3833500 }, { "epoch": 2.3, "learning_rate": 3.994549029395918e-05, "loss": 1.2493, "step": 3834000 }, { "epoch": 2.3, "learning_rate": 3.994339032839862e-05, "loss": 1.2343, "step": 3834500 }, { "epoch": 2.3, "learning_rate": 3.994129036283805e-05, "loss": 1.2659, "step": 3835000 }, { "epoch": 2.3, "learning_rate": 3.9939190397277485e-05, "loss": 1.2187, "step": 3835500 }, { "epoch": 2.3, "learning_rate": 3.993709463164804e-05, "loss": 1.1905, "step": 3836000 }, { "epoch": 2.3, "learning_rate": 3.993499466608748e-05, "loss": 1.2247, "step": 3836500 }, { "epoch": 2.3, "learning_rate": 3.993289470052691e-05, "loss": 1.217, "step": 3837000 }, { "epoch": 2.3, "learning_rate": 3.9930794734966346e-05, "loss": 1.2256, "step": 3837500 }, { "epoch": 2.3, "learning_rate": 3.9928694769405786e-05, "loss": 1.2343, "step": 3838000 }, { "epoch": 2.3, "learning_rate": 3.992659480384521e-05, "loss": 1.2218, "step": 3838500 }, { "epoch": 2.3, "learning_rate": 3.9924494838284646e-05, "loss": 1.2373, "step": 3839000 }, { "epoch": 2.3, "learning_rate": 3.992239907265521e-05, "loss": 1.2347, "step": 3839500 }, { "epoch": 2.3, "learning_rate": 3.9920299107094646e-05, "loss": 1.2353, "step": 3840000 }, { "epoch": 2.3, "learning_rate": 3.991819914153408e-05, "loss": 1.2096, "step": 3840500 }, { "epoch": 2.3, "learning_rate": 3.9916099175973513e-05, "loss": 1.2193, "step": 3841000 }, { "epoch": 2.3, "learning_rate": 3.991399921041295e-05, "loss": 1.2244, "step": 3841500 }, { "epoch": 2.3, "learning_rate": 3.991189924485238e-05, "loss": 1.2308, "step": 3842000 }, { "epoch": 2.3, "learning_rate": 3.990980347922294e-05, "loss": 1.2142, "step": 3842500 }, { "epoch": 2.3, "learning_rate": 3.990770351366238e-05, "loss": 1.2318, "step": 3843000 }, { "epoch": 2.3, "learning_rate": 3.990560354810181e-05, "loss": 1.2424, "step": 3843500 }, { "epoch": 2.3, "learning_rate": 3.990350358254124e-05, "loss": 1.2326, "step": 3844000 }, { "epoch": 2.3, "learning_rate": 3.990140361698068e-05, "loss": 1.2217, "step": 3844500 }, { "epoch": 2.31, "learning_rate": 3.9899312051282355e-05, "loss": 1.2055, "step": 3845000 }, { "epoch": 2.31, "learning_rate": 3.9897212085721795e-05, "loss": 1.1714, "step": 3845500 }, { "epoch": 2.31, "learning_rate": 3.989511212016123e-05, "loss": 1.2076, "step": 3846000 }, { "epoch": 2.31, "learning_rate": 3.989301215460067e-05, "loss": 1.2242, "step": 3846500 }, { "epoch": 2.31, "learning_rate": 3.98909121890401e-05, "loss": 1.2223, "step": 3847000 }, { "epoch": 2.31, "learning_rate": 3.9888812223479536e-05, "loss": 1.2323, "step": 3847500 }, { "epoch": 2.31, "learning_rate": 3.988671225791897e-05, "loss": 1.2468, "step": 3848000 }, { "epoch": 2.31, "learning_rate": 3.98846122923584e-05, "loss": 1.227, "step": 3848500 }, { "epoch": 2.31, "learning_rate": 3.9882512326797836e-05, "loss": 1.2182, "step": 3849000 }, { "epoch": 2.31, "learning_rate": 3.9880416561168397e-05, "loss": 1.244, "step": 3849500 }, { "epoch": 2.31, "learning_rate": 3.987831659560784e-05, "loss": 1.2073, "step": 3850000 }, { "epoch": 2.31, "learning_rate": 3.9876216630047263e-05, "loss": 1.2531, "step": 3850500 }, { "epoch": 2.31, "learning_rate": 3.98741166644867e-05, "loss": 1.2744, "step": 3851000 }, { "epoch": 2.31, "learning_rate": 3.987201669892614e-05, "loss": 1.2052, "step": 3851500 }, { "epoch": 2.31, "learning_rate": 3.98699209332967e-05, "loss": 1.2213, "step": 3852000 }, { "epoch": 2.31, "learning_rate": 3.986782096773613e-05, "loss": 1.2197, "step": 3852500 }, { "epoch": 2.31, "learning_rate": 3.9865725202106685e-05, "loss": 1.2312, "step": 3853000 }, { "epoch": 2.31, "learning_rate": 3.9863625236546125e-05, "loss": 1.2026, "step": 3853500 }, { "epoch": 2.31, "learning_rate": 3.986152527098556e-05, "loss": 1.2241, "step": 3854000 }, { "epoch": 2.31, "learning_rate": 3.985942530542499e-05, "loss": 1.221, "step": 3854500 }, { "epoch": 2.31, "learning_rate": 3.9857325339864425e-05, "loss": 1.2316, "step": 3855000 }, { "epoch": 2.31, "learning_rate": 3.985522537430386e-05, "loss": 1.2236, "step": 3855500 }, { "epoch": 2.31, "learning_rate": 3.985312540874329e-05, "loss": 1.2518, "step": 3856000 }, { "epoch": 2.31, "learning_rate": 3.985102544318273e-05, "loss": 1.2274, "step": 3856500 }, { "epoch": 2.31, "learning_rate": 3.984892967755329e-05, "loss": 1.2127, "step": 3857000 }, { "epoch": 2.31, "learning_rate": 3.9846833911923846e-05, "loss": 1.2219, "step": 3857500 }, { "epoch": 2.31, "learning_rate": 3.984473394636328e-05, "loss": 1.2206, "step": 3858000 }, { "epoch": 2.31, "learning_rate": 3.984263398080271e-05, "loss": 1.22, "step": 3858500 }, { "epoch": 2.31, "learning_rate": 3.9840534015242153e-05, "loss": 1.2221, "step": 3859000 }, { "epoch": 2.31, "learning_rate": 3.983843824961271e-05, "loss": 1.207, "step": 3859500 }, { "epoch": 2.31, "learning_rate": 3.983634248398326e-05, "loss": 1.2375, "step": 3860000 }, { "epoch": 2.31, "learning_rate": 3.9834242518422694e-05, "loss": 1.2418, "step": 3860500 }, { "epoch": 2.31, "learning_rate": 3.9832142552862134e-05, "loss": 1.2286, "step": 3861000 }, { "epoch": 2.32, "learning_rate": 3.983004258730157e-05, "loss": 1.2387, "step": 3861500 }, { "epoch": 2.32, "learning_rate": 3.9827942621741e-05, "loss": 1.2128, "step": 3862000 }, { "epoch": 2.32, "learning_rate": 3.982584265618044e-05, "loss": 1.2518, "step": 3862500 }, { "epoch": 2.32, "learning_rate": 3.9823742690619875e-05, "loss": 1.2373, "step": 3863000 }, { "epoch": 2.32, "learning_rate": 3.982164272505931e-05, "loss": 1.2124, "step": 3863500 }, { "epoch": 2.32, "learning_rate": 3.981954275949875e-05, "loss": 1.2217, "step": 3864000 }, { "epoch": 2.32, "learning_rate": 3.9817442793938175e-05, "loss": 1.2218, "step": 3864500 }, { "epoch": 2.32, "learning_rate": 3.981534282837761e-05, "loss": 1.1981, "step": 3865000 }, { "epoch": 2.32, "learning_rate": 3.981324706274817e-05, "loss": 1.2045, "step": 3865500 }, { "epoch": 2.32, "learning_rate": 3.981114709718761e-05, "loss": 1.2512, "step": 3866000 }, { "epoch": 2.32, "learning_rate": 3.980904713162704e-05, "loss": 1.2418, "step": 3866500 }, { "epoch": 2.32, "learning_rate": 3.9806947166066476e-05, "loss": 1.2223, "step": 3867000 }, { "epoch": 2.32, "learning_rate": 3.980484720050591e-05, "loss": 1.2658, "step": 3867500 }, { "epoch": 2.32, "learning_rate": 3.980274723494534e-05, "loss": 1.2289, "step": 3868000 }, { "epoch": 2.32, "learning_rate": 3.980064726938478e-05, "loss": 1.2233, "step": 3868500 }, { "epoch": 2.32, "learning_rate": 3.979854730382422e-05, "loss": 1.205, "step": 3869000 }, { "epoch": 2.32, "learning_rate": 3.979645153819477e-05, "loss": 1.2314, "step": 3869500 }, { "epoch": 2.32, "learning_rate": 3.9794351572634204e-05, "loss": 1.2227, "step": 3870000 }, { "epoch": 2.32, "learning_rate": 3.9792251607073644e-05, "loss": 1.232, "step": 3870500 }, { "epoch": 2.32, "learning_rate": 3.979015164151308e-05, "loss": 1.2337, "step": 3871000 }, { "epoch": 2.32, "learning_rate": 3.978805167595251e-05, "loss": 1.2419, "step": 3871500 }, { "epoch": 2.32, "learning_rate": 3.978595171039195e-05, "loss": 1.207, "step": 3872000 }, { "epoch": 2.32, "learning_rate": 3.9783851744831385e-05, "loss": 1.2061, "step": 3872500 }, { "epoch": 2.32, "learning_rate": 3.978175177927082e-05, "loss": 1.2766, "step": 3873000 }, { "epoch": 2.32, "learning_rate": 3.977965181371026e-05, "loss": 1.2395, "step": 3873500 }, { "epoch": 2.32, "learning_rate": 3.977755184814969e-05, "loss": 1.2211, "step": 3874000 }, { "epoch": 2.32, "learning_rate": 3.9775456082520245e-05, "loss": 1.2168, "step": 3874500 }, { "epoch": 2.32, "learning_rate": 3.977335611695968e-05, "loss": 1.2173, "step": 3875000 }, { "epoch": 2.32, "learning_rate": 3.977125615139912e-05, "loss": 1.2133, "step": 3875500 }, { "epoch": 2.32, "learning_rate": 3.976915618583855e-05, "loss": 1.1967, "step": 3876000 }, { "epoch": 2.32, "learning_rate": 3.9767056220277986e-05, "loss": 1.2088, "step": 3876500 }, { "epoch": 2.32, "learning_rate": 3.9764956254717426e-05, "loss": 1.2168, "step": 3877000 }, { "epoch": 2.32, "learning_rate": 3.976285628915685e-05, "loss": 1.2124, "step": 3877500 }, { "epoch": 2.33, "learning_rate": 3.976075632359629e-05, "loss": 1.2249, "step": 3878000 }, { "epoch": 2.33, "learning_rate": 3.9758660557966854e-05, "loss": 1.2312, "step": 3878500 }, { "epoch": 2.33, "learning_rate": 3.975656059240629e-05, "loss": 1.2233, "step": 3879000 }, { "epoch": 2.33, "learning_rate": 3.975446062684572e-05, "loss": 1.2363, "step": 3879500 }, { "epoch": 2.33, "learning_rate": 3.9752364861216274e-05, "loss": 1.2148, "step": 3880000 }, { "epoch": 2.33, "learning_rate": 3.9750264895655714e-05, "loss": 1.2229, "step": 3880500 }, { "epoch": 2.33, "learning_rate": 3.974816493009515e-05, "loss": 1.216, "step": 3881000 }, { "epoch": 2.33, "learning_rate": 3.974606496453458e-05, "loss": 1.2214, "step": 3881500 }, { "epoch": 2.33, "learning_rate": 3.9743964998974015e-05, "loss": 1.2161, "step": 3882000 }, { "epoch": 2.33, "learning_rate": 3.974186503341345e-05, "loss": 1.2188, "step": 3882500 }, { "epoch": 2.33, "learning_rate": 3.973976926778401e-05, "loss": 1.2454, "step": 3883000 }, { "epoch": 2.33, "learning_rate": 3.973766930222344e-05, "loss": 1.2071, "step": 3883500 }, { "epoch": 2.33, "learning_rate": 3.973556933666288e-05, "loss": 1.2426, "step": 3884000 }, { "epoch": 2.33, "learning_rate": 3.973346937110231e-05, "loss": 1.2363, "step": 3884500 }, { "epoch": 2.33, "learning_rate": 3.973136940554175e-05, "loss": 1.2436, "step": 3885000 }, { "epoch": 2.33, "learning_rate": 3.972926943998118e-05, "loss": 1.2437, "step": 3885500 }, { "epoch": 2.33, "learning_rate": 3.9727169474420616e-05, "loss": 1.2279, "step": 3886000 }, { "epoch": 2.33, "learning_rate": 3.9725069508860056e-05, "loss": 1.2067, "step": 3886500 }, { "epoch": 2.33, "learning_rate": 3.972297374323061e-05, "loss": 1.2206, "step": 3887000 }, { "epoch": 2.33, "learning_rate": 3.972087797760117e-05, "loss": 1.2149, "step": 3887500 }, { "epoch": 2.33, "learning_rate": 3.9718778012040604e-05, "loss": 1.2389, "step": 3888000 }, { "epoch": 2.33, "learning_rate": 3.971667804648004e-05, "loss": 1.2087, "step": 3888500 }, { "epoch": 2.33, "learning_rate": 3.971457808091948e-05, "loss": 1.2387, "step": 3889000 }, { "epoch": 2.33, "learning_rate": 3.9712478115358904e-05, "loss": 1.2262, "step": 3889500 }, { "epoch": 2.33, "learning_rate": 3.971037814979834e-05, "loss": 1.2241, "step": 3890000 }, { "epoch": 2.33, "learning_rate": 3.970827818423778e-05, "loss": 1.2458, "step": 3890500 }, { "epoch": 2.33, "learning_rate": 3.970617821867721e-05, "loss": 1.2198, "step": 3891000 }, { "epoch": 2.33, "learning_rate": 3.9704078253116645e-05, "loss": 1.2494, "step": 3891500 }, { "epoch": 2.33, "learning_rate": 3.9701982487487205e-05, "loss": 1.2222, "step": 3892000 }, { "epoch": 2.33, "learning_rate": 3.969988252192664e-05, "loss": 1.2218, "step": 3892500 }, { "epoch": 2.33, "learning_rate": 3.969778255636607e-05, "loss": 1.226, "step": 3893000 }, { "epoch": 2.33, "learning_rate": 3.969568259080551e-05, "loss": 1.2203, "step": 3893500 }, { "epoch": 2.33, "learning_rate": 3.9693582625244946e-05, "loss": 1.2118, "step": 3894000 }, { "epoch": 2.33, "learning_rate": 3.969148265968438e-05, "loss": 1.1966, "step": 3894500 }, { "epoch": 2.34, "learning_rate": 3.968938269412382e-05, "loss": 1.2097, "step": 3895000 }, { "epoch": 2.34, "learning_rate": 3.968728272856325e-05, "loss": 1.2611, "step": 3895500 }, { "epoch": 2.34, "learning_rate": 3.9685186962933806e-05, "loss": 1.2342, "step": 3896000 }, { "epoch": 2.34, "learning_rate": 3.968308699737324e-05, "loss": 1.2161, "step": 3896500 }, { "epoch": 2.34, "learning_rate": 3.968098703181268e-05, "loss": 1.2041, "step": 3897000 }, { "epoch": 2.34, "learning_rate": 3.9678887066252113e-05, "loss": 1.211, "step": 3897500 }, { "epoch": 2.34, "learning_rate": 3.967678710069155e-05, "loss": 1.2134, "step": 3898000 }, { "epoch": 2.34, "learning_rate": 3.967468713513099e-05, "loss": 1.2278, "step": 3898500 }, { "epoch": 2.34, "learning_rate": 3.967258716957042e-05, "loss": 1.1962, "step": 3899000 }, { "epoch": 2.34, "learning_rate": 3.9670487204009854e-05, "loss": 1.223, "step": 3899500 }, { "epoch": 2.34, "learning_rate": 3.9668391438380414e-05, "loss": 1.2325, "step": 3900000 }, { "epoch": 2.34, "eval_loss": 1.182997465133667, "eval_runtime": 1103.215, "eval_samples_per_second": 477.441, "eval_steps_per_second": 79.574, "step": 3900000 }, { "epoch": 2.34, "learning_rate": 3.966629147281985e-05, "loss": 1.2135, "step": 3900500 }, { "epoch": 2.34, "learning_rate": 3.96641957071904e-05, "loss": 1.2202, "step": 3901000 }, { "epoch": 2.34, "learning_rate": 3.9662095741629835e-05, "loss": 1.2039, "step": 3901500 }, { "epoch": 2.34, "learning_rate": 3.9659995776069275e-05, "loss": 1.2438, "step": 3902000 }, { "epoch": 2.34, "learning_rate": 3.965789581050871e-05, "loss": 1.2209, "step": 3902500 }, { "epoch": 2.34, "learning_rate": 3.965580004487926e-05, "loss": 1.2199, "step": 3903000 }, { "epoch": 2.34, "learning_rate": 3.9653700079318696e-05, "loss": 1.2234, "step": 3903500 }, { "epoch": 2.34, "learning_rate": 3.9651600113758136e-05, "loss": 1.2066, "step": 3904000 }, { "epoch": 2.34, "learning_rate": 3.964950014819757e-05, "loss": 1.2392, "step": 3904500 }, { "epoch": 2.34, "learning_rate": 3.964740438256812e-05, "loss": 1.1814, "step": 3905000 }, { "epoch": 2.34, "learning_rate": 3.964530441700756e-05, "loss": 1.2365, "step": 3905500 }, { "epoch": 2.34, "learning_rate": 3.9643204451446997e-05, "loss": 1.227, "step": 3906000 }, { "epoch": 2.34, "learning_rate": 3.964110448588643e-05, "loss": 1.2452, "step": 3906500 }, { "epoch": 2.34, "learning_rate": 3.963900452032587e-05, "loss": 1.2149, "step": 3907000 }, { "epoch": 2.34, "learning_rate": 3.9636904554765304e-05, "loss": 1.2287, "step": 3907500 }, { "epoch": 2.34, "learning_rate": 3.963480458920474e-05, "loss": 1.2221, "step": 3908000 }, { "epoch": 2.34, "learning_rate": 3.963270462364418e-05, "loss": 1.2155, "step": 3908500 }, { "epoch": 2.34, "learning_rate": 3.9630604658083604e-05, "loss": 1.2551, "step": 3909000 }, { "epoch": 2.34, "learning_rate": 3.962850469252304e-05, "loss": 1.2434, "step": 3909500 }, { "epoch": 2.34, "learning_rate": 3.96264089268936e-05, "loss": 1.2046, "step": 3910000 }, { "epoch": 2.34, "learning_rate": 3.962430896133304e-05, "loss": 1.2169, "step": 3910500 }, { "epoch": 2.34, "learning_rate": 3.962220899577247e-05, "loss": 1.2146, "step": 3911000 }, { "epoch": 2.35, "learning_rate": 3.9620113230143025e-05, "loss": 1.2568, "step": 3911500 }, { "epoch": 2.35, "learning_rate": 3.961801326458246e-05, "loss": 1.2267, "step": 3912000 }, { "epoch": 2.35, "learning_rate": 3.96159132990219e-05, "loss": 1.218, "step": 3912500 }, { "epoch": 2.35, "learning_rate": 3.961381333346133e-05, "loss": 1.2001, "step": 3913000 }, { "epoch": 2.35, "learning_rate": 3.9611713367900766e-05, "loss": 1.2187, "step": 3913500 }, { "epoch": 2.35, "learning_rate": 3.9609617602271326e-05, "loss": 1.2243, "step": 3914000 }, { "epoch": 2.35, "learning_rate": 3.960751763671076e-05, "loss": 1.2377, "step": 3914500 }, { "epoch": 2.35, "learning_rate": 3.960541767115019e-05, "loss": 1.214, "step": 3915000 }, { "epoch": 2.35, "learning_rate": 3.960331770558963e-05, "loss": 1.2322, "step": 3915500 }, { "epoch": 2.35, "learning_rate": 3.960121774002907e-05, "loss": 1.2044, "step": 3916000 }, { "epoch": 2.35, "learning_rate": 3.9599117774468493e-05, "loss": 1.228, "step": 3916500 }, { "epoch": 2.35, "learning_rate": 3.9597017808907934e-05, "loss": 1.2246, "step": 3917000 }, { "epoch": 2.35, "learning_rate": 3.959491784334737e-05, "loss": 1.2071, "step": 3917500 }, { "epoch": 2.35, "learning_rate": 3.95928178777868e-05, "loss": 1.2354, "step": 3918000 }, { "epoch": 2.35, "learning_rate": 3.9590722112157354e-05, "loss": 1.2243, "step": 3918500 }, { "epoch": 2.35, "learning_rate": 3.9588626346527915e-05, "loss": 1.2141, "step": 3919000 }, { "epoch": 2.35, "learning_rate": 3.9586526380967355e-05, "loss": 1.2364, "step": 3919500 }, { "epoch": 2.35, "learning_rate": 3.958442641540679e-05, "loss": 1.2201, "step": 3920000 }, { "epoch": 2.35, "learning_rate": 3.958232644984622e-05, "loss": 1.221, "step": 3920500 }, { "epoch": 2.35, "learning_rate": 3.9580226484285655e-05, "loss": 1.2547, "step": 3921000 }, { "epoch": 2.35, "learning_rate": 3.9578130718656215e-05, "loss": 1.2474, "step": 3921500 }, { "epoch": 2.35, "learning_rate": 3.957603075309565e-05, "loss": 1.2294, "step": 3922000 }, { "epoch": 2.35, "learning_rate": 3.957393078753509e-05, "loss": 1.263, "step": 3922500 }, { "epoch": 2.35, "learning_rate": 3.957183082197452e-05, "loss": 1.2571, "step": 3923000 }, { "epoch": 2.35, "learning_rate": 3.956973085641395e-05, "loss": 1.2371, "step": 3923500 }, { "epoch": 2.35, "learning_rate": 3.956763089085339e-05, "loss": 1.2215, "step": 3924000 }, { "epoch": 2.35, "learning_rate": 3.956553092529282e-05, "loss": 1.2212, "step": 3924500 }, { "epoch": 2.35, "learning_rate": 3.9563430959732256e-05, "loss": 1.2221, "step": 3925000 }, { "epoch": 2.35, "learning_rate": 3.956133519410282e-05, "loss": 1.2038, "step": 3925500 }, { "epoch": 2.35, "learning_rate": 3.955923522854225e-05, "loss": 1.2145, "step": 3926000 }, { "epoch": 2.35, "learning_rate": 3.9557135262981684e-05, "loss": 1.2251, "step": 3926500 }, { "epoch": 2.35, "learning_rate": 3.955503529742112e-05, "loss": 1.2476, "step": 3927000 }, { "epoch": 2.35, "learning_rate": 3.955293533186056e-05, "loss": 1.2225, "step": 3927500 }, { "epoch": 2.35, "learning_rate": 3.955083956623111e-05, "loss": 1.213, "step": 3928000 }, { "epoch": 2.36, "learning_rate": 3.9548739600670544e-05, "loss": 1.1864, "step": 3928500 }, { "epoch": 2.36, "learning_rate": 3.9546639635109985e-05, "loss": 1.2299, "step": 3929000 }, { "epoch": 2.36, "learning_rate": 3.954453966954942e-05, "loss": 1.2347, "step": 3929500 }, { "epoch": 2.36, "learning_rate": 3.954244390391998e-05, "loss": 1.2048, "step": 3930000 }, { "epoch": 2.36, "learning_rate": 3.9540343938359405e-05, "loss": 1.2036, "step": 3930500 }, { "epoch": 2.36, "learning_rate": 3.9538243972798845e-05, "loss": 1.2505, "step": 3931000 }, { "epoch": 2.36, "learning_rate": 3.953614400723828e-05, "loss": 1.2062, "step": 3931500 }, { "epoch": 2.36, "learning_rate": 3.953404404167771e-05, "loss": 1.2233, "step": 3932000 }, { "epoch": 2.36, "learning_rate": 3.953194407611715e-05, "loss": 1.2271, "step": 3932500 }, { "epoch": 2.36, "learning_rate": 3.9529844110556586e-05, "loss": 1.208, "step": 3933000 }, { "epoch": 2.36, "learning_rate": 3.952774414499602e-05, "loss": 1.1932, "step": 3933500 }, { "epoch": 2.36, "learning_rate": 3.952564417943546e-05, "loss": 1.2209, "step": 3934000 }, { "epoch": 2.36, "learning_rate": 3.952354841380601e-05, "loss": 1.2182, "step": 3934500 }, { "epoch": 2.36, "learning_rate": 3.952144844824545e-05, "loss": 1.2195, "step": 3935000 }, { "epoch": 2.36, "learning_rate": 3.951934848268488e-05, "loss": 1.2237, "step": 3935500 }, { "epoch": 2.36, "learning_rate": 3.951724851712432e-05, "loss": 1.2259, "step": 3936000 }, { "epoch": 2.36, "learning_rate": 3.9515148551563754e-05, "loss": 1.2238, "step": 3936500 }, { "epoch": 2.36, "learning_rate": 3.951305278593431e-05, "loss": 1.2428, "step": 3937000 }, { "epoch": 2.36, "learning_rate": 3.951095282037375e-05, "loss": 1.21, "step": 3937500 }, { "epoch": 2.36, "learning_rate": 3.950885285481318e-05, "loss": 1.2413, "step": 3938000 }, { "epoch": 2.36, "learning_rate": 3.9506752889252615e-05, "loss": 1.2355, "step": 3938500 }, { "epoch": 2.36, "learning_rate": 3.950465712362317e-05, "loss": 1.2127, "step": 3939000 }, { "epoch": 2.36, "learning_rate": 3.950255715806261e-05, "loss": 1.2229, "step": 3939500 }, { "epoch": 2.36, "learning_rate": 3.950045719250204e-05, "loss": 1.2357, "step": 3940000 }, { "epoch": 2.36, "learning_rate": 3.9498357226941475e-05, "loss": 1.2254, "step": 3940500 }, { "epoch": 2.36, "learning_rate": 3.9496257261380916e-05, "loss": 1.2408, "step": 3941000 }, { "epoch": 2.36, "learning_rate": 3.949416149575147e-05, "loss": 1.2336, "step": 3941500 }, { "epoch": 2.36, "learning_rate": 3.94920615301909e-05, "loss": 1.2255, "step": 3942000 }, { "epoch": 2.36, "learning_rate": 3.9489961564630336e-05, "loss": 1.2097, "step": 3942500 }, { "epoch": 2.36, "learning_rate": 3.9487861599069776e-05, "loss": 1.2258, "step": 3943000 }, { "epoch": 2.36, "learning_rate": 3.948576163350921e-05, "loss": 1.2371, "step": 3943500 }, { "epoch": 2.36, "learning_rate": 3.948366586787976e-05, "loss": 1.2487, "step": 3944000 }, { "epoch": 2.36, "learning_rate": 3.9481565902319204e-05, "loss": 1.2133, "step": 3944500 }, { "epoch": 2.37, "learning_rate": 3.947946593675864e-05, "loss": 1.2195, "step": 3945000 }, { "epoch": 2.37, "learning_rate": 3.947736597119807e-05, "loss": 1.2327, "step": 3945500 }, { "epoch": 2.37, "learning_rate": 3.947526600563751e-05, "loss": 1.2133, "step": 3946000 }, { "epoch": 2.37, "learning_rate": 3.9473166040076944e-05, "loss": 1.2001, "step": 3946500 }, { "epoch": 2.37, "learning_rate": 3.947106607451638e-05, "loss": 1.2304, "step": 3947000 }, { "epoch": 2.37, "learning_rate": 3.946896610895582e-05, "loss": 1.2162, "step": 3947500 }, { "epoch": 2.37, "learning_rate": 3.946687034332637e-05, "loss": 1.2176, "step": 3948000 }, { "epoch": 2.37, "learning_rate": 3.9464770377765805e-05, "loss": 1.1931, "step": 3948500 }, { "epoch": 2.37, "learning_rate": 3.946267041220524e-05, "loss": 1.2594, "step": 3949000 }, { "epoch": 2.37, "learning_rate": 3.946057044664468e-05, "loss": 1.2181, "step": 3949500 }, { "epoch": 2.37, "learning_rate": 3.945847048108411e-05, "loss": 1.2022, "step": 3950000 }, { "epoch": 2.37, "learning_rate": 3.945637051552354e-05, "loss": 1.2217, "step": 3950500 }, { "epoch": 2.37, "learning_rate": 3.94542747498941e-05, "loss": 1.2317, "step": 3951000 }, { "epoch": 2.37, "learning_rate": 3.945217478433354e-05, "loss": 1.22, "step": 3951500 }, { "epoch": 2.37, "learning_rate": 3.945007481877297e-05, "loss": 1.2306, "step": 3952000 }, { "epoch": 2.37, "learning_rate": 3.944797485321241e-05, "loss": 1.2177, "step": 3952500 }, { "epoch": 2.37, "learning_rate": 3.944587488765184e-05, "loss": 1.1972, "step": 3953000 }, { "epoch": 2.37, "learning_rate": 3.94437791220224e-05, "loss": 1.2126, "step": 3953500 }, { "epoch": 2.37, "learning_rate": 3.9441679156461834e-05, "loss": 1.227, "step": 3954000 }, { "epoch": 2.37, "learning_rate": 3.9439579190901274e-05, "loss": 1.2464, "step": 3954500 }, { "epoch": 2.37, "learning_rate": 3.943748342527183e-05, "loss": 1.2256, "step": 3955000 }, { "epoch": 2.37, "learning_rate": 3.943538345971126e-05, "loss": 1.2448, "step": 3955500 }, { "epoch": 2.37, "learning_rate": 3.9433287694081814e-05, "loss": 1.2113, "step": 3956000 }, { "epoch": 2.37, "learning_rate": 3.943118772852125e-05, "loss": 1.2208, "step": 3956500 }, { "epoch": 2.37, "learning_rate": 3.942908776296069e-05, "loss": 1.2053, "step": 3957000 }, { "epoch": 2.37, "learning_rate": 3.942698779740012e-05, "loss": 1.2084, "step": 3957500 }, { "epoch": 2.37, "learning_rate": 3.942488783183956e-05, "loss": 1.2354, "step": 3958000 }, { "epoch": 2.37, "learning_rate": 3.9422787866278995e-05, "loss": 1.235, "step": 3958500 }, { "epoch": 2.37, "learning_rate": 3.942068790071843e-05, "loss": 1.242, "step": 3959000 }, { "epoch": 2.37, "learning_rate": 3.941858793515787e-05, "loss": 1.2198, "step": 3959500 }, { "epoch": 2.37, "learning_rate": 3.9416487969597296e-05, "loss": 1.204, "step": 3960000 }, { "epoch": 2.37, "learning_rate": 3.941438800403673e-05, "loss": 1.1948, "step": 3960500 }, { "epoch": 2.37, "learning_rate": 3.941229223840729e-05, "loss": 1.2152, "step": 3961000 }, { "epoch": 2.38, "learning_rate": 3.941019227284673e-05, "loss": 1.2248, "step": 3961500 }, { "epoch": 2.38, "learning_rate": 3.940809230728616e-05, "loss": 1.2, "step": 3962000 }, { "epoch": 2.38, "learning_rate": 3.940599234172559e-05, "loss": 1.2139, "step": 3962500 }, { "epoch": 2.38, "learning_rate": 3.940389237616503e-05, "loss": 1.2378, "step": 3963000 }, { "epoch": 2.38, "learning_rate": 3.9401792410604464e-05, "loss": 1.2209, "step": 3963500 }, { "epoch": 2.38, "learning_rate": 3.93996924450439e-05, "loss": 1.2103, "step": 3964000 }, { "epoch": 2.38, "learning_rate": 3.939759667941446e-05, "loss": 1.2015, "step": 3964500 }, { "epoch": 2.38, "learning_rate": 3.939549671385389e-05, "loss": 1.2247, "step": 3965000 }, { "epoch": 2.38, "learning_rate": 3.9393396748293324e-05, "loss": 1.2298, "step": 3965500 }, { "epoch": 2.38, "learning_rate": 3.9391296782732764e-05, "loss": 1.2251, "step": 3966000 }, { "epoch": 2.38, "learning_rate": 3.93891968171722e-05, "loss": 1.2227, "step": 3966500 }, { "epoch": 2.38, "learning_rate": 3.938709685161163e-05, "loss": 1.2028, "step": 3967000 }, { "epoch": 2.38, "learning_rate": 3.938499688605107e-05, "loss": 1.2052, "step": 3967500 }, { "epoch": 2.38, "learning_rate": 3.9382896920490505e-05, "loss": 1.2249, "step": 3968000 }, { "epoch": 2.38, "learning_rate": 3.938080115486106e-05, "loss": 1.222, "step": 3968500 }, { "epoch": 2.38, "learning_rate": 3.937870118930049e-05, "loss": 1.2299, "step": 3969000 }, { "epoch": 2.38, "learning_rate": 3.937660122373993e-05, "loss": 1.2218, "step": 3969500 }, { "epoch": 2.38, "learning_rate": 3.9374501258179366e-05, "loss": 1.225, "step": 3970000 }, { "epoch": 2.38, "learning_rate": 3.937240549254992e-05, "loss": 1.2338, "step": 3970500 }, { "epoch": 2.38, "learning_rate": 3.937030972692048e-05, "loss": 1.1964, "step": 3971000 }, { "epoch": 2.38, "learning_rate": 3.936820976135991e-05, "loss": 1.1903, "step": 3971500 }, { "epoch": 2.38, "learning_rate": 3.936610979579935e-05, "loss": 1.236, "step": 3972000 }, { "epoch": 2.38, "learning_rate": 3.936400983023878e-05, "loss": 1.194, "step": 3972500 }, { "epoch": 2.38, "learning_rate": 3.936190986467822e-05, "loss": 1.2338, "step": 3973000 }, { "epoch": 2.38, "learning_rate": 3.9359809899117654e-05, "loss": 1.2481, "step": 3973500 }, { "epoch": 2.38, "learning_rate": 3.935770993355709e-05, "loss": 1.2367, "step": 3974000 }, { "epoch": 2.38, "learning_rate": 3.935560996799653e-05, "loss": 1.2048, "step": 3974500 }, { "epoch": 2.38, "learning_rate": 3.935351000243596e-05, "loss": 1.2289, "step": 3975000 }, { "epoch": 2.38, "learning_rate": 3.9351410036875394e-05, "loss": 1.2092, "step": 3975500 }, { "epoch": 2.38, "learning_rate": 3.934931427124595e-05, "loss": 1.2271, "step": 3976000 }, { "epoch": 2.38, "learning_rate": 3.934721430568539e-05, "loss": 1.23, "step": 3976500 }, { "epoch": 2.38, "learning_rate": 3.934511434012482e-05, "loss": 1.2616, "step": 3977000 }, { "epoch": 2.38, "learning_rate": 3.9343014374564255e-05, "loss": 1.2253, "step": 3977500 }, { "epoch": 2.38, "learning_rate": 3.9340914409003695e-05, "loss": 1.2187, "step": 3978000 }, { "epoch": 2.39, "learning_rate": 3.933881444344313e-05, "loss": 1.2422, "step": 3978500 }, { "epoch": 2.39, "learning_rate": 3.933671447788256e-05, "loss": 1.2065, "step": 3979000 }, { "epoch": 2.39, "learning_rate": 3.9334614512322e-05, "loss": 1.1699, "step": 3979500 }, { "epoch": 2.39, "learning_rate": 3.9332518746692556e-05, "loss": 1.1928, "step": 3980000 }, { "epoch": 2.39, "learning_rate": 3.933042298106311e-05, "loss": 1.2199, "step": 3980500 }, { "epoch": 2.39, "learning_rate": 3.932832301550254e-05, "loss": 1.2142, "step": 3981000 }, { "epoch": 2.39, "learning_rate": 3.932622304994198e-05, "loss": 1.2195, "step": 3981500 }, { "epoch": 2.39, "learning_rate": 3.932412308438142e-05, "loss": 1.2127, "step": 3982000 }, { "epoch": 2.39, "learning_rate": 3.932202731875197e-05, "loss": 1.2224, "step": 3982500 }, { "epoch": 2.39, "learning_rate": 3.9319927353191404e-05, "loss": 1.2491, "step": 3983000 }, { "epoch": 2.39, "learning_rate": 3.9317827387630844e-05, "loss": 1.2436, "step": 3983500 }, { "epoch": 2.39, "learning_rate": 3.931572742207028e-05, "loss": 1.2479, "step": 3984000 }, { "epoch": 2.39, "learning_rate": 3.931362745650971e-05, "loss": 1.1887, "step": 3984500 }, { "epoch": 2.39, "learning_rate": 3.931152749094915e-05, "loss": 1.2188, "step": 3985000 }, { "epoch": 2.39, "learning_rate": 3.9309427525388585e-05, "loss": 1.1963, "step": 3985500 }, { "epoch": 2.39, "learning_rate": 3.930732755982802e-05, "loss": 1.1988, "step": 3986000 }, { "epoch": 2.39, "learning_rate": 3.930523179419857e-05, "loss": 1.2259, "step": 3986500 }, { "epoch": 2.39, "learning_rate": 3.930313182863801e-05, "loss": 1.2126, "step": 3987000 }, { "epoch": 2.39, "learning_rate": 3.9301036063008566e-05, "loss": 1.1968, "step": 3987500 }, { "epoch": 2.39, "learning_rate": 3.9298936097448e-05, "loss": 1.2178, "step": 3988000 }, { "epoch": 2.39, "learning_rate": 3.929683613188744e-05, "loss": 1.204, "step": 3988500 }, { "epoch": 2.39, "learning_rate": 3.929473616632687e-05, "loss": 1.2282, "step": 3989000 }, { "epoch": 2.39, "learning_rate": 3.9292640400697426e-05, "loss": 1.1988, "step": 3989500 }, { "epoch": 2.39, "learning_rate": 3.929054043513686e-05, "loss": 1.2136, "step": 3990000 }, { "epoch": 2.39, "learning_rate": 3.92884404695763e-05, "loss": 1.2268, "step": 3990500 }, { "epoch": 2.39, "learning_rate": 3.9286340504015733e-05, "loss": 1.2081, "step": 3991000 }, { "epoch": 2.39, "learning_rate": 3.928424053845517e-05, "loss": 1.2573, "step": 3991500 }, { "epoch": 2.39, "learning_rate": 3.928214057289461e-05, "loss": 1.2153, "step": 3992000 }, { "epoch": 2.39, "learning_rate": 3.928004060733404e-05, "loss": 1.2559, "step": 3992500 }, { "epoch": 2.39, "learning_rate": 3.9277940641773474e-05, "loss": 1.2327, "step": 3993000 }, { "epoch": 2.39, "learning_rate": 3.9275840676212914e-05, "loss": 1.2099, "step": 3993500 }, { "epoch": 2.39, "learning_rate": 3.927374071065234e-05, "loss": 1.2278, "step": 3994000 }, { "epoch": 2.39, "learning_rate": 3.92716449450229e-05, "loss": 1.25, "step": 3994500 }, { "epoch": 2.4, "learning_rate": 3.9269544979462335e-05, "loss": 1.2103, "step": 3995000 }, { "epoch": 2.4, "learning_rate": 3.9267449213832895e-05, "loss": 1.2158, "step": 3995500 }, { "epoch": 2.4, "learning_rate": 3.926534924827233e-05, "loss": 1.214, "step": 3996000 }, { "epoch": 2.4, "learning_rate": 3.926324928271176e-05, "loss": 1.2043, "step": 3996500 }, { "epoch": 2.4, "learning_rate": 3.92611493171512e-05, "loss": 1.1996, "step": 3997000 }, { "epoch": 2.4, "learning_rate": 3.9259049351590636e-05, "loss": 1.1874, "step": 3997500 }, { "epoch": 2.4, "learning_rate": 3.925694938603007e-05, "loss": 1.2523, "step": 3998000 }, { "epoch": 2.4, "learning_rate": 3.925484942046951e-05, "loss": 1.2168, "step": 3998500 }, { "epoch": 2.4, "learning_rate": 3.9252749454908936e-05, "loss": 1.212, "step": 3999000 }, { "epoch": 2.4, "learning_rate": 3.9250653689279496e-05, "loss": 1.2271, "step": 3999500 }, { "epoch": 2.4, "learning_rate": 3.924855372371893e-05, "loss": 1.2031, "step": 4000000 }, { "epoch": 2.4, "eval_loss": 1.1803804636001587, "eval_runtime": 1110.222, "eval_samples_per_second": 474.428, "eval_steps_per_second": 79.072, "step": 4000000 }, { "epoch": 2.4, "learning_rate": 3.924645375815837e-05, "loss": 1.2029, "step": 4000500 }, { "epoch": 2.4, "learning_rate": 3.92443537925978e-05, "loss": 1.2338, "step": 4001000 }, { "epoch": 2.4, "learning_rate": 3.924225382703723e-05, "loss": 1.2061, "step": 4001500 }, { "epoch": 2.4, "learning_rate": 3.924015806140779e-05, "loss": 1.2055, "step": 4002000 }, { "epoch": 2.4, "learning_rate": 3.923805809584723e-05, "loss": 1.2464, "step": 4002500 }, { "epoch": 2.4, "learning_rate": 3.9235958130286664e-05, "loss": 1.2344, "step": 4003000 }, { "epoch": 2.4, "learning_rate": 3.92338581647261e-05, "loss": 1.2164, "step": 4003500 }, { "epoch": 2.4, "learning_rate": 3.923175819916553e-05, "loss": 1.2256, "step": 4004000 }, { "epoch": 2.4, "learning_rate": 3.9229658233604965e-05, "loss": 1.2295, "step": 4004500 }, { "epoch": 2.4, "learning_rate": 3.9227558268044405e-05, "loss": 1.2117, "step": 4005000 }, { "epoch": 2.4, "learning_rate": 3.922545830248384e-05, "loss": 1.2196, "step": 4005500 }, { "epoch": 2.4, "learning_rate": 3.922336253685439e-05, "loss": 1.2075, "step": 4006000 }, { "epoch": 2.4, "learning_rate": 3.9221262571293825e-05, "loss": 1.2173, "step": 4006500 }, { "epoch": 2.4, "learning_rate": 3.9219162605733266e-05, "loss": 1.2265, "step": 4007000 }, { "epoch": 2.4, "learning_rate": 3.92170626401727e-05, "loss": 1.2209, "step": 4007500 }, { "epoch": 2.4, "learning_rate": 3.921496687454326e-05, "loss": 1.2143, "step": 4008000 }, { "epoch": 2.4, "learning_rate": 3.9212866908982686e-05, "loss": 1.1951, "step": 4008500 }, { "epoch": 2.4, "learning_rate": 3.9210766943422126e-05, "loss": 1.203, "step": 4009000 }, { "epoch": 2.4, "learning_rate": 3.920866697786156e-05, "loss": 1.1978, "step": 4009500 }, { "epoch": 2.4, "learning_rate": 3.920656701230099e-05, "loss": 1.2072, "step": 4010000 }, { "epoch": 2.4, "learning_rate": 3.9204475446602674e-05, "loss": 1.2454, "step": 4010500 }, { "epoch": 2.4, "learning_rate": 3.9202375481042114e-05, "loss": 1.1936, "step": 4011000 }, { "epoch": 2.41, "learning_rate": 3.920027551548155e-05, "loss": 1.2084, "step": 4011500 }, { "epoch": 2.41, "learning_rate": 3.919817554992098e-05, "loss": 1.1898, "step": 4012000 }, { "epoch": 2.41, "learning_rate": 3.919607558436042e-05, "loss": 1.2102, "step": 4012500 }, { "epoch": 2.41, "learning_rate": 3.919397561879985e-05, "loss": 1.2079, "step": 4013000 }, { "epoch": 2.41, "learning_rate": 3.919187985317041e-05, "loss": 1.2116, "step": 4013500 }, { "epoch": 2.41, "learning_rate": 3.918977988760984e-05, "loss": 1.1974, "step": 4014000 }, { "epoch": 2.41, "learning_rate": 3.918767992204928e-05, "loss": 1.2262, "step": 4014500 }, { "epoch": 2.41, "learning_rate": 3.9185579956488715e-05, "loss": 1.2148, "step": 4015000 }, { "epoch": 2.41, "learning_rate": 3.918348419085927e-05, "loss": 1.2092, "step": 4015500 }, { "epoch": 2.41, "learning_rate": 3.91813842252987e-05, "loss": 1.2172, "step": 4016000 }, { "epoch": 2.41, "learning_rate": 3.917928425973814e-05, "loss": 1.1798, "step": 4016500 }, { "epoch": 2.41, "learning_rate": 3.9177184294177576e-05, "loss": 1.227, "step": 4017000 }, { "epoch": 2.41, "learning_rate": 3.9175084328617016e-05, "loss": 1.2369, "step": 4017500 }, { "epoch": 2.41, "learning_rate": 3.917298856298757e-05, "loss": 1.2196, "step": 4018000 }, { "epoch": 2.41, "learning_rate": 3.9170888597427e-05, "loss": 1.2308, "step": 4018500 }, { "epoch": 2.41, "learning_rate": 3.916878863186644e-05, "loss": 1.2334, "step": 4019000 }, { "epoch": 2.41, "learning_rate": 3.916668866630588e-05, "loss": 1.2158, "step": 4019500 }, { "epoch": 2.41, "learning_rate": 3.916458870074531e-05, "loss": 1.2299, "step": 4020000 }, { "epoch": 2.41, "learning_rate": 3.916248873518474e-05, "loss": 1.2024, "step": 4020500 }, { "epoch": 2.41, "learning_rate": 3.916038876962418e-05, "loss": 1.247, "step": 4021000 }, { "epoch": 2.41, "learning_rate": 3.915828880406361e-05, "loss": 1.1977, "step": 4021500 }, { "epoch": 2.41, "learning_rate": 3.915619303843417e-05, "loss": 1.2243, "step": 4022000 }, { "epoch": 2.41, "learning_rate": 3.91540930728736e-05, "loss": 1.214, "step": 4022500 }, { "epoch": 2.41, "learning_rate": 3.915199310731304e-05, "loss": 1.2222, "step": 4023000 }, { "epoch": 2.41, "learning_rate": 3.91498973416836e-05, "loss": 1.2033, "step": 4023500 }, { "epoch": 2.41, "learning_rate": 3.914779737612303e-05, "loss": 1.2374, "step": 4024000 }, { "epoch": 2.41, "learning_rate": 3.914569741056247e-05, "loss": 1.2157, "step": 4024500 }, { "epoch": 2.41, "learning_rate": 3.91435974450019e-05, "loss": 1.2258, "step": 4025000 }, { "epoch": 2.41, "learning_rate": 3.914149747944133e-05, "loss": 1.2522, "step": 4025500 }, { "epoch": 2.41, "learning_rate": 3.913939751388077e-05, "loss": 1.2239, "step": 4026000 }, { "epoch": 2.41, "learning_rate": 3.9137297548320206e-05, "loss": 1.2323, "step": 4026500 }, { "epoch": 2.41, "learning_rate": 3.9135201782690766e-05, "loss": 1.2101, "step": 4027000 }, { "epoch": 2.41, "learning_rate": 3.913310181713019e-05, "loss": 1.2292, "step": 4027500 }, { "epoch": 2.41, "learning_rate": 3.913100185156963e-05, "loss": 1.2309, "step": 4028000 }, { "epoch": 2.42, "learning_rate": 3.912890188600907e-05, "loss": 1.2354, "step": 4028500 }, { "epoch": 2.42, "learning_rate": 3.91268019204485e-05, "loss": 1.21, "step": 4029000 }, { "epoch": 2.42, "learning_rate": 3.912470615481906e-05, "loss": 1.2372, "step": 4029500 }, { "epoch": 2.42, "learning_rate": 3.9122606189258494e-05, "loss": 1.2124, "step": 4030000 }, { "epoch": 2.42, "learning_rate": 3.912050622369793e-05, "loss": 1.2474, "step": 4030500 }, { "epoch": 2.42, "learning_rate": 3.911840625813737e-05, "loss": 1.2053, "step": 4031000 }, { "epoch": 2.42, "learning_rate": 3.91163062925768e-05, "loss": 1.217, "step": 4031500 }, { "epoch": 2.42, "learning_rate": 3.911421472687848e-05, "loss": 1.2627, "step": 4032000 }, { "epoch": 2.42, "learning_rate": 3.9112114761317915e-05, "loss": 1.2186, "step": 4032500 }, { "epoch": 2.42, "learning_rate": 3.911001479575735e-05, "loss": 1.2202, "step": 4033000 }, { "epoch": 2.42, "learning_rate": 3.910791483019679e-05, "loss": 1.2264, "step": 4033500 }, { "epoch": 2.42, "learning_rate": 3.910581486463622e-05, "loss": 1.2483, "step": 4034000 }, { "epoch": 2.42, "learning_rate": 3.910371489907565e-05, "loss": 1.2219, "step": 4034500 }, { "epoch": 2.42, "learning_rate": 3.910161913344621e-05, "loss": 1.2246, "step": 4035000 }, { "epoch": 2.42, "learning_rate": 3.909951916788565e-05, "loss": 1.2301, "step": 4035500 }, { "epoch": 2.42, "learning_rate": 3.90974234022562e-05, "loss": 1.2103, "step": 4036000 }, { "epoch": 2.42, "learning_rate": 3.9095323436695637e-05, "loss": 1.2154, "step": 4036500 }, { "epoch": 2.42, "learning_rate": 3.909322347113508e-05, "loss": 1.2242, "step": 4037000 }, { "epoch": 2.42, "learning_rate": 3.909112350557451e-05, "loss": 1.2288, "step": 4037500 }, { "epoch": 2.42, "learning_rate": 3.9089023540013944e-05, "loss": 1.2116, "step": 4038000 }, { "epoch": 2.42, "learning_rate": 3.9086923574453384e-05, "loss": 1.2543, "step": 4038500 }, { "epoch": 2.42, "learning_rate": 3.908483200875505e-05, "loss": 1.2354, "step": 4039000 }, { "epoch": 2.42, "learning_rate": 3.908273204319449e-05, "loss": 1.2052, "step": 4039500 }, { "epoch": 2.42, "learning_rate": 3.9080632077633925e-05, "loss": 1.2095, "step": 4040000 }, { "epoch": 2.42, "learning_rate": 3.907853211207336e-05, "loss": 1.248, "step": 4040500 }, { "epoch": 2.42, "learning_rate": 3.90764321465128e-05, "loss": 1.2232, "step": 4041000 }, { "epoch": 2.42, "learning_rate": 3.907433218095223e-05, "loss": 1.2078, "step": 4041500 }, { "epoch": 2.42, "learning_rate": 3.9072232215391665e-05, "loss": 1.1842, "step": 4042000 }, { "epoch": 2.42, "learning_rate": 3.9070132249831105e-05, "loss": 1.2017, "step": 4042500 }, { "epoch": 2.42, "learning_rate": 3.906803228427054e-05, "loss": 1.2267, "step": 4043000 }, { "epoch": 2.42, "learning_rate": 3.906593231870997e-05, "loss": 1.2406, "step": 4043500 }, { "epoch": 2.42, "learning_rate": 3.9063832353149406e-05, "loss": 1.2154, "step": 4044000 }, { "epoch": 2.42, "learning_rate": 3.906173238758884e-05, "loss": 1.2191, "step": 4044500 }, { "epoch": 2.43, "learning_rate": 3.905963242202828e-05, "loss": 1.2114, "step": 4045000 }, { "epoch": 2.43, "learning_rate": 3.905753245646771e-05, "loss": 1.2223, "step": 4045500 }, { "epoch": 2.43, "learning_rate": 3.9055432490907146e-05, "loss": 1.25, "step": 4046000 }, { "epoch": 2.43, "learning_rate": 3.90533367252777e-05, "loss": 1.2221, "step": 4046500 }, { "epoch": 2.43, "learning_rate": 3.905123675971714e-05, "loss": 1.2487, "step": 4047000 }, { "epoch": 2.43, "learning_rate": 3.9049136794156574e-05, "loss": 1.2401, "step": 4047500 }, { "epoch": 2.43, "learning_rate": 3.904703682859601e-05, "loss": 1.2349, "step": 4048000 }, { "epoch": 2.43, "learning_rate": 3.904493686303545e-05, "loss": 1.2031, "step": 4048500 }, { "epoch": 2.43, "learning_rate": 3.904283689747488e-05, "loss": 1.2308, "step": 4049000 }, { "epoch": 2.43, "learning_rate": 3.9040736931914314e-05, "loss": 1.2282, "step": 4049500 }, { "epoch": 2.43, "learning_rate": 3.9038636966353755e-05, "loss": 1.2161, "step": 4050000 }, { "epoch": 2.43, "learning_rate": 3.903654120072431e-05, "loss": 1.1919, "step": 4050500 }, { "epoch": 2.43, "learning_rate": 3.903444123516374e-05, "loss": 1.2266, "step": 4051000 }, { "epoch": 2.43, "learning_rate": 3.9032341269603175e-05, "loss": 1.2017, "step": 4051500 }, { "epoch": 2.43, "learning_rate": 3.9030241304042615e-05, "loss": 1.2076, "step": 4052000 }, { "epoch": 2.43, "learning_rate": 3.902814133848205e-05, "loss": 1.1977, "step": 4052500 }, { "epoch": 2.43, "learning_rate": 3.902604137292148e-05, "loss": 1.2084, "step": 4053000 }, { "epoch": 2.43, "learning_rate": 3.902394140736092e-05, "loss": 1.2279, "step": 4053500 }, { "epoch": 2.43, "learning_rate": 3.9021841441800356e-05, "loss": 1.2395, "step": 4054000 }, { "epoch": 2.43, "learning_rate": 3.901974147623979e-05, "loss": 1.2089, "step": 4054500 }, { "epoch": 2.43, "learning_rate": 3.901764151067922e-05, "loss": 1.2309, "step": 4055000 }, { "epoch": 2.43, "learning_rate": 3.901554574504978e-05, "loss": 1.222, "step": 4055500 }, { "epoch": 2.43, "learning_rate": 3.9013445779489217e-05, "loss": 1.204, "step": 4056000 }, { "epoch": 2.43, "learning_rate": 3.901134581392866e-05, "loss": 1.2047, "step": 4056500 }, { "epoch": 2.43, "learning_rate": 3.9009245848368084e-05, "loss": 1.2174, "step": 4057000 }, { "epoch": 2.43, "learning_rate": 3.900714588280752e-05, "loss": 1.2433, "step": 4057500 }, { "epoch": 2.43, "learning_rate": 3.900505011717808e-05, "loss": 1.2193, "step": 4058000 }, { "epoch": 2.43, "learning_rate": 3.900295015161752e-05, "loss": 1.2104, "step": 4058500 }, { "epoch": 2.43, "learning_rate": 3.9000850186056944e-05, "loss": 1.2314, "step": 4059000 }, { "epoch": 2.43, "learning_rate": 3.899875022049638e-05, "loss": 1.2266, "step": 4059500 }, { "epoch": 2.43, "learning_rate": 3.899665445486694e-05, "loss": 1.2373, "step": 4060000 }, { "epoch": 2.43, "learning_rate": 3.899455448930638e-05, "loss": 1.2424, "step": 4060500 }, { "epoch": 2.43, "learning_rate": 3.899245872367693e-05, "loss": 1.2053, "step": 4061000 }, { "epoch": 2.44, "learning_rate": 3.8990358758116365e-05, "loss": 1.1964, "step": 4061500 }, { "epoch": 2.44, "learning_rate": 3.8988258792555806e-05, "loss": 1.2406, "step": 4062000 }, { "epoch": 2.44, "learning_rate": 3.898615882699524e-05, "loss": 1.208, "step": 4062500 }, { "epoch": 2.44, "learning_rate": 3.898405886143467e-05, "loss": 1.2105, "step": 4063000 }, { "epoch": 2.44, "learning_rate": 3.898195889587411e-05, "loss": 1.2196, "step": 4063500 }, { "epoch": 2.44, "learning_rate": 3.897985893031354e-05, "loss": 1.2089, "step": 4064000 }, { "epoch": 2.44, "learning_rate": 3.897775896475297e-05, "loss": 1.232, "step": 4064500 }, { "epoch": 2.44, "learning_rate": 3.897565899919241e-05, "loss": 1.2286, "step": 4065000 }, { "epoch": 2.44, "learning_rate": 3.8973559033631847e-05, "loss": 1.2042, "step": 4065500 }, { "epoch": 2.44, "learning_rate": 3.897146326800241e-05, "loss": 1.2156, "step": 4066000 }, { "epoch": 2.44, "learning_rate": 3.8969363302441834e-05, "loss": 1.2196, "step": 4066500 }, { "epoch": 2.44, "learning_rate": 3.8967263336881274e-05, "loss": 1.1966, "step": 4067000 }, { "epoch": 2.44, "learning_rate": 3.896516337132071e-05, "loss": 1.2194, "step": 4067500 }, { "epoch": 2.44, "learning_rate": 3.896306340576014e-05, "loss": 1.2137, "step": 4068000 }, { "epoch": 2.44, "learning_rate": 3.896096344019958e-05, "loss": 1.209, "step": 4068500 }, { "epoch": 2.44, "learning_rate": 3.8958863474639014e-05, "loss": 1.2157, "step": 4069000 }, { "epoch": 2.44, "learning_rate": 3.895676350907845e-05, "loss": 1.2316, "step": 4069500 }, { "epoch": 2.44, "learning_rate": 3.895466774344901e-05, "loss": 1.23, "step": 4070000 }, { "epoch": 2.44, "learning_rate": 3.895256777788844e-05, "loss": 1.2541, "step": 4070500 }, { "epoch": 2.44, "learning_rate": 3.8950467812327875e-05, "loss": 1.2285, "step": 4071000 }, { "epoch": 2.44, "learning_rate": 3.894837204669843e-05, "loss": 1.2466, "step": 4071500 }, { "epoch": 2.44, "learning_rate": 3.894627208113787e-05, "loss": 1.2337, "step": 4072000 }, { "epoch": 2.44, "learning_rate": 3.89441721155773e-05, "loss": 1.2221, "step": 4072500 }, { "epoch": 2.44, "learning_rate": 3.8942072150016736e-05, "loss": 1.2102, "step": 4073000 }, { "epoch": 2.44, "learning_rate": 3.8939972184456176e-05, "loss": 1.2122, "step": 4073500 }, { "epoch": 2.44, "learning_rate": 3.893787221889561e-05, "loss": 1.2219, "step": 4074000 }, { "epoch": 2.44, "learning_rate": 3.893577225333504e-05, "loss": 1.2086, "step": 4074500 }, { "epoch": 2.44, "learning_rate": 3.893367228777448e-05, "loss": 1.2127, "step": 4075000 }, { "epoch": 2.44, "learning_rate": 3.893157652214504e-05, "loss": 1.1915, "step": 4075500 }, { "epoch": 2.44, "learning_rate": 3.892947655658447e-05, "loss": 1.2305, "step": 4076000 }, { "epoch": 2.44, "learning_rate": 3.892737659102391e-05, "loss": 1.2378, "step": 4076500 }, { "epoch": 2.44, "learning_rate": 3.8925276625463344e-05, "loss": 1.2216, "step": 4077000 }, { "epoch": 2.44, "learning_rate": 3.89231808598339e-05, "loss": 1.1979, "step": 4077500 }, { "epoch": 2.44, "learning_rate": 3.892108089427333e-05, "loss": 1.2064, "step": 4078000 }, { "epoch": 2.45, "learning_rate": 3.891898092871277e-05, "loss": 1.2332, "step": 4078500 }, { "epoch": 2.45, "learning_rate": 3.8916885163083325e-05, "loss": 1.2236, "step": 4079000 }, { "epoch": 2.45, "learning_rate": 3.891478519752276e-05, "loss": 1.2301, "step": 4079500 }, { "epoch": 2.45, "learning_rate": 3.891268523196219e-05, "loss": 1.2126, "step": 4080000 }, { "epoch": 2.45, "learning_rate": 3.891058526640163e-05, "loss": 1.2311, "step": 4080500 }, { "epoch": 2.45, "learning_rate": 3.8908485300841065e-05, "loss": 1.2463, "step": 4081000 }, { "epoch": 2.45, "learning_rate": 3.89063853352805e-05, "loss": 1.2165, "step": 4081500 }, { "epoch": 2.45, "learning_rate": 3.890428536971994e-05, "loss": 1.2168, "step": 4082000 }, { "epoch": 2.45, "learning_rate": 3.890218540415937e-05, "loss": 1.2189, "step": 4082500 }, { "epoch": 2.45, "learning_rate": 3.8900089638529926e-05, "loss": 1.2384, "step": 4083000 }, { "epoch": 2.45, "learning_rate": 3.8897989672969366e-05, "loss": 1.2113, "step": 4083500 }, { "epoch": 2.45, "learning_rate": 3.88958897074088e-05, "loss": 1.2245, "step": 4084000 }, { "epoch": 2.45, "learning_rate": 3.889378974184823e-05, "loss": 1.1906, "step": 4084500 }, { "epoch": 2.45, "learning_rate": 3.8891689776287674e-05, "loss": 1.2169, "step": 4085000 }, { "epoch": 2.45, "learning_rate": 3.888958981072711e-05, "loss": 1.2146, "step": 4085500 }, { "epoch": 2.45, "learning_rate": 3.8887489845166534e-05, "loss": 1.2385, "step": 4086000 }, { "epoch": 2.45, "learning_rate": 3.8885389879605974e-05, "loss": 1.2286, "step": 4086500 }, { "epoch": 2.45, "learning_rate": 3.8883294113976534e-05, "loss": 1.2097, "step": 4087000 }, { "epoch": 2.45, "learning_rate": 3.888119834834709e-05, "loss": 1.2188, "step": 4087500 }, { "epoch": 2.45, "learning_rate": 3.887909838278652e-05, "loss": 1.2236, "step": 4088000 }, { "epoch": 2.45, "learning_rate": 3.8876998417225955e-05, "loss": 1.2404, "step": 4088500 }, { "epoch": 2.45, "learning_rate": 3.8874898451665395e-05, "loss": 1.1954, "step": 4089000 }, { "epoch": 2.45, "learning_rate": 3.887279848610483e-05, "loss": 1.2449, "step": 4089500 }, { "epoch": 2.45, "learning_rate": 3.887069852054426e-05, "loss": 1.2372, "step": 4090000 }, { "epoch": 2.45, "learning_rate": 3.88685985549837e-05, "loss": 1.2379, "step": 4090500 }, { "epoch": 2.45, "learning_rate": 3.8866502789354256e-05, "loss": 1.2335, "step": 4091000 }, { "epoch": 2.45, "learning_rate": 3.886440282379369e-05, "loss": 1.244, "step": 4091500 }, { "epoch": 2.45, "learning_rate": 3.886230285823313e-05, "loss": 1.2322, "step": 4092000 }, { "epoch": 2.45, "learning_rate": 3.886020289267256e-05, "loss": 1.2258, "step": 4092500 }, { "epoch": 2.45, "learning_rate": 3.8858102927111996e-05, "loss": 1.2326, "step": 4093000 }, { "epoch": 2.45, "learning_rate": 3.885600296155143e-05, "loss": 1.2122, "step": 4093500 }, { "epoch": 2.45, "learning_rate": 3.885390719592199e-05, "loss": 1.22, "step": 4094000 }, { "epoch": 2.45, "learning_rate": 3.8851807230361424e-05, "loss": 1.2125, "step": 4094500 }, { "epoch": 2.46, "learning_rate": 3.884970726480086e-05, "loss": 1.2113, "step": 4095000 }, { "epoch": 2.46, "learning_rate": 3.884760729924029e-05, "loss": 1.1873, "step": 4095500 }, { "epoch": 2.46, "learning_rate": 3.8845507333679724e-05, "loss": 1.2142, "step": 4096000 }, { "epoch": 2.46, "learning_rate": 3.8843411568050284e-05, "loss": 1.211, "step": 4096500 }, { "epoch": 2.46, "learning_rate": 3.884131160248972e-05, "loss": 1.2355, "step": 4097000 }, { "epoch": 2.46, "learning_rate": 3.883921163692916e-05, "loss": 1.2273, "step": 4097500 }, { "epoch": 2.46, "learning_rate": 3.8837111671368585e-05, "loss": 1.218, "step": 4098000 }, { "epoch": 2.46, "learning_rate": 3.8835015905739145e-05, "loss": 1.2227, "step": 4098500 }, { "epoch": 2.46, "learning_rate": 3.8832915940178585e-05, "loss": 1.1832, "step": 4099000 }, { "epoch": 2.46, "learning_rate": 3.883081597461802e-05, "loss": 1.2112, "step": 4099500 }, { "epoch": 2.46, "learning_rate": 3.882871600905745e-05, "loss": 1.2406, "step": 4100000 }, { "epoch": 2.46, "eval_loss": 1.1778119802474976, "eval_runtime": 1105.3868, "eval_samples_per_second": 476.503, "eval_steps_per_second": 79.417, "step": 4100000 }, { "epoch": 2.46, "learning_rate": 3.8826616043496886e-05, "loss": 1.2149, "step": 4100500 }, { "epoch": 2.46, "learning_rate": 3.8824520277867446e-05, "loss": 1.2216, "step": 4101000 }, { "epoch": 2.46, "learning_rate": 3.882242031230688e-05, "loss": 1.2, "step": 4101500 }, { "epoch": 2.46, "learning_rate": 3.882032034674631e-05, "loss": 1.2241, "step": 4102000 }, { "epoch": 2.46, "learning_rate": 3.881822038118575e-05, "loss": 1.2425, "step": 4102500 }, { "epoch": 2.46, "learning_rate": 3.881612041562518e-05, "loss": 1.2225, "step": 4103000 }, { "epoch": 2.46, "learning_rate": 3.881402464999574e-05, "loss": 1.1942, "step": 4103500 }, { "epoch": 2.46, "learning_rate": 3.8811924684435174e-05, "loss": 1.2254, "step": 4104000 }, { "epoch": 2.46, "learning_rate": 3.8809824718874614e-05, "loss": 1.2022, "step": 4104500 }, { "epoch": 2.46, "learning_rate": 3.880772475331404e-05, "loss": 1.2268, "step": 4105000 }, { "epoch": 2.46, "learning_rate": 3.880562478775348e-05, "loss": 1.2162, "step": 4105500 }, { "epoch": 2.46, "learning_rate": 3.8803524822192914e-05, "loss": 1.207, "step": 4106000 }, { "epoch": 2.46, "learning_rate": 3.8801429056563475e-05, "loss": 1.216, "step": 4106500 }, { "epoch": 2.46, "learning_rate": 3.879932909100291e-05, "loss": 1.2357, "step": 4107000 }, { "epoch": 2.46, "learning_rate": 3.879722912544234e-05, "loss": 1.1981, "step": 4107500 }, { "epoch": 2.46, "learning_rate": 3.8795129159881775e-05, "loss": 1.2308, "step": 4108000 }, { "epoch": 2.46, "learning_rate": 3.879302919432121e-05, "loss": 1.2108, "step": 4108500 }, { "epoch": 2.46, "learning_rate": 3.879092922876065e-05, "loss": 1.205, "step": 4109000 }, { "epoch": 2.46, "learning_rate": 3.878882926320008e-05, "loss": 1.2105, "step": 4109500 }, { "epoch": 2.46, "learning_rate": 3.8786729297639516e-05, "loss": 1.2176, "step": 4110000 }, { "epoch": 2.46, "learning_rate": 3.878463353201007e-05, "loss": 1.2359, "step": 4110500 }, { "epoch": 2.46, "learning_rate": 3.878253356644951e-05, "loss": 1.2142, "step": 4111000 }, { "epoch": 2.47, "learning_rate": 3.878043360088894e-05, "loss": 1.2104, "step": 4111500 }, { "epoch": 2.47, "learning_rate": 3.8778333635328376e-05, "loss": 1.2116, "step": 4112000 }, { "epoch": 2.47, "learning_rate": 3.877623786969894e-05, "loss": 1.2446, "step": 4112500 }, { "epoch": 2.47, "learning_rate": 3.877413790413837e-05, "loss": 1.2482, "step": 4113000 }, { "epoch": 2.47, "learning_rate": 3.8772037938577804e-05, "loss": 1.2035, "step": 4113500 }, { "epoch": 2.47, "learning_rate": 3.8769937973017244e-05, "loss": 1.195, "step": 4114000 }, { "epoch": 2.47, "learning_rate": 3.876783800745668e-05, "loss": 1.2472, "step": 4114500 }, { "epoch": 2.47, "learning_rate": 3.876574224182723e-05, "loss": 1.2413, "step": 4115000 }, { "epoch": 2.47, "learning_rate": 3.8763642276266664e-05, "loss": 1.2114, "step": 4115500 }, { "epoch": 2.47, "learning_rate": 3.8761542310706105e-05, "loss": 1.2233, "step": 4116000 }, { "epoch": 2.47, "learning_rate": 3.875944234514554e-05, "loss": 1.1913, "step": 4116500 }, { "epoch": 2.47, "learning_rate": 3.875734657951609e-05, "loss": 1.2259, "step": 4117000 }, { "epoch": 2.47, "learning_rate": 3.8755246613955525e-05, "loss": 1.2384, "step": 4117500 }, { "epoch": 2.47, "learning_rate": 3.8753146648394965e-05, "loss": 1.2053, "step": 4118000 }, { "epoch": 2.47, "learning_rate": 3.87510466828344e-05, "loss": 1.2397, "step": 4118500 }, { "epoch": 2.47, "learning_rate": 3.874895091720496e-05, "loss": 1.2195, "step": 4119000 }, { "epoch": 2.47, "learning_rate": 3.874685095164439e-05, "loss": 1.2145, "step": 4119500 }, { "epoch": 2.47, "learning_rate": 3.8744750986083826e-05, "loss": 1.2297, "step": 4120000 }, { "epoch": 2.47, "learning_rate": 3.8742655220454386e-05, "loss": 1.1953, "step": 4120500 }, { "epoch": 2.47, "learning_rate": 3.874055525489382e-05, "loss": 1.1861, "step": 4121000 }, { "epoch": 2.47, "learning_rate": 3.873845528933326e-05, "loss": 1.225, "step": 4121500 }, { "epoch": 2.47, "learning_rate": 3.873635532377269e-05, "loss": 1.2175, "step": 4122000 }, { "epoch": 2.47, "learning_rate": 3.873425955814325e-05, "loss": 1.2259, "step": 4122500 }, { "epoch": 2.47, "learning_rate": 3.873215959258268e-05, "loss": 1.2244, "step": 4123000 }, { "epoch": 2.47, "learning_rate": 3.873005962702212e-05, "loss": 1.1998, "step": 4123500 }, { "epoch": 2.47, "learning_rate": 3.8727959661461554e-05, "loss": 1.232, "step": 4124000 }, { "epoch": 2.47, "learning_rate": 3.872585969590098e-05, "loss": 1.2588, "step": 4124500 }, { "epoch": 2.47, "learning_rate": 3.872375973034042e-05, "loss": 1.2015, "step": 4125000 }, { "epoch": 2.47, "learning_rate": 3.8721659764779855e-05, "loss": 1.2123, "step": 4125500 }, { "epoch": 2.47, "learning_rate": 3.871955979921929e-05, "loss": 1.1864, "step": 4126000 }, { "epoch": 2.47, "learning_rate": 3.871745983365873e-05, "loss": 1.2309, "step": 4126500 }, { "epoch": 2.47, "learning_rate": 3.871535986809816e-05, "loss": 1.2245, "step": 4127000 }, { "epoch": 2.47, "learning_rate": 3.8713264102468715e-05, "loss": 1.2114, "step": 4127500 }, { "epoch": 2.47, "learning_rate": 3.8711164136908156e-05, "loss": 1.2328, "step": 4128000 }, { "epoch": 2.48, "learning_rate": 3.870906417134759e-05, "loss": 1.2302, "step": 4128500 }, { "epoch": 2.48, "learning_rate": 3.870696420578702e-05, "loss": 1.2362, "step": 4129000 }, { "epoch": 2.48, "learning_rate": 3.870486424022646e-05, "loss": 1.2266, "step": 4129500 }, { "epoch": 2.48, "learning_rate": 3.8702768474597016e-05, "loss": 1.2398, "step": 4130000 }, { "epoch": 2.48, "learning_rate": 3.870066850903645e-05, "loss": 1.2278, "step": 4130500 }, { "epoch": 2.48, "learning_rate": 3.869856854347588e-05, "loss": 1.202, "step": 4131000 }, { "epoch": 2.48, "learning_rate": 3.8696468577915324e-05, "loss": 1.1953, "step": 4131500 }, { "epoch": 2.48, "learning_rate": 3.869436861235476e-05, "loss": 1.2555, "step": 4132000 }, { "epoch": 2.48, "learning_rate": 3.869226864679419e-05, "loss": 1.2192, "step": 4132500 }, { "epoch": 2.48, "learning_rate": 3.869016868123363e-05, "loss": 1.1829, "step": 4133000 }, { "epoch": 2.48, "learning_rate": 3.8688072915604184e-05, "loss": 1.2021, "step": 4133500 }, { "epoch": 2.48, "learning_rate": 3.868597295004362e-05, "loss": 1.1873, "step": 4134000 }, { "epoch": 2.48, "learning_rate": 3.868387298448305e-05, "loss": 1.2125, "step": 4134500 }, { "epoch": 2.48, "learning_rate": 3.868177301892249e-05, "loss": 1.2164, "step": 4135000 }, { "epoch": 2.48, "learning_rate": 3.8679673053361925e-05, "loss": 1.2382, "step": 4135500 }, { "epoch": 2.48, "learning_rate": 3.867757728773248e-05, "loss": 1.2176, "step": 4136000 }, { "epoch": 2.48, "learning_rate": 3.867547732217192e-05, "loss": 1.24, "step": 4136500 }, { "epoch": 2.48, "learning_rate": 3.867337735661135e-05, "loss": 1.2383, "step": 4137000 }, { "epoch": 2.48, "learning_rate": 3.8671277391050786e-05, "loss": 1.2185, "step": 4137500 }, { "epoch": 2.48, "learning_rate": 3.8669177425490226e-05, "loss": 1.2171, "step": 4138000 }, { "epoch": 2.48, "learning_rate": 3.866708165986078e-05, "loss": 1.2397, "step": 4138500 }, { "epoch": 2.48, "learning_rate": 3.866498169430021e-05, "loss": 1.1875, "step": 4139000 }, { "epoch": 2.48, "learning_rate": 3.8662881728739646e-05, "loss": 1.2151, "step": 4139500 }, { "epoch": 2.48, "learning_rate": 3.8660781763179087e-05, "loss": 1.2395, "step": 4140000 }, { "epoch": 2.48, "learning_rate": 3.865868179761852e-05, "loss": 1.2268, "step": 4140500 }, { "epoch": 2.48, "learning_rate": 3.8656586031989074e-05, "loss": 1.2022, "step": 4141000 }, { "epoch": 2.48, "learning_rate": 3.8654486066428514e-05, "loss": 1.2351, "step": 4141500 }, { "epoch": 2.48, "learning_rate": 3.865238610086795e-05, "loss": 1.266, "step": 4142000 }, { "epoch": 2.48, "learning_rate": 3.865028613530738e-05, "loss": 1.2249, "step": 4142500 }, { "epoch": 2.48, "learning_rate": 3.864818616974682e-05, "loss": 1.252, "step": 4143000 }, { "epoch": 2.48, "learning_rate": 3.8646090404117375e-05, "loss": 1.2412, "step": 4143500 }, { "epoch": 2.48, "learning_rate": 3.864399043855681e-05, "loss": 1.2271, "step": 4144000 }, { "epoch": 2.48, "learning_rate": 3.864189047299624e-05, "loss": 1.2278, "step": 4144500 }, { "epoch": 2.49, "learning_rate": 3.863979050743568e-05, "loss": 1.2204, "step": 4145000 }, { "epoch": 2.49, "learning_rate": 3.8637694741806235e-05, "loss": 1.2233, "step": 4145500 }, { "epoch": 2.49, "learning_rate": 3.863559477624567e-05, "loss": 1.2159, "step": 4146000 }, { "epoch": 2.49, "learning_rate": 3.86334948106851e-05, "loss": 1.2006, "step": 4146500 }, { "epoch": 2.49, "learning_rate": 3.863139484512454e-05, "loss": 1.2104, "step": 4147000 }, { "epoch": 2.49, "learning_rate": 3.8629294879563976e-05, "loss": 1.2443, "step": 4147500 }, { "epoch": 2.49, "learning_rate": 3.862719491400341e-05, "loss": 1.1978, "step": 4148000 }, { "epoch": 2.49, "learning_rate": 3.862509914837397e-05, "loss": 1.217, "step": 4148500 }, { "epoch": 2.49, "learning_rate": 3.86229991828134e-05, "loss": 1.2021, "step": 4149000 }, { "epoch": 2.49, "learning_rate": 3.862089921725284e-05, "loss": 1.1947, "step": 4149500 }, { "epoch": 2.49, "learning_rate": 3.861879925169228e-05, "loss": 1.2162, "step": 4150000 }, { "epoch": 2.49, "learning_rate": 3.861669928613171e-05, "loss": 1.2673, "step": 4150500 }, { "epoch": 2.49, "learning_rate": 3.8614603520502264e-05, "loss": 1.2133, "step": 4151000 }, { "epoch": 2.49, "learning_rate": 3.86125035549417e-05, "loss": 1.2416, "step": 4151500 }, { "epoch": 2.49, "learning_rate": 3.861040358938114e-05, "loss": 1.2186, "step": 4152000 }, { "epoch": 2.49, "learning_rate": 3.860830362382057e-05, "loss": 1.2434, "step": 4152500 }, { "epoch": 2.49, "learning_rate": 3.8606207858191125e-05, "loss": 1.2165, "step": 4153000 }, { "epoch": 2.49, "learning_rate": 3.860410789263056e-05, "loss": 1.2181, "step": 4153500 }, { "epoch": 2.49, "learning_rate": 3.860201212700111e-05, "loss": 1.2138, "step": 4154000 }, { "epoch": 2.49, "learning_rate": 3.859991216144055e-05, "loss": 1.2212, "step": 4154500 }, { "epoch": 2.49, "learning_rate": 3.8597812195879985e-05, "loss": 1.1922, "step": 4155000 }, { "epoch": 2.49, "learning_rate": 3.8595712230319426e-05, "loss": 1.2299, "step": 4155500 }, { "epoch": 2.49, "learning_rate": 3.859361226475886e-05, "loss": 1.2106, "step": 4156000 }, { "epoch": 2.49, "learning_rate": 3.859151229919829e-05, "loss": 1.2559, "step": 4156500 }, { "epoch": 2.49, "learning_rate": 3.858941233363773e-05, "loss": 1.2088, "step": 4157000 }, { "epoch": 2.49, "learning_rate": 3.8587312368077166e-05, "loss": 1.221, "step": 4157500 }, { "epoch": 2.49, "learning_rate": 3.85852124025166e-05, "loss": 1.2206, "step": 4158000 }, { "epoch": 2.49, "learning_rate": 3.858311243695603e-05, "loss": 1.2181, "step": 4158500 }, { "epoch": 2.49, "learning_rate": 3.8581012471395467e-05, "loss": 1.2389, "step": 4159000 }, { "epoch": 2.49, "learning_rate": 3.85789125058349e-05, "loss": 1.2372, "step": 4159500 }, { "epoch": 2.49, "learning_rate": 3.857681674020546e-05, "loss": 1.2197, "step": 4160000 }, { "epoch": 2.49, "learning_rate": 3.85747167746449e-05, "loss": 1.2104, "step": 4160500 }, { "epoch": 2.49, "learning_rate": 3.857261680908433e-05, "loss": 1.2269, "step": 4161000 }, { "epoch": 2.49, "learning_rate": 3.857052104345489e-05, "loss": 1.2266, "step": 4161500 }, { "epoch": 2.5, "learning_rate": 3.856842107789432e-05, "loss": 1.2125, "step": 4162000 }, { "epoch": 2.5, "learning_rate": 3.856632111233376e-05, "loss": 1.2356, "step": 4162500 }, { "epoch": 2.5, "learning_rate": 3.856422114677319e-05, "loss": 1.2055, "step": 4163000 }, { "epoch": 2.5, "learning_rate": 3.856212118121263e-05, "loss": 1.2046, "step": 4163500 }, { "epoch": 2.5, "learning_rate": 3.856002121565206e-05, "loss": 1.2099, "step": 4164000 }, { "epoch": 2.5, "learning_rate": 3.855792545002262e-05, "loss": 1.2275, "step": 4164500 }, { "epoch": 2.5, "learning_rate": 3.8555825484462056e-05, "loss": 1.212, "step": 4165000 }, { "epoch": 2.5, "learning_rate": 3.855372551890149e-05, "loss": 1.2234, "step": 4165500 }, { "epoch": 2.5, "learning_rate": 3.855162555334092e-05, "loss": 1.2288, "step": 4166000 }, { "epoch": 2.5, "learning_rate": 3.8549525587780356e-05, "loss": 1.2324, "step": 4166500 }, { "epoch": 2.5, "learning_rate": 3.8547429822150916e-05, "loss": 1.2201, "step": 4167000 }, { "epoch": 2.5, "learning_rate": 3.8545329856590357e-05, "loss": 1.2302, "step": 4167500 }, { "epoch": 2.5, "learning_rate": 3.854322989102978e-05, "loss": 1.2271, "step": 4168000 }, { "epoch": 2.5, "learning_rate": 3.854112992546922e-05, "loss": 1.2321, "step": 4168500 }, { "epoch": 2.5, "learning_rate": 3.853902995990866e-05, "loss": 1.2301, "step": 4169000 }, { "epoch": 2.5, "learning_rate": 3.853693419427922e-05, "loss": 1.2289, "step": 4169500 }, { "epoch": 2.5, "learning_rate": 3.853483842864977e-05, "loss": 1.211, "step": 4170000 }, { "epoch": 2.5, "learning_rate": 3.8532738463089204e-05, "loss": 1.2278, "step": 4170500 }, { "epoch": 2.5, "learning_rate": 3.8530638497528645e-05, "loss": 1.2107, "step": 4171000 }, { "epoch": 2.5, "learning_rate": 3.852853853196808e-05, "loss": 1.229, "step": 4171500 }, { "epoch": 2.5, "learning_rate": 3.852643856640751e-05, "loss": 1.2183, "step": 4172000 }, { "epoch": 2.5, "learning_rate": 3.8524338600846945e-05, "loss": 1.1871, "step": 4172500 }, { "epoch": 2.5, "learning_rate": 3.852223863528638e-05, "loss": 1.2202, "step": 4173000 }, { "epoch": 2.5, "learning_rate": 3.852013866972581e-05, "loss": 1.2457, "step": 4173500 }, { "epoch": 2.5, "learning_rate": 3.851803870416525e-05, "loss": 1.2156, "step": 4174000 }, { "epoch": 2.5, "learning_rate": 3.851594293853581e-05, "loss": 1.1957, "step": 4174500 }, { "epoch": 2.5, "learning_rate": 3.851384297297524e-05, "loss": 1.2108, "step": 4175000 }, { "epoch": 2.5, "learning_rate": 3.851174300741467e-05, "loss": 1.206, "step": 4175500 }, { "epoch": 2.5, "learning_rate": 3.850964304185411e-05, "loss": 1.1935, "step": 4176000 }, { "epoch": 2.5, "learning_rate": 3.8507543076293546e-05, "loss": 1.2108, "step": 4176500 }, { "epoch": 2.5, "learning_rate": 3.850544311073298e-05, "loss": 1.2164, "step": 4177000 }, { "epoch": 2.5, "learning_rate": 3.850334734510354e-05, "loss": 1.2322, "step": 4177500 }, { "epoch": 2.5, "learning_rate": 3.8501247379542973e-05, "loss": 1.2391, "step": 4178000 }, { "epoch": 2.51, "learning_rate": 3.849914741398241e-05, "loss": 1.2102, "step": 4178500 }, { "epoch": 2.51, "learning_rate": 3.849704744842185e-05, "loss": 1.2259, "step": 4179000 }, { "epoch": 2.51, "learning_rate": 3.849494748286128e-05, "loss": 1.2195, "step": 4179500 }, { "epoch": 2.51, "learning_rate": 3.8492847517300714e-05, "loss": 1.2088, "step": 4180000 }, { "epoch": 2.51, "learning_rate": 3.8490747551740154e-05, "loss": 1.2214, "step": 4180500 }, { "epoch": 2.51, "learning_rate": 3.848864758617959e-05, "loss": 1.2155, "step": 4181000 }, { "epoch": 2.51, "learning_rate": 3.848655182055014e-05, "loss": 1.2015, "step": 4181500 }, { "epoch": 2.51, "learning_rate": 3.8484451854989575e-05, "loss": 1.1976, "step": 4182000 }, { "epoch": 2.51, "learning_rate": 3.8482351889429015e-05, "loss": 1.2003, "step": 4182500 }, { "epoch": 2.51, "learning_rate": 3.848025192386845e-05, "loss": 1.2114, "step": 4183000 }, { "epoch": 2.51, "learning_rate": 3.8478156158239e-05, "loss": 1.2435, "step": 4183500 }, { "epoch": 2.51, "learning_rate": 3.8476056192678436e-05, "loss": 1.1696, "step": 4184000 }, { "epoch": 2.51, "learning_rate": 3.8473956227117876e-05, "loss": 1.2459, "step": 4184500 }, { "epoch": 2.51, "learning_rate": 3.847185626155731e-05, "loss": 1.2216, "step": 4185000 }, { "epoch": 2.51, "learning_rate": 3.846976049592786e-05, "loss": 1.2045, "step": 4185500 }, { "epoch": 2.51, "learning_rate": 3.84676605303673e-05, "loss": 1.2306, "step": 4186000 }, { "epoch": 2.51, "learning_rate": 3.8465560564806737e-05, "loss": 1.2552, "step": 4186500 }, { "epoch": 2.51, "learning_rate": 3.846346479917729e-05, "loss": 1.2404, "step": 4187000 }, { "epoch": 2.51, "learning_rate": 3.8461364833616724e-05, "loss": 1.2195, "step": 4187500 }, { "epoch": 2.51, "learning_rate": 3.8459264868056164e-05, "loss": 1.2228, "step": 4188000 }, { "epoch": 2.51, "learning_rate": 3.84571649024956e-05, "loss": 1.2271, "step": 4188500 }, { "epoch": 2.51, "learning_rate": 3.845506493693503e-05, "loss": 1.2224, "step": 4189000 }, { "epoch": 2.51, "learning_rate": 3.845296497137447e-05, "loss": 1.2429, "step": 4189500 }, { "epoch": 2.51, "learning_rate": 3.8450865005813904e-05, "loss": 1.2272, "step": 4190000 }, { "epoch": 2.51, "learning_rate": 3.844876504025334e-05, "loss": 1.2059, "step": 4190500 }, { "epoch": 2.51, "learning_rate": 3.844666927462389e-05, "loss": 1.2168, "step": 4191000 }, { "epoch": 2.51, "learning_rate": 3.844456930906333e-05, "loss": 1.2083, "step": 4191500 }, { "epoch": 2.51, "learning_rate": 3.8442469343502765e-05, "loss": 1.1967, "step": 4192000 }, { "epoch": 2.51, "learning_rate": 3.84403693779422e-05, "loss": 1.2133, "step": 4192500 }, { "epoch": 2.51, "learning_rate": 3.843827781224388e-05, "loss": 1.2022, "step": 4193000 }, { "epoch": 2.51, "learning_rate": 3.843617784668332e-05, "loss": 1.2183, "step": 4193500 }, { "epoch": 2.51, "learning_rate": 3.8434077881122746e-05, "loss": 1.2195, "step": 4194000 }, { "epoch": 2.51, "learning_rate": 3.843197791556218e-05, "loss": 1.2081, "step": 4194500 }, { "epoch": 2.52, "learning_rate": 3.842987795000162e-05, "loss": 1.2311, "step": 4195000 }, { "epoch": 2.52, "learning_rate": 3.842777798444105e-05, "loss": 1.1919, "step": 4195500 }, { "epoch": 2.52, "learning_rate": 3.8425678018880487e-05, "loss": 1.2323, "step": 4196000 }, { "epoch": 2.52, "learning_rate": 3.842357805331993e-05, "loss": 1.184, "step": 4196500 }, { "epoch": 2.52, "learning_rate": 3.842147808775936e-05, "loss": 1.196, "step": 4197000 }, { "epoch": 2.52, "learning_rate": 3.8419378122198794e-05, "loss": 1.2, "step": 4197500 }, { "epoch": 2.52, "learning_rate": 3.8417278156638234e-05, "loss": 1.2245, "step": 4198000 }, { "epoch": 2.52, "learning_rate": 3.841517819107767e-05, "loss": 1.2268, "step": 4198500 }, { "epoch": 2.52, "learning_rate": 3.84130782255171e-05, "loss": 1.1826, "step": 4199000 }, { "epoch": 2.52, "learning_rate": 3.8410982459887654e-05, "loss": 1.218, "step": 4199500 }, { "epoch": 2.52, "learning_rate": 3.8408882494327095e-05, "loss": 1.1937, "step": 4200000 }, { "epoch": 2.52, "eval_loss": 1.1766777038574219, "eval_runtime": 1098.718, "eval_samples_per_second": 479.395, "eval_steps_per_second": 79.899, "step": 4200000 }, { "epoch": 2.52, "learning_rate": 3.840678252876653e-05, "loss": 1.2039, "step": 4200500 }, { "epoch": 2.52, "learning_rate": 3.840468256320597e-05, "loss": 1.2128, "step": 4201000 }, { "epoch": 2.52, "learning_rate": 3.84025825976454e-05, "loss": 1.1999, "step": 4201500 }, { "epoch": 2.52, "learning_rate": 3.8400486832015955e-05, "loss": 1.1987, "step": 4202000 }, { "epoch": 2.52, "learning_rate": 3.839838686645539e-05, "loss": 1.2, "step": 4202500 }, { "epoch": 2.52, "learning_rate": 3.839628690089483e-05, "loss": 1.22, "step": 4203000 }, { "epoch": 2.52, "learning_rate": 3.839418693533426e-05, "loss": 1.2402, "step": 4203500 }, { "epoch": 2.52, "learning_rate": 3.8392086969773696e-05, "loss": 1.2128, "step": 4204000 }, { "epoch": 2.52, "learning_rate": 3.838999120414425e-05, "loss": 1.1967, "step": 4204500 }, { "epoch": 2.52, "learning_rate": 3.838789123858369e-05, "loss": 1.2259, "step": 4205000 }, { "epoch": 2.52, "learning_rate": 3.838579127302312e-05, "loss": 1.205, "step": 4205500 }, { "epoch": 2.52, "learning_rate": 3.838369130746256e-05, "loss": 1.2285, "step": 4206000 }, { "epoch": 2.52, "learning_rate": 3.8381591341902e-05, "loss": 1.1938, "step": 4206500 }, { "epoch": 2.52, "learning_rate": 3.8379491376341424e-05, "loss": 1.2086, "step": 4207000 }, { "epoch": 2.52, "learning_rate": 3.8377395610711984e-05, "loss": 1.2319, "step": 4207500 }, { "epoch": 2.52, "learning_rate": 3.8375295645151424e-05, "loss": 1.2402, "step": 4208000 }, { "epoch": 2.52, "learning_rate": 3.837319567959086e-05, "loss": 1.2184, "step": 4208500 }, { "epoch": 2.52, "learning_rate": 3.8371095714030284e-05, "loss": 1.2045, "step": 4209000 }, { "epoch": 2.52, "learning_rate": 3.8368995748469725e-05, "loss": 1.2128, "step": 4209500 }, { "epoch": 2.52, "learning_rate": 3.836689578290916e-05, "loss": 1.2232, "step": 4210000 }, { "epoch": 2.52, "learning_rate": 3.836480001727972e-05, "loss": 1.2138, "step": 4210500 }, { "epoch": 2.52, "learning_rate": 3.836270005171915e-05, "loss": 1.2317, "step": 4211000 }, { "epoch": 2.52, "learning_rate": 3.8360604286089705e-05, "loss": 1.1962, "step": 4211500 }, { "epoch": 2.53, "learning_rate": 3.8358504320529146e-05, "loss": 1.2018, "step": 4212000 }, { "epoch": 2.53, "learning_rate": 3.835640435496858e-05, "loss": 1.23, "step": 4212500 }, { "epoch": 2.53, "learning_rate": 3.835430438940801e-05, "loss": 1.2023, "step": 4213000 }, { "epoch": 2.53, "learning_rate": 3.835220862377857e-05, "loss": 1.196, "step": 4213500 }, { "epoch": 2.53, "learning_rate": 3.8350108658218006e-05, "loss": 1.2055, "step": 4214000 }, { "epoch": 2.53, "learning_rate": 3.834800869265744e-05, "loss": 1.2194, "step": 4214500 }, { "epoch": 2.53, "learning_rate": 3.834590872709688e-05, "loss": 1.2162, "step": 4215000 }, { "epoch": 2.53, "learning_rate": 3.8343808761536314e-05, "loss": 1.2344, "step": 4215500 }, { "epoch": 2.53, "learning_rate": 3.834170879597575e-05, "loss": 1.229, "step": 4216000 }, { "epoch": 2.53, "learning_rate": 3.833960883041518e-05, "loss": 1.2174, "step": 4216500 }, { "epoch": 2.53, "learning_rate": 3.8337508864854614e-05, "loss": 1.225, "step": 4217000 }, { "epoch": 2.53, "learning_rate": 3.833540889929405e-05, "loss": 1.1931, "step": 4217500 }, { "epoch": 2.53, "learning_rate": 3.833331313366461e-05, "loss": 1.2053, "step": 4218000 }, { "epoch": 2.53, "learning_rate": 3.833121316810404e-05, "loss": 1.2225, "step": 4218500 }, { "epoch": 2.53, "learning_rate": 3.8329113202543475e-05, "loss": 1.2201, "step": 4219000 }, { "epoch": 2.53, "learning_rate": 3.832701323698291e-05, "loss": 1.2359, "step": 4219500 }, { "epoch": 2.53, "learning_rate": 3.832491327142235e-05, "loss": 1.1967, "step": 4220000 }, { "epoch": 2.53, "learning_rate": 3.832281330586178e-05, "loss": 1.1962, "step": 4220500 }, { "epoch": 2.53, "learning_rate": 3.8320717540232335e-05, "loss": 1.1961, "step": 4221000 }, { "epoch": 2.53, "learning_rate": 3.8318617574671776e-05, "loss": 1.2292, "step": 4221500 }, { "epoch": 2.53, "learning_rate": 3.831651760911121e-05, "loss": 1.2014, "step": 4222000 }, { "epoch": 2.53, "learning_rate": 3.831441764355064e-05, "loss": 1.1928, "step": 4222500 }, { "epoch": 2.53, "learning_rate": 3.831231767799008e-05, "loss": 1.2392, "step": 4223000 }, { "epoch": 2.53, "learning_rate": 3.8310217712429516e-05, "loss": 1.201, "step": 4223500 }, { "epoch": 2.53, "learning_rate": 3.830811774686895e-05, "loss": 1.2009, "step": 4224000 }, { "epoch": 2.53, "learning_rate": 3.83060219812395e-05, "loss": 1.2182, "step": 4224500 }, { "epoch": 2.53, "learning_rate": 3.8303922015678944e-05, "loss": 1.231, "step": 4225000 }, { "epoch": 2.53, "learning_rate": 3.830182205011838e-05, "loss": 1.2026, "step": 4225500 }, { "epoch": 2.53, "learning_rate": 3.829972208455781e-05, "loss": 1.2129, "step": 4226000 }, { "epoch": 2.53, "learning_rate": 3.829762211899725e-05, "loss": 1.2392, "step": 4226500 }, { "epoch": 2.53, "learning_rate": 3.8295526353367804e-05, "loss": 1.2036, "step": 4227000 }, { "epoch": 2.53, "learning_rate": 3.829342638780724e-05, "loss": 1.1836, "step": 4227500 }, { "epoch": 2.53, "learning_rate": 3.829132642224667e-05, "loss": 1.2339, "step": 4228000 }, { "epoch": 2.54, "learning_rate": 3.828922645668611e-05, "loss": 1.1989, "step": 4228500 }, { "epoch": 2.54, "learning_rate": 3.8287126491125545e-05, "loss": 1.2226, "step": 4229000 }, { "epoch": 2.54, "learning_rate": 3.828502652556498e-05, "loss": 1.2049, "step": 4229500 }, { "epoch": 2.54, "learning_rate": 3.828292656000442e-05, "loss": 1.2163, "step": 4230000 }, { "epoch": 2.54, "learning_rate": 3.828083079437497e-05, "loss": 1.2093, "step": 4230500 }, { "epoch": 2.54, "learning_rate": 3.8278730828814406e-05, "loss": 1.2233, "step": 4231000 }, { "epoch": 2.54, "learning_rate": 3.8276630863253846e-05, "loss": 1.1856, "step": 4231500 }, { "epoch": 2.54, "learning_rate": 3.827453089769328e-05, "loss": 1.222, "step": 4232000 }, { "epoch": 2.54, "learning_rate": 3.827243093213271e-05, "loss": 1.2037, "step": 4232500 }, { "epoch": 2.54, "learning_rate": 3.827033096657215e-05, "loss": 1.2209, "step": 4233000 }, { "epoch": 2.54, "learning_rate": 3.8268235200942707e-05, "loss": 1.2315, "step": 4233500 }, { "epoch": 2.54, "learning_rate": 3.826613523538214e-05, "loss": 1.2248, "step": 4234000 }, { "epoch": 2.54, "learning_rate": 3.8264035269821574e-05, "loss": 1.2221, "step": 4234500 }, { "epoch": 2.54, "learning_rate": 3.8261935304261014e-05, "loss": 1.1987, "step": 4235000 }, { "epoch": 2.54, "learning_rate": 3.825983533870045e-05, "loss": 1.1886, "step": 4235500 }, { "epoch": 2.54, "learning_rate": 3.8257735373139874e-05, "loss": 1.2181, "step": 4236000 }, { "epoch": 2.54, "learning_rate": 3.8255635407579314e-05, "loss": 1.2278, "step": 4236500 }, { "epoch": 2.54, "learning_rate": 3.825353544201875e-05, "loss": 1.2162, "step": 4237000 }, { "epoch": 2.54, "learning_rate": 3.825143967638931e-05, "loss": 1.1972, "step": 4237500 }, { "epoch": 2.54, "learning_rate": 3.824933971082874e-05, "loss": 1.2334, "step": 4238000 }, { "epoch": 2.54, "learning_rate": 3.8247239745268175e-05, "loss": 1.1983, "step": 4238500 }, { "epoch": 2.54, "learning_rate": 3.824513977970761e-05, "loss": 1.1974, "step": 4239000 }, { "epoch": 2.54, "learning_rate": 3.824304401407817e-05, "loss": 1.2372, "step": 4239500 }, { "epoch": 2.54, "learning_rate": 3.824094824844872e-05, "loss": 1.1997, "step": 4240000 }, { "epoch": 2.54, "learning_rate": 3.823884828288816e-05, "loss": 1.2289, "step": 4240500 }, { "epoch": 2.54, "learning_rate": 3.8236748317327596e-05, "loss": 1.2322, "step": 4241000 }, { "epoch": 2.54, "learning_rate": 3.823464835176703e-05, "loss": 1.2088, "step": 4241500 }, { "epoch": 2.54, "learning_rate": 3.823254838620647e-05, "loss": 1.1991, "step": 4242000 }, { "epoch": 2.54, "learning_rate": 3.82304484206459e-05, "loss": 1.2232, "step": 4242500 }, { "epoch": 2.54, "learning_rate": 3.8228348455085337e-05, "loss": 1.23, "step": 4243000 }, { "epoch": 2.54, "learning_rate": 3.822625268945589e-05, "loss": 1.2124, "step": 4243500 }, { "epoch": 2.54, "learning_rate": 3.822415272389533e-05, "loss": 1.2083, "step": 4244000 }, { "epoch": 2.54, "learning_rate": 3.8222052758334764e-05, "loss": 1.2382, "step": 4244500 }, { "epoch": 2.55, "learning_rate": 3.82199527927742e-05, "loss": 1.2191, "step": 4245000 }, { "epoch": 2.55, "learning_rate": 3.821785282721363e-05, "loss": 1.2124, "step": 4245500 }, { "epoch": 2.55, "learning_rate": 3.8215752861653064e-05, "loss": 1.2305, "step": 4246000 }, { "epoch": 2.55, "learning_rate": 3.8213657096023625e-05, "loss": 1.209, "step": 4246500 }, { "epoch": 2.55, "learning_rate": 3.8211557130463065e-05, "loss": 1.1887, "step": 4247000 }, { "epoch": 2.55, "learning_rate": 3.82094571649025e-05, "loss": 1.22, "step": 4247500 }, { "epoch": 2.55, "learning_rate": 3.820736139927305e-05, "loss": 1.2098, "step": 4248000 }, { "epoch": 2.55, "learning_rate": 3.8205261433712485e-05, "loss": 1.2315, "step": 4248500 }, { "epoch": 2.55, "learning_rate": 3.8203161468151926e-05, "loss": 1.199, "step": 4249000 }, { "epoch": 2.55, "learning_rate": 3.820106150259136e-05, "loss": 1.2178, "step": 4249500 }, { "epoch": 2.55, "learning_rate": 3.819896153703079e-05, "loss": 1.1936, "step": 4250000 }, { "epoch": 2.55, "learning_rate": 3.8196861571470226e-05, "loss": 1.1868, "step": 4250500 }, { "epoch": 2.55, "learning_rate": 3.819476160590966e-05, "loss": 1.2008, "step": 4251000 }, { "epoch": 2.55, "learning_rate": 3.819266164034909e-05, "loss": 1.2402, "step": 4251500 }, { "epoch": 2.55, "learning_rate": 3.819056167478853e-05, "loss": 1.2207, "step": 4252000 }, { "epoch": 2.55, "learning_rate": 3.8188461709227966e-05, "loss": 1.1873, "step": 4252500 }, { "epoch": 2.55, "learning_rate": 3.81863617436674e-05, "loss": 1.2003, "step": 4253000 }, { "epoch": 2.55, "learning_rate": 3.818426177810684e-05, "loss": 1.2167, "step": 4253500 }, { "epoch": 2.55, "learning_rate": 3.8182166012477394e-05, "loss": 1.1948, "step": 4254000 }, { "epoch": 2.55, "learning_rate": 3.818006604691683e-05, "loss": 1.2249, "step": 4254500 }, { "epoch": 2.55, "learning_rate": 3.817796608135627e-05, "loss": 1.2331, "step": 4255000 }, { "epoch": 2.55, "learning_rate": 3.81758661157957e-05, "loss": 1.2065, "step": 4255500 }, { "epoch": 2.55, "learning_rate": 3.8173766150235134e-05, "loss": 1.2026, "step": 4256000 }, { "epoch": 2.55, "learning_rate": 3.817167038460569e-05, "loss": 1.1534, "step": 4256500 }, { "epoch": 2.55, "learning_rate": 3.816957041904513e-05, "loss": 1.2397, "step": 4257000 }, { "epoch": 2.55, "learning_rate": 3.816747045348456e-05, "loss": 1.1747, "step": 4257500 }, { "epoch": 2.55, "learning_rate": 3.8165370487923995e-05, "loss": 1.2296, "step": 4258000 }, { "epoch": 2.55, "learning_rate": 3.8163270522363435e-05, "loss": 1.2031, "step": 4258500 }, { "epoch": 2.55, "learning_rate": 3.816117475673399e-05, "loss": 1.2304, "step": 4259000 }, { "epoch": 2.55, "learning_rate": 3.815907479117342e-05, "loss": 1.2106, "step": 4259500 }, { "epoch": 2.55, "learning_rate": 3.815697482561286e-05, "loss": 1.25, "step": 4260000 }, { "epoch": 2.55, "learning_rate": 3.8154874860052296e-05, "loss": 1.1865, "step": 4260500 }, { "epoch": 2.55, "learning_rate": 3.815277909442285e-05, "loss": 1.2227, "step": 4261000 }, { "epoch": 2.55, "learning_rate": 3.815067912886228e-05, "loss": 1.2178, "step": 4261500 }, { "epoch": 2.56, "learning_rate": 3.814857916330172e-05, "loss": 1.1893, "step": 4262000 }, { "epoch": 2.56, "learning_rate": 3.814647919774116e-05, "loss": 1.211, "step": 4262500 }, { "epoch": 2.56, "learning_rate": 3.814437923218059e-05, "loss": 1.2369, "step": 4263000 }, { "epoch": 2.56, "learning_rate": 3.814227926662003e-05, "loss": 1.2116, "step": 4263500 }, { "epoch": 2.56, "learning_rate": 3.8140183500990584e-05, "loss": 1.1834, "step": 4264000 }, { "epoch": 2.56, "learning_rate": 3.813808353543002e-05, "loss": 1.2044, "step": 4264500 }, { "epoch": 2.56, "learning_rate": 3.813598356986945e-05, "loss": 1.2249, "step": 4265000 }, { "epoch": 2.56, "learning_rate": 3.8133887804240005e-05, "loss": 1.2106, "step": 4265500 }, { "epoch": 2.56, "learning_rate": 3.8131787838679445e-05, "loss": 1.2204, "step": 4266000 }, { "epoch": 2.56, "learning_rate": 3.812968787311888e-05, "loss": 1.2171, "step": 4266500 }, { "epoch": 2.56, "learning_rate": 3.812758790755832e-05, "loss": 1.2397, "step": 4267000 }, { "epoch": 2.56, "learning_rate": 3.812548794199775e-05, "loss": 1.2259, "step": 4267500 }, { "epoch": 2.56, "learning_rate": 3.8123387976437185e-05, "loss": 1.1981, "step": 4268000 }, { "epoch": 2.56, "learning_rate": 3.8121288010876626e-05, "loss": 1.2315, "step": 4268500 }, { "epoch": 2.56, "learning_rate": 3.811919224524718e-05, "loss": 1.2061, "step": 4269000 }, { "epoch": 2.56, "learning_rate": 3.811709227968661e-05, "loss": 1.1928, "step": 4269500 }, { "epoch": 2.56, "learning_rate": 3.8114992314126046e-05, "loss": 1.2096, "step": 4270000 }, { "epoch": 2.56, "learning_rate": 3.8112892348565486e-05, "loss": 1.219, "step": 4270500 }, { "epoch": 2.56, "learning_rate": 3.811079238300492e-05, "loss": 1.2362, "step": 4271000 }, { "epoch": 2.56, "learning_rate": 3.810869241744435e-05, "loss": 1.2177, "step": 4271500 }, { "epoch": 2.56, "learning_rate": 3.8106592451883794e-05, "loss": 1.2053, "step": 4272000 }, { "epoch": 2.56, "learning_rate": 3.810449248632322e-05, "loss": 1.2097, "step": 4272500 }, { "epoch": 2.56, "learning_rate": 3.8102392520762654e-05, "loss": 1.2168, "step": 4273000 }, { "epoch": 2.56, "learning_rate": 3.8100296755133214e-05, "loss": 1.2175, "step": 4273500 }, { "epoch": 2.56, "learning_rate": 3.8098196789572654e-05, "loss": 1.2465, "step": 4274000 }, { "epoch": 2.56, "learning_rate": 3.809609682401209e-05, "loss": 1.1992, "step": 4274500 }, { "epoch": 2.56, "learning_rate": 3.809399685845152e-05, "loss": 1.2254, "step": 4275000 }, { "epoch": 2.56, "learning_rate": 3.809190109282208e-05, "loss": 1.2336, "step": 4275500 }, { "epoch": 2.56, "learning_rate": 3.8089801127261515e-05, "loss": 1.2255, "step": 4276000 }, { "epoch": 2.56, "learning_rate": 3.808770116170095e-05, "loss": 1.2097, "step": 4276500 }, { "epoch": 2.56, "learning_rate": 3.808560119614039e-05, "loss": 1.1769, "step": 4277000 }, { "epoch": 2.56, "learning_rate": 3.8083501230579815e-05, "loss": 1.1974, "step": 4277500 }, { "epoch": 2.56, "learning_rate": 3.8081405464950376e-05, "loss": 1.2469, "step": 4278000 }, { "epoch": 2.57, "learning_rate": 3.807930969932093e-05, "loss": 1.2071, "step": 4278500 }, { "epoch": 2.57, "learning_rate": 3.807720973376036e-05, "loss": 1.2128, "step": 4279000 }, { "epoch": 2.57, "learning_rate": 3.80751097681998e-05, "loss": 1.2164, "step": 4279500 }, { "epoch": 2.57, "learning_rate": 3.8073009802639236e-05, "loss": 1.1953, "step": 4280000 }, { "epoch": 2.57, "learning_rate": 3.807090983707867e-05, "loss": 1.2057, "step": 4280500 }, { "epoch": 2.57, "learning_rate": 3.806880987151811e-05, "loss": 1.2418, "step": 4281000 }, { "epoch": 2.57, "learning_rate": 3.8066709905957544e-05, "loss": 1.1931, "step": 4281500 }, { "epoch": 2.57, "learning_rate": 3.806460994039698e-05, "loss": 1.2296, "step": 4282000 }, { "epoch": 2.57, "learning_rate": 3.806250997483641e-05, "loss": 1.2235, "step": 4282500 }, { "epoch": 2.57, "learning_rate": 3.8060410009275844e-05, "loss": 1.2435, "step": 4283000 }, { "epoch": 2.57, "learning_rate": 3.8058314243646404e-05, "loss": 1.1818, "step": 4283500 }, { "epoch": 2.57, "learning_rate": 3.8056214278085845e-05, "loss": 1.2283, "step": 4284000 }, { "epoch": 2.57, "learning_rate": 3.805411431252527e-05, "loss": 1.1878, "step": 4284500 }, { "epoch": 2.57, "learning_rate": 3.8052014346964705e-05, "loss": 1.2149, "step": 4285000 }, { "epoch": 2.57, "learning_rate": 3.8049918581335265e-05, "loss": 1.2161, "step": 4285500 }, { "epoch": 2.57, "learning_rate": 3.804782701563694e-05, "loss": 1.2047, "step": 4286000 }, { "epoch": 2.57, "learning_rate": 3.804572705007638e-05, "loss": 1.248, "step": 4286500 }, { "epoch": 2.57, "learning_rate": 3.804362708451581e-05, "loss": 1.2102, "step": 4287000 }, { "epoch": 2.57, "learning_rate": 3.8041527118955246e-05, "loss": 1.2157, "step": 4287500 }, { "epoch": 2.57, "learning_rate": 3.8039427153394686e-05, "loss": 1.1973, "step": 4288000 }, { "epoch": 2.57, "learning_rate": 3.803732718783412e-05, "loss": 1.2231, "step": 4288500 }, { "epoch": 2.57, "learning_rate": 3.803522722227355e-05, "loss": 1.2103, "step": 4289000 }, { "epoch": 2.57, "learning_rate": 3.803312725671299e-05, "loss": 1.2212, "step": 4289500 }, { "epoch": 2.57, "learning_rate": 3.803102729115243e-05, "loss": 1.2224, "step": 4290000 }, { "epoch": 2.57, "learning_rate": 3.802892732559186e-05, "loss": 1.2023, "step": 4290500 }, { "epoch": 2.57, "learning_rate": 3.80268273600313e-05, "loss": 1.2175, "step": 4291000 }, { "epoch": 2.57, "learning_rate": 3.802472739447073e-05, "loss": 1.1829, "step": 4291500 }, { "epoch": 2.57, "learning_rate": 3.802262742891016e-05, "loss": 1.2137, "step": 4292000 }, { "epoch": 2.57, "learning_rate": 3.802053166328072e-05, "loss": 1.2476, "step": 4292500 }, { "epoch": 2.57, "learning_rate": 3.801843169772016e-05, "loss": 1.2172, "step": 4293000 }, { "epoch": 2.57, "learning_rate": 3.8016331732159595e-05, "loss": 1.2304, "step": 4293500 }, { "epoch": 2.57, "learning_rate": 3.801423596653015e-05, "loss": 1.209, "step": 4294000 }, { "epoch": 2.57, "learning_rate": 3.801213600096958e-05, "loss": 1.2139, "step": 4294500 }, { "epoch": 2.58, "learning_rate": 3.801003603540902e-05, "loss": 1.2214, "step": 4295000 }, { "epoch": 2.58, "learning_rate": 3.8007936069848455e-05, "loss": 1.191, "step": 4295500 }, { "epoch": 2.58, "learning_rate": 3.800583610428789e-05, "loss": 1.197, "step": 4296000 }, { "epoch": 2.58, "learning_rate": 3.800373613872732e-05, "loss": 1.2281, "step": 4296500 }, { "epoch": 2.58, "learning_rate": 3.8001636173166756e-05, "loss": 1.212, "step": 4297000 }, { "epoch": 2.58, "learning_rate": 3.7999536207606196e-05, "loss": 1.2066, "step": 4297500 }, { "epoch": 2.58, "learning_rate": 3.799743624204563e-05, "loss": 1.232, "step": 4298000 }, { "epoch": 2.58, "learning_rate": 3.799534047641619e-05, "loss": 1.2079, "step": 4298500 }, { "epoch": 2.58, "learning_rate": 3.7993240510855616e-05, "loss": 1.1922, "step": 4299000 }, { "epoch": 2.58, "learning_rate": 3.799114054529506e-05, "loss": 1.2116, "step": 4299500 }, { "epoch": 2.58, "learning_rate": 3.798904057973449e-05, "loss": 1.1956, "step": 4300000 }, { "epoch": 2.58, "eval_loss": 1.1711117029190063, "eval_runtime": 1097.9922, "eval_samples_per_second": 479.712, "eval_steps_per_second": 79.952, "step": 4300000 }, { "epoch": 2.58, "learning_rate": 3.7986940614173924e-05, "loss": 1.2074, "step": 4300500 }, { "epoch": 2.58, "learning_rate": 3.7984844848544484e-05, "loss": 1.239, "step": 4301000 }, { "epoch": 2.58, "learning_rate": 3.798274488298392e-05, "loss": 1.1959, "step": 4301500 }, { "epoch": 2.58, "learning_rate": 3.798064491742335e-05, "loss": 1.2282, "step": 4302000 }, { "epoch": 2.58, "learning_rate": 3.7978544951862784e-05, "loss": 1.215, "step": 4302500 }, { "epoch": 2.58, "learning_rate": 3.7976444986302225e-05, "loss": 1.2057, "step": 4303000 }, { "epoch": 2.58, "learning_rate": 3.797434502074166e-05, "loss": 1.217, "step": 4303500 }, { "epoch": 2.58, "learning_rate": 3.797224505518109e-05, "loss": 1.2254, "step": 4304000 }, { "epoch": 2.58, "learning_rate": 3.797014508962053e-05, "loss": 1.2297, "step": 4304500 }, { "epoch": 2.58, "learning_rate": 3.7968049323991085e-05, "loss": 1.1851, "step": 4305000 }, { "epoch": 2.58, "learning_rate": 3.796594935843052e-05, "loss": 1.2299, "step": 4305500 }, { "epoch": 2.58, "learning_rate": 3.796384939286996e-05, "loss": 1.2062, "step": 4306000 }, { "epoch": 2.58, "learning_rate": 3.796174942730939e-05, "loss": 1.2471, "step": 4306500 }, { "epoch": 2.58, "learning_rate": 3.7959653661679946e-05, "loss": 1.2162, "step": 4307000 }, { "epoch": 2.58, "learning_rate": 3.7957557896050506e-05, "loss": 1.2207, "step": 4307500 }, { "epoch": 2.58, "learning_rate": 3.795545793048994e-05, "loss": 1.2126, "step": 4308000 }, { "epoch": 2.58, "learning_rate": 3.795335796492937e-05, "loss": 1.1924, "step": 4308500 }, { "epoch": 2.58, "learning_rate": 3.7951262199299934e-05, "loss": 1.2228, "step": 4309000 }, { "epoch": 2.58, "learning_rate": 3.794916223373937e-05, "loss": 1.201, "step": 4309500 }, { "epoch": 2.58, "learning_rate": 3.79470622681788e-05, "loss": 1.209, "step": 4310000 }, { "epoch": 2.58, "learning_rate": 3.794496230261824e-05, "loss": 1.2004, "step": 4310500 }, { "epoch": 2.58, "learning_rate": 3.794286233705767e-05, "loss": 1.2303, "step": 4311000 }, { "epoch": 2.58, "learning_rate": 3.794076237149711e-05, "loss": 1.2122, "step": 4311500 }, { "epoch": 2.59, "learning_rate": 3.793866240593654e-05, "loss": 1.1962, "step": 4312000 }, { "epoch": 2.59, "learning_rate": 3.7936562440375975e-05, "loss": 1.2311, "step": 4312500 }, { "epoch": 2.59, "learning_rate": 3.7934462474815415e-05, "loss": 1.2239, "step": 4313000 }, { "epoch": 2.59, "learning_rate": 3.793236670918597e-05, "loss": 1.223, "step": 4313500 }, { "epoch": 2.59, "learning_rate": 3.79302667436254e-05, "loss": 1.1937, "step": 4314000 }, { "epoch": 2.59, "learning_rate": 3.7928166778064835e-05, "loss": 1.2077, "step": 4314500 }, { "epoch": 2.59, "learning_rate": 3.7926066812504276e-05, "loss": 1.2371, "step": 4315000 }, { "epoch": 2.59, "learning_rate": 3.792397104687483e-05, "loss": 1.248, "step": 4315500 }, { "epoch": 2.59, "learning_rate": 3.792187108131426e-05, "loss": 1.2262, "step": 4316000 }, { "epoch": 2.59, "learning_rate": 3.7919771115753696e-05, "loss": 1.2082, "step": 4316500 }, { "epoch": 2.59, "learning_rate": 3.7917671150193136e-05, "loss": 1.2298, "step": 4317000 }, { "epoch": 2.59, "learning_rate": 3.791557118463257e-05, "loss": 1.1988, "step": 4317500 }, { "epoch": 2.59, "learning_rate": 3.791347541900312e-05, "loss": 1.2064, "step": 4318000 }, { "epoch": 2.59, "learning_rate": 3.7911375453442564e-05, "loss": 1.2163, "step": 4318500 }, { "epoch": 2.59, "learning_rate": 3.7909275487882e-05, "loss": 1.1809, "step": 4319000 }, { "epoch": 2.59, "learning_rate": 3.790717552232143e-05, "loss": 1.2064, "step": 4319500 }, { "epoch": 2.59, "learning_rate": 3.790507555676087e-05, "loss": 1.2026, "step": 4320000 }, { "epoch": 2.59, "learning_rate": 3.7902975591200304e-05, "loss": 1.2201, "step": 4320500 }, { "epoch": 2.59, "learning_rate": 3.790087982557086e-05, "loss": 1.2326, "step": 4321000 }, { "epoch": 2.59, "learning_rate": 3.789877986001029e-05, "loss": 1.2159, "step": 4321500 }, { "epoch": 2.59, "learning_rate": 3.789667989444973e-05, "loss": 1.2023, "step": 4322000 }, { "epoch": 2.59, "learning_rate": 3.7894579928889165e-05, "loss": 1.2163, "step": 4322500 }, { "epoch": 2.59, "learning_rate": 3.789248416325972e-05, "loss": 1.2117, "step": 4323000 }, { "epoch": 2.59, "learning_rate": 3.789038419769915e-05, "loss": 1.2017, "step": 4323500 }, { "epoch": 2.59, "learning_rate": 3.788828423213859e-05, "loss": 1.2275, "step": 4324000 }, { "epoch": 2.59, "learning_rate": 3.7886184266578026e-05, "loss": 1.217, "step": 4324500 }, { "epoch": 2.59, "learning_rate": 3.788408430101746e-05, "loss": 1.1756, "step": 4325000 }, { "epoch": 2.59, "learning_rate": 3.788198853538802e-05, "loss": 1.1987, "step": 4325500 }, { "epoch": 2.59, "learning_rate": 3.787988856982745e-05, "loss": 1.2193, "step": 4326000 }, { "epoch": 2.59, "learning_rate": 3.7877788604266886e-05, "loss": 1.2398, "step": 4326500 }, { "epoch": 2.59, "learning_rate": 3.787568863870633e-05, "loss": 1.2238, "step": 4327000 }, { "epoch": 2.59, "learning_rate": 3.787359287307688e-05, "loss": 1.2035, "step": 4327500 }, { "epoch": 2.59, "learning_rate": 3.7871492907516314e-05, "loss": 1.2041, "step": 4328000 }, { "epoch": 2.6, "learning_rate": 3.786939294195575e-05, "loss": 1.2587, "step": 4328500 }, { "epoch": 2.6, "learning_rate": 3.786729297639519e-05, "loss": 1.217, "step": 4329000 }, { "epoch": 2.6, "learning_rate": 3.786519301083462e-05, "loss": 1.2283, "step": 4329500 }, { "epoch": 2.6, "learning_rate": 3.7863093045274054e-05, "loss": 1.2012, "step": 4330000 }, { "epoch": 2.6, "learning_rate": 3.786099727964461e-05, "loss": 1.1985, "step": 4330500 }, { "epoch": 2.6, "learning_rate": 3.785889731408405e-05, "loss": 1.2035, "step": 4331000 }, { "epoch": 2.6, "learning_rate": 3.785679734852348e-05, "loss": 1.2381, "step": 4331500 }, { "epoch": 2.6, "learning_rate": 3.785469738296292e-05, "loss": 1.2051, "step": 4332000 }, { "epoch": 2.6, "learning_rate": 3.7852597417402355e-05, "loss": 1.2232, "step": 4332500 }, { "epoch": 2.6, "learning_rate": 3.785049745184179e-05, "loss": 1.238, "step": 4333000 }, { "epoch": 2.6, "learning_rate": 3.784839748628123e-05, "loss": 1.223, "step": 4333500 }, { "epoch": 2.6, "learning_rate": 3.784629752072066e-05, "loss": 1.1851, "step": 4334000 }, { "epoch": 2.6, "learning_rate": 3.7844201755091216e-05, "loss": 1.2077, "step": 4334500 }, { "epoch": 2.6, "learning_rate": 3.784210598946177e-05, "loss": 1.2062, "step": 4335000 }, { "epoch": 2.6, "learning_rate": 3.784001022383233e-05, "loss": 1.2089, "step": 4335500 }, { "epoch": 2.6, "learning_rate": 3.783791025827176e-05, "loss": 1.2074, "step": 4336000 }, { "epoch": 2.6, "learning_rate": 3.7835810292711204e-05, "loss": 1.2054, "step": 4336500 }, { "epoch": 2.6, "learning_rate": 3.783371032715063e-05, "loss": 1.2181, "step": 4337000 }, { "epoch": 2.6, "learning_rate": 3.7831610361590064e-05, "loss": 1.2379, "step": 4337500 }, { "epoch": 2.6, "learning_rate": 3.7829510396029504e-05, "loss": 1.2081, "step": 4338000 }, { "epoch": 2.6, "learning_rate": 3.782741043046894e-05, "loss": 1.1883, "step": 4338500 }, { "epoch": 2.6, "learning_rate": 3.782531046490838e-05, "loss": 1.1868, "step": 4339000 }, { "epoch": 2.6, "learning_rate": 3.782321049934781e-05, "loss": 1.2058, "step": 4339500 }, { "epoch": 2.6, "learning_rate": 3.7821110533787245e-05, "loss": 1.1934, "step": 4340000 }, { "epoch": 2.6, "learning_rate": 3.78190147681578e-05, "loss": 1.2248, "step": 4340500 }, { "epoch": 2.6, "learning_rate": 3.781691480259724e-05, "loss": 1.2268, "step": 4341000 }, { "epoch": 2.6, "learning_rate": 3.781481483703667e-05, "loss": 1.2115, "step": 4341500 }, { "epoch": 2.6, "learning_rate": 3.7812714871476105e-05, "loss": 1.2071, "step": 4342000 }, { "epoch": 2.6, "learning_rate": 3.7810614905915546e-05, "loss": 1.1998, "step": 4342500 }, { "epoch": 2.6, "learning_rate": 3.78085191402861e-05, "loss": 1.232, "step": 4343000 }, { "epoch": 2.6, "learning_rate": 3.780641917472553e-05, "loss": 1.2164, "step": 4343500 }, { "epoch": 2.6, "learning_rate": 3.7804319209164966e-05, "loss": 1.2002, "step": 4344000 }, { "epoch": 2.6, "learning_rate": 3.7802219243604406e-05, "loss": 1.2062, "step": 4344500 }, { "epoch": 2.61, "learning_rate": 3.780012347797496e-05, "loss": 1.2022, "step": 4345000 }, { "epoch": 2.61, "learning_rate": 3.779802351241439e-05, "loss": 1.2143, "step": 4345500 }, { "epoch": 2.61, "learning_rate": 3.7795923546853834e-05, "loss": 1.2271, "step": 4346000 }, { "epoch": 2.61, "learning_rate": 3.779382358129327e-05, "loss": 1.2418, "step": 4346500 }, { "epoch": 2.61, "learning_rate": 3.77917236157327e-05, "loss": 1.2214, "step": 4347000 }, { "epoch": 2.61, "learning_rate": 3.7789627850103254e-05, "loss": 1.2291, "step": 4347500 }, { "epoch": 2.61, "learning_rate": 3.7787532084473814e-05, "loss": 1.2229, "step": 4348000 }, { "epoch": 2.61, "learning_rate": 3.7785432118913255e-05, "loss": 1.2125, "step": 4348500 }, { "epoch": 2.61, "learning_rate": 3.778333215335268e-05, "loss": 1.202, "step": 4349000 }, { "epoch": 2.61, "learning_rate": 3.7781232187792115e-05, "loss": 1.2179, "step": 4349500 }, { "epoch": 2.61, "learning_rate": 3.7779132222231555e-05, "loss": 1.1836, "step": 4350000 }, { "epoch": 2.61, "learning_rate": 3.777703225667099e-05, "loss": 1.1948, "step": 4350500 }, { "epoch": 2.61, "learning_rate": 3.777493229111042e-05, "loss": 1.2334, "step": 4351000 }, { "epoch": 2.61, "learning_rate": 3.777283232554986e-05, "loss": 1.2038, "step": 4351500 }, { "epoch": 2.61, "learning_rate": 3.7770732359989296e-05, "loss": 1.1914, "step": 4352000 }, { "epoch": 2.61, "learning_rate": 3.776863659435985e-05, "loss": 1.2288, "step": 4352500 }, { "epoch": 2.61, "learning_rate": 3.776653662879929e-05, "loss": 1.2249, "step": 4353000 }, { "epoch": 2.61, "learning_rate": 3.776443666323872e-05, "loss": 1.2435, "step": 4353500 }, { "epoch": 2.61, "learning_rate": 3.7762340897609276e-05, "loss": 1.1967, "step": 4354000 }, { "epoch": 2.61, "learning_rate": 3.776024093204871e-05, "loss": 1.2522, "step": 4354500 }, { "epoch": 2.61, "learning_rate": 3.775814096648815e-05, "loss": 1.2142, "step": 4355000 }, { "epoch": 2.61, "learning_rate": 3.7756041000927584e-05, "loss": 1.2276, "step": 4355500 }, { "epoch": 2.61, "learning_rate": 3.775394103536702e-05, "loss": 1.2209, "step": 4356000 }, { "epoch": 2.61, "learning_rate": 3.775184106980646e-05, "loss": 1.2056, "step": 4356500 }, { "epoch": 2.61, "learning_rate": 3.774974110424589e-05, "loss": 1.231, "step": 4357000 }, { "epoch": 2.61, "learning_rate": 3.7747641138685324e-05, "loss": 1.2285, "step": 4357500 }, { "epoch": 2.61, "learning_rate": 3.7745541173124764e-05, "loss": 1.2244, "step": 4358000 }, { "epoch": 2.61, "learning_rate": 3.77434412075642e-05, "loss": 1.2046, "step": 4358500 }, { "epoch": 2.61, "learning_rate": 3.7741341242003625e-05, "loss": 1.203, "step": 4359000 }, { "epoch": 2.61, "learning_rate": 3.7739241276443065e-05, "loss": 1.1867, "step": 4359500 }, { "epoch": 2.61, "learning_rate": 3.7737145510813625e-05, "loss": 1.2079, "step": 4360000 }, { "epoch": 2.61, "learning_rate": 3.773504974518418e-05, "loss": 1.2033, "step": 4360500 }, { "epoch": 2.61, "learning_rate": 3.773294977962361e-05, "loss": 1.1957, "step": 4361000 }, { "epoch": 2.61, "learning_rate": 3.773084981406305e-05, "loss": 1.2122, "step": 4361500 }, { "epoch": 2.62, "learning_rate": 3.7728749848502486e-05, "loss": 1.1986, "step": 4362000 }, { "epoch": 2.62, "learning_rate": 3.772664988294192e-05, "loss": 1.1876, "step": 4362500 }, { "epoch": 2.62, "learning_rate": 3.772455411731247e-05, "loss": 1.1838, "step": 4363000 }, { "epoch": 2.62, "learning_rate": 3.772245415175191e-05, "loss": 1.2147, "step": 4363500 }, { "epoch": 2.62, "learning_rate": 3.772035418619135e-05, "loss": 1.2008, "step": 4364000 }, { "epoch": 2.62, "learning_rate": 3.771825422063078e-05, "loss": 1.2216, "step": 4364500 }, { "epoch": 2.62, "learning_rate": 3.771615425507022e-05, "loss": 1.1932, "step": 4365000 }, { "epoch": 2.62, "learning_rate": 3.7714054289509654e-05, "loss": 1.2096, "step": 4365500 }, { "epoch": 2.62, "learning_rate": 3.771195432394909e-05, "loss": 1.2179, "step": 4366000 }, { "epoch": 2.62, "learning_rate": 3.770985435838852e-05, "loss": 1.2099, "step": 4366500 }, { "epoch": 2.62, "learning_rate": 3.770775859275908e-05, "loss": 1.2148, "step": 4367000 }, { "epoch": 2.62, "learning_rate": 3.7705662827129635e-05, "loss": 1.2221, "step": 4367500 }, { "epoch": 2.62, "learning_rate": 3.770356286156907e-05, "loss": 1.2291, "step": 4368000 }, { "epoch": 2.62, "learning_rate": 3.770146289600851e-05, "loss": 1.239, "step": 4368500 }, { "epoch": 2.62, "learning_rate": 3.769936293044794e-05, "loss": 1.1933, "step": 4369000 }, { "epoch": 2.62, "learning_rate": 3.7697262964887375e-05, "loss": 1.2128, "step": 4369500 }, { "epoch": 2.62, "learning_rate": 3.7695162999326815e-05, "loss": 1.1933, "step": 4370000 }, { "epoch": 2.62, "learning_rate": 3.769306303376625e-05, "loss": 1.2263, "step": 4370500 }, { "epoch": 2.62, "learning_rate": 3.76909672681368e-05, "loss": 1.2228, "step": 4371000 }, { "epoch": 2.62, "learning_rate": 3.7688867302576236e-05, "loss": 1.2077, "step": 4371500 }, { "epoch": 2.62, "learning_rate": 3.7686767337015676e-05, "loss": 1.2177, "step": 4372000 }, { "epoch": 2.62, "learning_rate": 3.768466737145511e-05, "loss": 1.2111, "step": 4372500 }, { "epoch": 2.62, "learning_rate": 3.768256740589454e-05, "loss": 1.2148, "step": 4373000 }, { "epoch": 2.62, "learning_rate": 3.76804716402651e-05, "loss": 1.2335, "step": 4373500 }, { "epoch": 2.62, "learning_rate": 3.767837167470454e-05, "loss": 1.2015, "step": 4374000 }, { "epoch": 2.62, "learning_rate": 3.767627170914397e-05, "loss": 1.2122, "step": 4374500 }, { "epoch": 2.62, "learning_rate": 3.7674171743583404e-05, "loss": 1.2042, "step": 4375000 }, { "epoch": 2.62, "learning_rate": 3.7672071778022844e-05, "loss": 1.2212, "step": 4375500 }, { "epoch": 2.62, "learning_rate": 3.76699760123934e-05, "loss": 1.1926, "step": 4376000 }, { "epoch": 2.62, "learning_rate": 3.766787604683283e-05, "loss": 1.2014, "step": 4376500 }, { "epoch": 2.62, "learning_rate": 3.766577608127227e-05, "loss": 1.2006, "step": 4377000 }, { "epoch": 2.62, "learning_rate": 3.7663676115711705e-05, "loss": 1.2074, "step": 4377500 }, { "epoch": 2.62, "learning_rate": 3.766158035008226e-05, "loss": 1.1944, "step": 4378000 }, { "epoch": 2.63, "learning_rate": 3.765948038452169e-05, "loss": 1.205, "step": 4378500 }, { "epoch": 2.63, "learning_rate": 3.765738041896113e-05, "loss": 1.2292, "step": 4379000 }, { "epoch": 2.63, "learning_rate": 3.7655280453400566e-05, "loss": 1.245, "step": 4379500 }, { "epoch": 2.63, "learning_rate": 3.765318048784e-05, "loss": 1.2431, "step": 4380000 }, { "epoch": 2.63, "learning_rate": 3.765108472221055e-05, "loss": 1.2105, "step": 4380500 }, { "epoch": 2.63, "learning_rate": 3.764898475664999e-05, "loss": 1.215, "step": 4381000 }, { "epoch": 2.63, "learning_rate": 3.7646884791089426e-05, "loss": 1.2113, "step": 4381500 }, { "epoch": 2.63, "learning_rate": 3.764478482552886e-05, "loss": 1.2354, "step": 4382000 }, { "epoch": 2.63, "learning_rate": 3.76426848599683e-05, "loss": 1.1899, "step": 4382500 }, { "epoch": 2.63, "learning_rate": 3.764058489440773e-05, "loss": 1.2096, "step": 4383000 }, { "epoch": 2.63, "learning_rate": 3.763848492884717e-05, "loss": 1.2048, "step": 4383500 }, { "epoch": 2.63, "learning_rate": 3.76363849632866e-05, "loss": 1.2087, "step": 4384000 }, { "epoch": 2.63, "learning_rate": 3.763428919765716e-05, "loss": 1.1912, "step": 4384500 }, { "epoch": 2.63, "learning_rate": 3.7632189232096594e-05, "loss": 1.2125, "step": 4385000 }, { "epoch": 2.63, "learning_rate": 3.763008926653603e-05, "loss": 1.2182, "step": 4385500 }, { "epoch": 2.63, "learning_rate": 3.762798930097546e-05, "loss": 1.2146, "step": 4386000 }, { "epoch": 2.63, "learning_rate": 3.762589353534602e-05, "loss": 1.2512, "step": 4386500 }, { "epoch": 2.63, "learning_rate": 3.7623793569785455e-05, "loss": 1.1869, "step": 4387000 }, { "epoch": 2.63, "learning_rate": 3.7621693604224895e-05, "loss": 1.1998, "step": 4387500 }, { "epoch": 2.63, "learning_rate": 3.761959363866432e-05, "loss": 1.2329, "step": 4388000 }, { "epoch": 2.63, "learning_rate": 3.7617502072966e-05, "loss": 1.2008, "step": 4388500 }, { "epoch": 2.63, "learning_rate": 3.7615402107405436e-05, "loss": 1.2257, "step": 4389000 }, { "epoch": 2.63, "learning_rate": 3.7613302141844876e-05, "loss": 1.2047, "step": 4389500 }, { "epoch": 2.63, "learning_rate": 3.761120217628431e-05, "loss": 1.24, "step": 4390000 }, { "epoch": 2.63, "learning_rate": 3.760910221072374e-05, "loss": 1.2129, "step": 4390500 }, { "epoch": 2.63, "learning_rate": 3.7607006445094296e-05, "loss": 1.2061, "step": 4391000 }, { "epoch": 2.63, "learning_rate": 3.760490647953374e-05, "loss": 1.1795, "step": 4391500 }, { "epoch": 2.63, "learning_rate": 3.760280651397317e-05, "loss": 1.2074, "step": 4392000 }, { "epoch": 2.63, "learning_rate": 3.7600706548412604e-05, "loss": 1.2085, "step": 4392500 }, { "epoch": 2.63, "learning_rate": 3.759861078278316e-05, "loss": 1.1778, "step": 4393000 }, { "epoch": 2.63, "learning_rate": 3.759651501715372e-05, "loss": 1.2119, "step": 4393500 }, { "epoch": 2.63, "learning_rate": 3.759441505159315e-05, "loss": 1.1892, "step": 4394000 }, { "epoch": 2.63, "learning_rate": 3.7592315086032584e-05, "loss": 1.2175, "step": 4394500 }, { "epoch": 2.63, "learning_rate": 3.7590215120472025e-05, "loss": 1.2312, "step": 4395000 }, { "epoch": 2.64, "learning_rate": 3.758811515491146e-05, "loss": 1.213, "step": 4395500 }, { "epoch": 2.64, "learning_rate": 3.758601938928202e-05, "loss": 1.1981, "step": 4396000 }, { "epoch": 2.64, "learning_rate": 3.7583919423721445e-05, "loss": 1.2325, "step": 4396500 }, { "epoch": 2.64, "learning_rate": 3.7581819458160885e-05, "loss": 1.1897, "step": 4397000 }, { "epoch": 2.64, "learning_rate": 3.757971949260032e-05, "loss": 1.2175, "step": 4397500 }, { "epoch": 2.64, "learning_rate": 3.757761952703975e-05, "loss": 1.1963, "step": 4398000 }, { "epoch": 2.64, "learning_rate": 3.757551956147919e-05, "loss": 1.233, "step": 4398500 }, { "epoch": 2.64, "learning_rate": 3.7573419595918626e-05, "loss": 1.1935, "step": 4399000 }, { "epoch": 2.64, "learning_rate": 3.757131963035806e-05, "loss": 1.2238, "step": 4399500 }, { "epoch": 2.64, "learning_rate": 3.75692196647975e-05, "loss": 1.2251, "step": 4400000 }, { "epoch": 2.64, "eval_loss": 1.1657322645187378, "eval_runtime": 1099.1385, "eval_samples_per_second": 479.212, "eval_steps_per_second": 79.869, "step": 4400000 }, { "epoch": 2.64, "learning_rate": 3.756711969923693e-05, "loss": 1.2357, "step": 4400500 }, { "epoch": 2.64, "learning_rate": 3.756501973367637e-05, "loss": 1.2106, "step": 4401000 }, { "epoch": 2.64, "learning_rate": 3.756291976811581e-05, "loss": 1.2053, "step": 4401500 }, { "epoch": 2.64, "learning_rate": 3.756082820241748e-05, "loss": 1.2313, "step": 4402000 }, { "epoch": 2.64, "learning_rate": 3.7558728236856914e-05, "loss": 1.1984, "step": 4402500 }, { "epoch": 2.64, "learning_rate": 3.755662827129635e-05, "loss": 1.1957, "step": 4403000 }, { "epoch": 2.64, "learning_rate": 3.755452830573579e-05, "loss": 1.2084, "step": 4403500 }, { "epoch": 2.64, "learning_rate": 3.755242834017522e-05, "loss": 1.1872, "step": 4404000 }, { "epoch": 2.64, "learning_rate": 3.7550328374614655e-05, "loss": 1.2052, "step": 4404500 }, { "epoch": 2.64, "learning_rate": 3.7548228409054095e-05, "loss": 1.2295, "step": 4405000 }, { "epoch": 2.64, "learning_rate": 3.754612844349353e-05, "loss": 1.2171, "step": 4405500 }, { "epoch": 2.64, "learning_rate": 3.754403267786408e-05, "loss": 1.2185, "step": 4406000 }, { "epoch": 2.64, "learning_rate": 3.7541932712303515e-05, "loss": 1.2056, "step": 4406500 }, { "epoch": 2.64, "learning_rate": 3.7539832746742956e-05, "loss": 1.1981, "step": 4407000 }, { "epoch": 2.64, "learning_rate": 3.753773278118239e-05, "loss": 1.245, "step": 4407500 }, { "epoch": 2.64, "learning_rate": 3.753563701555294e-05, "loss": 1.1932, "step": 4408000 }, { "epoch": 2.64, "learning_rate": 3.7533541249923496e-05, "loss": 1.1845, "step": 4408500 }, { "epoch": 2.64, "learning_rate": 3.7531441284362936e-05, "loss": 1.1873, "step": 4409000 }, { "epoch": 2.64, "learning_rate": 3.752934131880237e-05, "loss": 1.2121, "step": 4409500 }, { "epoch": 2.64, "learning_rate": 3.75272413532418e-05, "loss": 1.2368, "step": 4410000 }, { "epoch": 2.64, "learning_rate": 3.7525141387681244e-05, "loss": 1.2022, "step": 4410500 }, { "epoch": 2.64, "learning_rate": 3.752304982198292e-05, "loss": 1.2097, "step": 4411000 }, { "epoch": 2.64, "learning_rate": 3.752094985642236e-05, "loss": 1.2103, "step": 4411500 }, { "epoch": 2.65, "learning_rate": 3.751884989086179e-05, "loss": 1.2152, "step": 4412000 }, { "epoch": 2.65, "learning_rate": 3.7516749925301224e-05, "loss": 1.2081, "step": 4412500 }, { "epoch": 2.65, "learning_rate": 3.7514649959740665e-05, "loss": 1.192, "step": 4413000 }, { "epoch": 2.65, "learning_rate": 3.751254999418009e-05, "loss": 1.2016, "step": 4413500 }, { "epoch": 2.65, "learning_rate": 3.7510450028619525e-05, "loss": 1.2182, "step": 4414000 }, { "epoch": 2.65, "learning_rate": 3.7508350063058965e-05, "loss": 1.1954, "step": 4414500 }, { "epoch": 2.65, "learning_rate": 3.75062500974984e-05, "loss": 1.2268, "step": 4415000 }, { "epoch": 2.65, "learning_rate": 3.750415433186895e-05, "loss": 1.2096, "step": 4415500 }, { "epoch": 2.65, "learning_rate": 3.750205436630839e-05, "loss": 1.1921, "step": 4416000 }, { "epoch": 2.65, "learning_rate": 3.7499954400747826e-05, "loss": 1.2256, "step": 4416500 }, { "epoch": 2.65, "learning_rate": 3.749785443518726e-05, "loss": 1.1651, "step": 4417000 }, { "epoch": 2.65, "learning_rate": 3.749575866955782e-05, "loss": 1.2143, "step": 4417500 }, { "epoch": 2.65, "learning_rate": 3.749365870399725e-05, "loss": 1.1977, "step": 4418000 }, { "epoch": 2.65, "learning_rate": 3.7491558738436686e-05, "loss": 1.2257, "step": 4418500 }, { "epoch": 2.65, "learning_rate": 3.748945877287612e-05, "loss": 1.2149, "step": 4419000 }, { "epoch": 2.65, "learning_rate": 3.748736300724668e-05, "loss": 1.2131, "step": 4419500 }, { "epoch": 2.65, "learning_rate": 3.748526304168612e-05, "loss": 1.214, "step": 4420000 }, { "epoch": 2.65, "learning_rate": 3.748316307612555e-05, "loss": 1.2003, "step": 4420500 }, { "epoch": 2.65, "learning_rate": 3.748106311056498e-05, "loss": 1.199, "step": 4421000 }, { "epoch": 2.65, "learning_rate": 3.747896314500442e-05, "loss": 1.2201, "step": 4421500 }, { "epoch": 2.65, "learning_rate": 3.7476863179443854e-05, "loss": 1.1687, "step": 4422000 }, { "epoch": 2.65, "learning_rate": 3.7474767413814415e-05, "loss": 1.2092, "step": 4422500 }, { "epoch": 2.65, "learning_rate": 3.747266744825385e-05, "loss": 1.1789, "step": 4423000 }, { "epoch": 2.65, "learning_rate": 3.747056748269328e-05, "loss": 1.2124, "step": 4423500 }, { "epoch": 2.65, "learning_rate": 3.7468467517132715e-05, "loss": 1.2179, "step": 4424000 }, { "epoch": 2.65, "learning_rate": 3.7466367551572155e-05, "loss": 1.2112, "step": 4424500 }, { "epoch": 2.65, "learning_rate": 3.746426758601159e-05, "loss": 1.224, "step": 4425000 }, { "epoch": 2.65, "learning_rate": 3.746216762045102e-05, "loss": 1.1806, "step": 4425500 }, { "epoch": 2.65, "learning_rate": 3.7460071854821576e-05, "loss": 1.1955, "step": 4426000 }, { "epoch": 2.65, "learning_rate": 3.7457971889261016e-05, "loss": 1.1677, "step": 4426500 }, { "epoch": 2.65, "learning_rate": 3.745587192370045e-05, "loss": 1.1853, "step": 4427000 }, { "epoch": 2.65, "learning_rate": 3.745377195813988e-05, "loss": 1.2243, "step": 4427500 }, { "epoch": 2.65, "learning_rate": 3.745167199257932e-05, "loss": 1.2365, "step": 4428000 }, { "epoch": 2.66, "learning_rate": 3.744957202701876e-05, "loss": 1.1986, "step": 4428500 }, { "epoch": 2.66, "learning_rate": 3.744747206145819e-05, "loss": 1.2228, "step": 4429000 }, { "epoch": 2.66, "learning_rate": 3.744537209589763e-05, "loss": 1.1789, "step": 4429500 }, { "epoch": 2.66, "learning_rate": 3.7443272130337064e-05, "loss": 1.2196, "step": 4430000 }, { "epoch": 2.66, "learning_rate": 3.74411721647765e-05, "loss": 1.1968, "step": 4430500 }, { "epoch": 2.66, "learning_rate": 3.743907219921593e-05, "loss": 1.2057, "step": 4431000 }, { "epoch": 2.66, "learning_rate": 3.7436972233655364e-05, "loss": 1.2077, "step": 4431500 }, { "epoch": 2.66, "learning_rate": 3.7434876468025925e-05, "loss": 1.1919, "step": 4432000 }, { "epoch": 2.66, "learning_rate": 3.7432776502465365e-05, "loss": 1.2149, "step": 4432500 }, { "epoch": 2.66, "learning_rate": 3.743068073683592e-05, "loss": 1.2114, "step": 4433000 }, { "epoch": 2.66, "learning_rate": 3.742858077127535e-05, "loss": 1.2244, "step": 4433500 }, { "epoch": 2.66, "learning_rate": 3.7426480805714785e-05, "loss": 1.1967, "step": 4434000 }, { "epoch": 2.66, "learning_rate": 3.7424380840154226e-05, "loss": 1.2102, "step": 4434500 }, { "epoch": 2.66, "learning_rate": 3.742228087459366e-05, "loss": 1.2152, "step": 4435000 }, { "epoch": 2.66, "learning_rate": 3.7420180909033086e-05, "loss": 1.2235, "step": 4435500 }, { "epoch": 2.66, "learning_rate": 3.7418080943472526e-05, "loss": 1.2207, "step": 4436000 }, { "epoch": 2.66, "learning_rate": 3.741598097791196e-05, "loss": 1.2132, "step": 4436500 }, { "epoch": 2.66, "learning_rate": 3.741388521228252e-05, "loss": 1.221, "step": 4437000 }, { "epoch": 2.66, "learning_rate": 3.741178524672195e-05, "loss": 1.2138, "step": 4437500 }, { "epoch": 2.66, "learning_rate": 3.740968528116139e-05, "loss": 1.221, "step": 4438000 }, { "epoch": 2.66, "learning_rate": 3.740758951553195e-05, "loss": 1.2102, "step": 4438500 }, { "epoch": 2.66, "learning_rate": 3.740548954997138e-05, "loss": 1.2038, "step": 4439000 }, { "epoch": 2.66, "learning_rate": 3.740338958441082e-05, "loss": 1.2192, "step": 4439500 }, { "epoch": 2.66, "learning_rate": 3.7401289618850254e-05, "loss": 1.2068, "step": 4440000 }, { "epoch": 2.66, "learning_rate": 3.739918965328968e-05, "loss": 1.2141, "step": 4440500 }, { "epoch": 2.66, "learning_rate": 3.739708968772912e-05, "loss": 1.1971, "step": 4441000 }, { "epoch": 2.66, "learning_rate": 3.7394989722168555e-05, "loss": 1.2089, "step": 4441500 }, { "epoch": 2.66, "learning_rate": 3.739288975660799e-05, "loss": 1.1627, "step": 4442000 }, { "epoch": 2.66, "learning_rate": 3.739079399097854e-05, "loss": 1.2189, "step": 4442500 }, { "epoch": 2.66, "learning_rate": 3.738869402541798e-05, "loss": 1.1798, "step": 4443000 }, { "epoch": 2.66, "learning_rate": 3.7386594059857415e-05, "loss": 1.2182, "step": 4443500 }, { "epoch": 2.66, "learning_rate": 3.738449409429685e-05, "loss": 1.2382, "step": 4444000 }, { "epoch": 2.66, "learning_rate": 3.738240252859853e-05, "loss": 1.2126, "step": 4444500 }, { "epoch": 2.66, "learning_rate": 3.738030256303797e-05, "loss": 1.1834, "step": 4445000 }, { "epoch": 2.67, "learning_rate": 3.73782025974774e-05, "loss": 1.1854, "step": 4445500 }, { "epoch": 2.67, "learning_rate": 3.7376102631916836e-05, "loss": 1.192, "step": 4446000 }, { "epoch": 2.67, "learning_rate": 3.7374002666356277e-05, "loss": 1.2063, "step": 4446500 }, { "epoch": 2.67, "learning_rate": 3.737190690072683e-05, "loss": 1.2056, "step": 4447000 }, { "epoch": 2.67, "learning_rate": 3.7369806935166264e-05, "loss": 1.1957, "step": 4447500 }, { "epoch": 2.67, "learning_rate": 3.73677069696057e-05, "loss": 1.2347, "step": 4448000 }, { "epoch": 2.67, "learning_rate": 3.736560700404514e-05, "loss": 1.2208, "step": 4448500 }, { "epoch": 2.67, "learning_rate": 3.736350703848457e-05, "loss": 1.2232, "step": 4449000 }, { "epoch": 2.67, "learning_rate": 3.7361407072924004e-05, "loss": 1.1973, "step": 4449500 }, { "epoch": 2.67, "learning_rate": 3.735930710736344e-05, "loss": 1.1996, "step": 4450000 }, { "epoch": 2.67, "learning_rate": 3.735720714180287e-05, "loss": 1.2181, "step": 4450500 }, { "epoch": 2.67, "learning_rate": 3.735511137617343e-05, "loss": 1.2032, "step": 4451000 }, { "epoch": 2.67, "learning_rate": 3.7353011410612865e-05, "loss": 1.2197, "step": 4451500 }, { "epoch": 2.67, "learning_rate": 3.73509114450523e-05, "loss": 1.2307, "step": 4452000 }, { "epoch": 2.67, "learning_rate": 3.734881147949173e-05, "loss": 1.2136, "step": 4452500 }, { "epoch": 2.67, "learning_rate": 3.734671571386229e-05, "loss": 1.1857, "step": 4453000 }, { "epoch": 2.67, "learning_rate": 3.7344619948232846e-05, "loss": 1.2111, "step": 4453500 }, { "epoch": 2.67, "learning_rate": 3.7342519982672286e-05, "loss": 1.2055, "step": 4454000 }, { "epoch": 2.67, "learning_rate": 3.734042001711172e-05, "loss": 1.1874, "step": 4454500 }, { "epoch": 2.67, "learning_rate": 3.733832005155115e-05, "loss": 1.2045, "step": 4455000 }, { "epoch": 2.67, "learning_rate": 3.733622008599059e-05, "loss": 1.2073, "step": 4455500 }, { "epoch": 2.67, "learning_rate": 3.733412012043003e-05, "loss": 1.2253, "step": 4456000 }, { "epoch": 2.67, "learning_rate": 3.733202435480058e-05, "loss": 1.1832, "step": 4456500 }, { "epoch": 2.67, "learning_rate": 3.7329924389240014e-05, "loss": 1.2181, "step": 4457000 }, { "epoch": 2.67, "learning_rate": 3.7327824423679454e-05, "loss": 1.181, "step": 4457500 }, { "epoch": 2.67, "learning_rate": 3.732572445811889e-05, "loss": 1.2172, "step": 4458000 }, { "epoch": 2.67, "learning_rate": 3.732362449255832e-05, "loss": 1.2273, "step": 4458500 }, { "epoch": 2.67, "learning_rate": 3.732152872692888e-05, "loss": 1.2111, "step": 4459000 }, { "epoch": 2.67, "learning_rate": 3.7319428761368315e-05, "loss": 1.2224, "step": 4459500 }, { "epoch": 2.67, "learning_rate": 3.731732879580775e-05, "loss": 1.1904, "step": 4460000 }, { "epoch": 2.67, "learning_rate": 3.731522883024719e-05, "loss": 1.2094, "step": 4460500 }, { "epoch": 2.67, "learning_rate": 3.731312886468662e-05, "loss": 1.1945, "step": 4461000 }, { "epoch": 2.67, "learning_rate": 3.731102889912605e-05, "loss": 1.2206, "step": 4461500 }, { "epoch": 2.68, "learning_rate": 3.730893313349661e-05, "loss": 1.1879, "step": 4462000 }, { "epoch": 2.68, "learning_rate": 3.730683316793605e-05, "loss": 1.2016, "step": 4462500 }, { "epoch": 2.68, "learning_rate": 3.730473320237548e-05, "loss": 1.2392, "step": 4463000 }, { "epoch": 2.68, "learning_rate": 3.7302637436746036e-05, "loss": 1.1793, "step": 4463500 }, { "epoch": 2.68, "learning_rate": 3.730053747118547e-05, "loss": 1.1998, "step": 4464000 }, { "epoch": 2.68, "learning_rate": 3.729843750562491e-05, "loss": 1.2018, "step": 4464500 }, { "epoch": 2.68, "learning_rate": 3.729633754006434e-05, "loss": 1.2131, "step": 4465000 }, { "epoch": 2.68, "learning_rate": 3.729423757450378e-05, "loss": 1.1859, "step": 4465500 }, { "epoch": 2.68, "learning_rate": 3.729213760894322e-05, "loss": 1.2183, "step": 4466000 }, { "epoch": 2.68, "learning_rate": 3.7290037643382644e-05, "loss": 1.213, "step": 4466500 }, { "epoch": 2.68, "learning_rate": 3.7287937677822084e-05, "loss": 1.2292, "step": 4467000 }, { "epoch": 2.68, "learning_rate": 3.728583771226152e-05, "loss": 1.19, "step": 4467500 }, { "epoch": 2.68, "learning_rate": 3.728373774670095e-05, "loss": 1.2123, "step": 4468000 }, { "epoch": 2.68, "learning_rate": 3.728164198107151e-05, "loss": 1.2185, "step": 4468500 }, { "epoch": 2.68, "learning_rate": 3.7279546215442065e-05, "loss": 1.2111, "step": 4469000 }, { "epoch": 2.68, "learning_rate": 3.7277446249881505e-05, "loss": 1.2133, "step": 4469500 }, { "epoch": 2.68, "learning_rate": 3.727534628432094e-05, "loss": 1.2369, "step": 4470000 }, { "epoch": 2.68, "learning_rate": 3.727324631876037e-05, "loss": 1.2076, "step": 4470500 }, { "epoch": 2.68, "learning_rate": 3.727114635319981e-05, "loss": 1.1963, "step": 4471000 }, { "epoch": 2.68, "learning_rate": 3.726904638763924e-05, "loss": 1.2016, "step": 4471500 }, { "epoch": 2.68, "learning_rate": 3.726694642207867e-05, "loss": 1.2082, "step": 4472000 }, { "epoch": 2.68, "learning_rate": 3.726484645651811e-05, "loss": 1.2014, "step": 4472500 }, { "epoch": 2.68, "learning_rate": 3.726275069088867e-05, "loss": 1.1942, "step": 4473000 }, { "epoch": 2.68, "learning_rate": 3.72606507253281e-05, "loss": 1.1928, "step": 4473500 }, { "epoch": 2.68, "learning_rate": 3.725855075976754e-05, "loss": 1.2291, "step": 4474000 }, { "epoch": 2.68, "learning_rate": 3.725645079420697e-05, "loss": 1.1859, "step": 4474500 }, { "epoch": 2.68, "learning_rate": 3.725435082864641e-05, "loss": 1.2203, "step": 4475000 }, { "epoch": 2.68, "learning_rate": 3.725225086308585e-05, "loss": 1.2171, "step": 4475500 }, { "epoch": 2.68, "learning_rate": 3.72501550974564e-05, "loss": 1.1794, "step": 4476000 }, { "epoch": 2.68, "learning_rate": 3.7248055131895834e-05, "loss": 1.2169, "step": 4476500 }, { "epoch": 2.68, "learning_rate": 3.724595516633527e-05, "loss": 1.2109, "step": 4477000 }, { "epoch": 2.68, "learning_rate": 3.724385520077471e-05, "loss": 1.197, "step": 4477500 }, { "epoch": 2.68, "learning_rate": 3.724175523521414e-05, "loss": 1.2231, "step": 4478000 }, { "epoch": 2.69, "learning_rate": 3.7239659469584695e-05, "loss": 1.2241, "step": 4478500 }, { "epoch": 2.69, "learning_rate": 3.723755950402413e-05, "loss": 1.2119, "step": 4479000 }, { "epoch": 2.69, "learning_rate": 3.723545953846357e-05, "loss": 1.2237, "step": 4479500 }, { "epoch": 2.69, "learning_rate": 3.7233359572903e-05, "loss": 1.1747, "step": 4480000 }, { "epoch": 2.69, "learning_rate": 3.7231259607342435e-05, "loss": 1.2032, "step": 4480500 }, { "epoch": 2.69, "learning_rate": 3.7229159641781875e-05, "loss": 1.1848, "step": 4481000 }, { "epoch": 2.69, "learning_rate": 3.722705967622131e-05, "loss": 1.1924, "step": 4481500 }, { "epoch": 2.69, "learning_rate": 3.722495971066075e-05, "loss": 1.2005, "step": 4482000 }, { "epoch": 2.69, "learning_rate": 3.72228639450313e-05, "loss": 1.2102, "step": 4482500 }, { "epoch": 2.69, "learning_rate": 3.7220763979470736e-05, "loss": 1.1917, "step": 4483000 }, { "epoch": 2.69, "learning_rate": 3.721866401391017e-05, "loss": 1.1979, "step": 4483500 }, { "epoch": 2.69, "learning_rate": 3.721656824828072e-05, "loss": 1.2182, "step": 4484000 }, { "epoch": 2.69, "learning_rate": 3.7214472482651284e-05, "loss": 1.2239, "step": 4484500 }, { "epoch": 2.69, "learning_rate": 3.7212372517090724e-05, "loss": 1.2424, "step": 4485000 }, { "epoch": 2.69, "learning_rate": 3.721027255153015e-05, "loss": 1.1784, "step": 4485500 }, { "epoch": 2.69, "learning_rate": 3.7208172585969584e-05, "loss": 1.2004, "step": 4486000 }, { "epoch": 2.69, "learning_rate": 3.7206072620409024e-05, "loss": 1.2478, "step": 4486500 }, { "epoch": 2.69, "learning_rate": 3.720397265484846e-05, "loss": 1.234, "step": 4487000 }, { "epoch": 2.69, "learning_rate": 3.720187268928789e-05, "loss": 1.1881, "step": 4487500 }, { "epoch": 2.69, "learning_rate": 3.719977272372733e-05, "loss": 1.1966, "step": 4488000 }, { "epoch": 2.69, "learning_rate": 3.7197672758166765e-05, "loss": 1.1886, "step": 4488500 }, { "epoch": 2.69, "learning_rate": 3.7195572792606205e-05, "loss": 1.2231, "step": 4489000 }, { "epoch": 2.69, "learning_rate": 3.719347282704564e-05, "loss": 1.2004, "step": 4489500 }, { "epoch": 2.69, "learning_rate": 3.719137706141619e-05, "loss": 1.2025, "step": 4490000 }, { "epoch": 2.69, "learning_rate": 3.7189277095855626e-05, "loss": 1.1911, "step": 4490500 }, { "epoch": 2.69, "learning_rate": 3.7187177130295066e-05, "loss": 1.2208, "step": 4491000 }, { "epoch": 2.69, "learning_rate": 3.71850771647345e-05, "loss": 1.1989, "step": 4491500 }, { "epoch": 2.69, "learning_rate": 3.718297719917393e-05, "loss": 1.2078, "step": 4492000 }, { "epoch": 2.69, "learning_rate": 3.7180881433544486e-05, "loss": 1.2043, "step": 4492500 }, { "epoch": 2.69, "learning_rate": 3.7178781467983927e-05, "loss": 1.1965, "step": 4493000 }, { "epoch": 2.69, "learning_rate": 3.717668150242336e-05, "loss": 1.189, "step": 4493500 }, { "epoch": 2.69, "learning_rate": 3.7174581536862793e-05, "loss": 1.2165, "step": 4494000 }, { "epoch": 2.69, "learning_rate": 3.7172481571302234e-05, "loss": 1.2124, "step": 4494500 }, { "epoch": 2.69, "learning_rate": 3.717038580567279e-05, "loss": 1.2135, "step": 4495000 }, { "epoch": 2.7, "learning_rate": 3.716828584011222e-05, "loss": 1.2205, "step": 4495500 }, { "epoch": 2.7, "learning_rate": 3.716618587455166e-05, "loss": 1.2408, "step": 4496000 }, { "epoch": 2.7, "learning_rate": 3.7164085908991094e-05, "loss": 1.2011, "step": 4496500 }, { "epoch": 2.7, "learning_rate": 3.716198594343053e-05, "loss": 1.2201, "step": 4497000 }, { "epoch": 2.7, "learning_rate": 3.715988597786997e-05, "loss": 1.1845, "step": 4497500 }, { "epoch": 2.7, "learning_rate": 3.715779021224052e-05, "loss": 1.2008, "step": 4498000 }, { "epoch": 2.7, "learning_rate": 3.7155690246679955e-05, "loss": 1.204, "step": 4498500 }, { "epoch": 2.7, "learning_rate": 3.715359448105051e-05, "loss": 1.2202, "step": 4499000 }, { "epoch": 2.7, "learning_rate": 3.715149451548994e-05, "loss": 1.2081, "step": 4499500 }, { "epoch": 2.7, "learning_rate": 3.714939454992938e-05, "loss": 1.2204, "step": 4500000 }, { "epoch": 2.7, "eval_loss": 1.1660878658294678, "eval_runtime": 1104.5668, "eval_samples_per_second": 476.857, "eval_steps_per_second": 79.476, "step": 4500000 }, { "epoch": 2.7, "learning_rate": 3.7147294584368816e-05, "loss": 1.2168, "step": 4500500 }, { "epoch": 2.7, "learning_rate": 3.714519461880825e-05, "loss": 1.1995, "step": 4501000 }, { "epoch": 2.7, "learning_rate": 3.714309465324769e-05, "loss": 1.1965, "step": 4501500 }, { "epoch": 2.7, "learning_rate": 3.714099468768712e-05, "loss": 1.198, "step": 4502000 }, { "epoch": 2.7, "learning_rate": 3.7138894722126556e-05, "loss": 1.2243, "step": 4502500 }, { "epoch": 2.7, "learning_rate": 3.713679895649712e-05, "loss": 1.2141, "step": 4503000 }, { "epoch": 2.7, "learning_rate": 3.713470319086767e-05, "loss": 1.2205, "step": 4503500 }, { "epoch": 2.7, "learning_rate": 3.713260742523823e-05, "loss": 1.2158, "step": 4504000 }, { "epoch": 2.7, "learning_rate": 3.713050745967766e-05, "loss": 1.2192, "step": 4504500 }, { "epoch": 2.7, "learning_rate": 3.712840749411709e-05, "loss": 1.2056, "step": 4505000 }, { "epoch": 2.7, "learning_rate": 3.712630752855653e-05, "loss": 1.1917, "step": 4505500 }, { "epoch": 2.7, "learning_rate": 3.7124207562995965e-05, "loss": 1.2154, "step": 4506000 }, { "epoch": 2.7, "learning_rate": 3.71221075974354e-05, "loss": 1.2152, "step": 4506500 }, { "epoch": 2.7, "learning_rate": 3.712000763187484e-05, "loss": 1.215, "step": 4507000 }, { "epoch": 2.7, "learning_rate": 3.711790766631427e-05, "loss": 1.2267, "step": 4507500 }, { "epoch": 2.7, "learning_rate": 3.7115807700753705e-05, "loss": 1.1986, "step": 4508000 }, { "epoch": 2.7, "learning_rate": 3.7113707735193145e-05, "loss": 1.1956, "step": 4508500 }, { "epoch": 2.7, "learning_rate": 3.711160776963258e-05, "loss": 1.1994, "step": 4509000 }, { "epoch": 2.7, "learning_rate": 3.710950780407201e-05, "loss": 1.2137, "step": 4509500 }, { "epoch": 2.7, "learning_rate": 3.7107407838511446e-05, "loss": 1.2, "step": 4510000 }, { "epoch": 2.7, "learning_rate": 3.7105312072882006e-05, "loss": 1.2109, "step": 4510500 }, { "epoch": 2.7, "learning_rate": 3.710321210732144e-05, "loss": 1.2096, "step": 4511000 }, { "epoch": 2.7, "learning_rate": 3.710111214176088e-05, "loss": 1.2084, "step": 4511500 }, { "epoch": 2.71, "learning_rate": 3.709901217620031e-05, "loss": 1.2131, "step": 4512000 }, { "epoch": 2.71, "learning_rate": 3.709691221063974e-05, "loss": 1.2246, "step": 4512500 }, { "epoch": 2.71, "learning_rate": 3.709481224507918e-05, "loss": 1.1942, "step": 4513000 }, { "epoch": 2.71, "learning_rate": 3.7092712279518614e-05, "loss": 1.2029, "step": 4513500 }, { "epoch": 2.71, "learning_rate": 3.709061231395805e-05, "loss": 1.2152, "step": 4514000 }, { "epoch": 2.71, "learning_rate": 3.708851654832861e-05, "loss": 1.2066, "step": 4514500 }, { "epoch": 2.71, "learning_rate": 3.708641658276804e-05, "loss": 1.1868, "step": 4515000 }, { "epoch": 2.71, "learning_rate": 3.7084316617207474e-05, "loss": 1.2129, "step": 4515500 }, { "epoch": 2.71, "learning_rate": 3.708221665164691e-05, "loss": 1.1843, "step": 4516000 }, { "epoch": 2.71, "learning_rate": 3.708011668608635e-05, "loss": 1.2288, "step": 4516500 }, { "epoch": 2.71, "learning_rate": 3.707801672052578e-05, "loss": 1.1749, "step": 4517000 }, { "epoch": 2.71, "learning_rate": 3.7075916754965215e-05, "loss": 1.202, "step": 4517500 }, { "epoch": 2.71, "learning_rate": 3.7073820989335775e-05, "loss": 1.2309, "step": 4518000 }, { "epoch": 2.71, "learning_rate": 3.707172102377521e-05, "loss": 1.1955, "step": 4518500 }, { "epoch": 2.71, "learning_rate": 3.706962105821464e-05, "loss": 1.2199, "step": 4519000 }, { "epoch": 2.71, "learning_rate": 3.7067525292585196e-05, "loss": 1.218, "step": 4519500 }, { "epoch": 2.71, "learning_rate": 3.7065425327024636e-05, "loss": 1.2075, "step": 4520000 }, { "epoch": 2.71, "learning_rate": 3.706332536146407e-05, "loss": 1.2114, "step": 4520500 }, { "epoch": 2.71, "learning_rate": 3.70612253959035e-05, "loss": 1.1896, "step": 4521000 }, { "epoch": 2.71, "learning_rate": 3.705912543034294e-05, "loss": 1.2396, "step": 4521500 }, { "epoch": 2.71, "learning_rate": 3.705702546478238e-05, "loss": 1.2188, "step": 4522000 }, { "epoch": 2.71, "learning_rate": 3.705492549922181e-05, "loss": 1.1916, "step": 4522500 }, { "epoch": 2.71, "learning_rate": 3.705282553366125e-05, "loss": 1.2313, "step": 4523000 }, { "epoch": 2.71, "learning_rate": 3.7050725568100684e-05, "loss": 1.2217, "step": 4523500 }, { "epoch": 2.71, "learning_rate": 3.704862560254012e-05, "loss": 1.2234, "step": 4524000 }, { "epoch": 2.71, "learning_rate": 3.704652563697956e-05, "loss": 1.2273, "step": 4524500 }, { "epoch": 2.71, "learning_rate": 3.7044425671418984e-05, "loss": 1.2013, "step": 4525000 }, { "epoch": 2.71, "learning_rate": 3.7042329905789545e-05, "loss": 1.228, "step": 4525500 }, { "epoch": 2.71, "learning_rate": 3.704022994022898e-05, "loss": 1.2217, "step": 4526000 }, { "epoch": 2.71, "learning_rate": 3.703812997466842e-05, "loss": 1.2027, "step": 4526500 }, { "epoch": 2.71, "learning_rate": 3.703603000910785e-05, "loss": 1.1982, "step": 4527000 }, { "epoch": 2.71, "learning_rate": 3.7033930043547285e-05, "loss": 1.1679, "step": 4527500 }, { "epoch": 2.71, "learning_rate": 3.703183847784896e-05, "loss": 1.2017, "step": 4528000 }, { "epoch": 2.72, "learning_rate": 3.702974271221952e-05, "loss": 1.2312, "step": 4528500 }, { "epoch": 2.72, "learning_rate": 3.702764274665895e-05, "loss": 1.2109, "step": 4529000 }, { "epoch": 2.72, "learning_rate": 3.7025542781098386e-05, "loss": 1.2252, "step": 4529500 }, { "epoch": 2.72, "learning_rate": 3.702344281553782e-05, "loss": 1.2068, "step": 4530000 }, { "epoch": 2.72, "learning_rate": 3.702134284997726e-05, "loss": 1.1873, "step": 4530500 }, { "epoch": 2.72, "learning_rate": 3.701924288441669e-05, "loss": 1.2113, "step": 4531000 }, { "epoch": 2.72, "learning_rate": 3.701714291885613e-05, "loss": 1.2069, "step": 4531500 }, { "epoch": 2.72, "learning_rate": 3.701504295329557e-05, "loss": 1.1964, "step": 4532000 }, { "epoch": 2.72, "learning_rate": 3.7012942987735e-05, "loss": 1.2276, "step": 4532500 }, { "epoch": 2.72, "learning_rate": 3.7010843022174434e-05, "loss": 1.2111, "step": 4533000 }, { "epoch": 2.72, "learning_rate": 3.7008743056613874e-05, "loss": 1.1891, "step": 4533500 }, { "epoch": 2.72, "learning_rate": 3.700664309105331e-05, "loss": 1.221, "step": 4534000 }, { "epoch": 2.72, "learning_rate": 3.700454732542386e-05, "loss": 1.2013, "step": 4534500 }, { "epoch": 2.72, "learning_rate": 3.7002451559794415e-05, "loss": 1.2063, "step": 4535000 }, { "epoch": 2.72, "learning_rate": 3.7000351594233855e-05, "loss": 1.214, "step": 4535500 }, { "epoch": 2.72, "learning_rate": 3.699825162867329e-05, "loss": 1.191, "step": 4536000 }, { "epoch": 2.72, "learning_rate": 3.699615166311272e-05, "loss": 1.2092, "step": 4536500 }, { "epoch": 2.72, "learning_rate": 3.699405169755216e-05, "loss": 1.2009, "step": 4537000 }, { "epoch": 2.72, "learning_rate": 3.6991955931922716e-05, "loss": 1.1854, "step": 4537500 }, { "epoch": 2.72, "learning_rate": 3.698985596636215e-05, "loss": 1.2093, "step": 4538000 }, { "epoch": 2.72, "learning_rate": 3.698775600080158e-05, "loss": 1.2152, "step": 4538500 }, { "epoch": 2.72, "learning_rate": 3.698566023517214e-05, "loss": 1.1986, "step": 4539000 }, { "epoch": 2.72, "learning_rate": 3.6983560269611576e-05, "loss": 1.1942, "step": 4539500 }, { "epoch": 2.72, "learning_rate": 3.698146030405101e-05, "loss": 1.2155, "step": 4540000 }, { "epoch": 2.72, "learning_rate": 3.697936033849045e-05, "loss": 1.2098, "step": 4540500 }, { "epoch": 2.72, "learning_rate": 3.6977260372929884e-05, "loss": 1.1877, "step": 4541000 }, { "epoch": 2.72, "learning_rate": 3.697516040736932e-05, "loss": 1.1985, "step": 4541500 }, { "epoch": 2.72, "learning_rate": 3.697306044180876e-05, "loss": 1.2029, "step": 4542000 }, { "epoch": 2.72, "learning_rate": 3.697096047624819e-05, "loss": 1.2008, "step": 4542500 }, { "epoch": 2.72, "learning_rate": 3.6968860510687624e-05, "loss": 1.193, "step": 4543000 }, { "epoch": 2.72, "learning_rate": 3.6966760545127064e-05, "loss": 1.2343, "step": 4543500 }, { "epoch": 2.72, "learning_rate": 3.69646605795665e-05, "loss": 1.2487, "step": 4544000 }, { "epoch": 2.72, "learning_rate": 3.696256481393705e-05, "loss": 1.193, "step": 4544500 }, { "epoch": 2.72, "learning_rate": 3.6960464848376485e-05, "loss": 1.2261, "step": 4545000 }, { "epoch": 2.73, "learning_rate": 3.6958364882815925e-05, "loss": 1.1965, "step": 4545500 }, { "epoch": 2.73, "learning_rate": 3.695626491725536e-05, "loss": 1.2251, "step": 4546000 }, { "epoch": 2.73, "learning_rate": 3.6954164951694785e-05, "loss": 1.2047, "step": 4546500 }, { "epoch": 2.73, "learning_rate": 3.6952064986134226e-05, "loss": 1.2077, "step": 4547000 }, { "epoch": 2.73, "learning_rate": 3.694996502057366e-05, "loss": 1.2017, "step": 4547500 }, { "epoch": 2.73, "learning_rate": 3.694786925494422e-05, "loss": 1.1888, "step": 4548000 }, { "epoch": 2.73, "learning_rate": 3.694576928938366e-05, "loss": 1.2027, "step": 4548500 }, { "epoch": 2.73, "learning_rate": 3.6943669323823086e-05, "loss": 1.1987, "step": 4549000 }, { "epoch": 2.73, "learning_rate": 3.694156935826252e-05, "loss": 1.2177, "step": 4549500 }, { "epoch": 2.73, "learning_rate": 3.693947359263308e-05, "loss": 1.1962, "step": 4550000 }, { "epoch": 2.73, "learning_rate": 3.693737362707252e-05, "loss": 1.17, "step": 4550500 }, { "epoch": 2.73, "learning_rate": 3.6935273661511954e-05, "loss": 1.1927, "step": 4551000 }, { "epoch": 2.73, "learning_rate": 3.693317369595138e-05, "loss": 1.2085, "step": 4551500 }, { "epoch": 2.73, "learning_rate": 3.693107373039082e-05, "loss": 1.2002, "step": 4552000 }, { "epoch": 2.73, "learning_rate": 3.692897796476138e-05, "loss": 1.2013, "step": 4552500 }, { "epoch": 2.73, "learning_rate": 3.6926877999200815e-05, "loss": 1.203, "step": 4553000 }, { "epoch": 2.73, "learning_rate": 3.692477803364025e-05, "loss": 1.223, "step": 4553500 }, { "epoch": 2.73, "learning_rate": 3.692267806807968e-05, "loss": 1.1881, "step": 4554000 }, { "epoch": 2.73, "learning_rate": 3.6920578102519115e-05, "loss": 1.2013, "step": 4554500 }, { "epoch": 2.73, "learning_rate": 3.6918482336889675e-05, "loss": 1.2175, "step": 4555000 }, { "epoch": 2.73, "learning_rate": 3.6916382371329115e-05, "loss": 1.1975, "step": 4555500 }, { "epoch": 2.73, "learning_rate": 3.691428240576854e-05, "loss": 1.2009, "step": 4556000 }, { "epoch": 2.73, "learning_rate": 3.6912182440207976e-05, "loss": 1.2237, "step": 4556500 }, { "epoch": 2.73, "learning_rate": 3.6910082474647416e-05, "loss": 1.2099, "step": 4557000 }, { "epoch": 2.73, "learning_rate": 3.690798250908685e-05, "loss": 1.1489, "step": 4557500 }, { "epoch": 2.73, "learning_rate": 3.690588254352628e-05, "loss": 1.2198, "step": 4558000 }, { "epoch": 2.73, "learning_rate": 3.690378257796572e-05, "loss": 1.1984, "step": 4558500 }, { "epoch": 2.73, "learning_rate": 3.6901686812336277e-05, "loss": 1.2234, "step": 4559000 }, { "epoch": 2.73, "learning_rate": 3.689958684677571e-05, "loss": 1.1995, "step": 4559500 }, { "epoch": 2.73, "learning_rate": 3.689749108114627e-05, "loss": 1.2181, "step": 4560000 }, { "epoch": 2.73, "learning_rate": 3.6895391115585704e-05, "loss": 1.209, "step": 4560500 }, { "epoch": 2.73, "learning_rate": 3.689329115002514e-05, "loss": 1.208, "step": 4561000 }, { "epoch": 2.73, "learning_rate": 3.689119118446457e-05, "loss": 1.1806, "step": 4561500 }, { "epoch": 2.74, "learning_rate": 3.688909541883513e-05, "loss": 1.2011, "step": 4562000 }, { "epoch": 2.74, "learning_rate": 3.688699545327457e-05, "loss": 1.2142, "step": 4562500 }, { "epoch": 2.74, "learning_rate": 3.6884895487714005e-05, "loss": 1.1844, "step": 4563000 }, { "epoch": 2.74, "learning_rate": 3.688279552215343e-05, "loss": 1.2255, "step": 4563500 }, { "epoch": 2.74, "learning_rate": 3.688069975652399e-05, "loss": 1.2144, "step": 4564000 }, { "epoch": 2.74, "learning_rate": 3.687859979096343e-05, "loss": 1.2006, "step": 4564500 }, { "epoch": 2.74, "learning_rate": 3.6876499825402866e-05, "loss": 1.1816, "step": 4565000 }, { "epoch": 2.74, "learning_rate": 3.687439985984229e-05, "loss": 1.2074, "step": 4565500 }, { "epoch": 2.74, "learning_rate": 3.687229989428173e-05, "loss": 1.217, "step": 4566000 }, { "epoch": 2.74, "learning_rate": 3.687020412865229e-05, "loss": 1.174, "step": 4566500 }, { "epoch": 2.74, "learning_rate": 3.6868104163091726e-05, "loss": 1.2064, "step": 4567000 }, { "epoch": 2.74, "learning_rate": 3.686600419753116e-05, "loss": 1.2314, "step": 4567500 }, { "epoch": 2.74, "learning_rate": 3.686390423197059e-05, "loss": 1.1926, "step": 4568000 }, { "epoch": 2.74, "learning_rate": 3.686180426641003e-05, "loss": 1.2192, "step": 4568500 }, { "epoch": 2.74, "learning_rate": 3.685970850078059e-05, "loss": 1.2161, "step": 4569000 }, { "epoch": 2.74, "learning_rate": 3.685760853522003e-05, "loss": 1.1912, "step": 4569500 }, { "epoch": 2.74, "learning_rate": 3.685550856965946e-05, "loss": 1.2363, "step": 4570000 }, { "epoch": 2.74, "learning_rate": 3.685340860409889e-05, "loss": 1.2171, "step": 4570500 }, { "epoch": 2.74, "learning_rate": 3.685131283846945e-05, "loss": 1.1836, "step": 4571000 }, { "epoch": 2.74, "learning_rate": 3.684921707284e-05, "loss": 1.2071, "step": 4571500 }, { "epoch": 2.74, "learning_rate": 3.684711710727944e-05, "loss": 1.1999, "step": 4572000 }, { "epoch": 2.74, "learning_rate": 3.6845017141718875e-05, "loss": 1.2004, "step": 4572500 }, { "epoch": 2.74, "learning_rate": 3.684291717615831e-05, "loss": 1.2007, "step": 4573000 }, { "epoch": 2.74, "learning_rate": 3.684081721059775e-05, "loss": 1.1962, "step": 4573500 }, { "epoch": 2.74, "learning_rate": 3.683871724503718e-05, "loss": 1.2038, "step": 4574000 }, { "epoch": 2.74, "learning_rate": 3.6836617279476616e-05, "loss": 1.2125, "step": 4574500 }, { "epoch": 2.74, "learning_rate": 3.6834521513847176e-05, "loss": 1.1972, "step": 4575000 }, { "epoch": 2.74, "learning_rate": 3.683242154828661e-05, "loss": 1.2274, "step": 4575500 }, { "epoch": 2.74, "learning_rate": 3.683032158272604e-05, "loss": 1.2047, "step": 4576000 }, { "epoch": 2.74, "learning_rate": 3.682822161716548e-05, "loss": 1.2195, "step": 4576500 }, { "epoch": 2.74, "learning_rate": 3.6826121651604917e-05, "loss": 1.2151, "step": 4577000 }, { "epoch": 2.74, "learning_rate": 3.682402168604434e-05, "loss": 1.206, "step": 4577500 }, { "epoch": 2.74, "learning_rate": 3.6821925920414904e-05, "loss": 1.2157, "step": 4578000 }, { "epoch": 2.74, "learning_rate": 3.6819825954854344e-05, "loss": 1.1932, "step": 4578500 }, { "epoch": 2.75, "learning_rate": 3.681772598929378e-05, "loss": 1.202, "step": 4579000 }, { "epoch": 2.75, "learning_rate": 3.681562602373321e-05, "loss": 1.2344, "step": 4579500 }, { "epoch": 2.75, "learning_rate": 3.6813526058172644e-05, "loss": 1.2045, "step": 4580000 }, { "epoch": 2.75, "learning_rate": 3.681142609261208e-05, "loss": 1.2112, "step": 4580500 }, { "epoch": 2.75, "learning_rate": 3.680932612705151e-05, "loss": 1.2218, "step": 4581000 }, { "epoch": 2.75, "learning_rate": 3.680722616149095e-05, "loss": 1.2245, "step": 4581500 }, { "epoch": 2.75, "learning_rate": 3.6805126195930385e-05, "loss": 1.1939, "step": 4582000 }, { "epoch": 2.75, "learning_rate": 3.680302623036982e-05, "loss": 1.1896, "step": 4582500 }, { "epoch": 2.75, "learning_rate": 3.680092626480926e-05, "loss": 1.1903, "step": 4583000 }, { "epoch": 2.75, "learning_rate": 3.679882629924869e-05, "loss": 1.1853, "step": 4583500 }, { "epoch": 2.75, "learning_rate": 3.6796730533619246e-05, "loss": 1.208, "step": 4584000 }, { "epoch": 2.75, "learning_rate": 3.6794634767989806e-05, "loss": 1.2015, "step": 4584500 }, { "epoch": 2.75, "learning_rate": 3.679253480242924e-05, "loss": 1.215, "step": 4585000 }, { "epoch": 2.75, "learning_rate": 3.679043483686867e-05, "loss": 1.2204, "step": 4585500 }, { "epoch": 2.75, "learning_rate": 3.6788334871308106e-05, "loss": 1.2246, "step": 4586000 }, { "epoch": 2.75, "learning_rate": 3.6786234905747547e-05, "loss": 1.1694, "step": 4586500 }, { "epoch": 2.75, "learning_rate": 3.67841391401181e-05, "loss": 1.1844, "step": 4587000 }, { "epoch": 2.75, "learning_rate": 3.6782039174557534e-05, "loss": 1.203, "step": 4587500 }, { "epoch": 2.75, "learning_rate": 3.677993920899697e-05, "loss": 1.1947, "step": 4588000 }, { "epoch": 2.75, "learning_rate": 3.677783924343641e-05, "loss": 1.1909, "step": 4588500 }, { "epoch": 2.75, "learning_rate": 3.677573927787584e-05, "loss": 1.1864, "step": 4589000 }, { "epoch": 2.75, "learning_rate": 3.6773643512246394e-05, "loss": 1.1993, "step": 4589500 }, { "epoch": 2.75, "learning_rate": 3.6771547746616955e-05, "loss": 1.2095, "step": 4590000 }, { "epoch": 2.75, "learning_rate": 3.6769447781056395e-05, "loss": 1.194, "step": 4590500 }, { "epoch": 2.75, "learning_rate": 3.676734781549583e-05, "loss": 1.1733, "step": 4591000 }, { "epoch": 2.75, "learning_rate": 3.676524784993526e-05, "loss": 1.2103, "step": 4591500 }, { "epoch": 2.75, "learning_rate": 3.6763147884374695e-05, "loss": 1.1967, "step": 4592000 }, { "epoch": 2.75, "learning_rate": 3.676104791881413e-05, "loss": 1.1865, "step": 4592500 }, { "epoch": 2.75, "learning_rate": 3.675894795325356e-05, "loss": 1.2106, "step": 4593000 }, { "epoch": 2.75, "learning_rate": 3.6756847987693e-05, "loss": 1.1816, "step": 4593500 }, { "epoch": 2.75, "learning_rate": 3.6754748022132436e-05, "loss": 1.2173, "step": 4594000 }, { "epoch": 2.75, "learning_rate": 3.675265225650299e-05, "loss": 1.2146, "step": 4594500 }, { "epoch": 2.75, "learning_rate": 3.675055229094242e-05, "loss": 1.2085, "step": 4595000 }, { "epoch": 2.76, "learning_rate": 3.674845232538186e-05, "loss": 1.2103, "step": 4595500 }, { "epoch": 2.76, "learning_rate": 3.6746352359821297e-05, "loss": 1.2127, "step": 4596000 }, { "epoch": 2.76, "learning_rate": 3.674425239426073e-05, "loss": 1.2178, "step": 4596500 }, { "epoch": 2.76, "learning_rate": 3.674215242870017e-05, "loss": 1.1744, "step": 4597000 }, { "epoch": 2.76, "learning_rate": 3.6740056663070724e-05, "loss": 1.2118, "step": 4597500 }, { "epoch": 2.76, "learning_rate": 3.6737960897441284e-05, "loss": 1.193, "step": 4598000 }, { "epoch": 2.76, "learning_rate": 3.673586093188072e-05, "loss": 1.1942, "step": 4598500 }, { "epoch": 2.76, "learning_rate": 3.673376096632015e-05, "loss": 1.2093, "step": 4599000 }, { "epoch": 2.76, "learning_rate": 3.6731661000759585e-05, "loss": 1.1747, "step": 4599500 }, { "epoch": 2.76, "learning_rate": 3.672956103519902e-05, "loss": 1.2163, "step": 4600000 }, { "epoch": 2.76, "eval_loss": 1.163139820098877, "eval_runtime": 1103.2733, "eval_samples_per_second": 477.416, "eval_steps_per_second": 79.57, "step": 4600000 }, { "epoch": 2.76, "learning_rate": 3.672746106963846e-05, "loss": 1.2211, "step": 4600500 }, { "epoch": 2.76, "learning_rate": 3.672536110407789e-05, "loss": 1.204, "step": 4601000 }, { "epoch": 2.76, "learning_rate": 3.6723261138517325e-05, "loss": 1.2014, "step": 4601500 }, { "epoch": 2.76, "learning_rate": 3.6721161172956765e-05, "loss": 1.2161, "step": 4602000 }, { "epoch": 2.76, "learning_rate": 3.671906540732732e-05, "loss": 1.2148, "step": 4602500 }, { "epoch": 2.76, "learning_rate": 3.671696544176675e-05, "loss": 1.1951, "step": 4603000 }, { "epoch": 2.76, "learning_rate": 3.6714865476206186e-05, "loss": 1.2089, "step": 4603500 }, { "epoch": 2.76, "learning_rate": 3.6712765510645626e-05, "loss": 1.2359, "step": 4604000 }, { "epoch": 2.76, "learning_rate": 3.671066554508506e-05, "loss": 1.1791, "step": 4604500 }, { "epoch": 2.76, "learning_rate": 3.670856557952449e-05, "loss": 1.2236, "step": 4605000 }, { "epoch": 2.76, "learning_rate": 3.670646561396393e-05, "loss": 1.1952, "step": 4605500 }, { "epoch": 2.76, "learning_rate": 3.670436564840337e-05, "loss": 1.1927, "step": 4606000 }, { "epoch": 2.76, "learning_rate": 3.670226988277392e-05, "loss": 1.1981, "step": 4606500 }, { "epoch": 2.76, "learning_rate": 3.670016991721336e-05, "loss": 1.1933, "step": 4607000 }, { "epoch": 2.76, "learning_rate": 3.6698069951652794e-05, "loss": 1.215, "step": 4607500 }, { "epoch": 2.76, "learning_rate": 3.669596998609223e-05, "loss": 1.2234, "step": 4608000 }, { "epoch": 2.76, "learning_rate": 3.66938784203939e-05, "loss": 1.1973, "step": 4608500 }, { "epoch": 2.76, "learning_rate": 3.6691778454833335e-05, "loss": 1.2188, "step": 4609000 }, { "epoch": 2.76, "learning_rate": 3.6689678489272775e-05, "loss": 1.1961, "step": 4609500 }, { "epoch": 2.76, "learning_rate": 3.668757852371221e-05, "loss": 1.2124, "step": 4610000 }, { "epoch": 2.76, "learning_rate": 3.668547855815164e-05, "loss": 1.1954, "step": 4610500 }, { "epoch": 2.76, "learning_rate": 3.66833827925222e-05, "loss": 1.217, "step": 4611000 }, { "epoch": 2.76, "learning_rate": 3.6681282826961636e-05, "loss": 1.2008, "step": 4611500 }, { "epoch": 2.77, "learning_rate": 3.667918286140107e-05, "loss": 1.199, "step": 4612000 }, { "epoch": 2.77, "learning_rate": 3.667708289584051e-05, "loss": 1.1947, "step": 4612500 }, { "epoch": 2.77, "learning_rate": 3.667498293027994e-05, "loss": 1.1845, "step": 4613000 }, { "epoch": 2.77, "learning_rate": 3.6672882964719376e-05, "loss": 1.2102, "step": 4613500 }, { "epoch": 2.77, "learning_rate": 3.667078719908993e-05, "loss": 1.2148, "step": 4614000 }, { "epoch": 2.77, "learning_rate": 3.666868723352937e-05, "loss": 1.1992, "step": 4614500 }, { "epoch": 2.77, "learning_rate": 3.6666587267968804e-05, "loss": 1.1759, "step": 4615000 }, { "epoch": 2.77, "learning_rate": 3.666448730240824e-05, "loss": 1.1837, "step": 4615500 }, { "epoch": 2.77, "learning_rate": 3.666238733684768e-05, "loss": 1.202, "step": 4616000 }, { "epoch": 2.77, "learning_rate": 3.666028737128711e-05, "loss": 1.1811, "step": 4616500 }, { "epoch": 2.77, "learning_rate": 3.6658187405726544e-05, "loss": 1.1783, "step": 4617000 }, { "epoch": 2.77, "learning_rate": 3.6656087440165984e-05, "loss": 1.1965, "step": 4617500 }, { "epoch": 2.77, "learning_rate": 3.665399167453654e-05, "loss": 1.2006, "step": 4618000 }, { "epoch": 2.77, "learning_rate": 3.665189170897597e-05, "loss": 1.2456, "step": 4618500 }, { "epoch": 2.77, "learning_rate": 3.664979174341541e-05, "loss": 1.1703, "step": 4619000 }, { "epoch": 2.77, "learning_rate": 3.6647691777854845e-05, "loss": 1.1583, "step": 4619500 }, { "epoch": 2.77, "learning_rate": 3.66455960122254e-05, "loss": 1.2215, "step": 4620000 }, { "epoch": 2.77, "learning_rate": 3.664349604666483e-05, "loss": 1.2196, "step": 4620500 }, { "epoch": 2.77, "learning_rate": 3.6641400281035386e-05, "loss": 1.1872, "step": 4621000 }, { "epoch": 2.77, "learning_rate": 3.6639300315474826e-05, "loss": 1.2154, "step": 4621500 }, { "epoch": 2.77, "learning_rate": 3.663720034991426e-05, "loss": 1.2243, "step": 4622000 }, { "epoch": 2.77, "learning_rate": 3.663510038435369e-05, "loss": 1.2142, "step": 4622500 }, { "epoch": 2.77, "learning_rate": 3.6633004618724246e-05, "loss": 1.193, "step": 4623000 }, { "epoch": 2.77, "learning_rate": 3.663090465316369e-05, "loss": 1.2032, "step": 4623500 }, { "epoch": 2.77, "learning_rate": 3.662880468760312e-05, "loss": 1.2135, "step": 4624000 }, { "epoch": 2.77, "learning_rate": 3.6626704722042554e-05, "loss": 1.2345, "step": 4624500 }, { "epoch": 2.77, "learning_rate": 3.6624604756481994e-05, "loss": 1.2111, "step": 4625000 }, { "epoch": 2.77, "learning_rate": 3.662250899085255e-05, "loss": 1.2073, "step": 4625500 }, { "epoch": 2.77, "learning_rate": 3.662040902529198e-05, "loss": 1.2208, "step": 4626000 }, { "epoch": 2.77, "learning_rate": 3.661830905973142e-05, "loss": 1.1991, "step": 4626500 }, { "epoch": 2.77, "learning_rate": 3.6616209094170855e-05, "loss": 1.1907, "step": 4627000 }, { "epoch": 2.77, "learning_rate": 3.661410912861029e-05, "loss": 1.209, "step": 4627500 }, { "epoch": 2.77, "learning_rate": 3.661200916304973e-05, "loss": 1.1826, "step": 4628000 }, { "epoch": 2.77, "learning_rate": 3.660990919748916e-05, "loss": 1.2014, "step": 4628500 }, { "epoch": 2.78, "learning_rate": 3.6607809231928595e-05, "loss": 1.2212, "step": 4629000 }, { "epoch": 2.78, "learning_rate": 3.6605717666230276e-05, "loss": 1.1959, "step": 4629500 }, { "epoch": 2.78, "learning_rate": 3.66036177006697e-05, "loss": 1.2147, "step": 4630000 }, { "epoch": 2.78, "learning_rate": 3.660151773510914e-05, "loss": 1.2333, "step": 4630500 }, { "epoch": 2.78, "learning_rate": 3.6599417769548576e-05, "loss": 1.2151, "step": 4631000 }, { "epoch": 2.78, "learning_rate": 3.6597322003919136e-05, "loss": 1.2335, "step": 4631500 }, { "epoch": 2.78, "learning_rate": 3.6595222038358577e-05, "loss": 1.2113, "step": 4632000 }, { "epoch": 2.78, "learning_rate": 3.6593122072798e-05, "loss": 1.2189, "step": 4632500 }, { "epoch": 2.78, "learning_rate": 3.659102210723744e-05, "loss": 1.2152, "step": 4633000 }, { "epoch": 2.78, "learning_rate": 3.658892214167688e-05, "loss": 1.1874, "step": 4633500 }, { "epoch": 2.78, "learning_rate": 3.658682217611631e-05, "loss": 1.1624, "step": 4634000 }, { "epoch": 2.78, "learning_rate": 3.6584722210555744e-05, "loss": 1.1837, "step": 4634500 }, { "epoch": 2.78, "learning_rate": 3.6582622244995184e-05, "loss": 1.2007, "step": 4635000 }, { "epoch": 2.78, "learning_rate": 3.658052227943462e-05, "loss": 1.1904, "step": 4635500 }, { "epoch": 2.78, "learning_rate": 3.657842231387405e-05, "loss": 1.1769, "step": 4636000 }, { "epoch": 2.78, "learning_rate": 3.657633074817573e-05, "loss": 1.2046, "step": 4636500 }, { "epoch": 2.78, "learning_rate": 3.657423078261516e-05, "loss": 1.1649, "step": 4637000 }, { "epoch": 2.78, "learning_rate": 3.65721308170546e-05, "loss": 1.1793, "step": 4637500 }, { "epoch": 2.78, "learning_rate": 3.657003085149403e-05, "loss": 1.2177, "step": 4638000 }, { "epoch": 2.78, "learning_rate": 3.656793088593347e-05, "loss": 1.161, "step": 4638500 }, { "epoch": 2.78, "learning_rate": 3.6565830920372906e-05, "loss": 1.192, "step": 4639000 }, { "epoch": 2.78, "learning_rate": 3.656373095481234e-05, "loss": 1.205, "step": 4639500 }, { "epoch": 2.78, "learning_rate": 3.656163098925178e-05, "loss": 1.181, "step": 4640000 }, { "epoch": 2.78, "learning_rate": 3.655953102369121e-05, "loss": 1.1877, "step": 4640500 }, { "epoch": 2.78, "learning_rate": 3.6557435258061766e-05, "loss": 1.2009, "step": 4641000 }, { "epoch": 2.78, "learning_rate": 3.65553352925012e-05, "loss": 1.2092, "step": 4641500 }, { "epoch": 2.78, "learning_rate": 3.655323532694064e-05, "loss": 1.1991, "step": 4642000 }, { "epoch": 2.78, "learning_rate": 3.6551135361380073e-05, "loss": 1.1978, "step": 4642500 }, { "epoch": 2.78, "learning_rate": 3.654903539581951e-05, "loss": 1.1945, "step": 4643000 }, { "epoch": 2.78, "learning_rate": 3.654693543025895e-05, "loss": 1.186, "step": 4643500 }, { "epoch": 2.78, "learning_rate": 3.654483546469838e-05, "loss": 1.1879, "step": 4644000 }, { "epoch": 2.78, "learning_rate": 3.6542735499137814e-05, "loss": 1.1983, "step": 4644500 }, { "epoch": 2.78, "learning_rate": 3.654063973350837e-05, "loss": 1.2077, "step": 4645000 }, { "epoch": 2.79, "learning_rate": 3.653854396787893e-05, "loss": 1.2137, "step": 4645500 }, { "epoch": 2.79, "learning_rate": 3.653644400231836e-05, "loss": 1.2147, "step": 4646000 }, { "epoch": 2.79, "learning_rate": 3.6534344036757795e-05, "loss": 1.2019, "step": 4646500 }, { "epoch": 2.79, "learning_rate": 3.6532244071197235e-05, "loss": 1.2145, "step": 4647000 }, { "epoch": 2.79, "learning_rate": 3.653014410563667e-05, "loss": 1.2013, "step": 4647500 }, { "epoch": 2.79, "learning_rate": 3.65280441400761e-05, "loss": 1.1908, "step": 4648000 }, { "epoch": 2.79, "learning_rate": 3.6525948374446656e-05, "loss": 1.2172, "step": 4648500 }, { "epoch": 2.79, "learning_rate": 3.6523848408886096e-05, "loss": 1.207, "step": 4649000 }, { "epoch": 2.79, "learning_rate": 3.652174844332553e-05, "loss": 1.2157, "step": 4649500 }, { "epoch": 2.79, "learning_rate": 3.651965267769608e-05, "loss": 1.2491, "step": 4650000 }, { "epoch": 2.79, "learning_rate": 3.6517552712135516e-05, "loss": 1.2084, "step": 4650500 }, { "epoch": 2.79, "learning_rate": 3.6515452746574957e-05, "loss": 1.1814, "step": 4651000 }, { "epoch": 2.79, "learning_rate": 3.651335278101439e-05, "loss": 1.1819, "step": 4651500 }, { "epoch": 2.79, "learning_rate": 3.6511252815453824e-05, "loss": 1.2158, "step": 4652000 }, { "epoch": 2.79, "learning_rate": 3.6509152849893264e-05, "loss": 1.1715, "step": 4652500 }, { "epoch": 2.79, "learning_rate": 3.65070528843327e-05, "loss": 1.1916, "step": 4653000 }, { "epoch": 2.79, "learning_rate": 3.650495291877213e-05, "loss": 1.251, "step": 4653500 }, { "epoch": 2.79, "learning_rate": 3.650285295321157e-05, "loss": 1.1896, "step": 4654000 }, { "epoch": 2.79, "learning_rate": 3.6500761387513245e-05, "loss": 1.1931, "step": 4654500 }, { "epoch": 2.79, "learning_rate": 3.649866142195268e-05, "loss": 1.199, "step": 4655000 }, { "epoch": 2.79, "learning_rate": 3.649656145639211e-05, "loss": 1.2088, "step": 4655500 }, { "epoch": 2.79, "learning_rate": 3.649446149083155e-05, "loss": 1.1944, "step": 4656000 }, { "epoch": 2.79, "learning_rate": 3.6492361525270985e-05, "loss": 1.2331, "step": 4656500 }, { "epoch": 2.79, "learning_rate": 3.649026155971042e-05, "loss": 1.1861, "step": 4657000 }, { "epoch": 2.79, "learning_rate": 3.648816159414986e-05, "loss": 1.1927, "step": 4657500 }, { "epoch": 2.79, "learning_rate": 3.648606582852041e-05, "loss": 1.248, "step": 4658000 }, { "epoch": 2.79, "learning_rate": 3.6483965862959846e-05, "loss": 1.2187, "step": 4658500 }, { "epoch": 2.79, "learning_rate": 3.648186589739928e-05, "loss": 1.1919, "step": 4659000 }, { "epoch": 2.79, "learning_rate": 3.647976593183872e-05, "loss": 1.1589, "step": 4659500 }, { "epoch": 2.79, "learning_rate": 3.647766596627815e-05, "loss": 1.2003, "step": 4660000 }, { "epoch": 2.79, "learning_rate": 3.647557020064871e-05, "loss": 1.1919, "step": 4660500 }, { "epoch": 2.79, "learning_rate": 3.647347023508815e-05, "loss": 1.2178, "step": 4661000 }, { "epoch": 2.79, "learning_rate": 3.647137026952758e-05, "loss": 1.2156, "step": 4661500 }, { "epoch": 2.8, "learning_rate": 3.6469270303967014e-05, "loss": 1.2119, "step": 4662000 }, { "epoch": 2.8, "learning_rate": 3.6467170338406454e-05, "loss": 1.1729, "step": 4662500 }, { "epoch": 2.8, "learning_rate": 3.646507037284589e-05, "loss": 1.1975, "step": 4663000 }, { "epoch": 2.8, "learning_rate": 3.646297040728532e-05, "loss": 1.2193, "step": 4663500 }, { "epoch": 2.8, "learning_rate": 3.6460870441724754e-05, "loss": 1.1868, "step": 4664000 }, { "epoch": 2.8, "learning_rate": 3.645877047616419e-05, "loss": 1.2178, "step": 4664500 }, { "epoch": 2.8, "learning_rate": 3.645667051060362e-05, "loss": 1.1859, "step": 4665000 }, { "epoch": 2.8, "learning_rate": 3.645457054504306e-05, "loss": 1.2217, "step": 4665500 }, { "epoch": 2.8, "learning_rate": 3.645247477941362e-05, "loss": 1.1866, "step": 4666000 }, { "epoch": 2.8, "learning_rate": 3.645037481385305e-05, "loss": 1.2174, "step": 4666500 }, { "epoch": 2.8, "learning_rate": 3.644827484829248e-05, "loss": 1.1947, "step": 4667000 }, { "epoch": 2.8, "learning_rate": 3.644617488273192e-05, "loss": 1.1833, "step": 4667500 }, { "epoch": 2.8, "learning_rate": 3.6444074917171356e-05, "loss": 1.182, "step": 4668000 }, { "epoch": 2.8, "learning_rate": 3.6441979151541916e-05, "loss": 1.2013, "step": 4668500 }, { "epoch": 2.8, "learning_rate": 3.643987918598135e-05, "loss": 1.1957, "step": 4669000 }, { "epoch": 2.8, "learning_rate": 3.643778342035191e-05, "loss": 1.2004, "step": 4669500 }, { "epoch": 2.8, "learning_rate": 3.643568345479134e-05, "loss": 1.2074, "step": 4670000 }, { "epoch": 2.8, "learning_rate": 3.643358348923078e-05, "loss": 1.1961, "step": 4670500 }, { "epoch": 2.8, "learning_rate": 3.643148352367022e-05, "loss": 1.1886, "step": 4671000 }, { "epoch": 2.8, "learning_rate": 3.6429383558109644e-05, "loss": 1.1983, "step": 4671500 }, { "epoch": 2.8, "learning_rate": 3.6427287792480204e-05, "loss": 1.2227, "step": 4672000 }, { "epoch": 2.8, "learning_rate": 3.642518782691964e-05, "loss": 1.2107, "step": 4672500 }, { "epoch": 2.8, "learning_rate": 3.642308786135908e-05, "loss": 1.2124, "step": 4673000 }, { "epoch": 2.8, "learning_rate": 3.6420987895798504e-05, "loss": 1.2263, "step": 4673500 }, { "epoch": 2.8, "learning_rate": 3.6418892130169065e-05, "loss": 1.2343, "step": 4674000 }, { "epoch": 2.8, "learning_rate": 3.64167921646085e-05, "loss": 1.189, "step": 4674500 }, { "epoch": 2.8, "learning_rate": 3.641469219904794e-05, "loss": 1.22, "step": 4675000 }, { "epoch": 2.8, "learning_rate": 3.641259223348737e-05, "loss": 1.2115, "step": 4675500 }, { "epoch": 2.8, "learning_rate": 3.6410492267926805e-05, "loss": 1.2095, "step": 4676000 }, { "epoch": 2.8, "learning_rate": 3.640839230236624e-05, "loss": 1.2128, "step": 4676500 }, { "epoch": 2.8, "learning_rate": 3.640629233680567e-05, "loss": 1.2167, "step": 4677000 }, { "epoch": 2.8, "learning_rate": 3.640419237124511e-05, "loss": 1.2097, "step": 4677500 }, { "epoch": 2.8, "learning_rate": 3.6402092405684546e-05, "loss": 1.1971, "step": 4678000 }, { "epoch": 2.8, "learning_rate": 3.639999244012398e-05, "loss": 1.1878, "step": 4678500 }, { "epoch": 2.81, "learning_rate": 3.639789247456342e-05, "loss": 1.2122, "step": 4679000 }, { "epoch": 2.81, "learning_rate": 3.639579250900285e-05, "loss": 1.1921, "step": 4679500 }, { "epoch": 2.81, "learning_rate": 3.639369254344229e-05, "loss": 1.2129, "step": 4680000 }, { "epoch": 2.81, "learning_rate": 3.639159257788173e-05, "loss": 1.1916, "step": 4680500 }, { "epoch": 2.81, "learning_rate": 3.638949261232116e-05, "loss": 1.1941, "step": 4681000 }, { "epoch": 2.81, "learning_rate": 3.638739264676059e-05, "loss": 1.202, "step": 4681500 }, { "epoch": 2.81, "learning_rate": 3.638529688113115e-05, "loss": 1.2154, "step": 4682000 }, { "epoch": 2.81, "learning_rate": 3.638319691557059e-05, "loss": 1.2032, "step": 4682500 }, { "epoch": 2.81, "learning_rate": 3.638109695001002e-05, "loss": 1.1855, "step": 4683000 }, { "epoch": 2.81, "learning_rate": 3.6378996984449455e-05, "loss": 1.1944, "step": 4683500 }, { "epoch": 2.81, "learning_rate": 3.637690121882001e-05, "loss": 1.2142, "step": 4684000 }, { "epoch": 2.81, "learning_rate": 3.637480125325945e-05, "loss": 1.204, "step": 4684500 }, { "epoch": 2.81, "learning_rate": 3.637270128769888e-05, "loss": 1.1987, "step": 4685000 }, { "epoch": 2.81, "learning_rate": 3.637060132213832e-05, "loss": 1.2046, "step": 4685500 }, { "epoch": 2.81, "learning_rate": 3.6368501356577756e-05, "loss": 1.202, "step": 4686000 }, { "epoch": 2.81, "learning_rate": 3.636640559094831e-05, "loss": 1.2092, "step": 4686500 }, { "epoch": 2.81, "learning_rate": 3.636430562538774e-05, "loss": 1.1874, "step": 4687000 }, { "epoch": 2.81, "learning_rate": 3.636220565982718e-05, "loss": 1.2251, "step": 4687500 }, { "epoch": 2.81, "learning_rate": 3.6360105694266616e-05, "loss": 1.1958, "step": 4688000 }, { "epoch": 2.81, "learning_rate": 3.635800992863717e-05, "loss": 1.2091, "step": 4688500 }, { "epoch": 2.81, "learning_rate": 3.63559099630766e-05, "loss": 1.1761, "step": 4689000 }, { "epoch": 2.81, "learning_rate": 3.6353809997516044e-05, "loss": 1.1953, "step": 4689500 }, { "epoch": 2.81, "learning_rate": 3.635171003195548e-05, "loss": 1.2074, "step": 4690000 }, { "epoch": 2.81, "learning_rate": 3.634961006639491e-05, "loss": 1.2042, "step": 4690500 }, { "epoch": 2.81, "learning_rate": 3.6347510100834344e-05, "loss": 1.2107, "step": 4691000 }, { "epoch": 2.81, "learning_rate": 3.634541013527378e-05, "loss": 1.196, "step": 4691500 }, { "epoch": 2.81, "learning_rate": 3.634331016971322e-05, "loss": 1.1701, "step": 4692000 }, { "epoch": 2.81, "learning_rate": 3.634121020415265e-05, "loss": 1.2213, "step": 4692500 }, { "epoch": 2.81, "learning_rate": 3.6339110238592084e-05, "loss": 1.2067, "step": 4693000 }, { "epoch": 2.81, "learning_rate": 3.6337010273031525e-05, "loss": 1.2223, "step": 4693500 }, { "epoch": 2.81, "learning_rate": 3.633491450740208e-05, "loss": 1.2028, "step": 4694000 }, { "epoch": 2.81, "learning_rate": 3.633281454184151e-05, "loss": 1.2097, "step": 4694500 }, { "epoch": 2.81, "learning_rate": 3.6330714576280945e-05, "loss": 1.1858, "step": 4695000 }, { "epoch": 2.82, "learning_rate": 3.6328614610720385e-05, "loss": 1.2022, "step": 4695500 }, { "epoch": 2.82, "learning_rate": 3.632651464515982e-05, "loss": 1.218, "step": 4696000 }, { "epoch": 2.82, "learning_rate": 3.632441467959925e-05, "loss": 1.1727, "step": 4696500 }, { "epoch": 2.82, "learning_rate": 3.632231471403869e-05, "loss": 1.1869, "step": 4697000 }, { "epoch": 2.82, "learning_rate": 3.6320214748478126e-05, "loss": 1.191, "step": 4697500 }, { "epoch": 2.82, "learning_rate": 3.631811898284868e-05, "loss": 1.2257, "step": 4698000 }, { "epoch": 2.82, "learning_rate": 3.631601901728811e-05, "loss": 1.2039, "step": 4698500 }, { "epoch": 2.82, "learning_rate": 3.631391905172755e-05, "loss": 1.1765, "step": 4699000 }, { "epoch": 2.82, "learning_rate": 3.631181908616699e-05, "loss": 1.2188, "step": 4699500 }, { "epoch": 2.82, "learning_rate": 3.630971912060642e-05, "loss": 1.1825, "step": 4700000 }, { "epoch": 2.82, "eval_loss": 1.1633455753326416, "eval_runtime": 1101.2926, "eval_samples_per_second": 478.274, "eval_steps_per_second": 79.713, "step": 4700000 }, { "epoch": 2.82, "learning_rate": 3.630761915504586e-05, "loss": 1.2054, "step": 4700500 }, { "epoch": 2.82, "learning_rate": 3.6305519189485294e-05, "loss": 1.188, "step": 4701000 }, { "epoch": 2.82, "learning_rate": 3.630341922392473e-05, "loss": 1.1919, "step": 4701500 }, { "epoch": 2.82, "learning_rate": 3.630132345829529e-05, "loss": 1.2154, "step": 4702000 }, { "epoch": 2.82, "learning_rate": 3.629922349273472e-05, "loss": 1.2176, "step": 4702500 }, { "epoch": 2.82, "learning_rate": 3.6297123527174155e-05, "loss": 1.1711, "step": 4703000 }, { "epoch": 2.82, "learning_rate": 3.6295023561613595e-05, "loss": 1.1888, "step": 4703500 }, { "epoch": 2.82, "learning_rate": 3.629292779598415e-05, "loss": 1.2028, "step": 4704000 }, { "epoch": 2.82, "learning_rate": 3.62908320303547e-05, "loss": 1.1784, "step": 4704500 }, { "epoch": 2.82, "learning_rate": 3.6288732064794136e-05, "loss": 1.2093, "step": 4705000 }, { "epoch": 2.82, "learning_rate": 3.628663209923357e-05, "loss": 1.2052, "step": 4705500 }, { "epoch": 2.82, "learning_rate": 3.628453213367301e-05, "loss": 1.2177, "step": 4706000 }, { "epoch": 2.82, "learning_rate": 3.628243216811244e-05, "loss": 1.1742, "step": 4706500 }, { "epoch": 2.82, "learning_rate": 3.6280332202551876e-05, "loss": 1.2079, "step": 4707000 }, { "epoch": 2.82, "learning_rate": 3.6278232236991316e-05, "loss": 1.2041, "step": 4707500 }, { "epoch": 2.82, "learning_rate": 3.627613227143075e-05, "loss": 1.2185, "step": 4708000 }, { "epoch": 2.82, "learning_rate": 3.6274036505801303e-05, "loss": 1.2061, "step": 4708500 }, { "epoch": 2.82, "learning_rate": 3.6271936540240744e-05, "loss": 1.1878, "step": 4709000 }, { "epoch": 2.82, "learning_rate": 3.626983657468018e-05, "loss": 1.2126, "step": 4709500 }, { "epoch": 2.82, "learning_rate": 3.626773660911961e-05, "loss": 1.196, "step": 4710000 }, { "epoch": 2.82, "learning_rate": 3.6265640843490164e-05, "loss": 1.1912, "step": 4710500 }, { "epoch": 2.82, "learning_rate": 3.6263540877929604e-05, "loss": 1.2203, "step": 4711000 }, { "epoch": 2.82, "learning_rate": 3.626144091236904e-05, "loss": 1.1876, "step": 4711500 }, { "epoch": 2.83, "learning_rate": 3.625934094680847e-05, "loss": 1.2069, "step": 4712000 }, { "epoch": 2.83, "learning_rate": 3.625724098124791e-05, "loss": 1.2107, "step": 4712500 }, { "epoch": 2.83, "learning_rate": 3.6255141015687345e-05, "loss": 1.1891, "step": 4713000 }, { "epoch": 2.83, "learning_rate": 3.625304105012677e-05, "loss": 1.1799, "step": 4713500 }, { "epoch": 2.83, "learning_rate": 3.625094108456621e-05, "loss": 1.2006, "step": 4714000 }, { "epoch": 2.83, "learning_rate": 3.624884531893677e-05, "loss": 1.1825, "step": 4714500 }, { "epoch": 2.83, "learning_rate": 3.6246745353376206e-05, "loss": 1.2411, "step": 4715000 }, { "epoch": 2.83, "learning_rate": 3.624464538781564e-05, "loss": 1.1987, "step": 4715500 }, { "epoch": 2.83, "learning_rate": 3.624254542225507e-05, "loss": 1.2261, "step": 4716000 }, { "epoch": 2.83, "learning_rate": 3.6240445456694506e-05, "loss": 1.1952, "step": 4716500 }, { "epoch": 2.83, "learning_rate": 3.6238345491133946e-05, "loss": 1.195, "step": 4717000 }, { "epoch": 2.83, "learning_rate": 3.623624552557338e-05, "loss": 1.2201, "step": 4717500 }, { "epoch": 2.83, "learning_rate": 3.623414556001281e-05, "loss": 1.2103, "step": 4718000 }, { "epoch": 2.83, "learning_rate": 3.6232053994314494e-05, "loss": 1.2044, "step": 4718500 }, { "epoch": 2.83, "learning_rate": 3.622995402875393e-05, "loss": 1.2071, "step": 4719000 }, { "epoch": 2.83, "learning_rate": 3.622785406319337e-05, "loss": 1.1848, "step": 4719500 }, { "epoch": 2.83, "learning_rate": 3.62257540976328e-05, "loss": 1.2019, "step": 4720000 }, { "epoch": 2.83, "learning_rate": 3.622365413207223e-05, "loss": 1.1901, "step": 4720500 }, { "epoch": 2.83, "learning_rate": 3.622155416651167e-05, "loss": 1.1973, "step": 4721000 }, { "epoch": 2.83, "learning_rate": 3.62194542009511e-05, "loss": 1.1691, "step": 4721500 }, { "epoch": 2.83, "learning_rate": 3.6217354235390535e-05, "loss": 1.2154, "step": 4722000 }, { "epoch": 2.83, "learning_rate": 3.6215258469761095e-05, "loss": 1.2039, "step": 4722500 }, { "epoch": 2.83, "learning_rate": 3.6213162704131655e-05, "loss": 1.2102, "step": 4723000 }, { "epoch": 2.83, "learning_rate": 3.621106273857109e-05, "loss": 1.1888, "step": 4723500 }, { "epoch": 2.83, "learning_rate": 3.620896277301052e-05, "loss": 1.1894, "step": 4724000 }, { "epoch": 2.83, "learning_rate": 3.620686280744996e-05, "loss": 1.1901, "step": 4724500 }, { "epoch": 2.83, "learning_rate": 3.6204762841889396e-05, "loss": 1.2517, "step": 4725000 }, { "epoch": 2.83, "learning_rate": 3.620266707625995e-05, "loss": 1.1845, "step": 4725500 }, { "epoch": 2.83, "learning_rate": 3.62005713106305e-05, "loss": 1.1993, "step": 4726000 }, { "epoch": 2.83, "learning_rate": 3.619847134506994e-05, "loss": 1.1984, "step": 4726500 }, { "epoch": 2.83, "learning_rate": 3.619637137950938e-05, "loss": 1.194, "step": 4727000 }, { "epoch": 2.83, "learning_rate": 3.619427141394881e-05, "loss": 1.2029, "step": 4727500 }, { "epoch": 2.83, "learning_rate": 3.6192171448388244e-05, "loss": 1.2284, "step": 4728000 }, { "epoch": 2.83, "learning_rate": 3.6190071482827684e-05, "loss": 1.2056, "step": 4728500 }, { "epoch": 2.84, "learning_rate": 3.618797151726712e-05, "loss": 1.1984, "step": 4729000 }, { "epoch": 2.84, "learning_rate": 3.618587155170655e-05, "loss": 1.1771, "step": 4729500 }, { "epoch": 2.84, "learning_rate": 3.6183771586145984e-05, "loss": 1.1913, "step": 4730000 }, { "epoch": 2.84, "learning_rate": 3.6181675820516545e-05, "loss": 1.2038, "step": 4730500 }, { "epoch": 2.84, "learning_rate": 3.617957585495598e-05, "loss": 1.2057, "step": 4731000 }, { "epoch": 2.84, "learning_rate": 3.617747588939542e-05, "loss": 1.2085, "step": 4731500 }, { "epoch": 2.84, "learning_rate": 3.617537592383485e-05, "loss": 1.2051, "step": 4732000 }, { "epoch": 2.84, "learning_rate": 3.6173280158205405e-05, "loss": 1.2227, "step": 4732500 }, { "epoch": 2.84, "learning_rate": 3.617118439257596e-05, "loss": 1.203, "step": 4733000 }, { "epoch": 2.84, "learning_rate": 3.616908442701539e-05, "loss": 1.2091, "step": 4733500 }, { "epoch": 2.84, "learning_rate": 3.616698446145483e-05, "loss": 1.2071, "step": 4734000 }, { "epoch": 2.84, "learning_rate": 3.6164884495894266e-05, "loss": 1.2114, "step": 4734500 }, { "epoch": 2.84, "learning_rate": 3.61627845303337e-05, "loss": 1.1817, "step": 4735000 }, { "epoch": 2.84, "learning_rate": 3.616068456477314e-05, "loss": 1.2095, "step": 4735500 }, { "epoch": 2.84, "learning_rate": 3.615858459921257e-05, "loss": 1.1924, "step": 4736000 }, { "epoch": 2.84, "learning_rate": 3.615648463365201e-05, "loss": 1.199, "step": 4736500 }, { "epoch": 2.84, "learning_rate": 3.615438886802257e-05, "loss": 1.1851, "step": 4737000 }, { "epoch": 2.84, "learning_rate": 3.6152288902462e-05, "loss": 1.2288, "step": 4737500 }, { "epoch": 2.84, "learning_rate": 3.6150188936901434e-05, "loss": 1.1873, "step": 4738000 }, { "epoch": 2.84, "learning_rate": 3.6148088971340874e-05, "loss": 1.2014, "step": 4738500 }, { "epoch": 2.84, "learning_rate": 3.614598900578031e-05, "loss": 1.2026, "step": 4739000 }, { "epoch": 2.84, "learning_rate": 3.614389324015086e-05, "loss": 1.1921, "step": 4739500 }, { "epoch": 2.84, "learning_rate": 3.6141793274590295e-05, "loss": 1.1905, "step": 4740000 }, { "epoch": 2.84, "learning_rate": 3.6139693309029735e-05, "loss": 1.2072, "step": 4740500 }, { "epoch": 2.84, "learning_rate": 3.613759334346917e-05, "loss": 1.2167, "step": 4741000 }, { "epoch": 2.84, "learning_rate": 3.613549757783972e-05, "loss": 1.207, "step": 4741500 }, { "epoch": 2.84, "learning_rate": 3.6133397612279156e-05, "loss": 1.2005, "step": 4742000 }, { "epoch": 2.84, "learning_rate": 3.6131297646718596e-05, "loss": 1.2083, "step": 4742500 }, { "epoch": 2.84, "learning_rate": 3.612919768115803e-05, "loss": 1.2005, "step": 4743000 }, { "epoch": 2.84, "learning_rate": 3.612709771559747e-05, "loss": 1.2197, "step": 4743500 }, { "epoch": 2.84, "learning_rate": 3.612500194996802e-05, "loss": 1.2311, "step": 4744000 }, { "epoch": 2.84, "learning_rate": 3.6122901984407456e-05, "loss": 1.1843, "step": 4744500 }, { "epoch": 2.84, "learning_rate": 3.612080201884689e-05, "loss": 1.1958, "step": 4745000 }, { "epoch": 2.85, "learning_rate": 3.611870205328633e-05, "loss": 1.219, "step": 4745500 }, { "epoch": 2.85, "learning_rate": 3.6116602087725764e-05, "loss": 1.2087, "step": 4746000 }, { "epoch": 2.85, "learning_rate": 3.611450632209632e-05, "loss": 1.1806, "step": 4746500 }, { "epoch": 2.85, "learning_rate": 3.611240635653575e-05, "loss": 1.2002, "step": 4747000 }, { "epoch": 2.85, "learning_rate": 3.611030639097519e-05, "loss": 1.2056, "step": 4747500 }, { "epoch": 2.85, "learning_rate": 3.6108206425414624e-05, "loss": 1.199, "step": 4748000 }, { "epoch": 2.85, "learning_rate": 3.610610645985406e-05, "loss": 1.1788, "step": 4748500 }, { "epoch": 2.85, "learning_rate": 3.610400649429349e-05, "loss": 1.2023, "step": 4749000 }, { "epoch": 2.85, "learning_rate": 3.6101906528732925e-05, "loss": 1.2092, "step": 4749500 }, { "epoch": 2.85, "learning_rate": 3.609980656317236e-05, "loss": 1.2067, "step": 4750000 }, { "epoch": 2.85, "learning_rate": 3.60977065976118e-05, "loss": 1.1919, "step": 4750500 }, { "epoch": 2.85, "learning_rate": 3.609561083198236e-05, "loss": 1.218, "step": 4751000 }, { "epoch": 2.85, "learning_rate": 3.6093510866421785e-05, "loss": 1.197, "step": 4751500 }, { "epoch": 2.85, "learning_rate": 3.6091410900861226e-05, "loss": 1.2201, "step": 4752000 }, { "epoch": 2.85, "learning_rate": 3.608931093530066e-05, "loss": 1.1732, "step": 4752500 }, { "epoch": 2.85, "learning_rate": 3.608721096974009e-05, "loss": 1.1765, "step": 4753000 }, { "epoch": 2.85, "learning_rate": 3.608511100417953e-05, "loss": 1.1958, "step": 4753500 }, { "epoch": 2.85, "learning_rate": 3.6083015238550086e-05, "loss": 1.2067, "step": 4754000 }, { "epoch": 2.85, "learning_rate": 3.608091527298952e-05, "loss": 1.1921, "step": 4754500 }, { "epoch": 2.85, "learning_rate": 3.607881950736008e-05, "loss": 1.1672, "step": 4755000 }, { "epoch": 2.85, "learning_rate": 3.6076719541799514e-05, "loss": 1.1818, "step": 4755500 }, { "epoch": 2.85, "learning_rate": 3.6074619576238954e-05, "loss": 1.2246, "step": 4756000 }, { "epoch": 2.85, "learning_rate": 3.607251961067838e-05, "loss": 1.1816, "step": 4756500 }, { "epoch": 2.85, "learning_rate": 3.607041964511782e-05, "loss": 1.2222, "step": 4757000 }, { "epoch": 2.85, "learning_rate": 3.606832387948838e-05, "loss": 1.2449, "step": 4757500 }, { "epoch": 2.85, "learning_rate": 3.6066223913927815e-05, "loss": 1.1944, "step": 4758000 }, { "epoch": 2.85, "learning_rate": 3.606412394836724e-05, "loss": 1.2011, "step": 4758500 }, { "epoch": 2.85, "learning_rate": 3.606202398280668e-05, "loss": 1.1835, "step": 4759000 }, { "epoch": 2.85, "learning_rate": 3.6059924017246115e-05, "loss": 1.1727, "step": 4759500 }, { "epoch": 2.85, "learning_rate": 3.605782405168555e-05, "loss": 1.2028, "step": 4760000 }, { "epoch": 2.85, "learning_rate": 3.605572828605611e-05, "loss": 1.2084, "step": 4760500 }, { "epoch": 2.85, "learning_rate": 3.605362832049554e-05, "loss": 1.2158, "step": 4761000 }, { "epoch": 2.85, "learning_rate": 3.6051528354934976e-05, "loss": 1.1973, "step": 4761500 }, { "epoch": 2.86, "learning_rate": 3.604942838937441e-05, "loss": 1.1967, "step": 4762000 }, { "epoch": 2.86, "learning_rate": 3.604732842381385e-05, "loss": 1.1897, "step": 4762500 }, { "epoch": 2.86, "learning_rate": 3.604522845825328e-05, "loss": 1.2448, "step": 4763000 }, { "epoch": 2.86, "learning_rate": 3.6043132692623837e-05, "loss": 1.2157, "step": 4763500 }, { "epoch": 2.86, "learning_rate": 3.604103272706328e-05, "loss": 1.1874, "step": 4764000 }, { "epoch": 2.86, "learning_rate": 3.603893276150271e-05, "loss": 1.2273, "step": 4764500 }, { "epoch": 2.86, "learning_rate": 3.6036832795942144e-05, "loss": 1.228, "step": 4765000 }, { "epoch": 2.86, "learning_rate": 3.6034732830381584e-05, "loss": 1.1743, "step": 4765500 }, { "epoch": 2.86, "learning_rate": 3.603263706475214e-05, "loss": 1.175, "step": 4766000 }, { "epoch": 2.86, "learning_rate": 3.603053709919157e-05, "loss": 1.1941, "step": 4766500 }, { "epoch": 2.86, "learning_rate": 3.6028437133631004e-05, "loss": 1.1667, "step": 4767000 }, { "epoch": 2.86, "learning_rate": 3.6026337168070445e-05, "loss": 1.2261, "step": 4767500 }, { "epoch": 2.86, "learning_rate": 3.602423720250988e-05, "loss": 1.2095, "step": 4768000 }, { "epoch": 2.86, "learning_rate": 3.602213723694931e-05, "loss": 1.2033, "step": 4768500 }, { "epoch": 2.86, "learning_rate": 3.6020041471319865e-05, "loss": 1.2201, "step": 4769000 }, { "epoch": 2.86, "learning_rate": 3.6017941505759305e-05, "loss": 1.1929, "step": 4769500 }, { "epoch": 2.86, "learning_rate": 3.601584154019874e-05, "loss": 1.189, "step": 4770000 }, { "epoch": 2.86, "learning_rate": 3.601374157463817e-05, "loss": 1.204, "step": 4770500 }, { "epoch": 2.86, "learning_rate": 3.601164160907761e-05, "loss": 1.2056, "step": 4771000 }, { "epoch": 2.86, "learning_rate": 3.6009541643517046e-05, "loss": 1.2071, "step": 4771500 }, { "epoch": 2.86, "learning_rate": 3.600744167795648e-05, "loss": 1.2204, "step": 4772000 }, { "epoch": 2.86, "learning_rate": 3.600534171239592e-05, "loss": 1.2162, "step": 4772500 }, { "epoch": 2.86, "learning_rate": 3.600324174683535e-05, "loss": 1.1889, "step": 4773000 }, { "epoch": 2.86, "learning_rate": 3.6001141781274787e-05, "loss": 1.1886, "step": 4773500 }, { "epoch": 2.86, "learning_rate": 3.599904181571422e-05, "loss": 1.2125, "step": 4774000 }, { "epoch": 2.86, "learning_rate": 3.5996941850153653e-05, "loss": 1.1845, "step": 4774500 }, { "epoch": 2.86, "learning_rate": 3.5994846084524214e-05, "loss": 1.1981, "step": 4775000 }, { "epoch": 2.86, "learning_rate": 3.5992746118963654e-05, "loss": 1.2056, "step": 4775500 }, { "epoch": 2.86, "learning_rate": 3.599065035333421e-05, "loss": 1.1964, "step": 4776000 }, { "epoch": 2.86, "learning_rate": 3.598855038777364e-05, "loss": 1.1839, "step": 4776500 }, { "epoch": 2.86, "learning_rate": 3.5986450422213075e-05, "loss": 1.2048, "step": 4777000 }, { "epoch": 2.86, "learning_rate": 3.5984350456652515e-05, "loss": 1.202, "step": 4777500 }, { "epoch": 2.86, "learning_rate": 3.598225049109195e-05, "loss": 1.2022, "step": 4778000 }, { "epoch": 2.86, "learning_rate": 3.59801547254625e-05, "loss": 1.1963, "step": 4778500 }, { "epoch": 2.87, "learning_rate": 3.5978054759901935e-05, "loss": 1.2098, "step": 4779000 }, { "epoch": 2.87, "learning_rate": 3.5975954794341376e-05, "loss": 1.1974, "step": 4779500 }, { "epoch": 2.87, "learning_rate": 3.597385482878081e-05, "loss": 1.2121, "step": 4780000 }, { "epoch": 2.87, "learning_rate": 3.597175486322024e-05, "loss": 1.2044, "step": 4780500 }, { "epoch": 2.87, "learning_rate": 3.5969654897659676e-05, "loss": 1.2116, "step": 4781000 }, { "epoch": 2.87, "learning_rate": 3.5967559132030236e-05, "loss": 1.2026, "step": 4781500 }, { "epoch": 2.87, "learning_rate": 3.596545916646967e-05, "loss": 1.2008, "step": 4782000 }, { "epoch": 2.87, "learning_rate": 3.596335920090911e-05, "loss": 1.2317, "step": 4782500 }, { "epoch": 2.87, "learning_rate": 3.596125923534854e-05, "loss": 1.1985, "step": 4783000 }, { "epoch": 2.87, "learning_rate": 3.59591634697191e-05, "loss": 1.1721, "step": 4783500 }, { "epoch": 2.87, "learning_rate": 3.595706350415853e-05, "loss": 1.1958, "step": 4784000 }, { "epoch": 2.87, "learning_rate": 3.595496353859797e-05, "loss": 1.2187, "step": 4784500 }, { "epoch": 2.87, "learning_rate": 3.5952863573037404e-05, "loss": 1.1704, "step": 4785000 }, { "epoch": 2.87, "learning_rate": 3.595076780740796e-05, "loss": 1.2295, "step": 4785500 }, { "epoch": 2.87, "learning_rate": 3.594866784184739e-05, "loss": 1.1823, "step": 4786000 }, { "epoch": 2.87, "learning_rate": 3.594656787628683e-05, "loss": 1.2088, "step": 4786500 }, { "epoch": 2.87, "learning_rate": 3.5944467910726265e-05, "loss": 1.1953, "step": 4787000 }, { "epoch": 2.87, "learning_rate": 3.594237214509682e-05, "loss": 1.2237, "step": 4787500 }, { "epoch": 2.87, "learning_rate": 3.594027217953626e-05, "loss": 1.2132, "step": 4788000 }, { "epoch": 2.87, "learning_rate": 3.593817221397569e-05, "loss": 1.1921, "step": 4788500 }, { "epoch": 2.87, "learning_rate": 3.5936072248415126e-05, "loss": 1.236, "step": 4789000 }, { "epoch": 2.87, "learning_rate": 3.5933972282854566e-05, "loss": 1.2194, "step": 4789500 }, { "epoch": 2.87, "learning_rate": 3.5931872317294e-05, "loss": 1.2034, "step": 4790000 }, { "epoch": 2.87, "learning_rate": 3.5929772351733426e-05, "loss": 1.2056, "step": 4790500 }, { "epoch": 2.87, "learning_rate": 3.5927672386172866e-05, "loss": 1.1931, "step": 4791000 }, { "epoch": 2.87, "learning_rate": 3.59255724206123e-05, "loss": 1.1875, "step": 4791500 }, { "epoch": 2.87, "learning_rate": 3.592347245505173e-05, "loss": 1.2071, "step": 4792000 }, { "epoch": 2.87, "learning_rate": 3.592137248949117e-05, "loss": 1.2045, "step": 4792500 }, { "epoch": 2.87, "learning_rate": 3.591927252393061e-05, "loss": 1.1994, "step": 4793000 }, { "epoch": 2.87, "learning_rate": 3.591717255837004e-05, "loss": 1.1904, "step": 4793500 }, { "epoch": 2.87, "learning_rate": 3.591507259280948e-05, "loss": 1.1726, "step": 4794000 }, { "epoch": 2.87, "learning_rate": 3.5912976827180034e-05, "loss": 1.2033, "step": 4794500 }, { "epoch": 2.87, "learning_rate": 3.591087686161947e-05, "loss": 1.212, "step": 4795000 }, { "epoch": 2.88, "learning_rate": 3.59087768960589e-05, "loss": 1.2119, "step": 4795500 }, { "epoch": 2.88, "learning_rate": 3.590667693049834e-05, "loss": 1.216, "step": 4796000 }, { "epoch": 2.88, "learning_rate": 3.5904576964937775e-05, "loss": 1.1873, "step": 4796500 }, { "epoch": 2.88, "learning_rate": 3.5902476999377215e-05, "loss": 1.1738, "step": 4797000 }, { "epoch": 2.88, "learning_rate": 3.590037703381665e-05, "loss": 1.1922, "step": 4797500 }, { "epoch": 2.88, "learning_rate": 3.58982812681872e-05, "loss": 1.216, "step": 4798000 }, { "epoch": 2.88, "learning_rate": 3.5896181302626635e-05, "loss": 1.2084, "step": 4798500 }, { "epoch": 2.88, "learning_rate": 3.5894081337066076e-05, "loss": 1.1966, "step": 4799000 }, { "epoch": 2.88, "learning_rate": 3.589198137150551e-05, "loss": 1.2035, "step": 4799500 }, { "epoch": 2.88, "learning_rate": 3.588988560587606e-05, "loss": 1.1797, "step": 4800000 }, { "epoch": 2.88, "eval_loss": 1.1569877862930298, "eval_runtime": 1104.5998, "eval_samples_per_second": 476.842, "eval_steps_per_second": 79.474, "step": 4800000 }, { "epoch": 2.88, "learning_rate": 3.5887785640315496e-05, "loss": 1.2247, "step": 4800500 }, { "epoch": 2.88, "learning_rate": 3.5885685674754936e-05, "loss": 1.205, "step": 4801000 }, { "epoch": 2.88, "learning_rate": 3.588358570919437e-05, "loss": 1.1895, "step": 4801500 }, { "epoch": 2.88, "learning_rate": 3.58814857436338e-05, "loss": 1.1901, "step": 4802000 }, { "epoch": 2.88, "learning_rate": 3.587938997800436e-05, "loss": 1.2086, "step": 4802500 }, { "epoch": 2.88, "learning_rate": 3.58772900124438e-05, "loss": 1.1996, "step": 4803000 }, { "epoch": 2.88, "learning_rate": 3.587519424681435e-05, "loss": 1.1872, "step": 4803500 }, { "epoch": 2.88, "learning_rate": 3.5873094281253784e-05, "loss": 1.2036, "step": 4804000 }, { "epoch": 2.88, "learning_rate": 3.5870994315693224e-05, "loss": 1.203, "step": 4804500 }, { "epoch": 2.88, "learning_rate": 3.586889855006378e-05, "loss": 1.1776, "step": 4805000 }, { "epoch": 2.88, "learning_rate": 3.586679858450321e-05, "loss": 1.218, "step": 4805500 }, { "epoch": 2.88, "learning_rate": 3.5864698618942645e-05, "loss": 1.1744, "step": 4806000 }, { "epoch": 2.88, "learning_rate": 3.5862598653382085e-05, "loss": 1.2032, "step": 4806500 }, { "epoch": 2.88, "learning_rate": 3.586049868782152e-05, "loss": 1.2093, "step": 4807000 }, { "epoch": 2.88, "learning_rate": 3.585839872226095e-05, "loss": 1.1724, "step": 4807500 }, { "epoch": 2.88, "learning_rate": 3.585629875670039e-05, "loss": 1.2006, "step": 4808000 }, { "epoch": 2.88, "learning_rate": 3.5854198791139826e-05, "loss": 1.1943, "step": 4808500 }, { "epoch": 2.88, "learning_rate": 3.585209882557926e-05, "loss": 1.2294, "step": 4809000 }, { "epoch": 2.88, "learning_rate": 3.58499988600187e-05, "loss": 1.1754, "step": 4809500 }, { "epoch": 2.88, "learning_rate": 3.5847898894458126e-05, "loss": 1.2126, "step": 4810000 }, { "epoch": 2.88, "learning_rate": 3.5845798928897566e-05, "loss": 1.2136, "step": 4810500 }, { "epoch": 2.88, "learning_rate": 3.5843698963337e-05, "loss": 1.1973, "step": 4811000 }, { "epoch": 2.88, "learning_rate": 3.584159899777643e-05, "loss": 1.2096, "step": 4811500 }, { "epoch": 2.88, "learning_rate": 3.5839499032215874e-05, "loss": 1.1865, "step": 4812000 }, { "epoch": 2.89, "learning_rate": 3.583739906665531e-05, "loss": 1.1923, "step": 4812500 }, { "epoch": 2.89, "learning_rate": 3.583530330102586e-05, "loss": 1.1776, "step": 4813000 }, { "epoch": 2.89, "learning_rate": 3.5833203335465294e-05, "loss": 1.2029, "step": 4813500 }, { "epoch": 2.89, "learning_rate": 3.5831103369904734e-05, "loss": 1.1947, "step": 4814000 }, { "epoch": 2.89, "learning_rate": 3.582900340434417e-05, "loss": 1.189, "step": 4814500 }, { "epoch": 2.89, "learning_rate": 3.582690763871472e-05, "loss": 1.2078, "step": 4815000 }, { "epoch": 2.89, "learning_rate": 3.5824807673154155e-05, "loss": 1.1927, "step": 4815500 }, { "epoch": 2.89, "learning_rate": 3.5822707707593595e-05, "loss": 1.2251, "step": 4816000 }, { "epoch": 2.89, "learning_rate": 3.582060774203303e-05, "loss": 1.2161, "step": 4816500 }, { "epoch": 2.89, "learning_rate": 3.581850777647246e-05, "loss": 1.19, "step": 4817000 }, { "epoch": 2.89, "learning_rate": 3.581641201084302e-05, "loss": 1.2019, "step": 4817500 }, { "epoch": 2.89, "learning_rate": 3.5814312045282456e-05, "loss": 1.1848, "step": 4818000 }, { "epoch": 2.89, "learning_rate": 3.581221207972189e-05, "loss": 1.1792, "step": 4818500 }, { "epoch": 2.89, "learning_rate": 3.581011631409245e-05, "loss": 1.1943, "step": 4819000 }, { "epoch": 2.89, "learning_rate": 3.580801634853188e-05, "loss": 1.2053, "step": 4819500 }, { "epoch": 2.89, "learning_rate": 3.5805916382971316e-05, "loss": 1.1727, "step": 4820000 }, { "epoch": 2.89, "learning_rate": 3.580381641741075e-05, "loss": 1.1908, "step": 4820500 }, { "epoch": 2.89, "learning_rate": 3.580171645185019e-05, "loss": 1.1763, "step": 4821000 }, { "epoch": 2.89, "learning_rate": 3.5799616486289624e-05, "loss": 1.1968, "step": 4821500 }, { "epoch": 2.89, "learning_rate": 3.579751652072906e-05, "loss": 1.2089, "step": 4822000 }, { "epoch": 2.89, "learning_rate": 3.57954165551685e-05, "loss": 1.1851, "step": 4822500 }, { "epoch": 2.89, "learning_rate": 3.579331658960793e-05, "loss": 1.2242, "step": 4823000 }, { "epoch": 2.89, "learning_rate": 3.5791216624047364e-05, "loss": 1.1793, "step": 4823500 }, { "epoch": 2.89, "learning_rate": 3.5789116658486804e-05, "loss": 1.1819, "step": 4824000 }, { "epoch": 2.89, "learning_rate": 3.578701669292624e-05, "loss": 1.174, "step": 4824500 }, { "epoch": 2.89, "learning_rate": 3.578492092729679e-05, "loss": 1.1942, "step": 4825000 }, { "epoch": 2.89, "learning_rate": 3.5782820961736225e-05, "loss": 1.2153, "step": 4825500 }, { "epoch": 2.89, "learning_rate": 3.5780720996175665e-05, "loss": 1.195, "step": 4826000 }, { "epoch": 2.89, "learning_rate": 3.57786210306151e-05, "loss": 1.2042, "step": 4826500 }, { "epoch": 2.89, "learning_rate": 3.577652526498565e-05, "loss": 1.2042, "step": 4827000 }, { "epoch": 2.89, "learning_rate": 3.577442529942509e-05, "loss": 1.1934, "step": 4827500 }, { "epoch": 2.89, "learning_rate": 3.5772325333864526e-05, "loss": 1.1855, "step": 4828000 }, { "epoch": 2.89, "learning_rate": 3.577022536830396e-05, "loss": 1.2214, "step": 4828500 }, { "epoch": 2.9, "learning_rate": 3.576812960267451e-05, "loss": 1.175, "step": 4829000 }, { "epoch": 2.9, "learning_rate": 3.5766033837045066e-05, "loss": 1.1773, "step": 4829500 }, { "epoch": 2.9, "learning_rate": 3.576393387148451e-05, "loss": 1.2022, "step": 4830000 }, { "epoch": 2.9, "learning_rate": 3.576183390592394e-05, "loss": 1.1988, "step": 4830500 }, { "epoch": 2.9, "learning_rate": 3.5759733940363374e-05, "loss": 1.2069, "step": 4831000 }, { "epoch": 2.9, "learning_rate": 3.5757633974802814e-05, "loss": 1.1969, "step": 4831500 }, { "epoch": 2.9, "learning_rate": 3.575553820917337e-05, "loss": 1.2188, "step": 4832000 }, { "epoch": 2.9, "learning_rate": 3.57534382436128e-05, "loss": 1.2256, "step": 4832500 }, { "epoch": 2.9, "learning_rate": 3.575133827805224e-05, "loss": 1.2124, "step": 4833000 }, { "epoch": 2.9, "learning_rate": 3.5749238312491675e-05, "loss": 1.1965, "step": 4833500 }, { "epoch": 2.9, "learning_rate": 3.574713834693111e-05, "loss": 1.2095, "step": 4834000 }, { "epoch": 2.9, "learning_rate": 3.574503838137055e-05, "loss": 1.1893, "step": 4834500 }, { "epoch": 2.9, "learning_rate": 3.574293841580998e-05, "loss": 1.1957, "step": 4835000 }, { "epoch": 2.9, "learning_rate": 3.5740838450249415e-05, "loss": 1.206, "step": 4835500 }, { "epoch": 2.9, "learning_rate": 3.573874268461997e-05, "loss": 1.185, "step": 4836000 }, { "epoch": 2.9, "learning_rate": 3.573664271905941e-05, "loss": 1.1827, "step": 4836500 }, { "epoch": 2.9, "learning_rate": 3.573454275349884e-05, "loss": 1.1985, "step": 4837000 }, { "epoch": 2.9, "learning_rate": 3.5732442787938276e-05, "loss": 1.1878, "step": 4837500 }, { "epoch": 2.9, "learning_rate": 3.5730342822377716e-05, "loss": 1.2003, "step": 4838000 }, { "epoch": 2.9, "learning_rate": 3.572824285681715e-05, "loss": 1.1847, "step": 4838500 }, { "epoch": 2.9, "learning_rate": 3.572614289125658e-05, "loss": 1.1819, "step": 4839000 }, { "epoch": 2.9, "learning_rate": 3.572404712562714e-05, "loss": 1.194, "step": 4839500 }, { "epoch": 2.9, "learning_rate": 3.572194716006658e-05, "loss": 1.1941, "step": 4840000 }, { "epoch": 2.9, "learning_rate": 3.571984719450601e-05, "loss": 1.2302, "step": 4840500 }, { "epoch": 2.9, "learning_rate": 3.5717747228945444e-05, "loss": 1.1904, "step": 4841000 }, { "epoch": 2.9, "learning_rate": 3.5715647263384884e-05, "loss": 1.2105, "step": 4841500 }, { "epoch": 2.9, "learning_rate": 3.571354729782431e-05, "loss": 1.2244, "step": 4842000 }, { "epoch": 2.9, "learning_rate": 3.571145573212599e-05, "loss": 1.212, "step": 4842500 }, { "epoch": 2.9, "learning_rate": 3.5709355766565425e-05, "loss": 1.2059, "step": 4843000 }, { "epoch": 2.9, "learning_rate": 3.5707255801004865e-05, "loss": 1.1899, "step": 4843500 }, { "epoch": 2.9, "learning_rate": 3.57051558354443e-05, "loss": 1.1849, "step": 4844000 }, { "epoch": 2.9, "learning_rate": 3.570305586988373e-05, "loss": 1.1731, "step": 4844500 }, { "epoch": 2.9, "learning_rate": 3.570095590432317e-05, "loss": 1.2052, "step": 4845000 }, { "epoch": 2.91, "learning_rate": 3.5698855938762606e-05, "loss": 1.2027, "step": 4845500 }, { "epoch": 2.91, "learning_rate": 3.569675597320204e-05, "loss": 1.2156, "step": 4846000 }, { "epoch": 2.91, "learning_rate": 3.569466020757259e-05, "loss": 1.1996, "step": 4846500 }, { "epoch": 2.91, "learning_rate": 3.569256024201203e-05, "loss": 1.1731, "step": 4847000 }, { "epoch": 2.91, "learning_rate": 3.5690460276451466e-05, "loss": 1.1668, "step": 4847500 }, { "epoch": 2.91, "learning_rate": 3.56883603108909e-05, "loss": 1.1802, "step": 4848000 }, { "epoch": 2.91, "learning_rate": 3.568626034533034e-05, "loss": 1.2018, "step": 4848500 }, { "epoch": 2.91, "learning_rate": 3.5684160379769767e-05, "loss": 1.1728, "step": 4849000 }, { "epoch": 2.91, "learning_rate": 3.568206041420921e-05, "loss": 1.1795, "step": 4849500 }, { "epoch": 2.91, "learning_rate": 3.567996044864864e-05, "loss": 1.2172, "step": 4850000 }, { "epoch": 2.91, "learning_rate": 3.56778646830192e-05, "loss": 1.2268, "step": 4850500 }, { "epoch": 2.91, "learning_rate": 3.5675764717458634e-05, "loss": 1.2204, "step": 4851000 }, { "epoch": 2.91, "learning_rate": 3.567366475189807e-05, "loss": 1.1781, "step": 4851500 }, { "epoch": 2.91, "learning_rate": 3.56715647863375e-05, "loss": 1.1988, "step": 4852000 }, { "epoch": 2.91, "learning_rate": 3.5669464820776934e-05, "loss": 1.2125, "step": 4852500 }, { "epoch": 2.91, "learning_rate": 3.5667364855216375e-05, "loss": 1.2356, "step": 4853000 }, { "epoch": 2.91, "learning_rate": 3.566526488965581e-05, "loss": 1.1736, "step": 4853500 }, { "epoch": 2.91, "learning_rate": 3.566316492409524e-05, "loss": 1.1886, "step": 4854000 }, { "epoch": 2.91, "learning_rate": 3.5661069158465795e-05, "loss": 1.2477, "step": 4854500 }, { "epoch": 2.91, "learning_rate": 3.5658969192905235e-05, "loss": 1.165, "step": 4855000 }, { "epoch": 2.91, "learning_rate": 3.5656873427275796e-05, "loss": 1.1912, "step": 4855500 }, { "epoch": 2.91, "learning_rate": 3.565477346171522e-05, "loss": 1.2064, "step": 4856000 }, { "epoch": 2.91, "learning_rate": 3.565267349615466e-05, "loss": 1.17, "step": 4856500 }, { "epoch": 2.91, "learning_rate": 3.5650573530594096e-05, "loss": 1.2205, "step": 4857000 }, { "epoch": 2.91, "learning_rate": 3.564847356503353e-05, "loss": 1.2147, "step": 4857500 }, { "epoch": 2.91, "learning_rate": 3.564637359947297e-05, "loss": 1.1853, "step": 4858000 }, { "epoch": 2.91, "learning_rate": 3.56442736339124e-05, "loss": 1.2118, "step": 4858500 }, { "epoch": 2.91, "learning_rate": 3.564217786828296e-05, "loss": 1.2127, "step": 4859000 }, { "epoch": 2.91, "learning_rate": 3.564007790272239e-05, "loss": 1.2163, "step": 4859500 }, { "epoch": 2.91, "learning_rate": 3.563797793716183e-05, "loss": 1.2155, "step": 4860000 }, { "epoch": 2.91, "learning_rate": 3.5635877971601264e-05, "loss": 1.2094, "step": 4860500 }, { "epoch": 2.91, "learning_rate": 3.56337780060407e-05, "loss": 1.192, "step": 4861000 }, { "epoch": 2.91, "learning_rate": 3.563167804048014e-05, "loss": 1.2084, "step": 4861500 }, { "epoch": 2.91, "learning_rate": 3.562957807491957e-05, "loss": 1.1808, "step": 4862000 }, { "epoch": 2.92, "learning_rate": 3.5627478109359005e-05, "loss": 1.1785, "step": 4862500 }, { "epoch": 2.92, "learning_rate": 3.5625382343729565e-05, "loss": 1.1689, "step": 4863000 }, { "epoch": 2.92, "learning_rate": 3.5623282378169e-05, "loss": 1.198, "step": 4863500 }, { "epoch": 2.92, "learning_rate": 3.562118661253955e-05, "loss": 1.2205, "step": 4864000 }, { "epoch": 2.92, "learning_rate": 3.5619086646978986e-05, "loss": 1.1966, "step": 4864500 }, { "epoch": 2.92, "learning_rate": 3.5616986681418426e-05, "loss": 1.21, "step": 4865000 }, { "epoch": 2.92, "learning_rate": 3.561488671585786e-05, "loss": 1.2158, "step": 4865500 }, { "epoch": 2.92, "learning_rate": 3.561278675029729e-05, "loss": 1.2127, "step": 4866000 }, { "epoch": 2.92, "learning_rate": 3.5610690984667846e-05, "loss": 1.2204, "step": 4866500 }, { "epoch": 2.92, "learning_rate": 3.5608591019107286e-05, "loss": 1.1964, "step": 4867000 }, { "epoch": 2.92, "learning_rate": 3.560649105354672e-05, "loss": 1.1677, "step": 4867500 }, { "epoch": 2.92, "learning_rate": 3.5604391087986153e-05, "loss": 1.1937, "step": 4868000 }, { "epoch": 2.92, "learning_rate": 3.5602291122425594e-05, "loss": 1.2094, "step": 4868500 }, { "epoch": 2.92, "learning_rate": 3.560019115686503e-05, "loss": 1.2223, "step": 4869000 }, { "epoch": 2.92, "learning_rate": 3.559809539123558e-05, "loss": 1.2166, "step": 4869500 }, { "epoch": 2.92, "learning_rate": 3.559599542567502e-05, "loss": 1.2035, "step": 4870000 }, { "epoch": 2.92, "learning_rate": 3.5593895460114454e-05, "loss": 1.2044, "step": 4870500 }, { "epoch": 2.92, "learning_rate": 3.559179549455389e-05, "loss": 1.1768, "step": 4871000 }, { "epoch": 2.92, "learning_rate": 3.558969552899333e-05, "loss": 1.2329, "step": 4871500 }, { "epoch": 2.92, "learning_rate": 3.558759556343276e-05, "loss": 1.1919, "step": 4872000 }, { "epoch": 2.92, "learning_rate": 3.5585495597872195e-05, "loss": 1.2393, "step": 4872500 }, { "epoch": 2.92, "learning_rate": 3.5583395632311635e-05, "loss": 1.1783, "step": 4873000 }, { "epoch": 2.92, "learning_rate": 3.558129986668219e-05, "loss": 1.2029, "step": 4873500 }, { "epoch": 2.92, "learning_rate": 3.557920410105274e-05, "loss": 1.197, "step": 4874000 }, { "epoch": 2.92, "learning_rate": 3.5577104135492176e-05, "loss": 1.1909, "step": 4874500 }, { "epoch": 2.92, "learning_rate": 3.557500416993161e-05, "loss": 1.2012, "step": 4875000 }, { "epoch": 2.92, "learning_rate": 3.557290420437105e-05, "loss": 1.2581, "step": 4875500 }, { "epoch": 2.92, "learning_rate": 3.557080423881048e-05, "loss": 1.1802, "step": 4876000 }, { "epoch": 2.92, "learning_rate": 3.5568704273249916e-05, "loss": 1.2095, "step": 4876500 }, { "epoch": 2.92, "learning_rate": 3.556660850762048e-05, "loss": 1.1637, "step": 4877000 }, { "epoch": 2.92, "learning_rate": 3.556450854205991e-05, "loss": 1.1899, "step": 4877500 }, { "epoch": 2.92, "learning_rate": 3.5562408576499344e-05, "loss": 1.1925, "step": 4878000 }, { "epoch": 2.92, "learning_rate": 3.5560308610938784e-05, "loss": 1.2125, "step": 4878500 }, { "epoch": 2.93, "learning_rate": 3.555820864537822e-05, "loss": 1.1718, "step": 4879000 }, { "epoch": 2.93, "learning_rate": 3.555610867981765e-05, "loss": 1.2255, "step": 4879500 }, { "epoch": 2.93, "learning_rate": 3.555400871425709e-05, "loss": 1.1551, "step": 4880000 }, { "epoch": 2.93, "learning_rate": 3.5551908748696525e-05, "loss": 1.2055, "step": 4880500 }, { "epoch": 2.93, "learning_rate": 3.554981298306708e-05, "loss": 1.1939, "step": 4881000 }, { "epoch": 2.93, "learning_rate": 3.554771301750651e-05, "loss": 1.188, "step": 4881500 }, { "epoch": 2.93, "learning_rate": 3.554561305194595e-05, "loss": 1.2225, "step": 4882000 }, { "epoch": 2.93, "learning_rate": 3.5543517286316505e-05, "loss": 1.169, "step": 4882500 }, { "epoch": 2.93, "learning_rate": 3.554141732075594e-05, "loss": 1.1823, "step": 4883000 }, { "epoch": 2.93, "learning_rate": 3.553931735519537e-05, "loss": 1.1724, "step": 4883500 }, { "epoch": 2.93, "learning_rate": 3.553721738963481e-05, "loss": 1.1869, "step": 4884000 }, { "epoch": 2.93, "learning_rate": 3.5535117424074246e-05, "loss": 1.2344, "step": 4884500 }, { "epoch": 2.93, "learning_rate": 3.55330216584448e-05, "loss": 1.1936, "step": 4885000 }, { "epoch": 2.93, "learning_rate": 3.553092169288424e-05, "loss": 1.1978, "step": 4885500 }, { "epoch": 2.93, "learning_rate": 3.552882172732367e-05, "loss": 1.1808, "step": 4886000 }, { "epoch": 2.93, "learning_rate": 3.552672176176311e-05, "loss": 1.2096, "step": 4886500 }, { "epoch": 2.93, "learning_rate": 3.552462179620255e-05, "loss": 1.1803, "step": 4887000 }, { "epoch": 2.93, "learning_rate": 3.552252183064198e-05, "loss": 1.1873, "step": 4887500 }, { "epoch": 2.93, "learning_rate": 3.552042186508141e-05, "loss": 1.1726, "step": 4888000 }, { "epoch": 2.93, "learning_rate": 3.551832609945197e-05, "loss": 1.1765, "step": 4888500 }, { "epoch": 2.93, "learning_rate": 3.551622613389141e-05, "loss": 1.157, "step": 4889000 }, { "epoch": 2.93, "learning_rate": 3.551412616833084e-05, "loss": 1.1899, "step": 4889500 }, { "epoch": 2.93, "learning_rate": 3.5512026202770275e-05, "loss": 1.2315, "step": 4890000 }, { "epoch": 2.93, "learning_rate": 3.550992623720971e-05, "loss": 1.201, "step": 4890500 }, { "epoch": 2.93, "learning_rate": 3.550782627164914e-05, "loss": 1.2075, "step": 4891000 }, { "epoch": 2.93, "learning_rate": 3.55057305060197e-05, "loss": 1.1848, "step": 4891500 }, { "epoch": 2.93, "learning_rate": 3.5503634740390255e-05, "loss": 1.1803, "step": 4892000 }, { "epoch": 2.93, "learning_rate": 3.550153897476081e-05, "loss": 1.177, "step": 4892500 }, { "epoch": 2.93, "learning_rate": 3.549943900920025e-05, "loss": 1.2364, "step": 4893000 }, { "epoch": 2.93, "learning_rate": 3.549733904363968e-05, "loss": 1.2193, "step": 4893500 }, { "epoch": 2.93, "learning_rate": 3.5495239078079116e-05, "loss": 1.1924, "step": 4894000 }, { "epoch": 2.93, "learning_rate": 3.5493139112518556e-05, "loss": 1.1939, "step": 4894500 }, { "epoch": 2.93, "learning_rate": 3.549103914695799e-05, "loss": 1.1952, "step": 4895000 }, { "epoch": 2.94, "learning_rate": 3.548893918139742e-05, "loss": 1.2072, "step": 4895500 }, { "epoch": 2.94, "learning_rate": 3.5486839215836864e-05, "loss": 1.1993, "step": 4896000 }, { "epoch": 2.94, "learning_rate": 3.548474345020742e-05, "loss": 1.2296, "step": 4896500 }, { "epoch": 2.94, "learning_rate": 3.548264348464685e-05, "loss": 1.2044, "step": 4897000 }, { "epoch": 2.94, "learning_rate": 3.5480543519086284e-05, "loss": 1.2146, "step": 4897500 }, { "epoch": 2.94, "learning_rate": 3.5478443553525724e-05, "loss": 1.213, "step": 4898000 }, { "epoch": 2.94, "learning_rate": 3.547634358796516e-05, "loss": 1.1903, "step": 4898500 }, { "epoch": 2.94, "learning_rate": 3.547424362240459e-05, "loss": 1.1772, "step": 4899000 }, { "epoch": 2.94, "learning_rate": 3.547214365684403e-05, "loss": 1.2092, "step": 4899500 }, { "epoch": 2.94, "learning_rate": 3.547004369128346e-05, "loss": 1.1931, "step": 4900000 }, { "epoch": 2.94, "eval_loss": 1.150070071220398, "eval_runtime": 1102.0417, "eval_samples_per_second": 477.949, "eval_steps_per_second": 79.659, "step": 4900000 }, { "epoch": 2.94, "learning_rate": 3.546794792565402e-05, "loss": 1.1686, "step": 4900500 }, { "epoch": 2.94, "learning_rate": 3.546584796009346e-05, "loss": 1.1786, "step": 4901000 }, { "epoch": 2.94, "learning_rate": 3.546374799453289e-05, "loss": 1.1834, "step": 4901500 }, { "epoch": 2.94, "learning_rate": 3.5461648028972326e-05, "loss": 1.1955, "step": 4902000 }, { "epoch": 2.94, "learning_rate": 3.545954806341176e-05, "loss": 1.2104, "step": 4902500 }, { "epoch": 2.94, "learning_rate": 3.545744809785119e-05, "loss": 1.1799, "step": 4903000 }, { "epoch": 2.94, "learning_rate": 3.5455348132290626e-05, "loss": 1.1846, "step": 4903500 }, { "epoch": 2.94, "learning_rate": 3.5453248166730066e-05, "loss": 1.1922, "step": 4904000 }, { "epoch": 2.94, "learning_rate": 3.545115240110062e-05, "loss": 1.1906, "step": 4904500 }, { "epoch": 2.94, "learning_rate": 3.544905243554005e-05, "loss": 1.1742, "step": 4905000 }, { "epoch": 2.94, "learning_rate": 3.544695246997949e-05, "loss": 1.19, "step": 4905500 }, { "epoch": 2.94, "learning_rate": 3.544485250441893e-05, "loss": 1.1771, "step": 4906000 }, { "epoch": 2.94, "learning_rate": 3.544275253885836e-05, "loss": 1.2039, "step": 4906500 }, { "epoch": 2.94, "learning_rate": 3.5440656773228914e-05, "loss": 1.2081, "step": 4907000 }, { "epoch": 2.94, "learning_rate": 3.5438556807668354e-05, "loss": 1.1953, "step": 4907500 }, { "epoch": 2.94, "learning_rate": 3.543645684210779e-05, "loss": 1.1965, "step": 4908000 }, { "epoch": 2.94, "learning_rate": 3.543436107647835e-05, "loss": 1.1947, "step": 4908500 }, { "epoch": 2.94, "learning_rate": 3.543226111091778e-05, "loss": 1.176, "step": 4909000 }, { "epoch": 2.94, "learning_rate": 3.5430161145357215e-05, "loss": 1.1858, "step": 4909500 }, { "epoch": 2.94, "learning_rate": 3.542806117979665e-05, "loss": 1.2069, "step": 4910000 }, { "epoch": 2.94, "learning_rate": 3.542596121423608e-05, "loss": 1.1935, "step": 4910500 }, { "epoch": 2.94, "learning_rate": 3.542386124867552e-05, "loss": 1.2113, "step": 4911000 }, { "epoch": 2.94, "learning_rate": 3.542176548304608e-05, "loss": 1.1929, "step": 4911500 }, { "epoch": 2.94, "learning_rate": 3.541966551748551e-05, "loss": 1.1827, "step": 4912000 }, { "epoch": 2.95, "learning_rate": 3.541756555192494e-05, "loss": 1.204, "step": 4912500 }, { "epoch": 2.95, "learning_rate": 3.541546558636438e-05, "loss": 1.1612, "step": 4913000 }, { "epoch": 2.95, "learning_rate": 3.5413365620803816e-05, "loss": 1.1853, "step": 4913500 }, { "epoch": 2.95, "learning_rate": 3.541126565524325e-05, "loss": 1.1911, "step": 4914000 }, { "epoch": 2.95, "learning_rate": 3.540916568968269e-05, "loss": 1.2116, "step": 4914500 }, { "epoch": 2.95, "learning_rate": 3.5407065724122123e-05, "loss": 1.1766, "step": 4915000 }, { "epoch": 2.95, "learning_rate": 3.5404965758561564e-05, "loss": 1.1856, "step": 4915500 }, { "epoch": 2.95, "learning_rate": 3.5402865793001e-05, "loss": 1.1894, "step": 4916000 }, { "epoch": 2.95, "learning_rate": 3.540076582744043e-05, "loss": 1.1785, "step": 4916500 }, { "epoch": 2.95, "learning_rate": 3.5398670061810984e-05, "loss": 1.1615, "step": 4917000 }, { "epoch": 2.95, "learning_rate": 3.5396570096250424e-05, "loss": 1.1646, "step": 4917500 }, { "epoch": 2.95, "learning_rate": 3.539447013068986e-05, "loss": 1.1889, "step": 4918000 }, { "epoch": 2.95, "learning_rate": 3.539237016512929e-05, "loss": 1.1881, "step": 4918500 }, { "epoch": 2.95, "learning_rate": 3.5390274399499845e-05, "loss": 1.1956, "step": 4919000 }, { "epoch": 2.95, "learning_rate": 3.5388174433939285e-05, "loss": 1.1853, "step": 4919500 }, { "epoch": 2.95, "learning_rate": 3.538607446837872e-05, "loss": 1.2042, "step": 4920000 }, { "epoch": 2.95, "learning_rate": 3.538397450281815e-05, "loss": 1.1773, "step": 4920500 }, { "epoch": 2.95, "learning_rate": 3.538187453725759e-05, "loss": 1.2138, "step": 4921000 }, { "epoch": 2.95, "learning_rate": 3.5379774571697026e-05, "loss": 1.1997, "step": 4921500 }, { "epoch": 2.95, "learning_rate": 3.537767880606758e-05, "loss": 1.2098, "step": 4922000 }, { "epoch": 2.95, "learning_rate": 3.537557884050702e-05, "loss": 1.1951, "step": 4922500 }, { "epoch": 2.95, "learning_rate": 3.537347887494645e-05, "loss": 1.1966, "step": 4923000 }, { "epoch": 2.95, "learning_rate": 3.5371378909385887e-05, "loss": 1.2012, "step": 4923500 }, { "epoch": 2.95, "learning_rate": 3.536927894382533e-05, "loss": 1.1754, "step": 4924000 }, { "epoch": 2.95, "learning_rate": 3.5367178978264753e-05, "loss": 1.1853, "step": 4924500 }, { "epoch": 2.95, "learning_rate": 3.536507901270419e-05, "loss": 1.1625, "step": 4925000 }, { "epoch": 2.95, "learning_rate": 3.536297904714363e-05, "loss": 1.2095, "step": 4925500 }, { "epoch": 2.95, "learning_rate": 3.536087908158306e-05, "loss": 1.1875, "step": 4926000 }, { "epoch": 2.95, "learning_rate": 3.5358779116022494e-05, "loss": 1.1759, "step": 4926500 }, { "epoch": 2.95, "learning_rate": 3.5356679150461934e-05, "loss": 1.1836, "step": 4927000 }, { "epoch": 2.95, "learning_rate": 3.535457918490137e-05, "loss": 1.1699, "step": 4927500 }, { "epoch": 2.95, "learning_rate": 3.535248341927192e-05, "loss": 1.1825, "step": 4928000 }, { "epoch": 2.95, "learning_rate": 3.5350383453711355e-05, "loss": 1.1832, "step": 4928500 }, { "epoch": 2.96, "learning_rate": 3.5348283488150795e-05, "loss": 1.1735, "step": 4929000 }, { "epoch": 2.96, "learning_rate": 3.534618352259023e-05, "loss": 1.2161, "step": 4929500 }, { "epoch": 2.96, "learning_rate": 3.534408775696078e-05, "loss": 1.1926, "step": 4930000 }, { "epoch": 2.96, "learning_rate": 3.534198779140022e-05, "loss": 1.1985, "step": 4930500 }, { "epoch": 2.96, "learning_rate": 3.5339887825839656e-05, "loss": 1.1791, "step": 4931000 }, { "epoch": 2.96, "learning_rate": 3.533779206021021e-05, "loss": 1.2029, "step": 4931500 }, { "epoch": 2.96, "learning_rate": 3.533569209464964e-05, "loss": 1.2021, "step": 4932000 }, { "epoch": 2.96, "learning_rate": 3.533359212908908e-05, "loss": 1.1804, "step": 4932500 }, { "epoch": 2.96, "learning_rate": 3.5331492163528516e-05, "loss": 1.2075, "step": 4933000 }, { "epoch": 2.96, "learning_rate": 3.532939639789908e-05, "loss": 1.2154, "step": 4933500 }, { "epoch": 2.96, "learning_rate": 3.5327296432338503e-05, "loss": 1.1871, "step": 4934000 }, { "epoch": 2.96, "learning_rate": 3.5325196466777944e-05, "loss": 1.1961, "step": 4934500 }, { "epoch": 2.96, "learning_rate": 3.532309650121738e-05, "loss": 1.1789, "step": 4935000 }, { "epoch": 2.96, "learning_rate": 3.532099653565681e-05, "loss": 1.1838, "step": 4935500 }, { "epoch": 2.96, "learning_rate": 3.531889657009625e-05, "loss": 1.1975, "step": 4936000 }, { "epoch": 2.96, "learning_rate": 3.5316796604535684e-05, "loss": 1.1985, "step": 4936500 }, { "epoch": 2.96, "learning_rate": 3.531469663897512e-05, "loss": 1.1751, "step": 4937000 }, { "epoch": 2.96, "learning_rate": 3.531259667341456e-05, "loss": 1.1921, "step": 4937500 }, { "epoch": 2.96, "learning_rate": 3.531049670785399e-05, "loss": 1.1814, "step": 4938000 }, { "epoch": 2.96, "learning_rate": 3.5308400942224545e-05, "loss": 1.1779, "step": 4938500 }, { "epoch": 2.96, "learning_rate": 3.5306300976663985e-05, "loss": 1.2068, "step": 4939000 }, { "epoch": 2.96, "learning_rate": 3.530420101110342e-05, "loss": 1.1964, "step": 4939500 }, { "epoch": 2.96, "learning_rate": 3.530210524547397e-05, "loss": 1.1794, "step": 4940000 }, { "epoch": 2.96, "learning_rate": 3.5300005279913406e-05, "loss": 1.2067, "step": 4940500 }, { "epoch": 2.96, "learning_rate": 3.5297905314352846e-05, "loss": 1.203, "step": 4941000 }, { "epoch": 2.96, "learning_rate": 3.529580534879228e-05, "loss": 1.1975, "step": 4941500 }, { "epoch": 2.96, "learning_rate": 3.529370538323171e-05, "loss": 1.1813, "step": 4942000 }, { "epoch": 2.96, "learning_rate": 3.529160541767115e-05, "loss": 1.1987, "step": 4942500 }, { "epoch": 2.96, "learning_rate": 3.528950965204171e-05, "loss": 1.1871, "step": 4943000 }, { "epoch": 2.96, "learning_rate": 3.528740968648114e-05, "loss": 1.1964, "step": 4943500 }, { "epoch": 2.96, "learning_rate": 3.5285309720920574e-05, "loss": 1.1983, "step": 4944000 }, { "epoch": 2.96, "learning_rate": 3.5283209755360014e-05, "loss": 1.1786, "step": 4944500 }, { "epoch": 2.96, "learning_rate": 3.528110978979945e-05, "loss": 1.1886, "step": 4945000 }, { "epoch": 2.97, "learning_rate": 3.527900982423888e-05, "loss": 1.1916, "step": 4945500 }, { "epoch": 2.97, "learning_rate": 3.527690985867832e-05, "loss": 1.2012, "step": 4946000 }, { "epoch": 2.97, "learning_rate": 3.527480989311775e-05, "loss": 1.1637, "step": 4946500 }, { "epoch": 2.97, "learning_rate": 3.527270992755719e-05, "loss": 1.2104, "step": 4947000 }, { "epoch": 2.97, "learning_rate": 3.527061416192775e-05, "loss": 1.1895, "step": 4947500 }, { "epoch": 2.97, "learning_rate": 3.526851419636718e-05, "loss": 1.2104, "step": 4948000 }, { "epoch": 2.97, "learning_rate": 3.5266414230806615e-05, "loss": 1.1835, "step": 4948500 }, { "epoch": 2.97, "learning_rate": 3.526431426524605e-05, "loss": 1.1696, "step": 4949000 }, { "epoch": 2.97, "learning_rate": 3.526221429968548e-05, "loss": 1.1902, "step": 4949500 }, { "epoch": 2.97, "learning_rate": 3.5260114334124916e-05, "loss": 1.1867, "step": 4950000 }, { "epoch": 2.97, "learning_rate": 3.5258014368564356e-05, "loss": 1.1919, "step": 4950500 }, { "epoch": 2.97, "learning_rate": 3.5255918602934916e-05, "loss": 1.1933, "step": 4951000 }, { "epoch": 2.97, "learning_rate": 3.525381863737434e-05, "loss": 1.1714, "step": 4951500 }, { "epoch": 2.97, "learning_rate": 3.5251718671813776e-05, "loss": 1.2231, "step": 4952000 }, { "epoch": 2.97, "learning_rate": 3.5249618706253217e-05, "loss": 1.1904, "step": 4952500 }, { "epoch": 2.97, "learning_rate": 3.524751874069265e-05, "loss": 1.2286, "step": 4953000 }, { "epoch": 2.97, "learning_rate": 3.5245418775132084e-05, "loss": 1.2117, "step": 4953500 }, { "epoch": 2.97, "learning_rate": 3.5243318809571524e-05, "loss": 1.1688, "step": 4954000 }, { "epoch": 2.97, "learning_rate": 3.524122304394208e-05, "loss": 1.1621, "step": 4954500 }, { "epoch": 2.97, "learning_rate": 3.523912307838151e-05, "loss": 1.2109, "step": 4955000 }, { "epoch": 2.97, "learning_rate": 3.523702311282095e-05, "loss": 1.2456, "step": 4955500 }, { "epoch": 2.97, "learning_rate": 3.5234923147260384e-05, "loss": 1.1945, "step": 4956000 }, { "epoch": 2.97, "learning_rate": 3.523282738163094e-05, "loss": 1.1965, "step": 4956500 }, { "epoch": 2.97, "learning_rate": 3.523072741607037e-05, "loss": 1.1922, "step": 4957000 }, { "epoch": 2.97, "learning_rate": 3.522862745050981e-05, "loss": 1.1867, "step": 4957500 }, { "epoch": 2.97, "learning_rate": 3.5226527484949245e-05, "loss": 1.2077, "step": 4958000 }, { "epoch": 2.97, "learning_rate": 3.522442751938868e-05, "loss": 1.2082, "step": 4958500 }, { "epoch": 2.97, "learning_rate": 3.522232755382812e-05, "loss": 1.1866, "step": 4959000 }, { "epoch": 2.97, "learning_rate": 3.522022758826755e-05, "loss": 1.1989, "step": 4959500 }, { "epoch": 2.97, "learning_rate": 3.5218131822638106e-05, "loss": 1.1908, "step": 4960000 }, { "epoch": 2.97, "learning_rate": 3.521603185707754e-05, "loss": 1.2024, "step": 4960500 }, { "epoch": 2.97, "learning_rate": 3.521393189151698e-05, "loss": 1.2216, "step": 4961000 }, { "epoch": 2.97, "learning_rate": 3.521183192595641e-05, "loss": 1.1952, "step": 4961500 }, { "epoch": 2.97, "learning_rate": 3.520973196039585e-05, "loss": 1.1668, "step": 4962000 }, { "epoch": 2.98, "learning_rate": 3.520763619476641e-05, "loss": 1.2139, "step": 4962500 }, { "epoch": 2.98, "learning_rate": 3.520553622920584e-05, "loss": 1.2309, "step": 4963000 }, { "epoch": 2.98, "learning_rate": 3.5203436263645274e-05, "loss": 1.1917, "step": 4963500 }, { "epoch": 2.98, "learning_rate": 3.5201336298084714e-05, "loss": 1.1939, "step": 4964000 }, { "epoch": 2.98, "learning_rate": 3.519923633252415e-05, "loss": 1.1804, "step": 4964500 }, { "epoch": 2.98, "learning_rate": 3.519713636696358e-05, "loss": 1.1879, "step": 4965000 }, { "epoch": 2.98, "learning_rate": 3.519503640140302e-05, "loss": 1.1878, "step": 4965500 }, { "epoch": 2.98, "learning_rate": 3.5192940635773575e-05, "loss": 1.1847, "step": 4966000 }, { "epoch": 2.98, "learning_rate": 3.519084067021301e-05, "loss": 1.227, "step": 4966500 }, { "epoch": 2.98, "learning_rate": 3.518874070465244e-05, "loss": 1.1889, "step": 4967000 }, { "epoch": 2.98, "learning_rate": 3.518664073909188e-05, "loss": 1.1746, "step": 4967500 }, { "epoch": 2.98, "learning_rate": 3.5184540773531315e-05, "loss": 1.1893, "step": 4968000 }, { "epoch": 2.98, "learning_rate": 3.518244500790187e-05, "loss": 1.1907, "step": 4968500 }, { "epoch": 2.98, "learning_rate": 3.518034504234131e-05, "loss": 1.1976, "step": 4969000 }, { "epoch": 2.98, "learning_rate": 3.517824507678074e-05, "loss": 1.1862, "step": 4969500 }, { "epoch": 2.98, "learning_rate": 3.5176145111220176e-05, "loss": 1.2249, "step": 4970000 }, { "epoch": 2.98, "learning_rate": 3.5174045145659616e-05, "loss": 1.1959, "step": 4970500 }, { "epoch": 2.98, "learning_rate": 3.517194518009905e-05, "loss": 1.1911, "step": 4971000 }, { "epoch": 2.98, "learning_rate": 3.5169845214538476e-05, "loss": 1.1862, "step": 4971500 }, { "epoch": 2.98, "learning_rate": 3.516774524897792e-05, "loss": 1.1583, "step": 4972000 }, { "epoch": 2.98, "learning_rate": 3.516564948334848e-05, "loss": 1.1852, "step": 4972500 }, { "epoch": 2.98, "learning_rate": 3.516354951778791e-05, "loss": 1.165, "step": 4973000 }, { "epoch": 2.98, "learning_rate": 3.516144955222734e-05, "loss": 1.1931, "step": 4973500 }, { "epoch": 2.98, "learning_rate": 3.515934958666678e-05, "loss": 1.1914, "step": 4974000 }, { "epoch": 2.98, "learning_rate": 3.515725802096846e-05, "loss": 1.1932, "step": 4974500 }, { "epoch": 2.98, "learning_rate": 3.515515805540789e-05, "loss": 1.2081, "step": 4975000 }, { "epoch": 2.98, "learning_rate": 3.5153058089847325e-05, "loss": 1.172, "step": 4975500 }, { "epoch": 2.98, "learning_rate": 3.5150958124286765e-05, "loss": 1.1788, "step": 4976000 }, { "epoch": 2.98, "learning_rate": 3.51488581587262e-05, "loss": 1.213, "step": 4976500 }, { "epoch": 2.98, "learning_rate": 3.514675819316563e-05, "loss": 1.1947, "step": 4977000 }, { "epoch": 2.98, "learning_rate": 3.514465822760507e-05, "loss": 1.2347, "step": 4977500 }, { "epoch": 2.98, "learning_rate": 3.5142558262044506e-05, "loss": 1.2143, "step": 4978000 }, { "epoch": 2.98, "learning_rate": 3.514046249641506e-05, "loss": 1.1915, "step": 4978500 }, { "epoch": 2.99, "learning_rate": 3.513836253085449e-05, "loss": 1.195, "step": 4979000 }, { "epoch": 2.99, "learning_rate": 3.513626256529393e-05, "loss": 1.1789, "step": 4979500 }, { "epoch": 2.99, "learning_rate": 3.5134162599733366e-05, "loss": 1.2034, "step": 4980000 }, { "epoch": 2.99, "learning_rate": 3.51320626341728e-05, "loss": 1.1813, "step": 4980500 }, { "epoch": 2.99, "learning_rate": 3.5129966868543353e-05, "loss": 1.2187, "step": 4981000 }, { "epoch": 2.99, "learning_rate": 3.5127866902982794e-05, "loss": 1.1764, "step": 4981500 }, { "epoch": 2.99, "learning_rate": 3.512577113735335e-05, "loss": 1.1802, "step": 4982000 }, { "epoch": 2.99, "learning_rate": 3.512367117179278e-05, "loss": 1.1749, "step": 4982500 }, { "epoch": 2.99, "learning_rate": 3.512157120623222e-05, "loss": 1.1856, "step": 4983000 }, { "epoch": 2.99, "learning_rate": 3.5119471240671654e-05, "loss": 1.173, "step": 4983500 }, { "epoch": 2.99, "learning_rate": 3.511737127511109e-05, "loss": 1.1714, "step": 4984000 }, { "epoch": 2.99, "learning_rate": 3.511527130955053e-05, "loss": 1.1896, "step": 4984500 }, { "epoch": 2.99, "learning_rate": 3.511317134398996e-05, "loss": 1.2135, "step": 4985000 }, { "epoch": 2.99, "learning_rate": 3.5111075578360515e-05, "loss": 1.1937, "step": 4985500 }, { "epoch": 2.99, "learning_rate": 3.510897561279995e-05, "loss": 1.2188, "step": 4986000 }, { "epoch": 2.99, "learning_rate": 3.510687564723939e-05, "loss": 1.235, "step": 4986500 }, { "epoch": 2.99, "learning_rate": 3.510477568167882e-05, "loss": 1.2255, "step": 4987000 }, { "epoch": 2.99, "learning_rate": 3.5102675716118256e-05, "loss": 1.2107, "step": 4987500 }, { "epoch": 2.99, "learning_rate": 3.510057575055769e-05, "loss": 1.2078, "step": 4988000 }, { "epoch": 2.99, "learning_rate": 3.509847578499712e-05, "loss": 1.2072, "step": 4988500 }, { "epoch": 2.99, "learning_rate": 3.509638001936768e-05, "loss": 1.1822, "step": 4989000 }, { "epoch": 2.99, "learning_rate": 3.5094280053807116e-05, "loss": 1.1902, "step": 4989500 }, { "epoch": 2.99, "learning_rate": 3.509218008824656e-05, "loss": 1.2092, "step": 4990000 }, { "epoch": 2.99, "learning_rate": 3.5090080122685983e-05, "loss": 1.1944, "step": 4990500 }, { "epoch": 2.99, "learning_rate": 3.5087980157125424e-05, "loss": 1.2213, "step": 4991000 }, { "epoch": 2.99, "learning_rate": 3.508588019156486e-05, "loss": 1.2173, "step": 4991500 }, { "epoch": 2.99, "learning_rate": 3.508378022600429e-05, "loss": 1.1726, "step": 4992000 }, { "epoch": 2.99, "learning_rate": 3.508168026044373e-05, "loss": 1.2283, "step": 4992500 }, { "epoch": 2.99, "learning_rate": 3.5079580294883164e-05, "loss": 1.2095, "step": 4993000 }, { "epoch": 2.99, "learning_rate": 3.507748452925372e-05, "loss": 1.2043, "step": 4993500 }, { "epoch": 2.99, "learning_rate": 3.507538456369315e-05, "loss": 1.1754, "step": 4994000 }, { "epoch": 2.99, "learning_rate": 3.507328459813259e-05, "loss": 1.2093, "step": 4994500 }, { "epoch": 2.99, "learning_rate": 3.5071184632572025e-05, "loss": 1.2137, "step": 4995000 }, { "epoch": 3.0, "learning_rate": 3.506908886694258e-05, "loss": 1.234, "step": 4995500 }, { "epoch": 3.0, "learning_rate": 3.506699310131314e-05, "loss": 1.1872, "step": 4996000 }, { "epoch": 3.0, "learning_rate": 3.506489313575257e-05, "loss": 1.1619, "step": 4996500 }, { "epoch": 3.0, "learning_rate": 3.506279317019201e-05, "loss": 1.2051, "step": 4997000 }, { "epoch": 3.0, "learning_rate": 3.506069320463144e-05, "loss": 1.2021, "step": 4997500 }, { "epoch": 3.0, "learning_rate": 3.5058597439002e-05, "loss": 1.1838, "step": 4998000 }, { "epoch": 3.0, "learning_rate": 3.505649747344144e-05, "loss": 1.1752, "step": 4998500 }, { "epoch": 3.0, "learning_rate": 3.505439750788087e-05, "loss": 1.2044, "step": 4999000 }, { "epoch": 3.0, "learning_rate": 3.505229754232031e-05, "loss": 1.1751, "step": 4999500 }, { "epoch": 3.0, "learning_rate": 3.505019757675974e-05, "loss": 1.2131, "step": 5000000 }, { "epoch": 3.0, "eval_loss": 1.148377776145935, "eval_runtime": 1098.3919, "eval_samples_per_second": 479.537, "eval_steps_per_second": 79.923, "step": 5000000 }, { "epoch": 3.0, "learning_rate": 3.5048097611199174e-05, "loss": 1.1856, "step": 5000500 }, { "epoch": 3.0, "learning_rate": 3.504599764563861e-05, "loss": 1.2012, "step": 5001000 }, { "epoch": 3.0, "learning_rate": 3.504389768007805e-05, "loss": 1.2196, "step": 5001500 }, { "epoch": 3.0, "learning_rate": 3.504180191444861e-05, "loss": 1.194, "step": 5002000 }, { "epoch": 3.0, "learning_rate": 3.5039701948888034e-05, "loss": 1.1778, "step": 5002500 }, { "epoch": 3.0, "learning_rate": 3.503760198332747e-05, "loss": 1.1772, "step": 5003000 }, { "epoch": 3.0, "learning_rate": 3.503550201776691e-05, "loss": 1.185, "step": 5003500 }, { "epoch": 3.0, "learning_rate": 3.503340205220634e-05, "loss": 1.2015, "step": 5004000 }, { "epoch": 3.0, "learning_rate": 3.5031302086645775e-05, "loss": 1.1376, "step": 5004500 }, { "epoch": 3.0, "learning_rate": 3.5029202121085215e-05, "loss": 1.1188, "step": 5005000 }, { "epoch": 3.0, "learning_rate": 3.502710215552465e-05, "loss": 1.139, "step": 5005500 }, { "epoch": 3.0, "learning_rate": 3.502500218996408e-05, "loss": 1.1583, "step": 5006000 }, { "epoch": 3.0, "learning_rate": 3.502290222440352e-05, "loss": 1.1542, "step": 5006500 }, { "epoch": 3.0, "learning_rate": 3.5020802258842956e-05, "loss": 1.1757, "step": 5007000 }, { "epoch": 3.0, "learning_rate": 3.5018702293282396e-05, "loss": 1.1799, "step": 5007500 }, { "epoch": 3.0, "learning_rate": 3.501660232772182e-05, "loss": 1.1484, "step": 5008000 }, { "epoch": 3.0, "learning_rate": 3.5014502362161256e-05, "loss": 1.1543, "step": 5008500 }, { "epoch": 3.0, "learning_rate": 3.5012402396600696e-05, "loss": 1.125, "step": 5009000 }, { "epoch": 3.0, "learning_rate": 3.501030243104013e-05, "loss": 1.147, "step": 5009500 }, { "epoch": 3.0, "learning_rate": 3.5008206665410684e-05, "loss": 1.1666, "step": 5010000 }, { "epoch": 3.0, "learning_rate": 3.500610669985012e-05, "loss": 1.1843, "step": 5010500 }, { "epoch": 3.0, "learning_rate": 3.500400673428956e-05, "loss": 1.1633, "step": 5011000 }, { "epoch": 3.0, "learning_rate": 3.500190676872899e-05, "loss": 1.1559, "step": 5011500 }, { "epoch": 3.0, "learning_rate": 3.499981100309955e-05, "loss": 1.1695, "step": 5012000 }, { "epoch": 3.01, "learning_rate": 3.499771103753898e-05, "loss": 1.1516, "step": 5012500 }, { "epoch": 3.01, "learning_rate": 3.499561107197842e-05, "loss": 1.1696, "step": 5013000 }, { "epoch": 3.01, "learning_rate": 3.499351110641785e-05, "loss": 1.1501, "step": 5013500 }, { "epoch": 3.01, "learning_rate": 3.499141534078841e-05, "loss": 1.152, "step": 5014000 }, { "epoch": 3.01, "learning_rate": 3.498931537522785e-05, "loss": 1.1424, "step": 5014500 }, { "epoch": 3.01, "learning_rate": 3.498721540966728e-05, "loss": 1.1464, "step": 5015000 }, { "epoch": 3.01, "learning_rate": 3.498511544410671e-05, "loss": 1.1582, "step": 5015500 }, { "epoch": 3.01, "learning_rate": 3.498301547854615e-05, "loss": 1.1566, "step": 5016000 }, { "epoch": 3.01, "learning_rate": 3.498091971291671e-05, "loss": 1.1515, "step": 5016500 }, { "epoch": 3.01, "learning_rate": 3.4978819747356146e-05, "loss": 1.1634, "step": 5017000 }, { "epoch": 3.01, "learning_rate": 3.497671978179557e-05, "loss": 1.1652, "step": 5017500 }, { "epoch": 3.01, "learning_rate": 3.497462401616613e-05, "loss": 1.1473, "step": 5018000 }, { "epoch": 3.01, "learning_rate": 3.4972524050605573e-05, "loss": 1.1453, "step": 5018500 }, { "epoch": 3.01, "learning_rate": 3.497042408504501e-05, "loss": 1.161, "step": 5019000 }, { "epoch": 3.01, "learning_rate": 3.4968324119484434e-05, "loss": 1.18, "step": 5019500 }, { "epoch": 3.01, "learning_rate": 3.4966224153923874e-05, "loss": 1.1652, "step": 5020000 }, { "epoch": 3.01, "learning_rate": 3.496412418836331e-05, "loss": 1.1181, "step": 5020500 }, { "epoch": 3.01, "learning_rate": 3.496202422280275e-05, "loss": 1.143, "step": 5021000 }, { "epoch": 3.01, "learning_rate": 3.495992425724218e-05, "loss": 1.1664, "step": 5021500 }, { "epoch": 3.01, "learning_rate": 3.4957824291681614e-05, "loss": 1.1568, "step": 5022000 }, { "epoch": 3.01, "learning_rate": 3.495572852605217e-05, "loss": 1.1728, "step": 5022500 }, { "epoch": 3.01, "learning_rate": 3.495362856049161e-05, "loss": 1.134, "step": 5023000 }, { "epoch": 3.01, "learning_rate": 3.495152859493104e-05, "loss": 1.1872, "step": 5023500 }, { "epoch": 3.01, "learning_rate": 3.4949428629370475e-05, "loss": 1.1649, "step": 5024000 }, { "epoch": 3.01, "learning_rate": 3.4947337063672156e-05, "loss": 1.1377, "step": 5024500 }, { "epoch": 3.01, "learning_rate": 3.494523709811159e-05, "loss": 1.1376, "step": 5025000 }, { "epoch": 3.01, "learning_rate": 3.494313713255103e-05, "loss": 1.1705, "step": 5025500 }, { "epoch": 3.01, "learning_rate": 3.494103716699046e-05, "loss": 1.1507, "step": 5026000 }, { "epoch": 3.01, "learning_rate": 3.4938937201429896e-05, "loss": 1.1481, "step": 5026500 }, { "epoch": 3.01, "learning_rate": 3.4936841435800457e-05, "loss": 1.1766, "step": 5027000 }, { "epoch": 3.01, "learning_rate": 3.493474147023989e-05, "loss": 1.1501, "step": 5027500 }, { "epoch": 3.01, "learning_rate": 3.4932641504679324e-05, "loss": 1.1493, "step": 5028000 }, { "epoch": 3.01, "learning_rate": 3.4930541539118764e-05, "loss": 1.1341, "step": 5028500 }, { "epoch": 3.02, "learning_rate": 3.492844157355819e-05, "loss": 1.1606, "step": 5029000 }, { "epoch": 3.02, "learning_rate": 3.492634580792875e-05, "loss": 1.1669, "step": 5029500 }, { "epoch": 3.02, "learning_rate": 3.4924245842368184e-05, "loss": 1.1753, "step": 5030000 }, { "epoch": 3.02, "learning_rate": 3.4922145876807624e-05, "loss": 1.17, "step": 5030500 }, { "epoch": 3.02, "learning_rate": 3.492004591124706e-05, "loss": 1.1479, "step": 5031000 }, { "epoch": 3.02, "learning_rate": 3.4917945945686485e-05, "loss": 1.1393, "step": 5031500 }, { "epoch": 3.02, "learning_rate": 3.4915845980125925e-05, "loss": 1.1538, "step": 5032000 }, { "epoch": 3.02, "learning_rate": 3.491374601456536e-05, "loss": 1.149, "step": 5032500 }, { "epoch": 3.02, "learning_rate": 3.491164604900479e-05, "loss": 1.1827, "step": 5033000 }, { "epoch": 3.02, "learning_rate": 3.490955028337535e-05, "loss": 1.1527, "step": 5033500 }, { "epoch": 3.02, "learning_rate": 3.4907450317814786e-05, "loss": 1.1285, "step": 5034000 }, { "epoch": 3.02, "learning_rate": 3.490535035225422e-05, "loss": 1.1637, "step": 5034500 }, { "epoch": 3.02, "learning_rate": 3.490325038669366e-05, "loss": 1.1503, "step": 5035000 }, { "epoch": 3.02, "learning_rate": 3.490115042113309e-05, "loss": 1.1693, "step": 5035500 }, { "epoch": 3.02, "learning_rate": 3.489905465550365e-05, "loss": 1.1443, "step": 5036000 }, { "epoch": 3.02, "learning_rate": 3.489695468994308e-05, "loss": 1.188, "step": 5036500 }, { "epoch": 3.02, "learning_rate": 3.489485472438252e-05, "loss": 1.1658, "step": 5037000 }, { "epoch": 3.02, "learning_rate": 3.4892754758821953e-05, "loss": 1.1719, "step": 5037500 }, { "epoch": 3.02, "learning_rate": 3.489065479326139e-05, "loss": 1.1546, "step": 5038000 }, { "epoch": 3.02, "learning_rate": 3.488855482770083e-05, "loss": 1.158, "step": 5038500 }, { "epoch": 3.02, "learning_rate": 3.488645486214026e-05, "loss": 1.1637, "step": 5039000 }, { "epoch": 3.02, "learning_rate": 3.4884354896579694e-05, "loss": 1.1619, "step": 5039500 }, { "epoch": 3.02, "learning_rate": 3.488225913095025e-05, "loss": 1.1549, "step": 5040000 }, { "epoch": 3.02, "learning_rate": 3.488015916538969e-05, "loss": 1.1661, "step": 5040500 }, { "epoch": 3.02, "learning_rate": 3.487806339976024e-05, "loss": 1.1758, "step": 5041000 }, { "epoch": 3.02, "learning_rate": 3.4875963434199675e-05, "loss": 1.1814, "step": 5041500 }, { "epoch": 3.02, "learning_rate": 3.4873863468639115e-05, "loss": 1.1791, "step": 5042000 }, { "epoch": 3.02, "learning_rate": 3.487176350307855e-05, "loss": 1.1868, "step": 5042500 }, { "epoch": 3.02, "learning_rate": 3.486966353751798e-05, "loss": 1.1497, "step": 5043000 }, { "epoch": 3.02, "learning_rate": 3.4867567771888536e-05, "loss": 1.1821, "step": 5043500 }, { "epoch": 3.02, "learning_rate": 3.4865467806327976e-05, "loss": 1.1656, "step": 5044000 }, { "epoch": 3.02, "learning_rate": 3.486336784076741e-05, "loss": 1.1687, "step": 5044500 }, { "epoch": 3.02, "learning_rate": 3.486126787520684e-05, "loss": 1.1724, "step": 5045000 }, { "epoch": 3.02, "learning_rate": 3.485916790964628e-05, "loss": 1.1566, "step": 5045500 }, { "epoch": 3.03, "learning_rate": 3.4857067944085716e-05, "loss": 1.1816, "step": 5046000 }, { "epoch": 3.03, "learning_rate": 3.485496797852515e-05, "loss": 1.1953, "step": 5046500 }, { "epoch": 3.03, "learning_rate": 3.4852872212895704e-05, "loss": 1.1385, "step": 5047000 }, { "epoch": 3.03, "learning_rate": 3.4850772247335144e-05, "loss": 1.1679, "step": 5047500 }, { "epoch": 3.03, "learning_rate": 3.484867228177458e-05, "loss": 1.1657, "step": 5048000 }, { "epoch": 3.03, "learning_rate": 3.484657231621401e-05, "loss": 1.143, "step": 5048500 }, { "epoch": 3.03, "learning_rate": 3.484447235065345e-05, "loss": 1.1685, "step": 5049000 }, { "epoch": 3.03, "learning_rate": 3.4842372385092884e-05, "loss": 1.1395, "step": 5049500 }, { "epoch": 3.03, "learning_rate": 3.484027241953232e-05, "loss": 1.1605, "step": 5050000 }, { "epoch": 3.03, "learning_rate": 3.483817245397176e-05, "loss": 1.1602, "step": 5050500 }, { "epoch": 3.03, "learning_rate": 3.483607668834231e-05, "loss": 1.1565, "step": 5051000 }, { "epoch": 3.03, "learning_rate": 3.4833976722781745e-05, "loss": 1.1485, "step": 5051500 }, { "epoch": 3.03, "learning_rate": 3.4831876757221185e-05, "loss": 1.1394, "step": 5052000 }, { "epoch": 3.03, "learning_rate": 3.482977679166062e-05, "loss": 1.1427, "step": 5052500 }, { "epoch": 3.03, "learning_rate": 3.482768102603117e-05, "loss": 1.1716, "step": 5053000 }, { "epoch": 3.03, "learning_rate": 3.4825581060470606e-05, "loss": 1.1655, "step": 5053500 }, { "epoch": 3.03, "learning_rate": 3.4823481094910046e-05, "loss": 1.1793, "step": 5054000 }, { "epoch": 3.03, "learning_rate": 3.482138112934948e-05, "loss": 1.1777, "step": 5054500 }, { "epoch": 3.03, "learning_rate": 3.481928536372003e-05, "loss": 1.1851, "step": 5055000 }, { "epoch": 3.03, "learning_rate": 3.4817185398159467e-05, "loss": 1.1471, "step": 5055500 }, { "epoch": 3.03, "learning_rate": 3.481508963253003e-05, "loss": 1.1726, "step": 5056000 }, { "epoch": 3.03, "learning_rate": 3.481298966696946e-05, "loss": 1.1588, "step": 5056500 }, { "epoch": 3.03, "learning_rate": 3.4810889701408894e-05, "loss": 1.1643, "step": 5057000 }, { "epoch": 3.03, "learning_rate": 3.4808789735848334e-05, "loss": 1.1906, "step": 5057500 }, { "epoch": 3.03, "learning_rate": 3.480668977028777e-05, "loss": 1.1519, "step": 5058000 }, { "epoch": 3.03, "learning_rate": 3.48045898047272e-05, "loss": 1.1751, "step": 5058500 }, { "epoch": 3.03, "learning_rate": 3.480248983916664e-05, "loss": 1.1854, "step": 5059000 }, { "epoch": 3.03, "learning_rate": 3.4800389873606075e-05, "loss": 1.1599, "step": 5059500 }, { "epoch": 3.03, "learning_rate": 3.479829410797663e-05, "loss": 1.1621, "step": 5060000 }, { "epoch": 3.03, "learning_rate": 3.479619834234718e-05, "loss": 1.1936, "step": 5060500 }, { "epoch": 3.03, "learning_rate": 3.4794098376786615e-05, "loss": 1.1335, "step": 5061000 }, { "epoch": 3.03, "learning_rate": 3.4791998411226056e-05, "loss": 1.1487, "step": 5061500 }, { "epoch": 3.03, "learning_rate": 3.478989844566549e-05, "loss": 1.1465, "step": 5062000 }, { "epoch": 3.04, "learning_rate": 3.478779848010492e-05, "loss": 1.1591, "step": 5062500 }, { "epoch": 3.04, "learning_rate": 3.478569851454436e-05, "loss": 1.1636, "step": 5063000 }, { "epoch": 3.04, "learning_rate": 3.4783598548983796e-05, "loss": 1.145, "step": 5063500 }, { "epoch": 3.04, "learning_rate": 3.478149858342323e-05, "loss": 1.1814, "step": 5064000 }, { "epoch": 3.04, "learning_rate": 3.477940281779379e-05, "loss": 1.1652, "step": 5064500 }, { "epoch": 3.04, "learning_rate": 3.4777307052164344e-05, "loss": 1.1713, "step": 5065000 }, { "epoch": 3.04, "learning_rate": 3.477520708660378e-05, "loss": 1.1264, "step": 5065500 }, { "epoch": 3.04, "learning_rate": 3.477310712104321e-05, "loss": 1.1617, "step": 5066000 }, { "epoch": 3.04, "learning_rate": 3.477100715548265e-05, "loss": 1.1768, "step": 5066500 }, { "epoch": 3.04, "learning_rate": 3.4768907189922084e-05, "loss": 1.1809, "step": 5067000 }, { "epoch": 3.04, "learning_rate": 3.476680722436152e-05, "loss": 1.1635, "step": 5067500 }, { "epoch": 3.04, "learning_rate": 3.476470725880096e-05, "loss": 1.1727, "step": 5068000 }, { "epoch": 3.04, "learning_rate": 3.476261149317151e-05, "loss": 1.185, "step": 5068500 }, { "epoch": 3.04, "learning_rate": 3.4760511527610945e-05, "loss": 1.1826, "step": 5069000 }, { "epoch": 3.04, "learning_rate": 3.475841156205038e-05, "loss": 1.1794, "step": 5069500 }, { "epoch": 3.04, "learning_rate": 3.475631579642094e-05, "loss": 1.1416, "step": 5070000 }, { "epoch": 3.04, "learning_rate": 3.475421583086037e-05, "loss": 1.1545, "step": 5070500 }, { "epoch": 3.04, "learning_rate": 3.4752115865299806e-05, "loss": 1.1677, "step": 5071000 }, { "epoch": 3.04, "learning_rate": 3.4750015899739246e-05, "loss": 1.1722, "step": 5071500 }, { "epoch": 3.04, "learning_rate": 3.474791593417868e-05, "loss": 1.1362, "step": 5072000 }, { "epoch": 3.04, "learning_rate": 3.474581596861811e-05, "loss": 1.1738, "step": 5072500 }, { "epoch": 3.04, "learning_rate": 3.474371600305755e-05, "loss": 1.1662, "step": 5073000 }, { "epoch": 3.04, "learning_rate": 3.4741616037496986e-05, "loss": 1.1414, "step": 5073500 }, { "epoch": 3.04, "learning_rate": 3.473951607193642e-05, "loss": 1.1692, "step": 5074000 }, { "epoch": 3.04, "learning_rate": 3.473741610637586e-05, "loss": 1.163, "step": 5074500 }, { "epoch": 3.04, "learning_rate": 3.4735316140815294e-05, "loss": 1.1498, "step": 5075000 }, { "epoch": 3.04, "learning_rate": 3.473321617525472e-05, "loss": 1.1321, "step": 5075500 }, { "epoch": 3.04, "learning_rate": 3.473112040962528e-05, "loss": 1.1773, "step": 5076000 }, { "epoch": 3.04, "learning_rate": 3.4729024643995834e-05, "loss": 1.1429, "step": 5076500 }, { "epoch": 3.04, "learning_rate": 3.4726928878366395e-05, "loss": 1.1393, "step": 5077000 }, { "epoch": 3.04, "learning_rate": 3.472482891280583e-05, "loss": 1.1425, "step": 5077500 }, { "epoch": 3.04, "learning_rate": 3.472272894724526e-05, "loss": 1.159, "step": 5078000 }, { "epoch": 3.04, "learning_rate": 3.47206289816847e-05, "loss": 1.1996, "step": 5078500 }, { "epoch": 3.05, "learning_rate": 3.4718529016124135e-05, "loss": 1.1572, "step": 5079000 }, { "epoch": 3.05, "learning_rate": 3.471642905056357e-05, "loss": 1.1635, "step": 5079500 }, { "epoch": 3.05, "learning_rate": 3.471432908500301e-05, "loss": 1.1472, "step": 5080000 }, { "epoch": 3.05, "learning_rate": 3.471222911944244e-05, "loss": 1.1643, "step": 5080500 }, { "epoch": 3.05, "learning_rate": 3.4710129153881876e-05, "loss": 1.1504, "step": 5081000 }, { "epoch": 3.05, "learning_rate": 3.4708029188321316e-05, "loss": 1.1789, "step": 5081500 }, { "epoch": 3.05, "learning_rate": 3.470593342269187e-05, "loss": 1.1777, "step": 5082000 }, { "epoch": 3.05, "learning_rate": 3.47038334571313e-05, "loss": 1.1615, "step": 5082500 }, { "epoch": 3.05, "learning_rate": 3.4701733491570736e-05, "loss": 1.178, "step": 5083000 }, { "epoch": 3.05, "learning_rate": 3.469963352601018e-05, "loss": 1.1391, "step": 5083500 }, { "epoch": 3.05, "learning_rate": 3.469753356044961e-05, "loss": 1.178, "step": 5084000 }, { "epoch": 3.05, "learning_rate": 3.4695433594889044e-05, "loss": 1.1655, "step": 5084500 }, { "epoch": 3.05, "learning_rate": 3.469333362932848e-05, "loss": 1.1338, "step": 5085000 }, { "epoch": 3.05, "learning_rate": 3.469123366376791e-05, "loss": 1.1757, "step": 5085500 }, { "epoch": 3.05, "learning_rate": 3.468913789813847e-05, "loss": 1.1656, "step": 5086000 }, { "epoch": 3.05, "learning_rate": 3.468703793257791e-05, "loss": 1.1735, "step": 5086500 }, { "epoch": 3.05, "learning_rate": 3.468493796701734e-05, "loss": 1.1662, "step": 5087000 }, { "epoch": 3.05, "learning_rate": 3.468283800145677e-05, "loss": 1.1587, "step": 5087500 }, { "epoch": 3.05, "learning_rate": 3.468074223582733e-05, "loss": 1.1637, "step": 5088000 }, { "epoch": 3.05, "learning_rate": 3.4678646470197885e-05, "loss": 1.156, "step": 5088500 }, { "epoch": 3.05, "learning_rate": 3.4676546504637325e-05, "loss": 1.1744, "step": 5089000 }, { "epoch": 3.05, "learning_rate": 3.467444653907676e-05, "loss": 1.1842, "step": 5089500 }, { "epoch": 3.05, "learning_rate": 3.467234657351619e-05, "loss": 1.1882, "step": 5090000 }, { "epoch": 3.05, "learning_rate": 3.467024660795563e-05, "loss": 1.1722, "step": 5090500 }, { "epoch": 3.05, "learning_rate": 3.4668146642395066e-05, "loss": 1.1785, "step": 5091000 }, { "epoch": 3.05, "learning_rate": 3.466605087676562e-05, "loss": 1.1747, "step": 5091500 }, { "epoch": 3.05, "learning_rate": 3.466395091120505e-05, "loss": 1.1917, "step": 5092000 }, { "epoch": 3.05, "learning_rate": 3.466185094564449e-05, "loss": 1.1643, "step": 5092500 }, { "epoch": 3.05, "learning_rate": 3.465975098008393e-05, "loss": 1.1629, "step": 5093000 }, { "epoch": 3.05, "learning_rate": 3.465765101452337e-05, "loss": 1.1315, "step": 5093500 }, { "epoch": 3.05, "learning_rate": 3.46555510489628e-05, "loss": 1.1755, "step": 5094000 }, { "epoch": 3.05, "learning_rate": 3.4653455283333354e-05, "loss": 1.1576, "step": 5094500 }, { "epoch": 3.05, "learning_rate": 3.465135531777279e-05, "loss": 1.1908, "step": 5095000 }, { "epoch": 3.05, "learning_rate": 3.464925535221223e-05, "loss": 1.1754, "step": 5095500 }, { "epoch": 3.06, "learning_rate": 3.464715538665166e-05, "loss": 1.1544, "step": 5096000 }, { "epoch": 3.06, "learning_rate": 3.4645059621022215e-05, "loss": 1.1655, "step": 5096500 }, { "epoch": 3.06, "learning_rate": 3.464295965546165e-05, "loss": 1.1512, "step": 5097000 }, { "epoch": 3.06, "learning_rate": 3.464085968990109e-05, "loss": 1.1611, "step": 5097500 }, { "epoch": 3.06, "learning_rate": 3.463875972434052e-05, "loss": 1.1394, "step": 5098000 }, { "epoch": 3.06, "learning_rate": 3.4636659758779955e-05, "loss": 1.159, "step": 5098500 }, { "epoch": 3.06, "learning_rate": 3.463455979321939e-05, "loss": 1.1599, "step": 5099000 }, { "epoch": 3.06, "learning_rate": 3.463246402758995e-05, "loss": 1.1619, "step": 5099500 }, { "epoch": 3.06, "learning_rate": 3.463036406202938e-05, "loss": 1.1291, "step": 5100000 }, { "epoch": 3.06, "eval_loss": 1.1477725505828857, "eval_runtime": 1108.108, "eval_samples_per_second": 475.333, "eval_steps_per_second": 79.222, "step": 5100000 }, { "epoch": 3.06, "learning_rate": 3.462826409646882e-05, "loss": 1.1777, "step": 5100500 }, { "epoch": 3.06, "learning_rate": 3.4626164130908256e-05, "loss": 1.151, "step": 5101000 }, { "epoch": 3.06, "learning_rate": 3.462406416534768e-05, "loss": 1.1536, "step": 5101500 }, { "epoch": 3.06, "learning_rate": 3.462196419978712e-05, "loss": 1.1529, "step": 5102000 }, { "epoch": 3.06, "learning_rate": 3.461986423422656e-05, "loss": 1.1782, "step": 5102500 }, { "epoch": 3.06, "learning_rate": 3.461776426866599e-05, "loss": 1.1513, "step": 5103000 }, { "epoch": 3.06, "learning_rate": 3.461566850303655e-05, "loss": 1.1653, "step": 5103500 }, { "epoch": 3.06, "learning_rate": 3.4613568537475984e-05, "loss": 1.1866, "step": 5104000 }, { "epoch": 3.06, "learning_rate": 3.4611472771846544e-05, "loss": 1.1638, "step": 5104500 }, { "epoch": 3.06, "learning_rate": 3.460937280628598e-05, "loss": 1.1641, "step": 5105000 }, { "epoch": 3.06, "learning_rate": 3.460727284072541e-05, "loss": 1.1569, "step": 5105500 }, { "epoch": 3.06, "learning_rate": 3.4605172875164845e-05, "loss": 1.1469, "step": 5106000 }, { "epoch": 3.06, "learning_rate": 3.460307290960428e-05, "loss": 1.1823, "step": 5106500 }, { "epoch": 3.06, "learning_rate": 3.460097294404372e-05, "loss": 1.1686, "step": 5107000 }, { "epoch": 3.06, "learning_rate": 3.459887297848315e-05, "loss": 1.158, "step": 5107500 }, { "epoch": 3.06, "learning_rate": 3.459677721285371e-05, "loss": 1.1699, "step": 5108000 }, { "epoch": 3.06, "learning_rate": 3.459467724729314e-05, "loss": 1.1607, "step": 5108500 }, { "epoch": 3.06, "learning_rate": 3.459257728173258e-05, "loss": 1.1454, "step": 5109000 }, { "epoch": 3.06, "learning_rate": 3.459047731617201e-05, "loss": 1.1684, "step": 5109500 }, { "epoch": 3.06, "learning_rate": 3.4588377350611446e-05, "loss": 1.162, "step": 5110000 }, { "epoch": 3.06, "learning_rate": 3.4586277385050886e-05, "loss": 1.1588, "step": 5110500 }, { "epoch": 3.06, "learning_rate": 3.458417741949032e-05, "loss": 1.1503, "step": 5111000 }, { "epoch": 3.06, "learning_rate": 3.458207745392975e-05, "loss": 1.1632, "step": 5111500 }, { "epoch": 3.06, "learning_rate": 3.4579985888231434e-05, "loss": 1.1492, "step": 5112000 }, { "epoch": 3.07, "learning_rate": 3.457788592267087e-05, "loss": 1.1504, "step": 5112500 }, { "epoch": 3.07, "learning_rate": 3.457578595711031e-05, "loss": 1.1795, "step": 5113000 }, { "epoch": 3.07, "learning_rate": 3.4573685991549734e-05, "loss": 1.1777, "step": 5113500 }, { "epoch": 3.07, "learning_rate": 3.4571586025989174e-05, "loss": 1.1716, "step": 5114000 }, { "epoch": 3.07, "learning_rate": 3.4569490260359735e-05, "loss": 1.1592, "step": 5114500 }, { "epoch": 3.07, "learning_rate": 3.456739029479917e-05, "loss": 1.1643, "step": 5115000 }, { "epoch": 3.07, "learning_rate": 3.45652903292386e-05, "loss": 1.1944, "step": 5115500 }, { "epoch": 3.07, "learning_rate": 3.4563190363678035e-05, "loss": 1.1755, "step": 5116000 }, { "epoch": 3.07, "learning_rate": 3.456109039811747e-05, "loss": 1.1651, "step": 5116500 }, { "epoch": 3.07, "learning_rate": 3.45589904325569e-05, "loss": 1.1571, "step": 5117000 }, { "epoch": 3.07, "learning_rate": 3.455689046699634e-05, "loss": 1.1635, "step": 5117500 }, { "epoch": 3.07, "learning_rate": 3.4554794701366896e-05, "loss": 1.1843, "step": 5118000 }, { "epoch": 3.07, "learning_rate": 3.455269473580633e-05, "loss": 1.1745, "step": 5118500 }, { "epoch": 3.07, "learning_rate": 3.455059477024576e-05, "loss": 1.1586, "step": 5119000 }, { "epoch": 3.07, "learning_rate": 3.45484948046852e-05, "loss": 1.1568, "step": 5119500 }, { "epoch": 3.07, "learning_rate": 3.4546394839124636e-05, "loss": 1.1885, "step": 5120000 }, { "epoch": 3.07, "learning_rate": 3.454429487356407e-05, "loss": 1.1882, "step": 5120500 }, { "epoch": 3.07, "learning_rate": 3.454219490800351e-05, "loss": 1.1538, "step": 5121000 }, { "epoch": 3.07, "learning_rate": 3.4540094942442944e-05, "loss": 1.1836, "step": 5121500 }, { "epoch": 3.07, "learning_rate": 3.45379991768135e-05, "loss": 1.1663, "step": 5122000 }, { "epoch": 3.07, "learning_rate": 3.453590341118406e-05, "loss": 1.118, "step": 5122500 }, { "epoch": 3.07, "learning_rate": 3.453380344562349e-05, "loss": 1.1574, "step": 5123000 }, { "epoch": 3.07, "learning_rate": 3.4531703480062924e-05, "loss": 1.1688, "step": 5123500 }, { "epoch": 3.07, "learning_rate": 3.452960351450236e-05, "loss": 1.1644, "step": 5124000 }, { "epoch": 3.07, "learning_rate": 3.45275035489418e-05, "loss": 1.1635, "step": 5124500 }, { "epoch": 3.07, "learning_rate": 3.452540358338123e-05, "loss": 1.149, "step": 5125000 }, { "epoch": 3.07, "learning_rate": 3.4523307817751785e-05, "loss": 1.1669, "step": 5125500 }, { "epoch": 3.07, "learning_rate": 3.452120785219122e-05, "loss": 1.1293, "step": 5126000 }, { "epoch": 3.07, "learning_rate": 3.451910788663066e-05, "loss": 1.1639, "step": 5126500 }, { "epoch": 3.07, "learning_rate": 3.451700792107009e-05, "loss": 1.1415, "step": 5127000 }, { "epoch": 3.07, "learning_rate": 3.4514907955509526e-05, "loss": 1.1787, "step": 5127500 }, { "epoch": 3.07, "learning_rate": 3.4512807989948966e-05, "loss": 1.1856, "step": 5128000 }, { "epoch": 3.07, "learning_rate": 3.45107080243884e-05, "loss": 1.1218, "step": 5128500 }, { "epoch": 3.08, "learning_rate": 3.450860805882783e-05, "loss": 1.1869, "step": 5129000 }, { "epoch": 3.08, "learning_rate": 3.450651229319839e-05, "loss": 1.1676, "step": 5129500 }, { "epoch": 3.08, "learning_rate": 3.450441652756895e-05, "loss": 1.1703, "step": 5130000 }, { "epoch": 3.08, "learning_rate": 3.450231656200838e-05, "loss": 1.1621, "step": 5130500 }, { "epoch": 3.08, "learning_rate": 3.4500216596447814e-05, "loss": 1.1605, "step": 5131000 }, { "epoch": 3.08, "learning_rate": 3.4498116630887254e-05, "loss": 1.1544, "step": 5131500 }, { "epoch": 3.08, "learning_rate": 3.4496020865257814e-05, "loss": 1.1684, "step": 5132000 }, { "epoch": 3.08, "learning_rate": 3.449392089969724e-05, "loss": 1.1565, "step": 5132500 }, { "epoch": 3.08, "learning_rate": 3.4491820934136674e-05, "loss": 1.1775, "step": 5133000 }, { "epoch": 3.08, "learning_rate": 3.4489720968576115e-05, "loss": 1.1334, "step": 5133500 }, { "epoch": 3.08, "learning_rate": 3.448762100301555e-05, "loss": 1.1533, "step": 5134000 }, { "epoch": 3.08, "learning_rate": 3.448552103745498e-05, "loss": 1.1779, "step": 5134500 }, { "epoch": 3.08, "learning_rate": 3.448342107189442e-05, "loss": 1.1855, "step": 5135000 }, { "epoch": 3.08, "learning_rate": 3.4481321106333855e-05, "loss": 1.1363, "step": 5135500 }, { "epoch": 3.08, "learning_rate": 3.447922534070441e-05, "loss": 1.1588, "step": 5136000 }, { "epoch": 3.08, "learning_rate": 3.447712957507497e-05, "loss": 1.1472, "step": 5136500 }, { "epoch": 3.08, "learning_rate": 3.44750296095144e-05, "loss": 1.1904, "step": 5137000 }, { "epoch": 3.08, "learning_rate": 3.4472929643953836e-05, "loss": 1.1813, "step": 5137500 }, { "epoch": 3.08, "learning_rate": 3.447082967839327e-05, "loss": 1.1553, "step": 5138000 }, { "epoch": 3.08, "learning_rate": 3.446872971283271e-05, "loss": 1.1512, "step": 5138500 }, { "epoch": 3.08, "learning_rate": 3.446662974727214e-05, "loss": 1.186, "step": 5139000 }, { "epoch": 3.08, "learning_rate": 3.446452978171158e-05, "loss": 1.1542, "step": 5139500 }, { "epoch": 3.08, "learning_rate": 3.446242981615102e-05, "loss": 1.1547, "step": 5140000 }, { "epoch": 3.08, "learning_rate": 3.446033405052157e-05, "loss": 1.164, "step": 5140500 }, { "epoch": 3.08, "learning_rate": 3.4458234084961004e-05, "loss": 1.1528, "step": 5141000 }, { "epoch": 3.08, "learning_rate": 3.4456138319331564e-05, "loss": 1.172, "step": 5141500 }, { "epoch": 3.08, "learning_rate": 3.4454038353771e-05, "loss": 1.1634, "step": 5142000 }, { "epoch": 3.08, "learning_rate": 3.445193838821043e-05, "loss": 1.1785, "step": 5142500 }, { "epoch": 3.08, "learning_rate": 3.4449838422649865e-05, "loss": 1.1635, "step": 5143000 }, { "epoch": 3.08, "learning_rate": 3.4447738457089305e-05, "loss": 1.1579, "step": 5143500 }, { "epoch": 3.08, "learning_rate": 3.444563849152874e-05, "loss": 1.1812, "step": 5144000 }, { "epoch": 3.08, "learning_rate": 3.444353852596817e-05, "loss": 1.2038, "step": 5144500 }, { "epoch": 3.08, "learning_rate": 3.444143856040761e-05, "loss": 1.1697, "step": 5145000 }, { "epoch": 3.08, "learning_rate": 3.4439342794778166e-05, "loss": 1.1301, "step": 5145500 }, { "epoch": 3.09, "learning_rate": 3.44372428292176e-05, "loss": 1.1288, "step": 5146000 }, { "epoch": 3.09, "learning_rate": 3.443514286365703e-05, "loss": 1.1832, "step": 5146500 }, { "epoch": 3.09, "learning_rate": 3.443304289809647e-05, "loss": 1.1595, "step": 5147000 }, { "epoch": 3.09, "learning_rate": 3.4430947132467026e-05, "loss": 1.178, "step": 5147500 }, { "epoch": 3.09, "learning_rate": 3.442884716690646e-05, "loss": 1.1671, "step": 5148000 }, { "epoch": 3.09, "learning_rate": 3.442674720134589e-05, "loss": 1.1416, "step": 5148500 }, { "epoch": 3.09, "learning_rate": 3.4424647235785334e-05, "loss": 1.163, "step": 5149000 }, { "epoch": 3.09, "learning_rate": 3.442255147015589e-05, "loss": 1.1784, "step": 5149500 }, { "epoch": 3.09, "learning_rate": 3.442045150459532e-05, "loss": 1.1777, "step": 5150000 }, { "epoch": 3.09, "learning_rate": 3.441835153903476e-05, "loss": 1.1762, "step": 5150500 }, { "epoch": 3.09, "learning_rate": 3.441625577340532e-05, "loss": 1.1757, "step": 5151000 }, { "epoch": 3.09, "learning_rate": 3.441415580784475e-05, "loss": 1.1626, "step": 5151500 }, { "epoch": 3.09, "learning_rate": 3.441205584228418e-05, "loss": 1.1287, "step": 5152000 }, { "epoch": 3.09, "learning_rate": 3.440995587672362e-05, "loss": 1.1468, "step": 5152500 }, { "epoch": 3.09, "learning_rate": 3.4407855911163055e-05, "loss": 1.1755, "step": 5153000 }, { "epoch": 3.09, "learning_rate": 3.440575594560249e-05, "loss": 1.1621, "step": 5153500 }, { "epoch": 3.09, "learning_rate": 3.440365598004193e-05, "loss": 1.1269, "step": 5154000 }, { "epoch": 3.09, "learning_rate": 3.440156021441248e-05, "loss": 1.1528, "step": 5154500 }, { "epoch": 3.09, "learning_rate": 3.4399460248851916e-05, "loss": 1.1714, "step": 5155000 }, { "epoch": 3.09, "learning_rate": 3.439736028329135e-05, "loss": 1.2, "step": 5155500 }, { "epoch": 3.09, "learning_rate": 3.439526031773079e-05, "loss": 1.1606, "step": 5156000 }, { "epoch": 3.09, "learning_rate": 3.439316035217022e-05, "loss": 1.1981, "step": 5156500 }, { "epoch": 3.09, "learning_rate": 3.4391060386609656e-05, "loss": 1.1692, "step": 5157000 }, { "epoch": 3.09, "learning_rate": 3.43889604210491e-05, "loss": 1.1747, "step": 5157500 }, { "epoch": 3.09, "learning_rate": 3.438686045548853e-05, "loss": 1.1719, "step": 5158000 }, { "epoch": 3.09, "learning_rate": 3.4384768889790204e-05, "loss": 1.1672, "step": 5158500 }, { "epoch": 3.09, "learning_rate": 3.438266892422964e-05, "loss": 1.1738, "step": 5159000 }, { "epoch": 3.09, "learning_rate": 3.438056895866908e-05, "loss": 1.1766, "step": 5159500 }, { "epoch": 3.09, "learning_rate": 3.437846899310851e-05, "loss": 1.1598, "step": 5160000 }, { "epoch": 3.09, "learning_rate": 3.437637322747907e-05, "loss": 1.1275, "step": 5160500 }, { "epoch": 3.09, "learning_rate": 3.43742732619185e-05, "loss": 1.1568, "step": 5161000 }, { "epoch": 3.09, "learning_rate": 3.437217329635794e-05, "loss": 1.1494, "step": 5161500 }, { "epoch": 3.09, "learning_rate": 3.437007333079737e-05, "loss": 1.1419, "step": 5162000 }, { "epoch": 3.1, "learning_rate": 3.4367973365236805e-05, "loss": 1.1695, "step": 5162500 }, { "epoch": 3.1, "learning_rate": 3.4365873399676245e-05, "loss": 1.1782, "step": 5163000 }, { "epoch": 3.1, "learning_rate": 3.436377343411568e-05, "loss": 1.1773, "step": 5163500 }, { "epoch": 3.1, "learning_rate": 3.436167346855512e-05, "loss": 1.1414, "step": 5164000 }, { "epoch": 3.1, "learning_rate": 3.435957350299455e-05, "loss": 1.1794, "step": 5164500 }, { "epoch": 3.1, "learning_rate": 3.4357473537433986e-05, "loss": 1.1836, "step": 5165000 }, { "epoch": 3.1, "learning_rate": 3.435537777180454e-05, "loss": 1.1359, "step": 5165500 }, { "epoch": 3.1, "learning_rate": 3.435327780624398e-05, "loss": 1.1512, "step": 5166000 }, { "epoch": 3.1, "learning_rate": 3.435117784068341e-05, "loss": 1.1679, "step": 5166500 }, { "epoch": 3.1, "learning_rate": 3.434907787512285e-05, "loss": 1.1509, "step": 5167000 }, { "epoch": 3.1, "learning_rate": 3.434697790956229e-05, "loss": 1.1544, "step": 5167500 }, { "epoch": 3.1, "learning_rate": 3.434488214393284e-05, "loss": 1.1652, "step": 5168000 }, { "epoch": 3.1, "learning_rate": 3.4342782178372274e-05, "loss": 1.1348, "step": 5168500 }, { "epoch": 3.1, "learning_rate": 3.434068221281171e-05, "loss": 1.1794, "step": 5169000 }, { "epoch": 3.1, "learning_rate": 3.433858224725115e-05, "loss": 1.1466, "step": 5169500 }, { "epoch": 3.1, "learning_rate": 3.433648228169058e-05, "loss": 1.1726, "step": 5170000 }, { "epoch": 3.1, "learning_rate": 3.4334386516061135e-05, "loss": 1.165, "step": 5170500 }, { "epoch": 3.1, "learning_rate": 3.4332286550500575e-05, "loss": 1.1513, "step": 5171000 }, { "epoch": 3.1, "learning_rate": 3.433018658494001e-05, "loss": 1.1536, "step": 5171500 }, { "epoch": 3.1, "learning_rate": 3.432808661937944e-05, "loss": 1.1542, "step": 5172000 }, { "epoch": 3.1, "learning_rate": 3.432598665381888e-05, "loss": 1.1494, "step": 5172500 }, { "epoch": 3.1, "learning_rate": 3.432389508812055e-05, "loss": 1.1518, "step": 5173000 }, { "epoch": 3.1, "learning_rate": 3.432179512255999e-05, "loss": 1.1486, "step": 5173500 }, { "epoch": 3.1, "learning_rate": 3.431969515699942e-05, "loss": 1.1897, "step": 5174000 }, { "epoch": 3.1, "learning_rate": 3.4317595191438856e-05, "loss": 1.1523, "step": 5174500 }, { "epoch": 3.1, "learning_rate": 3.4315495225878296e-05, "loss": 1.1794, "step": 5175000 }, { "epoch": 3.1, "learning_rate": 3.431339526031773e-05, "loss": 1.1795, "step": 5175500 }, { "epoch": 3.1, "learning_rate": 3.431129529475716e-05, "loss": 1.151, "step": 5176000 }, { "epoch": 3.1, "learning_rate": 3.4309195329196604e-05, "loss": 1.1613, "step": 5176500 }, { "epoch": 3.1, "learning_rate": 3.430709536363604e-05, "loss": 1.1612, "step": 5177000 }, { "epoch": 3.1, "learning_rate": 3.430499539807547e-05, "loss": 1.1539, "step": 5177500 }, { "epoch": 3.1, "learning_rate": 3.430289543251491e-05, "loss": 1.1579, "step": 5178000 }, { "epoch": 3.1, "learning_rate": 3.430079546695434e-05, "loss": 1.1498, "step": 5178500 }, { "epoch": 3.11, "learning_rate": 3.42986997013249e-05, "loss": 1.136, "step": 5179000 }, { "epoch": 3.11, "learning_rate": 3.429659973576434e-05, "loss": 1.169, "step": 5179500 }, { "epoch": 3.11, "learning_rate": 3.429449977020377e-05, "loss": 1.1862, "step": 5180000 }, { "epoch": 3.11, "learning_rate": 3.4292399804643205e-05, "loss": 1.1693, "step": 5180500 }, { "epoch": 3.11, "learning_rate": 3.429030403901376e-05, "loss": 1.1988, "step": 5181000 }, { "epoch": 3.11, "learning_rate": 3.42882040734532e-05, "loss": 1.1741, "step": 5181500 }, { "epoch": 3.11, "learning_rate": 3.428610830782375e-05, "loss": 1.1554, "step": 5182000 }, { "epoch": 3.11, "learning_rate": 3.4284008342263186e-05, "loss": 1.1465, "step": 5182500 }, { "epoch": 3.11, "learning_rate": 3.428190837670262e-05, "loss": 1.168, "step": 5183000 }, { "epoch": 3.11, "learning_rate": 3.427980841114206e-05, "loss": 1.1677, "step": 5183500 }, { "epoch": 3.11, "learning_rate": 3.427770844558149e-05, "loss": 1.1748, "step": 5184000 }, { "epoch": 3.11, "learning_rate": 3.4275608480020926e-05, "loss": 1.1878, "step": 5184500 }, { "epoch": 3.11, "learning_rate": 3.4273508514460367e-05, "loss": 1.1437, "step": 5185000 }, { "epoch": 3.11, "learning_rate": 3.427141274883092e-05, "loss": 1.1731, "step": 5185500 }, { "epoch": 3.11, "learning_rate": 3.4269312783270354e-05, "loss": 1.1823, "step": 5186000 }, { "epoch": 3.11, "learning_rate": 3.4267212817709794e-05, "loss": 1.1693, "step": 5186500 }, { "epoch": 3.11, "learning_rate": 3.426511285214923e-05, "loss": 1.1903, "step": 5187000 }, { "epoch": 3.11, "learning_rate": 3.426301288658866e-05, "loss": 1.1629, "step": 5187500 }, { "epoch": 3.11, "learning_rate": 3.4260912921028094e-05, "loss": 1.1494, "step": 5188000 }, { "epoch": 3.11, "learning_rate": 3.425881295546753e-05, "loss": 1.155, "step": 5188500 }, { "epoch": 3.11, "learning_rate": 3.425671298990696e-05, "loss": 1.1768, "step": 5189000 }, { "epoch": 3.11, "learning_rate": 3.425461722427752e-05, "loss": 1.1818, "step": 5189500 }, { "epoch": 3.11, "learning_rate": 3.4252521458648075e-05, "loss": 1.157, "step": 5190000 }, { "epoch": 3.11, "learning_rate": 3.4250421493087515e-05, "loss": 1.1508, "step": 5190500 }, { "epoch": 3.11, "learning_rate": 3.424832572745807e-05, "loss": 1.1913, "step": 5191000 }, { "epoch": 3.11, "learning_rate": 3.42462257618975e-05, "loss": 1.1816, "step": 5191500 }, { "epoch": 3.11, "learning_rate": 3.424412579633694e-05, "loss": 1.1617, "step": 5192000 }, { "epoch": 3.11, "learning_rate": 3.4242025830776376e-05, "loss": 1.154, "step": 5192500 }, { "epoch": 3.11, "learning_rate": 3.423993006514693e-05, "loss": 1.1697, "step": 5193000 }, { "epoch": 3.11, "learning_rate": 3.423783009958636e-05, "loss": 1.1489, "step": 5193500 }, { "epoch": 3.11, "learning_rate": 3.42357301340258e-05, "loss": 1.1716, "step": 5194000 }, { "epoch": 3.11, "learning_rate": 3.423363436839636e-05, "loss": 1.1734, "step": 5194500 }, { "epoch": 3.11, "learning_rate": 3.423153440283579e-05, "loss": 1.1538, "step": 5195000 }, { "epoch": 3.11, "learning_rate": 3.4229434437275224e-05, "loss": 1.1546, "step": 5195500 }, { "epoch": 3.12, "learning_rate": 3.4227334471714664e-05, "loss": 1.1936, "step": 5196000 }, { "epoch": 3.12, "learning_rate": 3.42252345061541e-05, "loss": 1.1827, "step": 5196500 }, { "epoch": 3.12, "learning_rate": 3.422313454059353e-05, "loss": 1.1716, "step": 5197000 }, { "epoch": 3.12, "learning_rate": 3.422103457503297e-05, "loss": 1.1482, "step": 5197500 }, { "epoch": 3.12, "learning_rate": 3.4218934609472405e-05, "loss": 1.1445, "step": 5198000 }, { "epoch": 3.12, "learning_rate": 3.421683464391184e-05, "loss": 1.1744, "step": 5198500 }, { "epoch": 3.12, "learning_rate": 3.421473467835128e-05, "loss": 1.1511, "step": 5199000 }, { "epoch": 3.12, "learning_rate": 3.421263471279071e-05, "loss": 1.1544, "step": 5199500 }, { "epoch": 3.12, "learning_rate": 3.4210534747230145e-05, "loss": 1.1808, "step": 5200000 }, { "epoch": 3.12, "eval_loss": 1.1460847854614258, "eval_runtime": 1102.6522, "eval_samples_per_second": 477.685, "eval_steps_per_second": 79.614, "step": 5200000 }, { "epoch": 3.12, "learning_rate": 3.420843478166958e-05, "loss": 1.1751, "step": 5200500 }, { "epoch": 3.12, "learning_rate": 3.420633481610901e-05, "loss": 1.1673, "step": 5201000 }, { "epoch": 3.12, "learning_rate": 3.420423485054845e-05, "loss": 1.1478, "step": 5201500 }, { "epoch": 3.12, "learning_rate": 3.420213908491901e-05, "loss": 1.1684, "step": 5202000 }, { "epoch": 3.12, "learning_rate": 3.420003911935844e-05, "loss": 1.1746, "step": 5202500 }, { "epoch": 3.12, "learning_rate": 3.419793915379787e-05, "loss": 1.1453, "step": 5203000 }, { "epoch": 3.12, "learning_rate": 3.419583918823731e-05, "loss": 1.1699, "step": 5203500 }, { "epoch": 3.12, "learning_rate": 3.4193739222676747e-05, "loss": 1.1739, "step": 5204000 }, { "epoch": 3.12, "learning_rate": 3.41916434570473e-05, "loss": 1.1912, "step": 5204500 }, { "epoch": 3.12, "learning_rate": 3.4189543491486734e-05, "loss": 1.1381, "step": 5205000 }, { "epoch": 3.12, "learning_rate": 3.4187443525926174e-05, "loss": 1.1931, "step": 5205500 }, { "epoch": 3.12, "learning_rate": 3.418534356036561e-05, "loss": 1.1767, "step": 5206000 }, { "epoch": 3.12, "learning_rate": 3.418324359480504e-05, "loss": 1.169, "step": 5206500 }, { "epoch": 3.12, "learning_rate": 3.418114362924448e-05, "loss": 1.1641, "step": 5207000 }, { "epoch": 3.12, "learning_rate": 3.4179043663683914e-05, "loss": 1.1464, "step": 5207500 }, { "epoch": 3.12, "learning_rate": 3.417694369812335e-05, "loss": 1.1557, "step": 5208000 }, { "epoch": 3.12, "learning_rate": 3.417484373256279e-05, "loss": 1.1831, "step": 5208500 }, { "epoch": 3.12, "learning_rate": 3.417274796693334e-05, "loss": 1.1928, "step": 5209000 }, { "epoch": 3.12, "learning_rate": 3.4170652201303895e-05, "loss": 1.1676, "step": 5209500 }, { "epoch": 3.12, "learning_rate": 3.416855223574333e-05, "loss": 1.1631, "step": 5210000 }, { "epoch": 3.12, "learning_rate": 3.416645227018277e-05, "loss": 1.1481, "step": 5210500 }, { "epoch": 3.12, "learning_rate": 3.41643523046222e-05, "loss": 1.1631, "step": 5211000 }, { "epoch": 3.12, "learning_rate": 3.4162252339061636e-05, "loss": 1.1601, "step": 5211500 }, { "epoch": 3.12, "learning_rate": 3.4160152373501076e-05, "loss": 1.1651, "step": 5212000 }, { "epoch": 3.13, "learning_rate": 3.415805240794051e-05, "loss": 1.1779, "step": 5212500 }, { "epoch": 3.13, "learning_rate": 3.415595664231106e-05, "loss": 1.1369, "step": 5213000 }, { "epoch": 3.13, "learning_rate": 3.41538566767505e-05, "loss": 1.1697, "step": 5213500 }, { "epoch": 3.13, "learning_rate": 3.415175671118994e-05, "loss": 1.1403, "step": 5214000 }, { "epoch": 3.13, "learning_rate": 3.414966094556049e-05, "loss": 1.1545, "step": 5214500 }, { "epoch": 3.13, "learning_rate": 3.4147560979999924e-05, "loss": 1.1466, "step": 5215000 }, { "epoch": 3.13, "learning_rate": 3.4145461014439364e-05, "loss": 1.1723, "step": 5215500 }, { "epoch": 3.13, "learning_rate": 3.41433610488788e-05, "loss": 1.1806, "step": 5216000 }, { "epoch": 3.13, "learning_rate": 3.414126108331823e-05, "loss": 1.1873, "step": 5216500 }, { "epoch": 3.13, "learning_rate": 3.413916111775767e-05, "loss": 1.1938, "step": 5217000 }, { "epoch": 3.13, "learning_rate": 3.4137065352128225e-05, "loss": 1.2079, "step": 5217500 }, { "epoch": 3.13, "learning_rate": 3.413496538656766e-05, "loss": 1.1327, "step": 5218000 }, { "epoch": 3.13, "learning_rate": 3.413286542100709e-05, "loss": 1.2024, "step": 5218500 }, { "epoch": 3.13, "learning_rate": 3.413076545544653e-05, "loss": 1.1843, "step": 5219000 }, { "epoch": 3.13, "learning_rate": 3.4128665489885965e-05, "loss": 1.146, "step": 5219500 }, { "epoch": 3.13, "learning_rate": 3.41265655243254e-05, "loss": 1.1853, "step": 5220000 }, { "epoch": 3.13, "learning_rate": 3.412446555876484e-05, "loss": 1.1709, "step": 5220500 }, { "epoch": 3.13, "learning_rate": 3.412236559320427e-05, "loss": 1.1425, "step": 5221000 }, { "epoch": 3.13, "learning_rate": 3.4120265627643706e-05, "loss": 1.1484, "step": 5221500 }, { "epoch": 3.13, "learning_rate": 3.411816986201426e-05, "loss": 1.1402, "step": 5222000 }, { "epoch": 3.13, "learning_rate": 3.41160698964537e-05, "loss": 1.1471, "step": 5222500 }, { "epoch": 3.13, "learning_rate": 3.411396993089313e-05, "loss": 1.1571, "step": 5223000 }, { "epoch": 3.13, "learning_rate": 3.4111869965332574e-05, "loss": 1.1678, "step": 5223500 }, { "epoch": 3.13, "learning_rate": 3.410976999977201e-05, "loss": 1.16, "step": 5224000 }, { "epoch": 3.13, "learning_rate": 3.4107670034211434e-05, "loss": 1.1952, "step": 5224500 }, { "epoch": 3.13, "learning_rate": 3.4105570068650874e-05, "loss": 1.1712, "step": 5225000 }, { "epoch": 3.13, "learning_rate": 3.410347010309031e-05, "loss": 1.1833, "step": 5225500 }, { "epoch": 3.13, "learning_rate": 3.410137433746087e-05, "loss": 1.1686, "step": 5226000 }, { "epoch": 3.13, "learning_rate": 3.40992743719003e-05, "loss": 1.1937, "step": 5226500 }, { "epoch": 3.13, "learning_rate": 3.4097178606270855e-05, "loss": 1.183, "step": 5227000 }, { "epoch": 3.13, "learning_rate": 3.4095078640710295e-05, "loss": 1.181, "step": 5227500 }, { "epoch": 3.13, "learning_rate": 3.409297867514973e-05, "loss": 1.1785, "step": 5228000 }, { "epoch": 3.13, "learning_rate": 3.409087870958916e-05, "loss": 1.148, "step": 5228500 }, { "epoch": 3.13, "learning_rate": 3.40887787440286e-05, "loss": 1.2019, "step": 5229000 }, { "epoch": 3.14, "learning_rate": 3.408667877846803e-05, "loss": 1.1529, "step": 5229500 }, { "epoch": 3.14, "learning_rate": 3.408457881290747e-05, "loss": 1.1653, "step": 5230000 }, { "epoch": 3.14, "learning_rate": 3.40824788473469e-05, "loss": 1.1553, "step": 5230500 }, { "epoch": 3.14, "learning_rate": 3.408038308171746e-05, "loss": 1.1844, "step": 5231000 }, { "epoch": 3.14, "learning_rate": 3.407828311615689e-05, "loss": 1.1875, "step": 5231500 }, { "epoch": 3.14, "learning_rate": 3.407618735052745e-05, "loss": 1.1693, "step": 5232000 }, { "epoch": 3.14, "learning_rate": 3.407408738496689e-05, "loss": 1.1507, "step": 5232500 }, { "epoch": 3.14, "learning_rate": 3.4071987419406324e-05, "loss": 1.1529, "step": 5233000 }, { "epoch": 3.14, "learning_rate": 3.406988745384576e-05, "loss": 1.1726, "step": 5233500 }, { "epoch": 3.14, "learning_rate": 3.406778748828519e-05, "loss": 1.1601, "step": 5234000 }, { "epoch": 3.14, "learning_rate": 3.4065687522724624e-05, "loss": 1.1722, "step": 5234500 }, { "epoch": 3.14, "learning_rate": 3.406358755716406e-05, "loss": 1.14, "step": 5235000 }, { "epoch": 3.14, "learning_rate": 3.40614875916035e-05, "loss": 1.1924, "step": 5235500 }, { "epoch": 3.14, "learning_rate": 3.405939182597406e-05, "loss": 1.1706, "step": 5236000 }, { "epoch": 3.14, "learning_rate": 3.4057291860413485e-05, "loss": 1.1802, "step": 5236500 }, { "epoch": 3.14, "learning_rate": 3.4055191894852925e-05, "loss": 1.1847, "step": 5237000 }, { "epoch": 3.14, "learning_rate": 3.405309192929236e-05, "loss": 1.1825, "step": 5237500 }, { "epoch": 3.14, "learning_rate": 3.405099616366292e-05, "loss": 1.1495, "step": 5238000 }, { "epoch": 3.14, "learning_rate": 3.404890039803347e-05, "loss": 1.1652, "step": 5238500 }, { "epoch": 3.14, "learning_rate": 3.4046800432472906e-05, "loss": 1.1546, "step": 5239000 }, { "epoch": 3.14, "learning_rate": 3.4044700466912346e-05, "loss": 1.1809, "step": 5239500 }, { "epoch": 3.14, "learning_rate": 3.404260050135178e-05, "loss": 1.1423, "step": 5240000 }, { "epoch": 3.14, "learning_rate": 3.404050053579121e-05, "loss": 1.1882, "step": 5240500 }, { "epoch": 3.14, "learning_rate": 3.4038404770161767e-05, "loss": 1.1722, "step": 5241000 }, { "epoch": 3.14, "learning_rate": 3.403630480460121e-05, "loss": 1.1693, "step": 5241500 }, { "epoch": 3.14, "learning_rate": 3.403420483904064e-05, "loss": 1.1449, "step": 5242000 }, { "epoch": 3.14, "learning_rate": 3.4032104873480074e-05, "loss": 1.2002, "step": 5242500 }, { "epoch": 3.14, "learning_rate": 3.4030009107850634e-05, "loss": 1.1715, "step": 5243000 }, { "epoch": 3.14, "learning_rate": 3.402790914229007e-05, "loss": 1.1691, "step": 5243500 }, { "epoch": 3.14, "learning_rate": 3.40258091767295e-05, "loss": 1.1815, "step": 5244000 }, { "epoch": 3.14, "learning_rate": 3.402370921116894e-05, "loss": 1.1443, "step": 5244500 }, { "epoch": 3.14, "learning_rate": 3.4021609245608375e-05, "loss": 1.1581, "step": 5245000 }, { "epoch": 3.14, "learning_rate": 3.401950928004781e-05, "loss": 1.1545, "step": 5245500 }, { "epoch": 3.15, "learning_rate": 3.401740931448724e-05, "loss": 1.1413, "step": 5246000 }, { "epoch": 3.15, "learning_rate": 3.40153135488578e-05, "loss": 1.1474, "step": 5246500 }, { "epoch": 3.15, "learning_rate": 3.4013213583297235e-05, "loss": 1.1267, "step": 5247000 }, { "epoch": 3.15, "learning_rate": 3.401111361773667e-05, "loss": 1.1732, "step": 5247500 }, { "epoch": 3.15, "learning_rate": 3.400901365217611e-05, "loss": 1.1621, "step": 5248000 }, { "epoch": 3.15, "learning_rate": 3.4006913686615536e-05, "loss": 1.1766, "step": 5248500 }, { "epoch": 3.15, "learning_rate": 3.400481372105497e-05, "loss": 1.1352, "step": 5249000 }, { "epoch": 3.15, "learning_rate": 3.400271375549441e-05, "loss": 1.1602, "step": 5249500 }, { "epoch": 3.15, "learning_rate": 3.400061378993384e-05, "loss": 1.1443, "step": 5250000 }, { "epoch": 3.15, "learning_rate": 3.39985180243044e-05, "loss": 1.198, "step": 5250500 }, { "epoch": 3.15, "learning_rate": 3.399641805874384e-05, "loss": 1.1858, "step": 5251000 }, { "epoch": 3.15, "learning_rate": 3.399431809318327e-05, "loss": 1.1506, "step": 5251500 }, { "epoch": 3.15, "learning_rate": 3.399222232755383e-05, "loss": 1.1801, "step": 5252000 }, { "epoch": 3.15, "learning_rate": 3.3990122361993264e-05, "loss": 1.1862, "step": 5252500 }, { "epoch": 3.15, "learning_rate": 3.39880223964327e-05, "loss": 1.1771, "step": 5253000 }, { "epoch": 3.15, "learning_rate": 3.398592243087213e-05, "loss": 1.181, "step": 5253500 }, { "epoch": 3.15, "learning_rate": 3.3983822465311564e-05, "loss": 1.1685, "step": 5254000 }, { "epoch": 3.15, "learning_rate": 3.3981726699682125e-05, "loss": 1.1605, "step": 5254500 }, { "epoch": 3.15, "learning_rate": 3.3979626734121565e-05, "loss": 1.166, "step": 5255000 }, { "epoch": 3.15, "learning_rate": 3.397752676856099e-05, "loss": 1.1554, "step": 5255500 }, { "epoch": 3.15, "learning_rate": 3.3975426803000425e-05, "loss": 1.1752, "step": 5256000 }, { "epoch": 3.15, "learning_rate": 3.3973331037370985e-05, "loss": 1.171, "step": 5256500 }, { "epoch": 3.15, "learning_rate": 3.3971231071810426e-05, "loss": 1.1568, "step": 5257000 }, { "epoch": 3.15, "learning_rate": 3.396913110624986e-05, "loss": 1.1652, "step": 5257500 }, { "epoch": 3.15, "learning_rate": 3.396703114068929e-05, "loss": 1.1686, "step": 5258000 }, { "epoch": 3.15, "learning_rate": 3.396493537505985e-05, "loss": 1.1715, "step": 5258500 }, { "epoch": 3.15, "learning_rate": 3.3962835409499286e-05, "loss": 1.1795, "step": 5259000 }, { "epoch": 3.15, "learning_rate": 3.396073544393872e-05, "loss": 1.178, "step": 5259500 }, { "epoch": 3.15, "learning_rate": 3.395863547837816e-05, "loss": 1.148, "step": 5260000 }, { "epoch": 3.15, "learning_rate": 3.395653551281759e-05, "loss": 1.1626, "step": 5260500 }, { "epoch": 3.15, "learning_rate": 3.395443554725702e-05, "loss": 1.176, "step": 5261000 }, { "epoch": 3.15, "learning_rate": 3.395233558169646e-05, "loss": 1.172, "step": 5261500 }, { "epoch": 3.15, "learning_rate": 3.3950235616135894e-05, "loss": 1.1641, "step": 5262000 }, { "epoch": 3.16, "learning_rate": 3.394813985050645e-05, "loss": 1.1709, "step": 5262500 }, { "epoch": 3.16, "learning_rate": 3.394603988494588e-05, "loss": 1.1976, "step": 5263000 }, { "epoch": 3.16, "learning_rate": 3.394393991938532e-05, "loss": 1.179, "step": 5263500 }, { "epoch": 3.16, "learning_rate": 3.3941839953824755e-05, "loss": 1.2022, "step": 5264000 }, { "epoch": 3.16, "learning_rate": 3.393973998826419e-05, "loss": 1.1834, "step": 5264500 }, { "epoch": 3.16, "learning_rate": 3.393764002270363e-05, "loss": 1.1586, "step": 5265000 }, { "epoch": 3.16, "learning_rate": 3.393554425707418e-05, "loss": 1.1665, "step": 5265500 }, { "epoch": 3.16, "learning_rate": 3.3933444291513615e-05, "loss": 1.1882, "step": 5266000 }, { "epoch": 3.16, "learning_rate": 3.3931344325953056e-05, "loss": 1.155, "step": 5266500 }, { "epoch": 3.16, "learning_rate": 3.392924436039249e-05, "loss": 1.1745, "step": 5267000 }, { "epoch": 3.16, "learning_rate": 3.392714439483192e-05, "loss": 1.1598, "step": 5267500 }, { "epoch": 3.16, "learning_rate": 3.392504442927136e-05, "loss": 1.1596, "step": 5268000 }, { "epoch": 3.16, "learning_rate": 3.3922944463710796e-05, "loss": 1.174, "step": 5268500 }, { "epoch": 3.16, "learning_rate": 3.392084449815023e-05, "loss": 1.1552, "step": 5269000 }, { "epoch": 3.16, "learning_rate": 3.391874873252078e-05, "loss": 1.1933, "step": 5269500 }, { "epoch": 3.16, "learning_rate": 3.3916648766960224e-05, "loss": 1.1477, "step": 5270000 }, { "epoch": 3.16, "learning_rate": 3.39145572012619e-05, "loss": 1.1744, "step": 5270500 }, { "epoch": 3.16, "learning_rate": 3.391245723570134e-05, "loss": 1.1997, "step": 5271000 }, { "epoch": 3.16, "learning_rate": 3.391035727014077e-05, "loss": 1.154, "step": 5271500 }, { "epoch": 3.16, "learning_rate": 3.3908257304580204e-05, "loss": 1.1838, "step": 5272000 }, { "epoch": 3.16, "learning_rate": 3.390615733901964e-05, "loss": 1.1571, "step": 5272500 }, { "epoch": 3.16, "learning_rate": 3.390405737345907e-05, "loss": 1.1455, "step": 5273000 }, { "epoch": 3.16, "learning_rate": 3.390195740789851e-05, "loss": 1.1389, "step": 5273500 }, { "epoch": 3.16, "learning_rate": 3.3899857442337945e-05, "loss": 1.1551, "step": 5274000 }, { "epoch": 3.16, "learning_rate": 3.389775747677738e-05, "loss": 1.1707, "step": 5274500 }, { "epoch": 3.16, "learning_rate": 3.389565751121682e-05, "loss": 1.1959, "step": 5275000 }, { "epoch": 3.16, "learning_rate": 3.389355754565625e-05, "loss": 1.19, "step": 5275500 }, { "epoch": 3.16, "learning_rate": 3.3891457580095686e-05, "loss": 1.1814, "step": 5276000 }, { "epoch": 3.16, "learning_rate": 3.388936181446624e-05, "loss": 1.1413, "step": 5276500 }, { "epoch": 3.16, "learning_rate": 3.388726604883679e-05, "loss": 1.171, "step": 5277000 }, { "epoch": 3.16, "learning_rate": 3.388516608327623e-05, "loss": 1.1502, "step": 5277500 }, { "epoch": 3.16, "learning_rate": 3.3883066117715666e-05, "loss": 1.1542, "step": 5278000 }, { "epoch": 3.16, "learning_rate": 3.38809661521551e-05, "loss": 1.1897, "step": 5278500 }, { "epoch": 3.16, "learning_rate": 3.387886618659454e-05, "loss": 1.1769, "step": 5279000 }, { "epoch": 3.17, "learning_rate": 3.3876770420965094e-05, "loss": 1.1686, "step": 5279500 }, { "epoch": 3.17, "learning_rate": 3.387467045540453e-05, "loss": 1.1454, "step": 5280000 }, { "epoch": 3.17, "learning_rate": 3.387257048984397e-05, "loss": 1.1846, "step": 5280500 }, { "epoch": 3.17, "learning_rate": 3.38704705242834e-05, "loss": 1.1737, "step": 5281000 }, { "epoch": 3.17, "learning_rate": 3.3868370558722834e-05, "loss": 1.1612, "step": 5281500 }, { "epoch": 3.17, "learning_rate": 3.3866270593162275e-05, "loss": 1.1786, "step": 5282000 }, { "epoch": 3.17, "learning_rate": 3.386417062760171e-05, "loss": 1.1634, "step": 5282500 }, { "epoch": 3.17, "learning_rate": 3.386207066204114e-05, "loss": 1.1766, "step": 5283000 }, { "epoch": 3.17, "learning_rate": 3.385997069648058e-05, "loss": 1.1729, "step": 5283500 }, { "epoch": 3.17, "learning_rate": 3.3857870730920015e-05, "loss": 1.1702, "step": 5284000 }, { "epoch": 3.17, "learning_rate": 3.385577496529057e-05, "loss": 1.158, "step": 5284500 }, { "epoch": 3.17, "learning_rate": 3.385367499973e-05, "loss": 1.1882, "step": 5285000 }, { "epoch": 3.17, "learning_rate": 3.385157503416944e-05, "loss": 1.1644, "step": 5285500 }, { "epoch": 3.17, "learning_rate": 3.3849475068608876e-05, "loss": 1.1724, "step": 5286000 }, { "epoch": 3.17, "learning_rate": 3.384737510304831e-05, "loss": 1.1693, "step": 5286500 }, { "epoch": 3.17, "learning_rate": 3.384527513748775e-05, "loss": 1.1537, "step": 5287000 }, { "epoch": 3.17, "learning_rate": 3.38431793718583e-05, "loss": 1.1891, "step": 5287500 }, { "epoch": 3.17, "learning_rate": 3.384107940629774e-05, "loss": 1.1749, "step": 5288000 }, { "epoch": 3.17, "learning_rate": 3.383897944073718e-05, "loss": 1.1731, "step": 5288500 }, { "epoch": 3.17, "learning_rate": 3.383687947517661e-05, "loss": 1.152, "step": 5289000 }, { "epoch": 3.17, "learning_rate": 3.383477950961604e-05, "loss": 1.1807, "step": 5289500 }, { "epoch": 3.17, "learning_rate": 3.383267954405548e-05, "loss": 1.1774, "step": 5290000 }, { "epoch": 3.17, "learning_rate": 3.383058377842604e-05, "loss": 1.1496, "step": 5290500 }, { "epoch": 3.17, "learning_rate": 3.382848381286547e-05, "loss": 1.1536, "step": 5291000 }, { "epoch": 3.17, "learning_rate": 3.3826383847304905e-05, "loss": 1.2075, "step": 5291500 }, { "epoch": 3.17, "learning_rate": 3.382428388174434e-05, "loss": 1.1642, "step": 5292000 }, { "epoch": 3.17, "learning_rate": 3.382218391618377e-05, "loss": 1.1535, "step": 5292500 }, { "epoch": 3.17, "learning_rate": 3.382008815055433e-05, "loss": 1.1653, "step": 5293000 }, { "epoch": 3.17, "learning_rate": 3.3817988184993765e-05, "loss": 1.1669, "step": 5293500 }, { "epoch": 3.17, "learning_rate": 3.3815888219433205e-05, "loss": 1.1386, "step": 5294000 }, { "epoch": 3.17, "learning_rate": 3.381378825387263e-05, "loss": 1.1548, "step": 5294500 }, { "epoch": 3.17, "learning_rate": 3.3811688288312066e-05, "loss": 1.1754, "step": 5295000 }, { "epoch": 3.17, "learning_rate": 3.3809588322751506e-05, "loss": 1.1608, "step": 5295500 }, { "epoch": 3.18, "learning_rate": 3.380748835719094e-05, "loss": 1.1903, "step": 5296000 }, { "epoch": 3.18, "learning_rate": 3.380538839163038e-05, "loss": 1.1513, "step": 5296500 }, { "epoch": 3.18, "learning_rate": 3.380329682593205e-05, "loss": 1.1523, "step": 5297000 }, { "epoch": 3.18, "learning_rate": 3.3801196860371493e-05, "loss": 1.1539, "step": 5297500 }, { "epoch": 3.18, "learning_rate": 3.379909689481093e-05, "loss": 1.1538, "step": 5298000 }, { "epoch": 3.18, "learning_rate": 3.379699692925036e-05, "loss": 1.15, "step": 5298500 }, { "epoch": 3.18, "learning_rate": 3.3794896963689794e-05, "loss": 1.1573, "step": 5299000 }, { "epoch": 3.18, "learning_rate": 3.3792801198060354e-05, "loss": 1.1431, "step": 5299500 }, { "epoch": 3.18, "learning_rate": 3.379070123249979e-05, "loss": 1.1464, "step": 5300000 }, { "epoch": 3.18, "eval_loss": 1.144875407218933, "eval_runtime": 1110.2882, "eval_samples_per_second": 474.399, "eval_steps_per_second": 79.067, "step": 5300000 }, { "epoch": 3.18, "learning_rate": 3.378860546687034e-05, "loss": 1.1742, "step": 5300500 }, { "epoch": 3.18, "learning_rate": 3.3786505501309775e-05, "loss": 1.2002, "step": 5301000 }, { "epoch": 3.18, "learning_rate": 3.3784405535749215e-05, "loss": 1.1369, "step": 5301500 }, { "epoch": 3.18, "learning_rate": 3.378230557018865e-05, "loss": 1.1692, "step": 5302000 }, { "epoch": 3.18, "learning_rate": 3.378020560462809e-05, "loss": 1.1997, "step": 5302500 }, { "epoch": 3.18, "learning_rate": 3.377810563906752e-05, "loss": 1.1565, "step": 5303000 }, { "epoch": 3.18, "learning_rate": 3.3776005673506956e-05, "loss": 1.1792, "step": 5303500 }, { "epoch": 3.18, "learning_rate": 3.377390570794639e-05, "loss": 1.1762, "step": 5304000 }, { "epoch": 3.18, "learning_rate": 3.377180574238582e-05, "loss": 1.179, "step": 5304500 }, { "epoch": 3.18, "learning_rate": 3.376970997675638e-05, "loss": 1.1768, "step": 5305000 }, { "epoch": 3.18, "learning_rate": 3.3767610011195816e-05, "loss": 1.1532, "step": 5305500 }, { "epoch": 3.18, "learning_rate": 3.3765510045635257e-05, "loss": 1.1553, "step": 5306000 }, { "epoch": 3.18, "learning_rate": 3.376341008007468e-05, "loss": 1.1361, "step": 5306500 }, { "epoch": 3.18, "learning_rate": 3.376131011451412e-05, "loss": 1.1716, "step": 5307000 }, { "epoch": 3.18, "learning_rate": 3.375921434888468e-05, "loss": 1.147, "step": 5307500 }, { "epoch": 3.18, "learning_rate": 3.375711438332412e-05, "loss": 1.174, "step": 5308000 }, { "epoch": 3.18, "learning_rate": 3.3755014417763544e-05, "loss": 1.1645, "step": 5308500 }, { "epoch": 3.18, "learning_rate": 3.3752914452202984e-05, "loss": 1.1649, "step": 5309000 }, { "epoch": 3.18, "learning_rate": 3.375081448664242e-05, "loss": 1.1613, "step": 5309500 }, { "epoch": 3.18, "learning_rate": 3.374871452108185e-05, "loss": 1.1791, "step": 5310000 }, { "epoch": 3.18, "learning_rate": 3.374661455552129e-05, "loss": 1.1696, "step": 5310500 }, { "epoch": 3.18, "learning_rate": 3.3744518789891845e-05, "loss": 1.167, "step": 5311000 }, { "epoch": 3.18, "learning_rate": 3.374241882433128e-05, "loss": 1.1543, "step": 5311500 }, { "epoch": 3.18, "learning_rate": 3.374031885877071e-05, "loss": 1.1633, "step": 5312000 }, { "epoch": 3.19, "learning_rate": 3.373821889321015e-05, "loss": 1.1657, "step": 5312500 }, { "epoch": 3.19, "learning_rate": 3.3736118927649586e-05, "loss": 1.1861, "step": 5313000 }, { "epoch": 3.19, "learning_rate": 3.373402316202014e-05, "loss": 1.2088, "step": 5313500 }, { "epoch": 3.19, "learning_rate": 3.373192319645957e-05, "loss": 1.1784, "step": 5314000 }, { "epoch": 3.19, "learning_rate": 3.372982323089901e-05, "loss": 1.1717, "step": 5314500 }, { "epoch": 3.19, "learning_rate": 3.3727723265338446e-05, "loss": 1.1704, "step": 5315000 }, { "epoch": 3.19, "learning_rate": 3.372562329977788e-05, "loss": 1.1454, "step": 5315500 }, { "epoch": 3.19, "learning_rate": 3.372352333421732e-05, "loss": 1.1564, "step": 5316000 }, { "epoch": 3.19, "learning_rate": 3.3721423368656753e-05, "loss": 1.1685, "step": 5316500 }, { "epoch": 3.19, "learning_rate": 3.371932760302731e-05, "loss": 1.1875, "step": 5317000 }, { "epoch": 3.19, "learning_rate": 3.371722763746675e-05, "loss": 1.1784, "step": 5317500 }, { "epoch": 3.19, "learning_rate": 3.37151318718373e-05, "loss": 1.1826, "step": 5318000 }, { "epoch": 3.19, "learning_rate": 3.3713031906276734e-05, "loss": 1.1569, "step": 5318500 }, { "epoch": 3.19, "learning_rate": 3.371093194071617e-05, "loss": 1.1666, "step": 5319000 }, { "epoch": 3.19, "learning_rate": 3.370883197515561e-05, "loss": 1.1509, "step": 5319500 }, { "epoch": 3.19, "learning_rate": 3.370673200959504e-05, "loss": 1.1687, "step": 5320000 }, { "epoch": 3.19, "learning_rate": 3.3704632044034475e-05, "loss": 1.1747, "step": 5320500 }, { "epoch": 3.19, "learning_rate": 3.3702532078473915e-05, "loss": 1.1714, "step": 5321000 }, { "epoch": 3.19, "learning_rate": 3.370043211291335e-05, "loss": 1.1413, "step": 5321500 }, { "epoch": 3.19, "learning_rate": 3.369833214735278e-05, "loss": 1.1586, "step": 5322000 }, { "epoch": 3.19, "learning_rate": 3.3696236381723336e-05, "loss": 1.168, "step": 5322500 }, { "epoch": 3.19, "learning_rate": 3.3694136416162776e-05, "loss": 1.1707, "step": 5323000 }, { "epoch": 3.19, "learning_rate": 3.369203645060221e-05, "loss": 1.2057, "step": 5323500 }, { "epoch": 3.19, "learning_rate": 3.368993648504164e-05, "loss": 1.1536, "step": 5324000 }, { "epoch": 3.19, "learning_rate": 3.368783651948108e-05, "loss": 1.1697, "step": 5324500 }, { "epoch": 3.19, "learning_rate": 3.3685736553920516e-05, "loss": 1.1584, "step": 5325000 }, { "epoch": 3.19, "learning_rate": 3.368363658835995e-05, "loss": 1.1843, "step": 5325500 }, { "epoch": 3.19, "learning_rate": 3.368153662279938e-05, "loss": 1.1776, "step": 5326000 }, { "epoch": 3.19, "learning_rate": 3.3679440857169944e-05, "loss": 1.1261, "step": 5326500 }, { "epoch": 3.19, "learning_rate": 3.367734089160938e-05, "loss": 1.198, "step": 5327000 }, { "epoch": 3.19, "learning_rate": 3.367524512597993e-05, "loss": 1.1932, "step": 5327500 }, { "epoch": 3.19, "learning_rate": 3.367314516041937e-05, "loss": 1.167, "step": 5328000 }, { "epoch": 3.19, "learning_rate": 3.3671049394789925e-05, "loss": 1.1424, "step": 5328500 }, { "epoch": 3.19, "learning_rate": 3.366894942922936e-05, "loss": 1.1492, "step": 5329000 }, { "epoch": 3.2, "learning_rate": 3.366684946366879e-05, "loss": 1.1446, "step": 5329500 }, { "epoch": 3.2, "learning_rate": 3.366474949810823e-05, "loss": 1.1653, "step": 5330000 }, { "epoch": 3.2, "learning_rate": 3.3662649532547665e-05, "loss": 1.1483, "step": 5330500 }, { "epoch": 3.2, "learning_rate": 3.36605495669871e-05, "loss": 1.163, "step": 5331000 }, { "epoch": 3.2, "learning_rate": 3.365844960142654e-05, "loss": 1.1611, "step": 5331500 }, { "epoch": 3.2, "learning_rate": 3.365634963586597e-05, "loss": 1.1659, "step": 5332000 }, { "epoch": 3.2, "learning_rate": 3.3654249670305406e-05, "loss": 1.1664, "step": 5332500 }, { "epoch": 3.2, "learning_rate": 3.3652149704744846e-05, "loss": 1.2027, "step": 5333000 }, { "epoch": 3.2, "learning_rate": 3.365004973918427e-05, "loss": 1.1881, "step": 5333500 }, { "epoch": 3.2, "learning_rate": 3.364794977362371e-05, "loss": 1.1847, "step": 5334000 }, { "epoch": 3.2, "learning_rate": 3.364585400799427e-05, "loss": 1.1891, "step": 5334500 }, { "epoch": 3.2, "learning_rate": 3.364375824236483e-05, "loss": 1.1615, "step": 5335000 }, { "epoch": 3.2, "learning_rate": 3.364165827680426e-05, "loss": 1.1784, "step": 5335500 }, { "epoch": 3.2, "learning_rate": 3.3639558311243694e-05, "loss": 1.1849, "step": 5336000 }, { "epoch": 3.2, "learning_rate": 3.3637458345683134e-05, "loss": 1.1648, "step": 5336500 }, { "epoch": 3.2, "learning_rate": 3.363535838012257e-05, "loss": 1.1437, "step": 5337000 }, { "epoch": 3.2, "learning_rate": 3.3633258414562e-05, "loss": 1.1634, "step": 5337500 }, { "epoch": 3.2, "learning_rate": 3.3631162648932554e-05, "loss": 1.1636, "step": 5338000 }, { "epoch": 3.2, "learning_rate": 3.3629062683371995e-05, "loss": 1.1512, "step": 5338500 }, { "epoch": 3.2, "learning_rate": 3.362696271781143e-05, "loss": 1.1552, "step": 5339000 }, { "epoch": 3.2, "learning_rate": 3.362486275225086e-05, "loss": 1.1736, "step": 5339500 }, { "epoch": 3.2, "learning_rate": 3.362276698662142e-05, "loss": 1.1628, "step": 5340000 }, { "epoch": 3.2, "learning_rate": 3.3620667021060855e-05, "loss": 1.2001, "step": 5340500 }, { "epoch": 3.2, "learning_rate": 3.361856705550029e-05, "loss": 1.1654, "step": 5341000 }, { "epoch": 3.2, "learning_rate": 3.361646708993973e-05, "loss": 1.1589, "step": 5341500 }, { "epoch": 3.2, "learning_rate": 3.361436712437916e-05, "loss": 1.1443, "step": 5342000 }, { "epoch": 3.2, "learning_rate": 3.3612267158818596e-05, "loss": 1.1866, "step": 5342500 }, { "epoch": 3.2, "learning_rate": 3.361016719325803e-05, "loss": 1.1784, "step": 5343000 }, { "epoch": 3.2, "learning_rate": 3.360806722769746e-05, "loss": 1.1663, "step": 5343500 }, { "epoch": 3.2, "learning_rate": 3.360597146206802e-05, "loss": 1.1434, "step": 5344000 }, { "epoch": 3.2, "learning_rate": 3.360387149650746e-05, "loss": 1.1535, "step": 5344500 }, { "epoch": 3.2, "learning_rate": 3.360177153094689e-05, "loss": 1.1903, "step": 5345000 }, { "epoch": 3.2, "learning_rate": 3.359967576531745e-05, "loss": 1.1557, "step": 5345500 }, { "epoch": 3.21, "learning_rate": 3.3597575799756884e-05, "loss": 1.1887, "step": 5346000 }, { "epoch": 3.21, "learning_rate": 3.359547583419632e-05, "loss": 1.1675, "step": 5346500 }, { "epoch": 3.21, "learning_rate": 3.359337586863576e-05, "loss": 1.1825, "step": 5347000 }, { "epoch": 3.21, "learning_rate": 3.3591275903075184e-05, "loss": 1.181, "step": 5347500 }, { "epoch": 3.21, "learning_rate": 3.3589175937514625e-05, "loss": 1.1513, "step": 5348000 }, { "epoch": 3.21, "learning_rate": 3.358707597195406e-05, "loss": 1.1662, "step": 5348500 }, { "epoch": 3.21, "learning_rate": 3.358497600639349e-05, "loss": 1.1656, "step": 5349000 }, { "epoch": 3.21, "learning_rate": 3.358288024076405e-05, "loss": 1.1638, "step": 5349500 }, { "epoch": 3.21, "learning_rate": 3.3580780275203485e-05, "loss": 1.1872, "step": 5350000 }, { "epoch": 3.21, "learning_rate": 3.357868030964292e-05, "loss": 1.1764, "step": 5350500 }, { "epoch": 3.21, "learning_rate": 3.357658034408235e-05, "loss": 1.1776, "step": 5351000 }, { "epoch": 3.21, "learning_rate": 3.357448037852179e-05, "loss": 1.146, "step": 5351500 }, { "epoch": 3.21, "learning_rate": 3.357238461289235e-05, "loss": 1.1664, "step": 5352000 }, { "epoch": 3.21, "learning_rate": 3.357028464733178e-05, "loss": 1.183, "step": 5352500 }, { "epoch": 3.21, "learning_rate": 3.356818468177121e-05, "loss": 1.1656, "step": 5353000 }, { "epoch": 3.21, "learning_rate": 3.356608471621065e-05, "loss": 1.1752, "step": 5353500 }, { "epoch": 3.21, "learning_rate": 3.356398475065009e-05, "loss": 1.1802, "step": 5354000 }, { "epoch": 3.21, "learning_rate": 3.356188478508953e-05, "loss": 1.158, "step": 5354500 }, { "epoch": 3.21, "learning_rate": 3.355978481952896e-05, "loss": 1.1878, "step": 5355000 }, { "epoch": 3.21, "learning_rate": 3.3557684853968394e-05, "loss": 1.1831, "step": 5355500 }, { "epoch": 3.21, "learning_rate": 3.355558908833895e-05, "loss": 1.1679, "step": 5356000 }, { "epoch": 3.21, "learning_rate": 3.355348912277839e-05, "loss": 1.1638, "step": 5356500 }, { "epoch": 3.21, "learning_rate": 3.355138915721782e-05, "loss": 1.1638, "step": 5357000 }, { "epoch": 3.21, "learning_rate": 3.3549289191657255e-05, "loss": 1.1934, "step": 5357500 }, { "epoch": 3.21, "learning_rate": 3.3547189226096695e-05, "loss": 1.1778, "step": 5358000 }, { "epoch": 3.21, "learning_rate": 3.354508926053613e-05, "loss": 1.1576, "step": 5358500 }, { "epoch": 3.21, "learning_rate": 3.354298929497556e-05, "loss": 1.1737, "step": 5359000 }, { "epoch": 3.21, "learning_rate": 3.3540889329415e-05, "loss": 1.1674, "step": 5359500 }, { "epoch": 3.21, "learning_rate": 3.3538793563785556e-05, "loss": 1.17, "step": 5360000 }, { "epoch": 3.21, "learning_rate": 3.353669359822499e-05, "loss": 1.1683, "step": 5360500 }, { "epoch": 3.21, "learning_rate": 3.353459783259554e-05, "loss": 1.1624, "step": 5361000 }, { "epoch": 3.21, "learning_rate": 3.353249786703498e-05, "loss": 1.1736, "step": 5361500 }, { "epoch": 3.21, "learning_rate": 3.3530397901474416e-05, "loss": 1.1524, "step": 5362000 }, { "epoch": 3.22, "learning_rate": 3.352829793591385e-05, "loss": 1.1611, "step": 5362500 }, { "epoch": 3.22, "learning_rate": 3.352619797035329e-05, "loss": 1.1708, "step": 5363000 }, { "epoch": 3.22, "learning_rate": 3.3524098004792723e-05, "loss": 1.1678, "step": 5363500 }, { "epoch": 3.22, "learning_rate": 3.352199803923216e-05, "loss": 1.1549, "step": 5364000 }, { "epoch": 3.22, "learning_rate": 3.35198980736716e-05, "loss": 1.2398, "step": 5364500 }, { "epoch": 3.22, "learning_rate": 3.351780230804215e-05, "loss": 1.1568, "step": 5365000 }, { "epoch": 3.22, "learning_rate": 3.3515702342481584e-05, "loss": 1.1711, "step": 5365500 }, { "epoch": 3.22, "learning_rate": 3.351360237692102e-05, "loss": 1.1801, "step": 5366000 }, { "epoch": 3.22, "learning_rate": 3.351150241136046e-05, "loss": 1.1759, "step": 5366500 }, { "epoch": 3.22, "learning_rate": 3.350940664573101e-05, "loss": 1.157, "step": 5367000 }, { "epoch": 3.22, "learning_rate": 3.3507306680170445e-05, "loss": 1.1495, "step": 5367500 }, { "epoch": 3.22, "learning_rate": 3.350520671460988e-05, "loss": 1.1261, "step": 5368000 }, { "epoch": 3.22, "learning_rate": 3.350310674904932e-05, "loss": 1.1927, "step": 5368500 }, { "epoch": 3.22, "learning_rate": 3.350101098341987e-05, "loss": 1.1402, "step": 5369000 }, { "epoch": 3.22, "learning_rate": 3.3498911017859306e-05, "loss": 1.159, "step": 5369500 }, { "epoch": 3.22, "learning_rate": 3.3496811052298746e-05, "loss": 1.1661, "step": 5370000 }, { "epoch": 3.22, "learning_rate": 3.349471108673818e-05, "loss": 1.178, "step": 5370500 }, { "epoch": 3.22, "learning_rate": 3.349261112117761e-05, "loss": 1.1467, "step": 5371000 }, { "epoch": 3.22, "learning_rate": 3.349051115561705e-05, "loss": 1.1741, "step": 5371500 }, { "epoch": 3.22, "learning_rate": 3.348841119005648e-05, "loss": 1.1815, "step": 5372000 }, { "epoch": 3.22, "learning_rate": 3.348631122449591e-05, "loss": 1.1874, "step": 5372500 }, { "epoch": 3.22, "learning_rate": 3.3484215458866474e-05, "loss": 1.1452, "step": 5373000 }, { "epoch": 3.22, "learning_rate": 3.3482115493305914e-05, "loss": 1.1737, "step": 5373500 }, { "epoch": 3.22, "learning_rate": 3.348001552774535e-05, "loss": 1.1563, "step": 5374000 }, { "epoch": 3.22, "learning_rate": 3.3477915562184774e-05, "loss": 1.169, "step": 5374500 }, { "epoch": 3.22, "learning_rate": 3.3475815596624214e-05, "loss": 1.1668, "step": 5375000 }, { "epoch": 3.22, "learning_rate": 3.347371563106365e-05, "loss": 1.1901, "step": 5375500 }, { "epoch": 3.22, "learning_rate": 3.347161566550308e-05, "loss": 1.1438, "step": 5376000 }, { "epoch": 3.22, "learning_rate": 3.346951569994252e-05, "loss": 1.1613, "step": 5376500 }, { "epoch": 3.22, "learning_rate": 3.3467419934313075e-05, "loss": 1.1709, "step": 5377000 }, { "epoch": 3.22, "learning_rate": 3.3465324168683635e-05, "loss": 1.178, "step": 5377500 }, { "epoch": 3.22, "learning_rate": 3.346322420312307e-05, "loss": 1.167, "step": 5378000 }, { "epoch": 3.22, "learning_rate": 3.346112423756251e-05, "loss": 1.1412, "step": 5378500 }, { "epoch": 3.22, "learning_rate": 3.345902427200194e-05, "loss": 1.1629, "step": 5379000 }, { "epoch": 3.23, "learning_rate": 3.345692430644137e-05, "loss": 1.1608, "step": 5379500 }, { "epoch": 3.23, "learning_rate": 3.345482854081193e-05, "loss": 1.1547, "step": 5380000 }, { "epoch": 3.23, "learning_rate": 3.345272857525137e-05, "loss": 1.1811, "step": 5380500 }, { "epoch": 3.23, "learning_rate": 3.34506286096908e-05, "loss": 1.1707, "step": 5381000 }, { "epoch": 3.23, "learning_rate": 3.344853284406136e-05, "loss": 1.158, "step": 5381500 }, { "epoch": 3.23, "learning_rate": 3.344643287850079e-05, "loss": 1.1686, "step": 5382000 }, { "epoch": 3.23, "learning_rate": 3.344433291294023e-05, "loss": 1.1531, "step": 5382500 }, { "epoch": 3.23, "learning_rate": 3.3442232947379664e-05, "loss": 1.1236, "step": 5383000 }, { "epoch": 3.23, "learning_rate": 3.34401329818191e-05, "loss": 1.1588, "step": 5383500 }, { "epoch": 3.23, "learning_rate": 3.343803301625853e-05, "loss": 1.1649, "step": 5384000 }, { "epoch": 3.23, "learning_rate": 3.3435933050697964e-05, "loss": 1.1502, "step": 5384500 }, { "epoch": 3.23, "learning_rate": 3.3433833085137404e-05, "loss": 1.1575, "step": 5385000 }, { "epoch": 3.23, "learning_rate": 3.3431737319507965e-05, "loss": 1.1721, "step": 5385500 }, { "epoch": 3.23, "learning_rate": 3.342964155387852e-05, "loss": 1.1654, "step": 5386000 }, { "epoch": 3.23, "learning_rate": 3.342754158831795e-05, "loss": 1.1513, "step": 5386500 }, { "epoch": 3.23, "learning_rate": 3.3425441622757385e-05, "loss": 1.1709, "step": 5387000 }, { "epoch": 3.23, "learning_rate": 3.3423341657196826e-05, "loss": 1.1836, "step": 5387500 }, { "epoch": 3.23, "learning_rate": 3.342124169163626e-05, "loss": 1.1345, "step": 5388000 }, { "epoch": 3.23, "learning_rate": 3.341914172607569e-05, "loss": 1.197, "step": 5388500 }, { "epoch": 3.23, "learning_rate": 3.3417041760515126e-05, "loss": 1.165, "step": 5389000 }, { "epoch": 3.23, "learning_rate": 3.341494179495456e-05, "loss": 1.14, "step": 5389500 }, { "epoch": 3.23, "learning_rate": 3.341284182939399e-05, "loss": 1.1837, "step": 5390000 }, { "epoch": 3.23, "learning_rate": 3.341074186383343e-05, "loss": 1.1634, "step": 5390500 }, { "epoch": 3.23, "learning_rate": 3.3408641898272867e-05, "loss": 1.183, "step": 5391000 }, { "epoch": 3.23, "learning_rate": 3.34065419327123e-05, "loss": 1.1713, "step": 5391500 }, { "epoch": 3.23, "learning_rate": 3.340444616708286e-05, "loss": 1.1559, "step": 5392000 }, { "epoch": 3.23, "learning_rate": 3.340235040145342e-05, "loss": 1.1913, "step": 5392500 }, { "epoch": 3.23, "learning_rate": 3.3400250435892854e-05, "loss": 1.1692, "step": 5393000 }, { "epoch": 3.23, "learning_rate": 3.339815047033228e-05, "loss": 1.1881, "step": 5393500 }, { "epoch": 3.23, "learning_rate": 3.339605050477172e-05, "loss": 1.1612, "step": 5394000 }, { "epoch": 3.23, "learning_rate": 3.3393950539211155e-05, "loss": 1.1806, "step": 5394500 }, { "epoch": 3.23, "learning_rate": 3.3391854773581715e-05, "loss": 1.1496, "step": 5395000 }, { "epoch": 3.23, "learning_rate": 3.338975480802115e-05, "loss": 1.1743, "step": 5395500 }, { "epoch": 3.24, "learning_rate": 3.338765484246058e-05, "loss": 1.1724, "step": 5396000 }, { "epoch": 3.24, "learning_rate": 3.3385554876900015e-05, "loss": 1.1768, "step": 5396500 }, { "epoch": 3.24, "learning_rate": 3.3383459111270576e-05, "loss": 1.1587, "step": 5397000 }, { "epoch": 3.24, "learning_rate": 3.338135914571001e-05, "loss": 1.1746, "step": 5397500 }, { "epoch": 3.24, "learning_rate": 3.337925918014945e-05, "loss": 1.1671, "step": 5398000 }, { "epoch": 3.24, "learning_rate": 3.3377159214588876e-05, "loss": 1.187, "step": 5398500 }, { "epoch": 3.24, "learning_rate": 3.3375059249028316e-05, "loss": 1.1635, "step": 5399000 }, { "epoch": 3.24, "learning_rate": 3.337295928346775e-05, "loss": 1.2195, "step": 5399500 }, { "epoch": 3.24, "learning_rate": 3.337085931790718e-05, "loss": 1.1819, "step": 5400000 }, { "epoch": 3.24, "eval_loss": 1.1395820379257202, "eval_runtime": 1108.0224, "eval_samples_per_second": 475.369, "eval_steps_per_second": 79.229, "step": 5400000 }, { "epoch": 3.24, "learning_rate": 3.336875935234662e-05, "loss": 1.1601, "step": 5400500 }, { "epoch": 3.24, "learning_rate": 3.336665938678606e-05, "loss": 1.1496, "step": 5401000 }, { "epoch": 3.24, "learning_rate": 3.336456362115661e-05, "loss": 1.1535, "step": 5401500 }, { "epoch": 3.24, "learning_rate": 3.3362463655596044e-05, "loss": 1.205, "step": 5402000 }, { "epoch": 3.24, "learning_rate": 3.3360363690035484e-05, "loss": 1.1644, "step": 5402500 }, { "epoch": 3.24, "learning_rate": 3.335826372447492e-05, "loss": 1.1973, "step": 5403000 }, { "epoch": 3.24, "learning_rate": 3.335616375891435e-05, "loss": 1.1611, "step": 5403500 }, { "epoch": 3.24, "learning_rate": 3.335406379335379e-05, "loss": 1.1469, "step": 5404000 }, { "epoch": 3.24, "learning_rate": 3.3351968027724345e-05, "loss": 1.1849, "step": 5404500 }, { "epoch": 3.24, "learning_rate": 3.334986806216378e-05, "loss": 1.1697, "step": 5405000 }, { "epoch": 3.24, "learning_rate": 3.334776809660321e-05, "loss": 1.1596, "step": 5405500 }, { "epoch": 3.24, "learning_rate": 3.334566813104265e-05, "loss": 1.1605, "step": 5406000 }, { "epoch": 3.24, "learning_rate": 3.3343572365413206e-05, "loss": 1.1731, "step": 5406500 }, { "epoch": 3.24, "learning_rate": 3.334147239985264e-05, "loss": 1.1637, "step": 5407000 }, { "epoch": 3.24, "learning_rate": 3.333937243429208e-05, "loss": 1.1695, "step": 5407500 }, { "epoch": 3.24, "learning_rate": 3.333727246873151e-05, "loss": 1.1654, "step": 5408000 }, { "epoch": 3.24, "learning_rate": 3.3335172503170946e-05, "loss": 1.1595, "step": 5408500 }, { "epoch": 3.24, "learning_rate": 3.3333072537610386e-05, "loss": 1.1599, "step": 5409000 }, { "epoch": 3.24, "learning_rate": 3.333097257204982e-05, "loss": 1.1603, "step": 5409500 }, { "epoch": 3.24, "learning_rate": 3.332887260648925e-05, "loss": 1.1606, "step": 5410000 }, { "epoch": 3.24, "learning_rate": 3.332677684085981e-05, "loss": 1.1638, "step": 5410500 }, { "epoch": 3.24, "learning_rate": 3.332467687529925e-05, "loss": 1.161, "step": 5411000 }, { "epoch": 3.24, "learning_rate": 3.332257690973868e-05, "loss": 1.18, "step": 5411500 }, { "epoch": 3.24, "learning_rate": 3.3320476944178114e-05, "loss": 1.1594, "step": 5412000 }, { "epoch": 3.25, "learning_rate": 3.331838117854867e-05, "loss": 1.1587, "step": 5412500 }, { "epoch": 3.25, "learning_rate": 3.331628121298811e-05, "loss": 1.1667, "step": 5413000 }, { "epoch": 3.25, "learning_rate": 3.331418544735866e-05, "loss": 1.1327, "step": 5413500 }, { "epoch": 3.25, "learning_rate": 3.3312085481798095e-05, "loss": 1.167, "step": 5414000 }, { "epoch": 3.25, "learning_rate": 3.3309985516237535e-05, "loss": 1.155, "step": 5414500 }, { "epoch": 3.25, "learning_rate": 3.330788555067697e-05, "loss": 1.1407, "step": 5415000 }, { "epoch": 3.25, "learning_rate": 3.330578978504752e-05, "loss": 1.1831, "step": 5415500 }, { "epoch": 3.25, "learning_rate": 3.3303689819486956e-05, "loss": 1.1503, "step": 5416000 }, { "epoch": 3.25, "learning_rate": 3.3301589853926396e-05, "loss": 1.1606, "step": 5416500 }, { "epoch": 3.25, "learning_rate": 3.329948988836583e-05, "loss": 1.1233, "step": 5417000 }, { "epoch": 3.25, "learning_rate": 3.329738992280526e-05, "loss": 1.159, "step": 5417500 }, { "epoch": 3.25, "learning_rate": 3.32952899572447e-05, "loss": 1.1773, "step": 5418000 }, { "epoch": 3.25, "learning_rate": 3.3293194191615257e-05, "loss": 1.1834, "step": 5418500 }, { "epoch": 3.25, "learning_rate": 3.329109422605469e-05, "loss": 1.1522, "step": 5419000 }, { "epoch": 3.25, "learning_rate": 3.3288994260494123e-05, "loss": 1.1777, "step": 5419500 }, { "epoch": 3.25, "learning_rate": 3.3286894294933564e-05, "loss": 1.1646, "step": 5420000 }, { "epoch": 3.25, "learning_rate": 3.3284794329373e-05, "loss": 1.1515, "step": 5420500 }, { "epoch": 3.25, "learning_rate": 3.328269856374355e-05, "loss": 1.1313, "step": 5421000 }, { "epoch": 3.25, "learning_rate": 3.328059859818299e-05, "loss": 1.1284, "step": 5421500 }, { "epoch": 3.25, "learning_rate": 3.3278498632622424e-05, "loss": 1.1759, "step": 5422000 }, { "epoch": 3.25, "learning_rate": 3.327639866706186e-05, "loss": 1.1636, "step": 5422500 }, { "epoch": 3.25, "learning_rate": 3.32742987015013e-05, "loss": 1.1684, "step": 5423000 }, { "epoch": 3.25, "learning_rate": 3.327219873594073e-05, "loss": 1.1606, "step": 5423500 }, { "epoch": 3.25, "learning_rate": 3.3270098770380165e-05, "loss": 1.1744, "step": 5424000 }, { "epoch": 3.25, "learning_rate": 3.3267998804819605e-05, "loss": 1.182, "step": 5424500 }, { "epoch": 3.25, "learning_rate": 3.326590303919016e-05, "loss": 1.1715, "step": 5425000 }, { "epoch": 3.25, "learning_rate": 3.326380307362959e-05, "loss": 1.1513, "step": 5425500 }, { "epoch": 3.25, "learning_rate": 3.3261703108069026e-05, "loss": 1.147, "step": 5426000 }, { "epoch": 3.25, "learning_rate": 3.3259603142508466e-05, "loss": 1.1498, "step": 5426500 }, { "epoch": 3.25, "learning_rate": 3.325750737687902e-05, "loss": 1.1705, "step": 5427000 }, { "epoch": 3.25, "learning_rate": 3.325540741131845e-05, "loss": 1.1551, "step": 5427500 }, { "epoch": 3.25, "learning_rate": 3.325330744575789e-05, "loss": 1.1418, "step": 5428000 }, { "epoch": 3.25, "learning_rate": 3.325120748019733e-05, "loss": 1.1837, "step": 5428500 }, { "epoch": 3.25, "learning_rate": 3.324911171456788e-05, "loss": 1.1873, "step": 5429000 }, { "epoch": 3.26, "learning_rate": 3.3247011749007314e-05, "loss": 1.1557, "step": 5429500 }, { "epoch": 3.26, "learning_rate": 3.3244911783446754e-05, "loss": 1.1508, "step": 5430000 }, { "epoch": 3.26, "learning_rate": 3.324281181788619e-05, "loss": 1.1694, "step": 5430500 }, { "epoch": 3.26, "learning_rate": 3.324071605225674e-05, "loss": 1.1638, "step": 5431000 }, { "epoch": 3.26, "learning_rate": 3.32386202866273e-05, "loss": 1.161, "step": 5431500 }, { "epoch": 3.26, "learning_rate": 3.323652032106673e-05, "loss": 1.2012, "step": 5432000 }, { "epoch": 3.26, "learning_rate": 3.323442035550617e-05, "loss": 1.1718, "step": 5432500 }, { "epoch": 3.26, "learning_rate": 3.32323203899456e-05, "loss": 1.1588, "step": 5433000 }, { "epoch": 3.26, "learning_rate": 3.323022042438504e-05, "loss": 1.1767, "step": 5433500 }, { "epoch": 3.26, "learning_rate": 3.3228124658755596e-05, "loss": 1.1446, "step": 5434000 }, { "epoch": 3.26, "learning_rate": 3.322602469319503e-05, "loss": 1.1679, "step": 5434500 }, { "epoch": 3.26, "learning_rate": 3.322392472763446e-05, "loss": 1.1593, "step": 5435000 }, { "epoch": 3.26, "learning_rate": 3.32218247620739e-05, "loss": 1.1671, "step": 5435500 }, { "epoch": 3.26, "learning_rate": 3.3219724796513336e-05, "loss": 1.1591, "step": 5436000 }, { "epoch": 3.26, "learning_rate": 3.321762483095277e-05, "loss": 1.1673, "step": 5436500 }, { "epoch": 3.26, "learning_rate": 3.321552486539221e-05, "loss": 1.1723, "step": 5437000 }, { "epoch": 3.26, "learning_rate": 3.321342489983164e-05, "loss": 1.1452, "step": 5437500 }, { "epoch": 3.26, "learning_rate": 3.321132493427108e-05, "loss": 1.1548, "step": 5438000 }, { "epoch": 3.26, "learning_rate": 3.320922916864163e-05, "loss": 1.1962, "step": 5438500 }, { "epoch": 3.26, "learning_rate": 3.320712920308107e-05, "loss": 1.1682, "step": 5439000 }, { "epoch": 3.26, "learning_rate": 3.3205033437451624e-05, "loss": 1.1735, "step": 5439500 }, { "epoch": 3.26, "learning_rate": 3.320293347189106e-05, "loss": 1.1659, "step": 5440000 }, { "epoch": 3.26, "learning_rate": 3.32008335063305e-05, "loss": 1.1713, "step": 5440500 }, { "epoch": 3.26, "learning_rate": 3.319873354076993e-05, "loss": 1.1762, "step": 5441000 }, { "epoch": 3.26, "learning_rate": 3.3196633575209365e-05, "loss": 1.1634, "step": 5441500 }, { "epoch": 3.26, "learning_rate": 3.3194533609648805e-05, "loss": 1.1536, "step": 5442000 }, { "epoch": 3.26, "learning_rate": 3.319243364408824e-05, "loss": 1.1683, "step": 5442500 }, { "epoch": 3.26, "learning_rate": 3.319033367852767e-05, "loss": 1.1876, "step": 5443000 }, { "epoch": 3.26, "learning_rate": 3.3188237912898226e-05, "loss": 1.1797, "step": 5443500 }, { "epoch": 3.26, "learning_rate": 3.3186137947337666e-05, "loss": 1.1658, "step": 5444000 }, { "epoch": 3.26, "learning_rate": 3.318404218170822e-05, "loss": 1.1528, "step": 5444500 }, { "epoch": 3.26, "learning_rate": 3.318194221614765e-05, "loss": 1.1678, "step": 5445000 }, { "epoch": 3.26, "learning_rate": 3.3179842250587086e-05, "loss": 1.1561, "step": 5445500 }, { "epoch": 3.27, "learning_rate": 3.3177742285026526e-05, "loss": 1.1711, "step": 5446000 }, { "epoch": 3.27, "learning_rate": 3.317564231946596e-05, "loss": 1.1713, "step": 5446500 }, { "epoch": 3.27, "learning_rate": 3.3173542353905393e-05, "loss": 1.1589, "step": 5447000 }, { "epoch": 3.27, "learning_rate": 3.3171446588275954e-05, "loss": 1.1698, "step": 5447500 }, { "epoch": 3.27, "learning_rate": 3.316934662271539e-05, "loss": 1.1669, "step": 5448000 }, { "epoch": 3.27, "learning_rate": 3.316724665715482e-05, "loss": 1.1636, "step": 5448500 }, { "epoch": 3.27, "learning_rate": 3.316514669159426e-05, "loss": 1.1744, "step": 5449000 }, { "epoch": 3.27, "learning_rate": 3.3163046726033694e-05, "loss": 1.1552, "step": 5449500 }, { "epoch": 3.27, "learning_rate": 3.316094676047313e-05, "loss": 1.1609, "step": 5450000 }, { "epoch": 3.27, "learning_rate": 3.315885099484368e-05, "loss": 1.1952, "step": 5450500 }, { "epoch": 3.27, "learning_rate": 3.315675102928312e-05, "loss": 1.1671, "step": 5451000 }, { "epoch": 3.27, "learning_rate": 3.3154651063722555e-05, "loss": 1.1823, "step": 5451500 }, { "epoch": 3.27, "learning_rate": 3.315255529809311e-05, "loss": 1.172, "step": 5452000 }, { "epoch": 3.27, "learning_rate": 3.315045533253254e-05, "loss": 1.1717, "step": 5452500 }, { "epoch": 3.27, "learning_rate": 3.314835536697198e-05, "loss": 1.1687, "step": 5453000 }, { "epoch": 3.27, "learning_rate": 3.3146255401411416e-05, "loss": 1.1762, "step": 5453500 }, { "epoch": 3.27, "learning_rate": 3.314415543585085e-05, "loss": 1.1514, "step": 5454000 }, { "epoch": 3.27, "learning_rate": 3.314205547029029e-05, "loss": 1.1831, "step": 5454500 }, { "epoch": 3.27, "learning_rate": 3.313995550472972e-05, "loss": 1.1613, "step": 5455000 }, { "epoch": 3.27, "learning_rate": 3.3137855539169156e-05, "loss": 1.1873, "step": 5455500 }, { "epoch": 3.27, "learning_rate": 3.31357555736086e-05, "loss": 1.1413, "step": 5456000 }, { "epoch": 3.27, "learning_rate": 3.313365980797915e-05, "loss": 1.1679, "step": 5456500 }, { "epoch": 3.27, "learning_rate": 3.3131559842418584e-05, "loss": 1.1777, "step": 5457000 }, { "epoch": 3.27, "learning_rate": 3.3129459876858024e-05, "loss": 1.1392, "step": 5457500 }, { "epoch": 3.27, "learning_rate": 3.312735991129746e-05, "loss": 1.2143, "step": 5458000 }, { "epoch": 3.27, "learning_rate": 3.312526414566801e-05, "loss": 1.1879, "step": 5458500 }, { "epoch": 3.27, "learning_rate": 3.3123164180107444e-05, "loss": 1.1868, "step": 5459000 }, { "epoch": 3.27, "learning_rate": 3.3121064214546885e-05, "loss": 1.185, "step": 5459500 }, { "epoch": 3.27, "learning_rate": 3.311896424898632e-05, "loss": 1.1529, "step": 5460000 }, { "epoch": 3.27, "learning_rate": 3.311686428342575e-05, "loss": 1.1518, "step": 5460500 }, { "epoch": 3.27, "learning_rate": 3.3114764317865185e-05, "loss": 1.188, "step": 5461000 }, { "epoch": 3.27, "learning_rate": 3.311266435230462e-05, "loss": 1.1941, "step": 5461500 }, { "epoch": 3.27, "learning_rate": 3.311056438674405e-05, "loss": 1.1534, "step": 5462000 }, { "epoch": 3.27, "learning_rate": 3.310847282104573e-05, "loss": 1.1342, "step": 5462500 }, { "epoch": 3.28, "learning_rate": 3.310637285548517e-05, "loss": 1.1565, "step": 5463000 }, { "epoch": 3.28, "learning_rate": 3.3104272889924606e-05, "loss": 1.1423, "step": 5463500 }, { "epoch": 3.28, "learning_rate": 3.310217292436404e-05, "loss": 1.1779, "step": 5464000 }, { "epoch": 3.28, "learning_rate": 3.310007295880348e-05, "loss": 1.1334, "step": 5464500 }, { "epoch": 3.28, "learning_rate": 3.309797299324291e-05, "loss": 1.1437, "step": 5465000 }, { "epoch": 3.28, "learning_rate": 3.309587722761347e-05, "loss": 1.1339, "step": 5465500 }, { "epoch": 3.28, "learning_rate": 3.30937772620529e-05, "loss": 1.1452, "step": 5466000 }, { "epoch": 3.28, "learning_rate": 3.309167729649234e-05, "loss": 1.1541, "step": 5466500 }, { "epoch": 3.28, "learning_rate": 3.3089577330931774e-05, "loss": 1.1505, "step": 5467000 }, { "epoch": 3.28, "learning_rate": 3.308747736537121e-05, "loss": 1.1438, "step": 5467500 }, { "epoch": 3.28, "learning_rate": 3.308537739981065e-05, "loss": 1.1637, "step": 5468000 }, { "epoch": 3.28, "learning_rate": 3.30832816341812e-05, "loss": 1.1919, "step": 5468500 }, { "epoch": 3.28, "learning_rate": 3.3081181668620635e-05, "loss": 1.1632, "step": 5469000 }, { "epoch": 3.28, "learning_rate": 3.307908170306007e-05, "loss": 1.1246, "step": 5469500 }, { "epoch": 3.28, "learning_rate": 3.307698173749951e-05, "loss": 1.1809, "step": 5470000 }, { "epoch": 3.28, "learning_rate": 3.3074881771938935e-05, "loss": 1.1424, "step": 5470500 }, { "epoch": 3.28, "learning_rate": 3.3072786006309495e-05, "loss": 1.1798, "step": 5471000 }, { "epoch": 3.28, "learning_rate": 3.3070686040748936e-05, "loss": 1.155, "step": 5471500 }, { "epoch": 3.28, "learning_rate": 3.306858607518837e-05, "loss": 1.1303, "step": 5472000 }, { "epoch": 3.28, "learning_rate": 3.30664861096278e-05, "loss": 1.1962, "step": 5472500 }, { "epoch": 3.28, "learning_rate": 3.3064386144067236e-05, "loss": 1.1701, "step": 5473000 }, { "epoch": 3.28, "learning_rate": 3.306228617850667e-05, "loss": 1.1438, "step": 5473500 }, { "epoch": 3.28, "learning_rate": 3.30601862129461e-05, "loss": 1.1357, "step": 5474000 }, { "epoch": 3.28, "learning_rate": 3.305808624738554e-05, "loss": 1.1799, "step": 5474500 }, { "epoch": 3.28, "learning_rate": 3.305598628182498e-05, "loss": 1.1823, "step": 5475000 }, { "epoch": 3.28, "learning_rate": 3.305389051619553e-05, "loss": 1.1611, "step": 5475500 }, { "epoch": 3.28, "learning_rate": 3.305179895049721e-05, "loss": 1.2026, "step": 5476000 }, { "epoch": 3.28, "learning_rate": 3.3049698984936644e-05, "loss": 1.1814, "step": 5476500 }, { "epoch": 3.28, "learning_rate": 3.3047599019376084e-05, "loss": 1.1593, "step": 5477000 }, { "epoch": 3.28, "learning_rate": 3.304549905381552e-05, "loss": 1.1712, "step": 5477500 }, { "epoch": 3.28, "learning_rate": 3.304339908825495e-05, "loss": 1.1595, "step": 5478000 }, { "epoch": 3.28, "learning_rate": 3.304129912269439e-05, "loss": 1.1467, "step": 5478500 }, { "epoch": 3.28, "learning_rate": 3.3039199157133825e-05, "loss": 1.1679, "step": 5479000 }, { "epoch": 3.29, "learning_rate": 3.303709919157326e-05, "loss": 1.1644, "step": 5479500 }, { "epoch": 3.29, "learning_rate": 3.303499922601269e-05, "loss": 1.1744, "step": 5480000 }, { "epoch": 3.29, "learning_rate": 3.303290346038325e-05, "loss": 1.1404, "step": 5480500 }, { "epoch": 3.29, "learning_rate": 3.3030803494822686e-05, "loss": 1.1551, "step": 5481000 }, { "epoch": 3.29, "learning_rate": 3.302870352926212e-05, "loss": 1.199, "step": 5481500 }, { "epoch": 3.29, "learning_rate": 3.302660356370156e-05, "loss": 1.1867, "step": 5482000 }, { "epoch": 3.29, "learning_rate": 3.3024503598140986e-05, "loss": 1.1575, "step": 5482500 }, { "epoch": 3.29, "learning_rate": 3.3022407832511547e-05, "loss": 1.1857, "step": 5483000 }, { "epoch": 3.29, "learning_rate": 3.302030786695098e-05, "loss": 1.1904, "step": 5483500 }, { "epoch": 3.29, "learning_rate": 3.301820790139042e-05, "loss": 1.1542, "step": 5484000 }, { "epoch": 3.29, "learning_rate": 3.3016107935829854e-05, "loss": 1.1606, "step": 5484500 }, { "epoch": 3.29, "learning_rate": 3.301400797026929e-05, "loss": 1.185, "step": 5485000 }, { "epoch": 3.29, "learning_rate": 3.301190800470872e-05, "loss": 1.1759, "step": 5485500 }, { "epoch": 3.29, "learning_rate": 3.300981223907928e-05, "loss": 1.1896, "step": 5486000 }, { "epoch": 3.29, "learning_rate": 3.3007712273518714e-05, "loss": 1.1442, "step": 5486500 }, { "epoch": 3.29, "learning_rate": 3.3005612307958155e-05, "loss": 1.1471, "step": 5487000 }, { "epoch": 3.29, "learning_rate": 3.300351234239758e-05, "loss": 1.1923, "step": 5487500 }, { "epoch": 3.29, "learning_rate": 3.300141657676814e-05, "loss": 1.2072, "step": 5488000 }, { "epoch": 3.29, "learning_rate": 3.2999316611207575e-05, "loss": 1.1648, "step": 5488500 }, { "epoch": 3.29, "learning_rate": 3.2997216645647015e-05, "loss": 1.1729, "step": 5489000 }, { "epoch": 3.29, "learning_rate": 3.299511668008644e-05, "loss": 1.1857, "step": 5489500 }, { "epoch": 3.29, "learning_rate": 3.2993016714525875e-05, "loss": 1.1502, "step": 5490000 }, { "epoch": 3.29, "learning_rate": 3.2990916748965316e-05, "loss": 1.1833, "step": 5490500 }, { "epoch": 3.29, "learning_rate": 3.298881678340475e-05, "loss": 1.1546, "step": 5491000 }, { "epoch": 3.29, "learning_rate": 3.298671681784418e-05, "loss": 1.1611, "step": 5491500 }, { "epoch": 3.29, "learning_rate": 3.298462105221474e-05, "loss": 1.1693, "step": 5492000 }, { "epoch": 3.29, "learning_rate": 3.2982521086654176e-05, "loss": 1.1526, "step": 5492500 }, { "epoch": 3.29, "learning_rate": 3.298042112109361e-05, "loss": 1.1717, "step": 5493000 }, { "epoch": 3.29, "learning_rate": 3.297832115553305e-05, "loss": 1.1482, "step": 5493500 }, { "epoch": 3.29, "learning_rate": 3.2976221189972484e-05, "loss": 1.1426, "step": 5494000 }, { "epoch": 3.29, "learning_rate": 3.297412542434304e-05, "loss": 1.1362, "step": 5494500 }, { "epoch": 3.29, "learning_rate": 3.297202545878247e-05, "loss": 1.1979, "step": 5495000 }, { "epoch": 3.29, "learning_rate": 3.296992549322191e-05, "loss": 1.1556, "step": 5495500 }, { "epoch": 3.3, "learning_rate": 3.2967825527661344e-05, "loss": 1.1764, "step": 5496000 }, { "epoch": 3.3, "learning_rate": 3.296572556210078e-05, "loss": 1.1845, "step": 5496500 }, { "epoch": 3.3, "learning_rate": 3.296362979647133e-05, "loss": 1.1422, "step": 5497000 }, { "epoch": 3.3, "learning_rate": 3.296152983091077e-05, "loss": 1.1756, "step": 5497500 }, { "epoch": 3.3, "learning_rate": 3.2959429865350205e-05, "loss": 1.1572, "step": 5498000 }, { "epoch": 3.3, "learning_rate": 3.2957329899789645e-05, "loss": 1.167, "step": 5498500 }, { "epoch": 3.3, "learning_rate": 3.295522993422908e-05, "loss": 1.1462, "step": 5499000 }, { "epoch": 3.3, "learning_rate": 3.295312996866851e-05, "loss": 1.1485, "step": 5499500 }, { "epoch": 3.3, "learning_rate": 3.295103000310795e-05, "loss": 1.1524, "step": 5500000 }, { "epoch": 3.3, "eval_loss": 1.1378905773162842, "eval_runtime": 1099.047, "eval_samples_per_second": 479.252, "eval_steps_per_second": 79.876, "step": 5500000 }, { "epoch": 3.3, "learning_rate": 3.2948930037547386e-05, "loss": 1.1625, "step": 5500500 }, { "epoch": 3.3, "learning_rate": 3.2946838471849066e-05, "loss": 1.1708, "step": 5501000 }, { "epoch": 3.3, "learning_rate": 3.294473850628849e-05, "loss": 1.1493, "step": 5501500 }, { "epoch": 3.3, "learning_rate": 3.2942638540727927e-05, "loss": 1.1854, "step": 5502000 }, { "epoch": 3.3, "learning_rate": 3.294053857516737e-05, "loss": 1.1913, "step": 5502500 }, { "epoch": 3.3, "learning_rate": 3.29384386096068e-05, "loss": 1.1578, "step": 5503000 }, { "epoch": 3.3, "learning_rate": 3.2936338644046234e-05, "loss": 1.1537, "step": 5503500 }, { "epoch": 3.3, "learning_rate": 3.2934238678485674e-05, "loss": 1.1585, "step": 5504000 }, { "epoch": 3.3, "learning_rate": 3.293213871292511e-05, "loss": 1.1688, "step": 5504500 }, { "epoch": 3.3, "learning_rate": 3.293003874736454e-05, "loss": 1.1186, "step": 5505000 }, { "epoch": 3.3, "learning_rate": 3.29279429817351e-05, "loss": 1.1879, "step": 5505500 }, { "epoch": 3.3, "learning_rate": 3.2925843016174535e-05, "loss": 1.1895, "step": 5506000 }, { "epoch": 3.3, "learning_rate": 3.292374725054509e-05, "loss": 1.1507, "step": 5506500 }, { "epoch": 3.3, "learning_rate": 3.292164728498452e-05, "loss": 1.1481, "step": 5507000 }, { "epoch": 3.3, "learning_rate": 3.291954731942396e-05, "loss": 1.1713, "step": 5507500 }, { "epoch": 3.3, "learning_rate": 3.2917447353863395e-05, "loss": 1.138, "step": 5508000 }, { "epoch": 3.3, "learning_rate": 3.291534738830283e-05, "loss": 1.1811, "step": 5508500 }, { "epoch": 3.3, "learning_rate": 3.291324742274227e-05, "loss": 1.133, "step": 5509000 }, { "epoch": 3.3, "learning_rate": 3.29111474571817e-05, "loss": 1.155, "step": 5509500 }, { "epoch": 3.3, "learning_rate": 3.2909047491621136e-05, "loss": 1.1687, "step": 5510000 }, { "epoch": 3.3, "learning_rate": 3.290695172599169e-05, "loss": 1.1658, "step": 5510500 }, { "epoch": 3.3, "learning_rate": 3.290485176043113e-05, "loss": 1.1992, "step": 5511000 }, { "epoch": 3.3, "learning_rate": 3.290275179487056e-05, "loss": 1.1878, "step": 5511500 }, { "epoch": 3.3, "learning_rate": 3.290065182931e-05, "loss": 1.1614, "step": 5512000 }, { "epoch": 3.3, "learning_rate": 3.289855606368056e-05, "loss": 1.145, "step": 5512500 }, { "epoch": 3.31, "learning_rate": 3.289646029805112e-05, "loss": 1.1661, "step": 5513000 }, { "epoch": 3.31, "learning_rate": 3.2894360332490544e-05, "loss": 1.17, "step": 5513500 }, { "epoch": 3.31, "learning_rate": 3.289226036692998e-05, "loss": 1.1862, "step": 5514000 }, { "epoch": 3.31, "learning_rate": 3.289016040136942e-05, "loss": 1.186, "step": 5514500 }, { "epoch": 3.31, "learning_rate": 3.288806043580885e-05, "loss": 1.1473, "step": 5515000 }, { "epoch": 3.31, "learning_rate": 3.2885960470248285e-05, "loss": 1.1498, "step": 5515500 }, { "epoch": 3.31, "learning_rate": 3.288386470461884e-05, "loss": 1.1522, "step": 5516000 }, { "epoch": 3.31, "learning_rate": 3.288176473905828e-05, "loss": 1.1552, "step": 5516500 }, { "epoch": 3.31, "learning_rate": 3.287966477349771e-05, "loss": 1.1527, "step": 5517000 }, { "epoch": 3.31, "learning_rate": 3.2877564807937145e-05, "loss": 1.1367, "step": 5517500 }, { "epoch": 3.31, "learning_rate": 3.2875464842376586e-05, "loss": 1.1411, "step": 5518000 }, { "epoch": 3.31, "learning_rate": 3.287336907674714e-05, "loss": 1.1738, "step": 5518500 }, { "epoch": 3.31, "learning_rate": 3.287126911118657e-05, "loss": 1.1583, "step": 5519000 }, { "epoch": 3.31, "learning_rate": 3.286916914562601e-05, "loss": 1.1476, "step": 5519500 }, { "epoch": 3.31, "learning_rate": 3.2867069180065446e-05, "loss": 1.1541, "step": 5520000 }, { "epoch": 3.31, "learning_rate": 3.2864973414436e-05, "loss": 1.1742, "step": 5520500 }, { "epoch": 3.31, "learning_rate": 3.2862873448875433e-05, "loss": 1.1569, "step": 5521000 }, { "epoch": 3.31, "learning_rate": 3.2860773483314874e-05, "loss": 1.1619, "step": 5521500 }, { "epoch": 3.31, "learning_rate": 3.285867351775431e-05, "loss": 1.1665, "step": 5522000 }, { "epoch": 3.31, "learning_rate": 3.285657355219374e-05, "loss": 1.141, "step": 5522500 }, { "epoch": 3.31, "learning_rate": 3.2854477786564294e-05, "loss": 1.14, "step": 5523000 }, { "epoch": 3.31, "learning_rate": 3.2852377821003734e-05, "loss": 1.1496, "step": 5523500 }, { "epoch": 3.31, "learning_rate": 3.285027785544317e-05, "loss": 1.1462, "step": 5524000 }, { "epoch": 3.31, "learning_rate": 3.28481778898826e-05, "loss": 1.2024, "step": 5524500 }, { "epoch": 3.31, "learning_rate": 3.284607792432204e-05, "loss": 1.1859, "step": 5525000 }, { "epoch": 3.31, "learning_rate": 3.2843977958761475e-05, "loss": 1.153, "step": 5525500 }, { "epoch": 3.31, "learning_rate": 3.284187799320091e-05, "loss": 1.1873, "step": 5526000 }, { "epoch": 3.31, "learning_rate": 3.283977802764035e-05, "loss": 1.1425, "step": 5526500 }, { "epoch": 3.31, "learning_rate": 3.283768646194203e-05, "loss": 1.1748, "step": 5527000 }, { "epoch": 3.31, "learning_rate": 3.283558649638146e-05, "loss": 1.1748, "step": 5527500 }, { "epoch": 3.31, "learning_rate": 3.283348653082089e-05, "loss": 1.1708, "step": 5528000 }, { "epoch": 3.31, "learning_rate": 3.283138656526033e-05, "loss": 1.1719, "step": 5528500 }, { "epoch": 3.31, "learning_rate": 3.282928659969976e-05, "loss": 1.1561, "step": 5529000 }, { "epoch": 3.32, "learning_rate": 3.282719083407032e-05, "loss": 1.1466, "step": 5529500 }, { "epoch": 3.32, "learning_rate": 3.282509086850976e-05, "loss": 1.1463, "step": 5530000 }, { "epoch": 3.32, "learning_rate": 3.282299090294919e-05, "loss": 1.1693, "step": 5530500 }, { "epoch": 3.32, "learning_rate": 3.2820890937388624e-05, "loss": 1.1917, "step": 5531000 }, { "epoch": 3.32, "learning_rate": 3.281879097182806e-05, "loss": 1.1586, "step": 5531500 }, { "epoch": 3.32, "learning_rate": 3.28166910062675e-05, "loss": 1.1624, "step": 5532000 }, { "epoch": 3.32, "learning_rate": 3.281459104070693e-05, "loss": 1.1395, "step": 5532500 }, { "epoch": 3.32, "learning_rate": 3.2812491075146364e-05, "loss": 1.1747, "step": 5533000 }, { "epoch": 3.32, "learning_rate": 3.2810395309516925e-05, "loss": 1.1776, "step": 5533500 }, { "epoch": 3.32, "learning_rate": 3.2808299543887485e-05, "loss": 1.1549, "step": 5534000 }, { "epoch": 3.32, "learning_rate": 3.280619957832692e-05, "loss": 1.156, "step": 5534500 }, { "epoch": 3.32, "learning_rate": 3.2804099612766345e-05, "loss": 1.1574, "step": 5535000 }, { "epoch": 3.32, "learning_rate": 3.2801999647205785e-05, "loss": 1.1571, "step": 5535500 }, { "epoch": 3.32, "learning_rate": 3.279989968164522e-05, "loss": 1.1835, "step": 5536000 }, { "epoch": 3.32, "learning_rate": 3.279779971608465e-05, "loss": 1.1816, "step": 5536500 }, { "epoch": 3.32, "learning_rate": 3.279569975052409e-05, "loss": 1.1394, "step": 5537000 }, { "epoch": 3.32, "learning_rate": 3.2793599784963526e-05, "loss": 1.131, "step": 5537500 }, { "epoch": 3.32, "learning_rate": 3.279149981940296e-05, "loss": 1.1544, "step": 5538000 }, { "epoch": 3.32, "learning_rate": 3.27893998538424e-05, "loss": 1.182, "step": 5538500 }, { "epoch": 3.32, "learning_rate": 3.278729988828183e-05, "loss": 1.1708, "step": 5539000 }, { "epoch": 3.32, "learning_rate": 3.278519992272127e-05, "loss": 1.1644, "step": 5539500 }, { "epoch": 3.32, "learning_rate": 3.278310415709182e-05, "loss": 1.1766, "step": 5540000 }, { "epoch": 3.32, "learning_rate": 3.278100419153126e-05, "loss": 1.1715, "step": 5540500 }, { "epoch": 3.32, "learning_rate": 3.2778904225970694e-05, "loss": 1.1449, "step": 5541000 }, { "epoch": 3.32, "learning_rate": 3.277680846034125e-05, "loss": 1.1597, "step": 5541500 }, { "epoch": 3.32, "learning_rate": 3.277470849478069e-05, "loss": 1.1782, "step": 5542000 }, { "epoch": 3.32, "learning_rate": 3.277260852922012e-05, "loss": 1.1908, "step": 5542500 }, { "epoch": 3.32, "learning_rate": 3.2770508563659555e-05, "loss": 1.1709, "step": 5543000 }, { "epoch": 3.32, "learning_rate": 3.2768408598098995e-05, "loss": 1.1774, "step": 5543500 }, { "epoch": 3.32, "learning_rate": 3.276631283246955e-05, "loss": 1.1743, "step": 5544000 }, { "epoch": 3.32, "learning_rate": 3.276421286690898e-05, "loss": 1.1763, "step": 5544500 }, { "epoch": 3.32, "learning_rate": 3.2762117101279535e-05, "loss": 1.1776, "step": 5545000 }, { "epoch": 3.32, "learning_rate": 3.276001713571897e-05, "loss": 1.1648, "step": 5545500 }, { "epoch": 3.33, "learning_rate": 3.275791717015841e-05, "loss": 1.1566, "step": 5546000 }, { "epoch": 3.33, "learning_rate": 3.275581720459784e-05, "loss": 1.172, "step": 5546500 }, { "epoch": 3.33, "learning_rate": 3.2753717239037276e-05, "loss": 1.1489, "step": 5547000 }, { "epoch": 3.33, "learning_rate": 3.2751617273476716e-05, "loss": 1.181, "step": 5547500 }, { "epoch": 3.33, "learning_rate": 3.274951730791615e-05, "loss": 1.1685, "step": 5548000 }, { "epoch": 3.33, "learning_rate": 3.274741734235558e-05, "loss": 1.1771, "step": 5548500 }, { "epoch": 3.33, "learning_rate": 3.2745321576726144e-05, "loss": 1.1511, "step": 5549000 }, { "epoch": 3.33, "learning_rate": 3.274322161116558e-05, "loss": 1.1519, "step": 5549500 }, { "epoch": 3.33, "learning_rate": 3.274112164560501e-05, "loss": 1.1419, "step": 5550000 }, { "epoch": 3.33, "learning_rate": 3.273902168004445e-05, "loss": 1.1502, "step": 5550500 }, { "epoch": 3.33, "learning_rate": 3.2736921714483884e-05, "loss": 1.1489, "step": 5551000 }, { "epoch": 3.33, "learning_rate": 3.273482174892332e-05, "loss": 1.1766, "step": 5551500 }, { "epoch": 3.33, "learning_rate": 3.273272598329387e-05, "loss": 1.1568, "step": 5552000 }, { "epoch": 3.33, "learning_rate": 3.2730630217664425e-05, "loss": 1.1706, "step": 5552500 }, { "epoch": 3.33, "learning_rate": 3.2728530252103865e-05, "loss": 1.1486, "step": 5553000 }, { "epoch": 3.33, "learning_rate": 3.27264302865433e-05, "loss": 1.1537, "step": 5553500 }, { "epoch": 3.33, "learning_rate": 3.272433032098273e-05, "loss": 1.1729, "step": 5554000 }, { "epoch": 3.33, "learning_rate": 3.272223035542217e-05, "loss": 1.1485, "step": 5554500 }, { "epoch": 3.33, "learning_rate": 3.2720130389861606e-05, "loss": 1.1878, "step": 5555000 }, { "epoch": 3.33, "learning_rate": 3.271803042430104e-05, "loss": 1.1427, "step": 5555500 }, { "epoch": 3.33, "learning_rate": 3.271593045874048e-05, "loss": 1.1504, "step": 5556000 }, { "epoch": 3.33, "learning_rate": 3.271383049317991e-05, "loss": 1.1673, "step": 5556500 }, { "epoch": 3.33, "learning_rate": 3.2711730527619346e-05, "loss": 1.1653, "step": 5557000 }, { "epoch": 3.33, "learning_rate": 3.270963476198991e-05, "loss": 1.1439, "step": 5557500 }, { "epoch": 3.33, "learning_rate": 3.270753479642934e-05, "loss": 1.1659, "step": 5558000 }, { "epoch": 3.33, "learning_rate": 3.2705434830868774e-05, "loss": 1.1871, "step": 5558500 }, { "epoch": 3.33, "learning_rate": 3.270333906523933e-05, "loss": 1.1778, "step": 5559000 }, { "epoch": 3.33, "learning_rate": 3.270123909967877e-05, "loss": 1.1815, "step": 5559500 }, { "epoch": 3.33, "learning_rate": 3.26991391341182e-05, "loss": 1.1466, "step": 5560000 }, { "epoch": 3.33, "learning_rate": 3.2697039168557634e-05, "loss": 1.1849, "step": 5560500 }, { "epoch": 3.33, "learning_rate": 3.2694939202997075e-05, "loss": 1.1925, "step": 5561000 }, { "epoch": 3.33, "learning_rate": 3.269283923743651e-05, "loss": 1.1572, "step": 5561500 }, { "epoch": 3.33, "learning_rate": 3.269074347180706e-05, "loss": 1.1872, "step": 5562000 }, { "epoch": 3.33, "learning_rate": 3.2688643506246495e-05, "loss": 1.1712, "step": 5562500 }, { "epoch": 3.34, "learning_rate": 3.2686543540685935e-05, "loss": 1.1812, "step": 5563000 }, { "epoch": 3.34, "learning_rate": 3.268444357512537e-05, "loss": 1.1597, "step": 5563500 }, { "epoch": 3.34, "learning_rate": 3.268234360956481e-05, "loss": 1.1551, "step": 5564000 }, { "epoch": 3.34, "learning_rate": 3.2680243644004236e-05, "loss": 1.184, "step": 5564500 }, { "epoch": 3.34, "learning_rate": 3.267814367844367e-05, "loss": 1.1797, "step": 5565000 }, { "epoch": 3.34, "learning_rate": 3.267604371288311e-05, "loss": 1.1994, "step": 5565500 }, { "epoch": 3.34, "learning_rate": 3.267394374732254e-05, "loss": 1.1719, "step": 5566000 }, { "epoch": 3.34, "learning_rate": 3.26718479816931e-05, "loss": 1.1334, "step": 5566500 }, { "epoch": 3.34, "learning_rate": 3.266974801613253e-05, "loss": 1.1589, "step": 5567000 }, { "epoch": 3.34, "learning_rate": 3.266764805057197e-05, "loss": 1.1703, "step": 5567500 }, { "epoch": 3.34, "learning_rate": 3.2665548085011403e-05, "loss": 1.1446, "step": 5568000 }, { "epoch": 3.34, "learning_rate": 3.266344811945084e-05, "loss": 1.1726, "step": 5568500 }, { "epoch": 3.34, "learning_rate": 3.266135235382139e-05, "loss": 1.1584, "step": 5569000 }, { "epoch": 3.34, "learning_rate": 3.265925238826083e-05, "loss": 1.1766, "step": 5569500 }, { "epoch": 3.34, "learning_rate": 3.2657152422700264e-05, "loss": 1.172, "step": 5570000 }, { "epoch": 3.34, "learning_rate": 3.2655052457139704e-05, "loss": 1.176, "step": 5570500 }, { "epoch": 3.34, "learning_rate": 3.265295249157914e-05, "loss": 1.161, "step": 5571000 }, { "epoch": 3.34, "learning_rate": 3.265085252601857e-05, "loss": 1.1801, "step": 5571500 }, { "epoch": 3.34, "learning_rate": 3.264875256045801e-05, "loss": 1.1811, "step": 5572000 }, { "epoch": 3.34, "learning_rate": 3.2646656794828565e-05, "loss": 1.1385, "step": 5572500 }, { "epoch": 3.34, "learning_rate": 3.2644561029199126e-05, "loss": 1.1526, "step": 5573000 }, { "epoch": 3.34, "learning_rate": 3.264246106363856e-05, "loss": 1.1776, "step": 5573500 }, { "epoch": 3.34, "learning_rate": 3.2640361098077986e-05, "loss": 1.1654, "step": 5574000 }, { "epoch": 3.34, "learning_rate": 3.2638261132517426e-05, "loss": 1.1677, "step": 5574500 }, { "epoch": 3.34, "learning_rate": 3.263616116695686e-05, "loss": 1.1366, "step": 5575000 }, { "epoch": 3.34, "learning_rate": 3.263406120139629e-05, "loss": 1.1738, "step": 5575500 }, { "epoch": 3.34, "learning_rate": 3.263196123583573e-05, "loss": 1.1653, "step": 5576000 }, { "epoch": 3.34, "learning_rate": 3.2629861270275167e-05, "loss": 1.1197, "step": 5576500 }, { "epoch": 3.34, "learning_rate": 3.26277613047146e-05, "loss": 1.1872, "step": 5577000 }, { "epoch": 3.34, "learning_rate": 3.262566133915404e-05, "loss": 1.1891, "step": 5577500 }, { "epoch": 3.34, "learning_rate": 3.2623565573524594e-05, "loss": 1.1761, "step": 5578000 }, { "epoch": 3.34, "learning_rate": 3.262146560796403e-05, "loss": 1.173, "step": 5578500 }, { "epoch": 3.34, "learning_rate": 3.261936564240347e-05, "loss": 1.2092, "step": 5579000 }, { "epoch": 3.35, "learning_rate": 3.26172656768429e-05, "loss": 1.1165, "step": 5579500 }, { "epoch": 3.35, "learning_rate": 3.2615165711282334e-05, "loss": 1.174, "step": 5580000 }, { "epoch": 3.35, "learning_rate": 3.2613065745721775e-05, "loss": 1.1497, "step": 5580500 }, { "epoch": 3.35, "learning_rate": 3.261096578016121e-05, "loss": 1.1227, "step": 5581000 }, { "epoch": 3.35, "learning_rate": 3.260886581460064e-05, "loss": 1.1727, "step": 5581500 }, { "epoch": 3.35, "learning_rate": 3.2606774248902315e-05, "loss": 1.1553, "step": 5582000 }, { "epoch": 3.35, "learning_rate": 3.260467428334175e-05, "loss": 1.1526, "step": 5582500 }, { "epoch": 3.35, "learning_rate": 3.260257431778119e-05, "loss": 1.1357, "step": 5583000 }, { "epoch": 3.35, "learning_rate": 3.260047435222062e-05, "loss": 1.1499, "step": 5583500 }, { "epoch": 3.35, "learning_rate": 3.2598374386660056e-05, "loss": 1.1476, "step": 5584000 }, { "epoch": 3.35, "learning_rate": 3.2596274421099496e-05, "loss": 1.177, "step": 5584500 }, { "epoch": 3.35, "learning_rate": 3.259417445553893e-05, "loss": 1.1596, "step": 5585000 }, { "epoch": 3.35, "learning_rate": 3.259207868990948e-05, "loss": 1.1741, "step": 5585500 }, { "epoch": 3.35, "learning_rate": 3.258997872434892e-05, "loss": 1.1765, "step": 5586000 }, { "epoch": 3.35, "learning_rate": 3.258787875878836e-05, "loss": 1.1748, "step": 5586500 }, { "epoch": 3.35, "learning_rate": 3.258577879322779e-05, "loss": 1.1551, "step": 5587000 }, { "epoch": 3.35, "learning_rate": 3.258367882766723e-05, "loss": 1.1721, "step": 5587500 }, { "epoch": 3.35, "learning_rate": 3.2581578862106664e-05, "loss": 1.1569, "step": 5588000 }, { "epoch": 3.35, "learning_rate": 3.25794788965461e-05, "loss": 1.1801, "step": 5588500 }, { "epoch": 3.35, "learning_rate": 3.257737893098553e-05, "loss": 1.1581, "step": 5589000 }, { "epoch": 3.35, "learning_rate": 3.2575278965424964e-05, "loss": 1.1685, "step": 5589500 }, { "epoch": 3.35, "learning_rate": 3.25731789998644e-05, "loss": 1.1671, "step": 5590000 }, { "epoch": 3.35, "learning_rate": 3.257107903430384e-05, "loss": 1.181, "step": 5590500 }, { "epoch": 3.35, "learning_rate": 3.256897906874327e-05, "loss": 1.1481, "step": 5591000 }, { "epoch": 3.35, "learning_rate": 3.2566883303113825e-05, "loss": 1.1585, "step": 5591500 }, { "epoch": 3.35, "learning_rate": 3.256478333755326e-05, "loss": 1.1532, "step": 5592000 }, { "epoch": 3.35, "learning_rate": 3.256268757192382e-05, "loss": 1.1392, "step": 5592500 }, { "epoch": 3.35, "learning_rate": 3.256058760636326e-05, "loss": 1.1482, "step": 5593000 }, { "epoch": 3.35, "learning_rate": 3.2558487640802686e-05, "loss": 1.1802, "step": 5593500 }, { "epoch": 3.35, "learning_rate": 3.2556387675242126e-05, "loss": 1.1745, "step": 5594000 }, { "epoch": 3.35, "learning_rate": 3.255428770968156e-05, "loss": 1.1544, "step": 5594500 }, { "epoch": 3.35, "learning_rate": 3.255219194405212e-05, "loss": 1.177, "step": 5595000 }, { "epoch": 3.35, "learning_rate": 3.255009197849155e-05, "loss": 1.1666, "step": 5595500 }, { "epoch": 3.36, "learning_rate": 3.254799201293099e-05, "loss": 1.149, "step": 5596000 }, { "epoch": 3.36, "learning_rate": 3.254589204737042e-05, "loss": 1.1711, "step": 5596500 }, { "epoch": 3.36, "learning_rate": 3.2543792081809854e-05, "loss": 1.1619, "step": 5597000 }, { "epoch": 3.36, "learning_rate": 3.2541692116249294e-05, "loss": 1.1795, "step": 5597500 }, { "epoch": 3.36, "learning_rate": 3.253959215068873e-05, "loss": 1.1767, "step": 5598000 }, { "epoch": 3.36, "learning_rate": 3.253749218512816e-05, "loss": 1.156, "step": 5598500 }, { "epoch": 3.36, "learning_rate": 3.2535396419498714e-05, "loss": 1.1811, "step": 5599000 }, { "epoch": 3.36, "learning_rate": 3.2533296453938155e-05, "loss": 1.1904, "step": 5599500 }, { "epoch": 3.36, "learning_rate": 3.253119648837759e-05, "loss": 1.1792, "step": 5600000 }, { "epoch": 3.36, "eval_loss": 1.133358359336853, "eval_runtime": 1102.5182, "eval_samples_per_second": 477.743, "eval_steps_per_second": 79.624, "step": 5600000 }, { "epoch": 3.36, "learning_rate": 3.252909652281702e-05, "loss": 1.1808, "step": 5600500 }, { "epoch": 3.36, "learning_rate": 3.252699655725646e-05, "loss": 1.1619, "step": 5601000 }, { "epoch": 3.36, "learning_rate": 3.2524896591695895e-05, "loss": 1.1489, "step": 5601500 }, { "epoch": 3.36, "learning_rate": 3.252279662613533e-05, "loss": 1.1419, "step": 5602000 }, { "epoch": 3.36, "learning_rate": 3.252069666057477e-05, "loss": 1.1486, "step": 5602500 }, { "epoch": 3.36, "learning_rate": 3.251860509487645e-05, "loss": 1.1613, "step": 5603000 }, { "epoch": 3.36, "learning_rate": 3.2516505129315876e-05, "loss": 1.1704, "step": 5603500 }, { "epoch": 3.36, "learning_rate": 3.251440516375531e-05, "loss": 1.1719, "step": 5604000 }, { "epoch": 3.36, "learning_rate": 3.251230519819475e-05, "loss": 1.1553, "step": 5604500 }, { "epoch": 3.36, "learning_rate": 3.251020943256531e-05, "loss": 1.1717, "step": 5605000 }, { "epoch": 3.36, "learning_rate": 3.250810946700474e-05, "loss": 1.1541, "step": 5605500 }, { "epoch": 3.36, "learning_rate": 3.250600950144417e-05, "loss": 1.1557, "step": 5606000 }, { "epoch": 3.36, "learning_rate": 3.250390953588361e-05, "loss": 1.1617, "step": 5606500 }, { "epoch": 3.36, "learning_rate": 3.2501809570323044e-05, "loss": 1.1977, "step": 5607000 }, { "epoch": 3.36, "learning_rate": 3.249970960476248e-05, "loss": 1.1225, "step": 5607500 }, { "epoch": 3.36, "learning_rate": 3.249761383913304e-05, "loss": 1.1355, "step": 5608000 }, { "epoch": 3.36, "learning_rate": 3.249551387357247e-05, "loss": 1.2044, "step": 5608500 }, { "epoch": 3.36, "learning_rate": 3.2493413908011905e-05, "loss": 1.1625, "step": 5609000 }, { "epoch": 3.36, "learning_rate": 3.2491313942451345e-05, "loss": 1.218, "step": 5609500 }, { "epoch": 3.36, "learning_rate": 3.248921397689078e-05, "loss": 1.1813, "step": 5610000 }, { "epoch": 3.36, "learning_rate": 3.248711401133021e-05, "loss": 1.1655, "step": 5610500 }, { "epoch": 3.36, "learning_rate": 3.248501404576965e-05, "loss": 1.1774, "step": 5611000 }, { "epoch": 3.36, "learning_rate": 3.2482914080209086e-05, "loss": 1.1804, "step": 5611500 }, { "epoch": 3.36, "learning_rate": 3.248081411464852e-05, "loss": 1.1701, "step": 5612000 }, { "epoch": 3.36, "learning_rate": 3.247871414908796e-05, "loss": 1.1715, "step": 5612500 }, { "epoch": 3.37, "learning_rate": 3.247661418352739e-05, "loss": 1.1438, "step": 5613000 }, { "epoch": 3.37, "learning_rate": 3.247451421796682e-05, "loss": 1.1704, "step": 5613500 }, { "epoch": 3.37, "learning_rate": 3.247241845233738e-05, "loss": 1.1741, "step": 5614000 }, { "epoch": 3.37, "learning_rate": 3.247032268670793e-05, "loss": 1.1767, "step": 5614500 }, { "epoch": 3.37, "learning_rate": 3.2468222721147374e-05, "loss": 1.1701, "step": 5615000 }, { "epoch": 3.37, "learning_rate": 3.246612275558681e-05, "loss": 1.1704, "step": 5615500 }, { "epoch": 3.37, "learning_rate": 3.246402279002624e-05, "loss": 1.1816, "step": 5616000 }, { "epoch": 3.37, "learning_rate": 3.246192282446568e-05, "loss": 1.1832, "step": 5616500 }, { "epoch": 3.37, "learning_rate": 3.2459822858905114e-05, "loss": 1.1316, "step": 5617000 }, { "epoch": 3.37, "learning_rate": 3.2457722893344554e-05, "loss": 1.1756, "step": 5617500 }, { "epoch": 3.37, "learning_rate": 3.245562292778399e-05, "loss": 1.1507, "step": 5618000 }, { "epoch": 3.37, "learning_rate": 3.245352716215454e-05, "loss": 1.1894, "step": 5618500 }, { "epoch": 3.37, "learning_rate": 3.2451427196593975e-05, "loss": 1.1887, "step": 5619000 }, { "epoch": 3.37, "learning_rate": 3.2449327231033415e-05, "loss": 1.1716, "step": 5619500 }, { "epoch": 3.37, "learning_rate": 3.244722726547285e-05, "loss": 1.1675, "step": 5620000 }, { "epoch": 3.37, "learning_rate": 3.24451314998434e-05, "loss": 1.162, "step": 5620500 }, { "epoch": 3.37, "learning_rate": 3.2443035734213956e-05, "loss": 1.1518, "step": 5621000 }, { "epoch": 3.37, "learning_rate": 3.244093576865339e-05, "loss": 1.1753, "step": 5621500 }, { "epoch": 3.37, "learning_rate": 3.243883580309283e-05, "loss": 1.174, "step": 5622000 }, { "epoch": 3.37, "learning_rate": 3.243673583753226e-05, "loss": 1.1792, "step": 5622500 }, { "epoch": 3.37, "learning_rate": 3.24346358719717e-05, "loss": 1.1855, "step": 5623000 }, { "epoch": 3.37, "learning_rate": 3.2432535906411137e-05, "loss": 1.1751, "step": 5623500 }, { "epoch": 3.37, "learning_rate": 3.243043594085057e-05, "loss": 1.1527, "step": 5624000 }, { "epoch": 3.37, "learning_rate": 3.242833597529001e-05, "loss": 1.1513, "step": 5624500 }, { "epoch": 3.37, "learning_rate": 3.2426240209660564e-05, "loss": 1.1684, "step": 5625000 }, { "epoch": 3.37, "learning_rate": 3.242414444403112e-05, "loss": 1.1693, "step": 5625500 }, { "epoch": 3.37, "learning_rate": 3.242204447847055e-05, "loss": 1.1837, "step": 5626000 }, { "epoch": 3.37, "learning_rate": 3.2419944512909984e-05, "loss": 1.1661, "step": 5626500 }, { "epoch": 3.37, "learning_rate": 3.2417844547349425e-05, "loss": 1.1463, "step": 5627000 }, { "epoch": 3.37, "learning_rate": 3.241574878171998e-05, "loss": 1.1676, "step": 5627500 }, { "epoch": 3.37, "learning_rate": 3.241364881615941e-05, "loss": 1.1473, "step": 5628000 }, { "epoch": 3.37, "learning_rate": 3.2411548850598845e-05, "loss": 1.1347, "step": 5628500 }, { "epoch": 3.37, "learning_rate": 3.2409448885038285e-05, "loss": 1.1763, "step": 5629000 }, { "epoch": 3.38, "learning_rate": 3.240734891947772e-05, "loss": 1.1725, "step": 5629500 }, { "epoch": 3.38, "learning_rate": 3.240524895391716e-05, "loss": 1.1698, "step": 5630000 }, { "epoch": 3.38, "learning_rate": 3.240315318828771e-05, "loss": 1.179, "step": 5630500 }, { "epoch": 3.38, "learning_rate": 3.2401053222727146e-05, "loss": 1.1706, "step": 5631000 }, { "epoch": 3.38, "learning_rate": 3.239895325716658e-05, "loss": 1.1759, "step": 5631500 }, { "epoch": 3.38, "learning_rate": 3.239685329160602e-05, "loss": 1.1692, "step": 5632000 }, { "epoch": 3.38, "learning_rate": 3.239475332604545e-05, "loss": 1.1353, "step": 5632500 }, { "epoch": 3.38, "learning_rate": 3.239265336048489e-05, "loss": 1.1444, "step": 5633000 }, { "epoch": 3.38, "learning_rate": 3.239055759485544e-05, "loss": 1.1692, "step": 5633500 }, { "epoch": 3.38, "learning_rate": 3.238845762929488e-05, "loss": 1.1608, "step": 5634000 }, { "epoch": 3.38, "learning_rate": 3.2386357663734314e-05, "loss": 1.1699, "step": 5634500 }, { "epoch": 3.38, "learning_rate": 3.238425769817375e-05, "loss": 1.1614, "step": 5635000 }, { "epoch": 3.38, "learning_rate": 3.238216193254431e-05, "loss": 1.15, "step": 5635500 }, { "epoch": 3.38, "learning_rate": 3.238006196698374e-05, "loss": 1.1875, "step": 5636000 }, { "epoch": 3.38, "learning_rate": 3.2377962001423175e-05, "loss": 1.1752, "step": 5636500 }, { "epoch": 3.38, "learning_rate": 3.2375862035862615e-05, "loss": 1.1718, "step": 5637000 }, { "epoch": 3.38, "learning_rate": 3.237376207030205e-05, "loss": 1.1659, "step": 5637500 }, { "epoch": 3.38, "learning_rate": 3.23716663046726e-05, "loss": 1.1794, "step": 5638000 }, { "epoch": 3.38, "learning_rate": 3.2369566339112035e-05, "loss": 1.1792, "step": 5638500 }, { "epoch": 3.38, "learning_rate": 3.2367466373551476e-05, "loss": 1.1934, "step": 5639000 }, { "epoch": 3.38, "learning_rate": 3.236537060792203e-05, "loss": 1.1655, "step": 5639500 }, { "epoch": 3.38, "learning_rate": 3.236327064236146e-05, "loss": 1.1692, "step": 5640000 }, { "epoch": 3.38, "learning_rate": 3.2361170676800896e-05, "loss": 1.1593, "step": 5640500 }, { "epoch": 3.38, "learning_rate": 3.2359070711240336e-05, "loss": 1.1541, "step": 5641000 }, { "epoch": 3.38, "learning_rate": 3.235697074567977e-05, "loss": 1.1514, "step": 5641500 }, { "epoch": 3.38, "learning_rate": 3.23548707801192e-05, "loss": 1.1655, "step": 5642000 }, { "epoch": 3.38, "learning_rate": 3.2352770814558644e-05, "loss": 1.1697, "step": 5642500 }, { "epoch": 3.38, "learning_rate": 3.235067084899808e-05, "loss": 1.1728, "step": 5643000 }, { "epoch": 3.38, "learning_rate": 3.234857088343751e-05, "loss": 1.1941, "step": 5643500 }, { "epoch": 3.38, "learning_rate": 3.234647091787695e-05, "loss": 1.1786, "step": 5644000 }, { "epoch": 3.38, "learning_rate": 3.234437095231638e-05, "loss": 1.1452, "step": 5644500 }, { "epoch": 3.38, "learning_rate": 3.234227098675582e-05, "loss": 1.167, "step": 5645000 }, { "epoch": 3.38, "learning_rate": 3.234017522112638e-05, "loss": 1.1921, "step": 5645500 }, { "epoch": 3.39, "learning_rate": 3.233807525556581e-05, "loss": 1.174, "step": 5646000 }, { "epoch": 3.39, "learning_rate": 3.2335979489936365e-05, "loss": 1.1794, "step": 5646500 }, { "epoch": 3.39, "learning_rate": 3.23338795243758e-05, "loss": 1.1822, "step": 5647000 }, { "epoch": 3.39, "learning_rate": 3.233177955881524e-05, "loss": 1.1585, "step": 5647500 }, { "epoch": 3.39, "learning_rate": 3.232967959325467e-05, "loss": 1.1533, "step": 5648000 }, { "epoch": 3.39, "learning_rate": 3.2327579627694106e-05, "loss": 1.2131, "step": 5648500 }, { "epoch": 3.39, "learning_rate": 3.2325479662133546e-05, "loss": 1.171, "step": 5649000 }, { "epoch": 3.39, "learning_rate": 3.23233838965041e-05, "loss": 1.1628, "step": 5649500 }, { "epoch": 3.39, "learning_rate": 3.232128813087465e-05, "loss": 1.1329, "step": 5650000 }, { "epoch": 3.39, "learning_rate": 3.2319188165314086e-05, "loss": 1.1574, "step": 5650500 }, { "epoch": 3.39, "learning_rate": 3.231708819975353e-05, "loss": 1.1651, "step": 5651000 }, { "epoch": 3.39, "learning_rate": 3.231498823419296e-05, "loss": 1.1865, "step": 5651500 }, { "epoch": 3.39, "learning_rate": 3.2312888268632394e-05, "loss": 1.185, "step": 5652000 }, { "epoch": 3.39, "learning_rate": 3.2310788303071834e-05, "loss": 1.1538, "step": 5652500 }, { "epoch": 3.39, "learning_rate": 3.230868833751127e-05, "loss": 1.1288, "step": 5653000 }, { "epoch": 3.39, "learning_rate": 3.23065883719507e-05, "loss": 1.1766, "step": 5653500 }, { "epoch": 3.39, "learning_rate": 3.2304492606321254e-05, "loss": 1.1806, "step": 5654000 }, { "epoch": 3.39, "learning_rate": 3.2302392640760695e-05, "loss": 1.126, "step": 5654500 }, { "epoch": 3.39, "learning_rate": 3.230029267520013e-05, "loss": 1.2017, "step": 5655000 }, { "epoch": 3.39, "learning_rate": 3.229819270963956e-05, "loss": 1.1871, "step": 5655500 }, { "epoch": 3.39, "learning_rate": 3.2296092744079e-05, "loss": 1.156, "step": 5656000 }, { "epoch": 3.39, "learning_rate": 3.2293996978449555e-05, "loss": 1.2061, "step": 5656500 }, { "epoch": 3.39, "learning_rate": 3.229189701288899e-05, "loss": 1.1497, "step": 5657000 }, { "epoch": 3.39, "learning_rate": 3.228979704732842e-05, "loss": 1.1775, "step": 5657500 }, { "epoch": 3.39, "learning_rate": 3.228769708176786e-05, "loss": 1.1785, "step": 5658000 }, { "epoch": 3.39, "learning_rate": 3.2285597116207296e-05, "loss": 1.1697, "step": 5658500 }, { "epoch": 3.39, "learning_rate": 3.228349715064673e-05, "loss": 1.144, "step": 5659000 }, { "epoch": 3.39, "learning_rate": 3.228140138501729e-05, "loss": 1.1763, "step": 5659500 }, { "epoch": 3.39, "learning_rate": 3.227930141945672e-05, "loss": 1.1717, "step": 5660000 }, { "epoch": 3.39, "learning_rate": 3.2277201453896157e-05, "loss": 1.1606, "step": 5660500 }, { "epoch": 3.39, "learning_rate": 3.227510148833559e-05, "loss": 1.1757, "step": 5661000 }, { "epoch": 3.39, "learning_rate": 3.2273001522775024e-05, "loss": 1.1664, "step": 5661500 }, { "epoch": 3.39, "learning_rate": 3.227090155721446e-05, "loss": 1.1787, "step": 5662000 }, { "epoch": 3.39, "learning_rate": 3.22688015916539e-05, "loss": 1.1451, "step": 5662500 }, { "epoch": 3.4, "learning_rate": 3.226670582602446e-05, "loss": 1.1424, "step": 5663000 }, { "epoch": 3.4, "learning_rate": 3.2264605860463884e-05, "loss": 1.1706, "step": 5663500 }, { "epoch": 3.4, "learning_rate": 3.226250589490332e-05, "loss": 1.1625, "step": 5664000 }, { "epoch": 3.4, "learning_rate": 3.226040592934276e-05, "loss": 1.1533, "step": 5664500 }, { "epoch": 3.4, "learning_rate": 3.225830596378219e-05, "loss": 1.1547, "step": 5665000 }, { "epoch": 3.4, "learning_rate": 3.2256205998221625e-05, "loss": 1.1701, "step": 5665500 }, { "epoch": 3.4, "learning_rate": 3.2254110232592185e-05, "loss": 1.1818, "step": 5666000 }, { "epoch": 3.4, "learning_rate": 3.225201026703162e-05, "loss": 1.161, "step": 5666500 }, { "epoch": 3.4, "learning_rate": 3.224991030147105e-05, "loss": 1.1827, "step": 5667000 }, { "epoch": 3.4, "learning_rate": 3.224781033591049e-05, "loss": 1.1575, "step": 5667500 }, { "epoch": 3.4, "learning_rate": 3.2245710370349926e-05, "loss": 1.1496, "step": 5668000 }, { "epoch": 3.4, "learning_rate": 3.224361040478936e-05, "loss": 1.1928, "step": 5668500 }, { "epoch": 3.4, "learning_rate": 3.224151463915991e-05, "loss": 1.1721, "step": 5669000 }, { "epoch": 3.4, "learning_rate": 3.223941467359935e-05, "loss": 1.1539, "step": 5669500 }, { "epoch": 3.4, "learning_rate": 3.2237314708038787e-05, "loss": 1.1492, "step": 5670000 }, { "epoch": 3.4, "learning_rate": 3.223521474247822e-05, "loss": 1.1445, "step": 5670500 }, { "epoch": 3.4, "learning_rate": 3.223311477691766e-05, "loss": 1.1622, "step": 5671000 }, { "epoch": 3.4, "learning_rate": 3.2231019011288214e-05, "loss": 1.165, "step": 5671500 }, { "epoch": 3.4, "learning_rate": 3.222891904572765e-05, "loss": 1.1525, "step": 5672000 }, { "epoch": 3.4, "learning_rate": 3.222682328009821e-05, "loss": 1.175, "step": 5672500 }, { "epoch": 3.4, "learning_rate": 3.222472331453764e-05, "loss": 1.1524, "step": 5673000 }, { "epoch": 3.4, "learning_rate": 3.2222623348977075e-05, "loss": 1.1751, "step": 5673500 }, { "epoch": 3.4, "learning_rate": 3.222052338341651e-05, "loss": 1.1471, "step": 5674000 }, { "epoch": 3.4, "learning_rate": 3.221842341785595e-05, "loss": 1.185, "step": 5674500 }, { "epoch": 3.4, "learning_rate": 3.221632345229538e-05, "loss": 1.1564, "step": 5675000 }, { "epoch": 3.4, "learning_rate": 3.2214223486734815e-05, "loss": 1.1444, "step": 5675500 }, { "epoch": 3.4, "learning_rate": 3.2212123521174255e-05, "loss": 1.1681, "step": 5676000 }, { "epoch": 3.4, "learning_rate": 3.221002355561369e-05, "loss": 1.1551, "step": 5676500 }, { "epoch": 3.4, "learning_rate": 3.220792359005312e-05, "loss": 1.1717, "step": 5677000 }, { "epoch": 3.4, "learning_rate": 3.220582362449256e-05, "loss": 1.1558, "step": 5677500 }, { "epoch": 3.4, "learning_rate": 3.2203727858863116e-05, "loss": 1.1918, "step": 5678000 }, { "epoch": 3.4, "learning_rate": 3.220162789330255e-05, "loss": 1.1825, "step": 5678500 }, { "epoch": 3.4, "learning_rate": 3.219952792774198e-05, "loss": 1.1828, "step": 5679000 }, { "epoch": 3.41, "learning_rate": 3.219742796218142e-05, "loss": 1.1944, "step": 5679500 }, { "epoch": 3.41, "learning_rate": 3.219533219655198e-05, "loss": 1.1802, "step": 5680000 }, { "epoch": 3.41, "learning_rate": 3.219323223099141e-05, "loss": 1.1518, "step": 5680500 }, { "epoch": 3.41, "learning_rate": 3.2191132265430844e-05, "loss": 1.1576, "step": 5681000 }, { "epoch": 3.41, "learning_rate": 3.2189032299870284e-05, "loss": 1.1639, "step": 5681500 }, { "epoch": 3.41, "learning_rate": 3.218693233430972e-05, "loss": 1.1385, "step": 5682000 }, { "epoch": 3.41, "learning_rate": 3.218483236874916e-05, "loss": 1.1536, "step": 5682500 }, { "epoch": 3.41, "learning_rate": 3.218273240318859e-05, "loss": 1.1451, "step": 5683000 }, { "epoch": 3.41, "learning_rate": 3.218063243762802e-05, "loss": 1.1563, "step": 5683500 }, { "epoch": 3.41, "learning_rate": 3.217853667199858e-05, "loss": 1.1711, "step": 5684000 }, { "epoch": 3.41, "learning_rate": 3.217643670643802e-05, "loss": 1.168, "step": 5684500 }, { "epoch": 3.41, "learning_rate": 3.217434094080857e-05, "loss": 1.1343, "step": 5685000 }, { "epoch": 3.41, "learning_rate": 3.2172240975248005e-05, "loss": 1.1616, "step": 5685500 }, { "epoch": 3.41, "learning_rate": 3.217014100968744e-05, "loss": 1.1852, "step": 5686000 }, { "epoch": 3.41, "learning_rate": 3.216804104412688e-05, "loss": 1.1697, "step": 5686500 }, { "epoch": 3.41, "learning_rate": 3.216594527849743e-05, "loss": 1.1901, "step": 5687000 }, { "epoch": 3.41, "learning_rate": 3.2163845312936866e-05, "loss": 1.139, "step": 5687500 }, { "epoch": 3.41, "learning_rate": 3.2161745347376306e-05, "loss": 1.1687, "step": 5688000 }, { "epoch": 3.41, "learning_rate": 3.215964538181574e-05, "loss": 1.1308, "step": 5688500 }, { "epoch": 3.41, "learning_rate": 3.215754541625517e-05, "loss": 1.1691, "step": 5689000 }, { "epoch": 3.41, "learning_rate": 3.215544965062573e-05, "loss": 1.1597, "step": 5689500 }, { "epoch": 3.41, "learning_rate": 3.215334968506517e-05, "loss": 1.1738, "step": 5690000 }, { "epoch": 3.41, "learning_rate": 3.21512497195046e-05, "loss": 1.1635, "step": 5690500 }, { "epoch": 3.41, "learning_rate": 3.2149153953875154e-05, "loss": 1.1641, "step": 5691000 }, { "epoch": 3.41, "learning_rate": 3.214705398831459e-05, "loss": 1.1459, "step": 5691500 }, { "epoch": 3.41, "learning_rate": 3.214495402275403e-05, "loss": 1.1648, "step": 5692000 }, { "epoch": 3.41, "learning_rate": 3.214285405719346e-05, "loss": 1.1644, "step": 5692500 }, { "epoch": 3.41, "learning_rate": 3.2140754091632895e-05, "loss": 1.1855, "step": 5693000 }, { "epoch": 3.41, "learning_rate": 3.2138654126072335e-05, "loss": 1.1784, "step": 5693500 }, { "epoch": 3.41, "learning_rate": 3.213655416051177e-05, "loss": 1.1485, "step": 5694000 }, { "epoch": 3.41, "learning_rate": 3.21344541949512e-05, "loss": 1.1618, "step": 5694500 }, { "epoch": 3.41, "learning_rate": 3.213235422939064e-05, "loss": 1.1597, "step": 5695000 }, { "epoch": 3.41, "learning_rate": 3.213025426383007e-05, "loss": 1.131, "step": 5695500 }, { "epoch": 3.41, "learning_rate": 3.212815429826951e-05, "loss": 1.1581, "step": 5696000 }, { "epoch": 3.42, "learning_rate": 3.212605433270894e-05, "loss": 1.1535, "step": 5696500 }, { "epoch": 3.42, "learning_rate": 3.21239585670795e-05, "loss": 1.1674, "step": 5697000 }, { "epoch": 3.42, "learning_rate": 3.212185860151893e-05, "loss": 1.1664, "step": 5697500 }, { "epoch": 3.42, "learning_rate": 3.211975863595837e-05, "loss": 1.1804, "step": 5698000 }, { "epoch": 3.42, "learning_rate": 3.211766287032893e-05, "loss": 1.1701, "step": 5698500 }, { "epoch": 3.42, "learning_rate": 3.2115562904768364e-05, "loss": 1.1658, "step": 5699000 }, { "epoch": 3.42, "learning_rate": 3.21134629392078e-05, "loss": 1.1671, "step": 5699500 }, { "epoch": 3.42, "learning_rate": 3.211136297364723e-05, "loss": 1.1312, "step": 5700000 }, { "epoch": 3.42, "eval_loss": 1.132497787475586, "eval_runtime": 1107.8438, "eval_samples_per_second": 475.446, "eval_steps_per_second": 79.241, "step": 5700000 }, { "epoch": 3.42, "learning_rate": 3.210926720801779e-05, "loss": 1.1698, "step": 5700500 }, { "epoch": 3.42, "learning_rate": 3.2107167242457224e-05, "loss": 1.141, "step": 5701000 }, { "epoch": 3.42, "learning_rate": 3.210506727689666e-05, "loss": 1.1997, "step": 5701500 }, { "epoch": 3.42, "learning_rate": 3.21029673113361e-05, "loss": 1.1503, "step": 5702000 }, { "epoch": 3.42, "learning_rate": 3.2100867345775525e-05, "loss": 1.1567, "step": 5702500 }, { "epoch": 3.42, "learning_rate": 3.2098767380214965e-05, "loss": 1.1632, "step": 5703000 }, { "epoch": 3.42, "learning_rate": 3.20966674146544e-05, "loss": 1.1549, "step": 5703500 }, { "epoch": 3.42, "learning_rate": 3.209456744909383e-05, "loss": 1.1559, "step": 5704000 }, { "epoch": 3.42, "learning_rate": 3.209247168346439e-05, "loss": 1.1703, "step": 5704500 }, { "epoch": 3.42, "learning_rate": 3.2090371717903826e-05, "loss": 1.1443, "step": 5705000 }, { "epoch": 3.42, "learning_rate": 3.208827175234326e-05, "loss": 1.1661, "step": 5705500 }, { "epoch": 3.42, "learning_rate": 3.208617178678269e-05, "loss": 1.1596, "step": 5706000 }, { "epoch": 3.42, "learning_rate": 3.208407602115325e-05, "loss": 1.1798, "step": 5706500 }, { "epoch": 3.42, "learning_rate": 3.208197605559269e-05, "loss": 1.1757, "step": 5707000 }, { "epoch": 3.42, "learning_rate": 3.207988028996325e-05, "loss": 1.1434, "step": 5707500 }, { "epoch": 3.42, "learning_rate": 3.207778032440268e-05, "loss": 1.161, "step": 5708000 }, { "epoch": 3.42, "learning_rate": 3.2075680358842114e-05, "loss": 1.1387, "step": 5708500 }, { "epoch": 3.42, "learning_rate": 3.2073580393281554e-05, "loss": 1.1582, "step": 5709000 }, { "epoch": 3.42, "learning_rate": 3.207148042772098e-05, "loss": 1.1697, "step": 5709500 }, { "epoch": 3.42, "learning_rate": 3.206938046216042e-05, "loss": 1.1606, "step": 5710000 }, { "epoch": 3.42, "learning_rate": 3.2067280496599854e-05, "loss": 1.1694, "step": 5710500 }, { "epoch": 3.42, "learning_rate": 3.206518053103929e-05, "loss": 1.1898, "step": 5711000 }, { "epoch": 3.42, "learning_rate": 3.206308056547873e-05, "loss": 1.1401, "step": 5711500 }, { "epoch": 3.42, "learning_rate": 3.206098479984928e-05, "loss": 1.1683, "step": 5712000 }, { "epoch": 3.42, "learning_rate": 3.2058884834288715e-05, "loss": 1.1466, "step": 5712500 }, { "epoch": 3.43, "learning_rate": 3.205678486872815e-05, "loss": 1.1438, "step": 5713000 }, { "epoch": 3.43, "learning_rate": 3.205468490316759e-05, "loss": 1.1322, "step": 5713500 }, { "epoch": 3.43, "learning_rate": 3.205259333746926e-05, "loss": 1.1568, "step": 5714000 }, { "epoch": 3.43, "learning_rate": 3.20504933719087e-05, "loss": 1.1858, "step": 5714500 }, { "epoch": 3.43, "learning_rate": 3.2048397606279256e-05, "loss": 1.1724, "step": 5715000 }, { "epoch": 3.43, "learning_rate": 3.204629764071869e-05, "loss": 1.1717, "step": 5715500 }, { "epoch": 3.43, "learning_rate": 3.204419767515813e-05, "loss": 1.1658, "step": 5716000 }, { "epoch": 3.43, "learning_rate": 3.2042097709597563e-05, "loss": 1.1671, "step": 5716500 }, { "epoch": 3.43, "learning_rate": 3.204000194396812e-05, "loss": 1.1767, "step": 5717000 }, { "epoch": 3.43, "learning_rate": 3.203790197840755e-05, "loss": 1.1672, "step": 5717500 }, { "epoch": 3.43, "learning_rate": 3.203580201284699e-05, "loss": 1.1223, "step": 5718000 }, { "epoch": 3.43, "learning_rate": 3.2033702047286424e-05, "loss": 1.1336, "step": 5718500 }, { "epoch": 3.43, "learning_rate": 3.203160208172586e-05, "loss": 1.1758, "step": 5719000 }, { "epoch": 3.43, "learning_rate": 3.20295021161653e-05, "loss": 1.1549, "step": 5719500 }, { "epoch": 3.43, "learning_rate": 3.202740215060473e-05, "loss": 1.1436, "step": 5720000 }, { "epoch": 3.43, "learning_rate": 3.2025302185044165e-05, "loss": 1.1579, "step": 5720500 }, { "epoch": 3.43, "learning_rate": 3.2023202219483605e-05, "loss": 1.158, "step": 5721000 }, { "epoch": 3.43, "learning_rate": 3.202110225392303e-05, "loss": 1.1989, "step": 5721500 }, { "epoch": 3.43, "learning_rate": 3.2019002288362465e-05, "loss": 1.1657, "step": 5722000 }, { "epoch": 3.43, "learning_rate": 3.2016902322801905e-05, "loss": 1.1528, "step": 5722500 }, { "epoch": 3.43, "learning_rate": 3.201480235724134e-05, "loss": 1.1252, "step": 5723000 }, { "epoch": 3.43, "learning_rate": 3.20127065916119e-05, "loss": 1.1798, "step": 5723500 }, { "epoch": 3.43, "learning_rate": 3.201060662605133e-05, "loss": 1.1683, "step": 5724000 }, { "epoch": 3.43, "learning_rate": 3.2008506660490766e-05, "loss": 1.1565, "step": 5724500 }, { "epoch": 3.43, "learning_rate": 3.2006410894861326e-05, "loss": 1.1366, "step": 5725000 }, { "epoch": 3.43, "learning_rate": 3.200431092930076e-05, "loss": 1.1843, "step": 5725500 }, { "epoch": 3.43, "learning_rate": 3.20022109637402e-05, "loss": 1.1941, "step": 5726000 }, { "epoch": 3.43, "learning_rate": 3.200011099817963e-05, "loss": 1.1581, "step": 5726500 }, { "epoch": 3.43, "learning_rate": 3.199801103261906e-05, "loss": 1.1518, "step": 5727000 }, { "epoch": 3.43, "learning_rate": 3.19959110670585e-05, "loss": 1.1554, "step": 5727500 }, { "epoch": 3.43, "learning_rate": 3.1993811101497934e-05, "loss": 1.1484, "step": 5728000 }, { "epoch": 3.43, "learning_rate": 3.199171113593737e-05, "loss": 1.1363, "step": 5728500 }, { "epoch": 3.43, "learning_rate": 3.198961117037681e-05, "loss": 1.1542, "step": 5729000 }, { "epoch": 3.44, "learning_rate": 3.198751120481624e-05, "loss": 1.1512, "step": 5729500 }, { "epoch": 3.44, "learning_rate": 3.1985411239255675e-05, "loss": 1.156, "step": 5730000 }, { "epoch": 3.44, "learning_rate": 3.1983311273695115e-05, "loss": 1.1595, "step": 5730500 }, { "epoch": 3.44, "learning_rate": 3.198121550806567e-05, "loss": 1.1697, "step": 5731000 }, { "epoch": 3.44, "learning_rate": 3.19791155425051e-05, "loss": 1.1599, "step": 5731500 }, { "epoch": 3.44, "learning_rate": 3.1977015576944535e-05, "loss": 1.164, "step": 5732000 }, { "epoch": 3.44, "learning_rate": 3.1974915611383976e-05, "loss": 1.1451, "step": 5732500 }, { "epoch": 3.44, "learning_rate": 3.197281984575453e-05, "loss": 1.1526, "step": 5733000 }, { "epoch": 3.44, "learning_rate": 3.197072408012508e-05, "loss": 1.1653, "step": 5733500 }, { "epoch": 3.44, "learning_rate": 3.1968624114564516e-05, "loss": 1.1628, "step": 5734000 }, { "epoch": 3.44, "learning_rate": 3.1966524149003956e-05, "loss": 1.1765, "step": 5734500 }, { "epoch": 3.44, "learning_rate": 3.196442418344339e-05, "loss": 1.1748, "step": 5735000 }, { "epoch": 3.44, "learning_rate": 3.196232421788282e-05, "loss": 1.1739, "step": 5735500 }, { "epoch": 3.44, "learning_rate": 3.1960224252322264e-05, "loss": 1.1745, "step": 5736000 }, { "epoch": 3.44, "learning_rate": 3.19581242867617e-05, "loss": 1.1649, "step": 5736500 }, { "epoch": 3.44, "learning_rate": 3.195602432120113e-05, "loss": 1.1575, "step": 5737000 }, { "epoch": 3.44, "learning_rate": 3.1953928555571684e-05, "loss": 1.1695, "step": 5737500 }, { "epoch": 3.44, "learning_rate": 3.1951828590011124e-05, "loss": 1.1413, "step": 5738000 }, { "epoch": 3.44, "learning_rate": 3.194972862445056e-05, "loss": 1.173, "step": 5738500 }, { "epoch": 3.44, "learning_rate": 3.194762865888999e-05, "loss": 1.1741, "step": 5739000 }, { "epoch": 3.44, "learning_rate": 3.194552869332943e-05, "loss": 1.1698, "step": 5739500 }, { "epoch": 3.44, "learning_rate": 3.1943428727768865e-05, "loss": 1.1832, "step": 5740000 }, { "epoch": 3.44, "learning_rate": 3.19413287622083e-05, "loss": 1.166, "step": 5740500 }, { "epoch": 3.44, "learning_rate": 3.193922879664774e-05, "loss": 1.1473, "step": 5741000 }, { "epoch": 3.44, "learning_rate": 3.193713723094941e-05, "loss": 1.1404, "step": 5741500 }, { "epoch": 3.44, "learning_rate": 3.1935037265388846e-05, "loss": 1.1378, "step": 5742000 }, { "epoch": 3.44, "learning_rate": 3.193293729982828e-05, "loss": 1.1562, "step": 5742500 }, { "epoch": 3.44, "learning_rate": 3.193083733426772e-05, "loss": 1.1554, "step": 5743000 }, { "epoch": 3.44, "learning_rate": 3.192873736870715e-05, "loss": 1.1599, "step": 5743500 }, { "epoch": 3.44, "learning_rate": 3.1926637403146586e-05, "loss": 1.1747, "step": 5744000 }, { "epoch": 3.44, "learning_rate": 3.192454163751714e-05, "loss": 1.1836, "step": 5744500 }, { "epoch": 3.44, "learning_rate": 3.192244167195658e-05, "loss": 1.1401, "step": 5745000 }, { "epoch": 3.44, "learning_rate": 3.1920341706396014e-05, "loss": 1.185, "step": 5745500 }, { "epoch": 3.44, "learning_rate": 3.191824174083545e-05, "loss": 1.1379, "step": 5746000 }, { "epoch": 3.45, "learning_rate": 3.191614177527489e-05, "loss": 1.16, "step": 5746500 }, { "epoch": 3.45, "learning_rate": 3.191404180971432e-05, "loss": 1.1609, "step": 5747000 }, { "epoch": 3.45, "learning_rate": 3.191194184415376e-05, "loss": 1.1223, "step": 5747500 }, { "epoch": 3.45, "learning_rate": 3.1909841878593194e-05, "loss": 1.1856, "step": 5748000 }, { "epoch": 3.45, "learning_rate": 3.190774611296375e-05, "loss": 1.1863, "step": 5748500 }, { "epoch": 3.45, "learning_rate": 3.190564614740318e-05, "loss": 1.1641, "step": 5749000 }, { "epoch": 3.45, "learning_rate": 3.190354618184262e-05, "loss": 1.159, "step": 5749500 }, { "epoch": 3.45, "learning_rate": 3.1901446216282055e-05, "loss": 1.16, "step": 5750000 }, { "epoch": 3.45, "learning_rate": 3.189935045065261e-05, "loss": 1.1428, "step": 5750500 }, { "epoch": 3.45, "learning_rate": 3.189725048509204e-05, "loss": 1.1544, "step": 5751000 }, { "epoch": 3.45, "learning_rate": 3.189515051953148e-05, "loss": 1.1539, "step": 5751500 }, { "epoch": 3.45, "learning_rate": 3.1893050553970916e-05, "loss": 1.1577, "step": 5752000 }, { "epoch": 3.45, "learning_rate": 3.189095478834147e-05, "loss": 1.1709, "step": 5752500 }, { "epoch": 3.45, "learning_rate": 3.188885902271202e-05, "loss": 1.1955, "step": 5753000 }, { "epoch": 3.45, "learning_rate": 3.188675905715146e-05, "loss": 1.1875, "step": 5753500 }, { "epoch": 3.45, "learning_rate": 3.18846590915909e-05, "loss": 1.1712, "step": 5754000 }, { "epoch": 3.45, "learning_rate": 3.188255912603033e-05, "loss": 1.1749, "step": 5754500 }, { "epoch": 3.45, "learning_rate": 3.188045916046977e-05, "loss": 1.1689, "step": 5755000 }, { "epoch": 3.45, "learning_rate": 3.1878359194909204e-05, "loss": 1.1436, "step": 5755500 }, { "epoch": 3.45, "learning_rate": 3.187625922934864e-05, "loss": 1.1434, "step": 5756000 }, { "epoch": 3.45, "learning_rate": 3.187415926378808e-05, "loss": 1.1472, "step": 5756500 }, { "epoch": 3.45, "learning_rate": 3.187206349815863e-05, "loss": 1.1759, "step": 5757000 }, { "epoch": 3.45, "learning_rate": 3.1869963532598065e-05, "loss": 1.1698, "step": 5757500 }, { "epoch": 3.45, "learning_rate": 3.18678635670375e-05, "loss": 1.1382, "step": 5758000 }, { "epoch": 3.45, "learning_rate": 3.186576360147694e-05, "loss": 1.1299, "step": 5758500 }, { "epoch": 3.45, "learning_rate": 3.186366363591637e-05, "loss": 1.1768, "step": 5759000 }, { "epoch": 3.45, "learning_rate": 3.1861563670355805e-05, "loss": 1.1664, "step": 5759500 }, { "epoch": 3.45, "learning_rate": 3.1859463704795245e-05, "loss": 1.1634, "step": 5760000 }, { "epoch": 3.45, "learning_rate": 3.185736373923467e-05, "loss": 1.1438, "step": 5760500 }, { "epoch": 3.45, "learning_rate": 3.185526797360523e-05, "loss": 1.1761, "step": 5761000 }, { "epoch": 3.45, "learning_rate": 3.1853172207975786e-05, "loss": 1.1601, "step": 5761500 }, { "epoch": 3.45, "learning_rate": 3.1851072242415226e-05, "loss": 1.1417, "step": 5762000 }, { "epoch": 3.45, "learning_rate": 3.184897227685466e-05, "loss": 1.1427, "step": 5762500 }, { "epoch": 3.46, "learning_rate": 3.184687231129409e-05, "loss": 1.1951, "step": 5763000 }, { "epoch": 3.46, "learning_rate": 3.1844772345733533e-05, "loss": 1.2043, "step": 5763500 }, { "epoch": 3.46, "learning_rate": 3.184267238017297e-05, "loss": 1.1861, "step": 5764000 }, { "epoch": 3.46, "learning_rate": 3.18405724146124e-05, "loss": 1.1932, "step": 5764500 }, { "epoch": 3.46, "learning_rate": 3.1838472449051834e-05, "loss": 1.1597, "step": 5765000 }, { "epoch": 3.46, "learning_rate": 3.1836376683422394e-05, "loss": 1.1548, "step": 5765500 }, { "epoch": 3.46, "learning_rate": 3.183427671786183e-05, "loss": 1.1686, "step": 5766000 }, { "epoch": 3.46, "learning_rate": 3.183217675230126e-05, "loss": 1.169, "step": 5766500 }, { "epoch": 3.46, "learning_rate": 3.18300767867407e-05, "loss": 1.1618, "step": 5767000 }, { "epoch": 3.46, "learning_rate": 3.182797682118013e-05, "loss": 1.1584, "step": 5767500 }, { "epoch": 3.46, "learning_rate": 3.182587685561957e-05, "loss": 1.1554, "step": 5768000 }, { "epoch": 3.46, "learning_rate": 3.1823776890059e-05, "loss": 1.1216, "step": 5768500 }, { "epoch": 3.46, "learning_rate": 3.1821676924498435e-05, "loss": 1.1736, "step": 5769000 }, { "epoch": 3.46, "learning_rate": 3.1819581158868996e-05, "loss": 1.1407, "step": 5769500 }, { "epoch": 3.46, "learning_rate": 3.181748119330843e-05, "loss": 1.1424, "step": 5770000 }, { "epoch": 3.46, "learning_rate": 3.181538122774786e-05, "loss": 1.1856, "step": 5770500 }, { "epoch": 3.46, "learning_rate": 3.1813281262187296e-05, "loss": 1.1468, "step": 5771000 }, { "epoch": 3.46, "learning_rate": 3.1811185496557856e-05, "loss": 1.1634, "step": 5771500 }, { "epoch": 3.46, "learning_rate": 3.1809085530997296e-05, "loss": 1.181, "step": 5772000 }, { "epoch": 3.46, "learning_rate": 3.180698556543672e-05, "loss": 1.1913, "step": 5772500 }, { "epoch": 3.46, "learning_rate": 3.180488559987616e-05, "loss": 1.1863, "step": 5773000 }, { "epoch": 3.46, "learning_rate": 3.180278983424672e-05, "loss": 1.1734, "step": 5773500 }, { "epoch": 3.46, "learning_rate": 3.180068986868616e-05, "loss": 1.141, "step": 5774000 }, { "epoch": 3.46, "learning_rate": 3.179859410305671e-05, "loss": 1.1709, "step": 5774500 }, { "epoch": 3.46, "learning_rate": 3.1796494137496144e-05, "loss": 1.1674, "step": 5775000 }, { "epoch": 3.46, "learning_rate": 3.1794394171935584e-05, "loss": 1.1371, "step": 5775500 }, { "epoch": 3.46, "learning_rate": 3.179229420637502e-05, "loss": 1.1782, "step": 5776000 }, { "epoch": 3.46, "learning_rate": 3.179019424081445e-05, "loss": 1.1681, "step": 5776500 }, { "epoch": 3.46, "learning_rate": 3.1788094275253885e-05, "loss": 1.1641, "step": 5777000 }, { "epoch": 3.46, "learning_rate": 3.1785998509624445e-05, "loss": 1.1706, "step": 5777500 }, { "epoch": 3.46, "learning_rate": 3.178389854406388e-05, "loss": 1.1547, "step": 5778000 }, { "epoch": 3.46, "learning_rate": 3.178179857850331e-05, "loss": 1.1442, "step": 5778500 }, { "epoch": 3.46, "learning_rate": 3.177969861294275e-05, "loss": 1.1858, "step": 5779000 }, { "epoch": 3.47, "learning_rate": 3.177759864738218e-05, "loss": 1.183, "step": 5779500 }, { "epoch": 3.47, "learning_rate": 3.177550288175274e-05, "loss": 1.1728, "step": 5780000 }, { "epoch": 3.47, "learning_rate": 3.177340291619217e-05, "loss": 1.1735, "step": 5780500 }, { "epoch": 3.47, "learning_rate": 3.177130295063161e-05, "loss": 1.1416, "step": 5781000 }, { "epoch": 3.47, "learning_rate": 3.1769202985071047e-05, "loss": 1.1453, "step": 5781500 }, { "epoch": 3.47, "learning_rate": 3.176710301951048e-05, "loss": 1.186, "step": 5782000 }, { "epoch": 3.47, "learning_rate": 3.176500725388104e-05, "loss": 1.1518, "step": 5782500 }, { "epoch": 3.47, "learning_rate": 3.1762911488251594e-05, "loss": 1.1559, "step": 5783000 }, { "epoch": 3.47, "learning_rate": 3.176081152269103e-05, "loss": 1.1769, "step": 5783500 }, { "epoch": 3.47, "learning_rate": 3.175871155713046e-05, "loss": 1.1566, "step": 5784000 }, { "epoch": 3.47, "learning_rate": 3.17566115915699e-05, "loss": 1.1915, "step": 5784500 }, { "epoch": 3.47, "learning_rate": 3.1754511626009335e-05, "loss": 1.1506, "step": 5785000 }, { "epoch": 3.47, "learning_rate": 3.175241166044877e-05, "loss": 1.1684, "step": 5785500 }, { "epoch": 3.47, "learning_rate": 3.175031169488821e-05, "loss": 1.1573, "step": 5786000 }, { "epoch": 3.47, "learning_rate": 3.1748211729327635e-05, "loss": 1.1638, "step": 5786500 }, { "epoch": 3.47, "learning_rate": 3.174611176376707e-05, "loss": 1.1645, "step": 5787000 }, { "epoch": 3.47, "learning_rate": 3.174401599813763e-05, "loss": 1.1609, "step": 5787500 }, { "epoch": 3.47, "learning_rate": 3.174191603257707e-05, "loss": 1.1569, "step": 5788000 }, { "epoch": 3.47, "learning_rate": 3.17398160670165e-05, "loss": 1.1569, "step": 5788500 }, { "epoch": 3.47, "learning_rate": 3.1737716101455936e-05, "loss": 1.1638, "step": 5789000 }, { "epoch": 3.47, "learning_rate": 3.1735620335826496e-05, "loss": 1.1536, "step": 5789500 }, { "epoch": 3.47, "learning_rate": 3.173352037026593e-05, "loss": 1.1527, "step": 5790000 }, { "epoch": 3.47, "learning_rate": 3.173142040470536e-05, "loss": 1.1528, "step": 5790500 }, { "epoch": 3.47, "learning_rate": 3.1729320439144803e-05, "loss": 1.1453, "step": 5791000 }, { "epoch": 3.47, "learning_rate": 3.172722047358423e-05, "loss": 1.1826, "step": 5791500 }, { "epoch": 3.47, "learning_rate": 3.1725120508023664e-05, "loss": 1.1846, "step": 5792000 }, { "epoch": 3.47, "learning_rate": 3.1723020542463104e-05, "loss": 1.1534, "step": 5792500 }, { "epoch": 3.47, "learning_rate": 3.1720924776833664e-05, "loss": 1.1646, "step": 5793000 }, { "epoch": 3.47, "learning_rate": 3.17188248112731e-05, "loss": 1.1722, "step": 5793500 }, { "epoch": 3.47, "learning_rate": 3.1716724845712524e-05, "loss": 1.1695, "step": 5794000 }, { "epoch": 3.47, "learning_rate": 3.1714624880151965e-05, "loss": 1.1833, "step": 5794500 }, { "epoch": 3.47, "learning_rate": 3.17125249145914e-05, "loss": 1.1601, "step": 5795000 }, { "epoch": 3.47, "learning_rate": 3.171042494903083e-05, "loss": 1.1613, "step": 5795500 }, { "epoch": 3.47, "learning_rate": 3.170833338333251e-05, "loss": 1.1725, "step": 5796000 }, { "epoch": 3.48, "learning_rate": 3.170623341777195e-05, "loss": 1.1689, "step": 5796500 }, { "epoch": 3.48, "learning_rate": 3.1704133452211386e-05, "loss": 1.152, "step": 5797000 }, { "epoch": 3.48, "learning_rate": 3.170203348665082e-05, "loss": 1.1679, "step": 5797500 }, { "epoch": 3.48, "learning_rate": 3.169993352109026e-05, "loss": 1.1857, "step": 5798000 }, { "epoch": 3.48, "learning_rate": 3.1697833555529686e-05, "loss": 1.1694, "step": 5798500 }, { "epoch": 3.48, "learning_rate": 3.169573358996912e-05, "loss": 1.1209, "step": 5799000 }, { "epoch": 3.48, "learning_rate": 3.169363362440856e-05, "loss": 1.157, "step": 5799500 }, { "epoch": 3.48, "learning_rate": 3.169153365884799e-05, "loss": 1.1589, "step": 5800000 }, { "epoch": 3.48, "eval_loss": 1.126969337463379, "eval_runtime": 1104.9848, "eval_samples_per_second": 476.676, "eval_steps_per_second": 79.446, "step": 5800000 }, { "epoch": 3.48, "learning_rate": 3.1689437893218553e-05, "loss": 1.1474, "step": 5800500 }, { "epoch": 3.48, "learning_rate": 3.168733792765798e-05, "loss": 1.1733, "step": 5801000 }, { "epoch": 3.48, "learning_rate": 3.168523796209742e-05, "loss": 1.1411, "step": 5801500 }, { "epoch": 3.48, "learning_rate": 3.1683137996536854e-05, "loss": 1.1551, "step": 5802000 }, { "epoch": 3.48, "learning_rate": 3.168103803097629e-05, "loss": 1.1822, "step": 5802500 }, { "epoch": 3.48, "learning_rate": 3.167893806541573e-05, "loss": 1.151, "step": 5803000 }, { "epoch": 3.48, "learning_rate": 3.167683809985516e-05, "loss": 1.1747, "step": 5803500 }, { "epoch": 3.48, "learning_rate": 3.1674738134294594e-05, "loss": 1.136, "step": 5804000 }, { "epoch": 3.48, "learning_rate": 3.1672638168734035e-05, "loss": 1.1617, "step": 5804500 }, { "epoch": 3.48, "learning_rate": 3.167054240310459e-05, "loss": 1.1403, "step": 5805000 }, { "epoch": 3.48, "learning_rate": 3.166844243754402e-05, "loss": 1.1473, "step": 5805500 }, { "epoch": 3.48, "learning_rate": 3.166634247198346e-05, "loss": 1.155, "step": 5806000 }, { "epoch": 3.48, "learning_rate": 3.1664242506422895e-05, "loss": 1.1345, "step": 5806500 }, { "epoch": 3.48, "learning_rate": 3.166214674079345e-05, "loss": 1.176, "step": 5807000 }, { "epoch": 3.48, "learning_rate": 3.166004677523288e-05, "loss": 1.179, "step": 5807500 }, { "epoch": 3.48, "learning_rate": 3.165794680967232e-05, "loss": 1.1442, "step": 5808000 }, { "epoch": 3.48, "learning_rate": 3.1655851044042876e-05, "loss": 1.1426, "step": 5808500 }, { "epoch": 3.48, "learning_rate": 3.165375107848231e-05, "loss": 1.1678, "step": 5809000 }, { "epoch": 3.48, "learning_rate": 3.165165531285287e-05, "loss": 1.1509, "step": 5809500 }, { "epoch": 3.48, "learning_rate": 3.1649555347292304e-05, "loss": 1.1512, "step": 5810000 }, { "epoch": 3.48, "learning_rate": 3.1647459581662864e-05, "loss": 1.1695, "step": 5810500 }, { "epoch": 3.48, "learning_rate": 3.16453596161023e-05, "loss": 1.1764, "step": 5811000 }, { "epoch": 3.48, "learning_rate": 3.164325965054173e-05, "loss": 1.1493, "step": 5811500 }, { "epoch": 3.48, "learning_rate": 3.164115968498117e-05, "loss": 1.1406, "step": 5812000 }, { "epoch": 3.48, "learning_rate": 3.1639059719420604e-05, "loss": 1.1742, "step": 5812500 }, { "epoch": 3.49, "learning_rate": 3.163696395379116e-05, "loss": 1.1853, "step": 5813000 }, { "epoch": 3.49, "learning_rate": 3.163486398823059e-05, "loss": 1.1598, "step": 5813500 }, { "epoch": 3.49, "learning_rate": 3.163276402267003e-05, "loss": 1.1819, "step": 5814000 }, { "epoch": 3.49, "learning_rate": 3.1630664057109465e-05, "loss": 1.1759, "step": 5814500 }, { "epoch": 3.49, "learning_rate": 3.16285640915489e-05, "loss": 1.1665, "step": 5815000 }, { "epoch": 3.49, "learning_rate": 3.162646412598833e-05, "loss": 1.1531, "step": 5815500 }, { "epoch": 3.49, "learning_rate": 3.1624364160427766e-05, "loss": 1.1687, "step": 5816000 }, { "epoch": 3.49, "learning_rate": 3.16222641948672e-05, "loss": 1.1561, "step": 5816500 }, { "epoch": 3.49, "learning_rate": 3.162016422930664e-05, "loss": 1.1491, "step": 5817000 }, { "epoch": 3.49, "learning_rate": 3.161806426374607e-05, "loss": 1.1724, "step": 5817500 }, { "epoch": 3.49, "learning_rate": 3.1615964298185506e-05, "loss": 1.1631, "step": 5818000 }, { "epoch": 3.49, "learning_rate": 3.1613864332624946e-05, "loss": 1.1127, "step": 5818500 }, { "epoch": 3.49, "learning_rate": 3.161176436706438e-05, "loss": 1.163, "step": 5819000 }, { "epoch": 3.49, "learning_rate": 3.1609668601434933e-05, "loss": 1.1589, "step": 5819500 }, { "epoch": 3.49, "learning_rate": 3.1607568635874374e-05, "loss": 1.1583, "step": 5820000 }, { "epoch": 3.49, "learning_rate": 3.160546867031381e-05, "loss": 1.1588, "step": 5820500 }, { "epoch": 3.49, "learning_rate": 3.160336870475324e-05, "loss": 1.1696, "step": 5821000 }, { "epoch": 3.49, "learning_rate": 3.160126873919268e-05, "loss": 1.1645, "step": 5821500 }, { "epoch": 3.49, "learning_rate": 3.1599168773632114e-05, "loss": 1.1631, "step": 5822000 }, { "epoch": 3.49, "learning_rate": 3.159706880807155e-05, "loss": 1.1635, "step": 5822500 }, { "epoch": 3.49, "learning_rate": 3.159496884251098e-05, "loss": 1.1264, "step": 5823000 }, { "epoch": 3.49, "learning_rate": 3.159287307688154e-05, "loss": 1.1745, "step": 5823500 }, { "epoch": 3.49, "learning_rate": 3.1590773111320975e-05, "loss": 1.1581, "step": 5824000 }, { "epoch": 3.49, "learning_rate": 3.158867314576041e-05, "loss": 1.1759, "step": 5824500 }, { "epoch": 3.49, "learning_rate": 3.158657318019985e-05, "loss": 1.1641, "step": 5825000 }, { "epoch": 3.49, "learning_rate": 3.15844774145704e-05, "loss": 1.1687, "step": 5825500 }, { "epoch": 3.49, "learning_rate": 3.1582377449009836e-05, "loss": 1.1481, "step": 5826000 }, { "epoch": 3.49, "learning_rate": 3.1580277483449276e-05, "loss": 1.1726, "step": 5826500 }, { "epoch": 3.49, "learning_rate": 3.157818171781983e-05, "loss": 1.1738, "step": 5827000 }, { "epoch": 3.49, "learning_rate": 3.157608175225926e-05, "loss": 1.1579, "step": 5827500 }, { "epoch": 3.49, "learning_rate": 3.1573981786698697e-05, "loss": 1.1702, "step": 5828000 }, { "epoch": 3.49, "learning_rate": 3.157188182113814e-05, "loss": 1.1387, "step": 5828500 }, { "epoch": 3.49, "learning_rate": 3.156978185557757e-05, "loss": 1.1632, "step": 5829000 }, { "epoch": 3.5, "learning_rate": 3.1567681890017004e-05, "loss": 1.1309, "step": 5829500 }, { "epoch": 3.5, "learning_rate": 3.156559032431868e-05, "loss": 1.178, "step": 5830000 }, { "epoch": 3.5, "learning_rate": 3.156349035875811e-05, "loss": 1.1572, "step": 5830500 }, { "epoch": 3.5, "learning_rate": 3.156139039319755e-05, "loss": 1.1894, "step": 5831000 }, { "epoch": 3.5, "learning_rate": 3.155929462756811e-05, "loss": 1.1866, "step": 5831500 }, { "epoch": 3.5, "learning_rate": 3.155719466200754e-05, "loss": 1.1443, "step": 5832000 }, { "epoch": 3.5, "learning_rate": 3.155509469644698e-05, "loss": 1.1382, "step": 5832500 }, { "epoch": 3.5, "learning_rate": 3.155299473088641e-05, "loss": 1.1823, "step": 5833000 }, { "epoch": 3.5, "learning_rate": 3.155089896525697e-05, "loss": 1.1531, "step": 5833500 }, { "epoch": 3.5, "learning_rate": 3.1548798999696406e-05, "loss": 1.1411, "step": 5834000 }, { "epoch": 3.5, "learning_rate": 3.154669903413584e-05, "loss": 1.1435, "step": 5834500 }, { "epoch": 3.5, "learning_rate": 3.154459906857527e-05, "loss": 1.148, "step": 5835000 }, { "epoch": 3.5, "learning_rate": 3.1542499103014706e-05, "loss": 1.1628, "step": 5835500 }, { "epoch": 3.5, "learning_rate": 3.1540399137454146e-05, "loss": 1.1548, "step": 5836000 }, { "epoch": 3.5, "learning_rate": 3.153829917189358e-05, "loss": 1.1289, "step": 5836500 }, { "epoch": 3.5, "learning_rate": 3.153619920633301e-05, "loss": 1.1408, "step": 5837000 }, { "epoch": 3.5, "learning_rate": 3.153409924077245e-05, "loss": 1.1604, "step": 5837500 }, { "epoch": 3.5, "learning_rate": 3.153199927521189e-05, "loss": 1.1388, "step": 5838000 }, { "epoch": 3.5, "learning_rate": 3.152989930965132e-05, "loss": 1.1554, "step": 5838500 }, { "epoch": 3.5, "learning_rate": 3.152779934409076e-05, "loss": 1.1423, "step": 5839000 }, { "epoch": 3.5, "learning_rate": 3.1525703578461314e-05, "loss": 1.1777, "step": 5839500 }, { "epoch": 3.5, "learning_rate": 3.152360361290075e-05, "loss": 1.148, "step": 5840000 }, { "epoch": 3.5, "learning_rate": 3.152150364734019e-05, "loss": 1.1437, "step": 5840500 }, { "epoch": 3.5, "learning_rate": 3.151940368177962e-05, "loss": 1.1623, "step": 5841000 }, { "epoch": 3.5, "learning_rate": 3.1517303716219055e-05, "loss": 1.1405, "step": 5841500 }, { "epoch": 3.5, "learning_rate": 3.151520795058961e-05, "loss": 1.1685, "step": 5842000 }, { "epoch": 3.5, "learning_rate": 3.151310798502905e-05, "loss": 1.1948, "step": 5842500 }, { "epoch": 3.5, "learning_rate": 3.151100801946848e-05, "loss": 1.1379, "step": 5843000 }, { "epoch": 3.5, "learning_rate": 3.1508908053907915e-05, "loss": 1.1651, "step": 5843500 }, { "epoch": 3.5, "learning_rate": 3.1506808088347356e-05, "loss": 1.1415, "step": 5844000 }, { "epoch": 3.5, "learning_rate": 3.150471232271791e-05, "loss": 1.1794, "step": 5844500 }, { "epoch": 3.5, "learning_rate": 3.150261235715734e-05, "loss": 1.1576, "step": 5845000 }, { "epoch": 3.5, "learning_rate": 3.1500512391596776e-05, "loss": 1.1421, "step": 5845500 }, { "epoch": 3.5, "learning_rate": 3.1498412426036216e-05, "loss": 1.1453, "step": 5846000 }, { "epoch": 3.51, "learning_rate": 3.149631246047565e-05, "loss": 1.1576, "step": 5846500 }, { "epoch": 3.51, "learning_rate": 3.149421249491508e-05, "loss": 1.1578, "step": 5847000 }, { "epoch": 3.51, "learning_rate": 3.149211252935452e-05, "loss": 1.1643, "step": 5847500 }, { "epoch": 3.51, "learning_rate": 3.149001256379395e-05, "loss": 1.1605, "step": 5848000 }, { "epoch": 3.51, "learning_rate": 3.148791679816451e-05, "loss": 1.157, "step": 5848500 }, { "epoch": 3.51, "learning_rate": 3.148581683260395e-05, "loss": 1.1816, "step": 5849000 }, { "epoch": 3.51, "learning_rate": 3.1483721066974504e-05, "loss": 1.1649, "step": 5849500 }, { "epoch": 3.51, "learning_rate": 3.148162110141394e-05, "loss": 1.208, "step": 5850000 }, { "epoch": 3.51, "learning_rate": 3.147952113585337e-05, "loss": 1.1497, "step": 5850500 }, { "epoch": 3.51, "learning_rate": 3.1477425370223925e-05, "loss": 1.1664, "step": 5851000 }, { "epoch": 3.51, "learning_rate": 3.1475325404663365e-05, "loss": 1.141, "step": 5851500 }, { "epoch": 3.51, "learning_rate": 3.14732254391028e-05, "loss": 1.1654, "step": 5852000 }, { "epoch": 3.51, "learning_rate": 3.147112547354223e-05, "loss": 1.1721, "step": 5852500 }, { "epoch": 3.51, "learning_rate": 3.146902550798167e-05, "loss": 1.1481, "step": 5853000 }, { "epoch": 3.51, "learning_rate": 3.1466925542421106e-05, "loss": 1.153, "step": 5853500 }, { "epoch": 3.51, "learning_rate": 3.146482557686054e-05, "loss": 1.185, "step": 5854000 }, { "epoch": 3.51, "learning_rate": 3.14627298112311e-05, "loss": 1.1701, "step": 5854500 }, { "epoch": 3.51, "learning_rate": 3.146062984567053e-05, "loss": 1.1651, "step": 5855000 }, { "epoch": 3.51, "learning_rate": 3.1458529880109966e-05, "loss": 1.1614, "step": 5855500 }, { "epoch": 3.51, "learning_rate": 3.145642991454941e-05, "loss": 1.166, "step": 5856000 }, { "epoch": 3.51, "learning_rate": 3.145432994898883e-05, "loss": 1.1691, "step": 5856500 }, { "epoch": 3.51, "learning_rate": 3.145222998342827e-05, "loss": 1.1458, "step": 5857000 }, { "epoch": 3.51, "learning_rate": 3.145013001786771e-05, "loss": 1.1614, "step": 5857500 }, { "epoch": 3.51, "learning_rate": 3.144803005230714e-05, "loss": 1.1673, "step": 5858000 }, { "epoch": 3.51, "learning_rate": 3.1445930086746574e-05, "loss": 1.1499, "step": 5858500 }, { "epoch": 3.51, "learning_rate": 3.1443830121186014e-05, "loss": 1.1274, "step": 5859000 }, { "epoch": 3.51, "learning_rate": 3.144173015562545e-05, "loss": 1.1846, "step": 5859500 }, { "epoch": 3.51, "learning_rate": 3.143963019006488e-05, "loss": 1.1797, "step": 5860000 }, { "epoch": 3.51, "learning_rate": 3.143753862436656e-05, "loss": 1.1241, "step": 5860500 }, { "epoch": 3.51, "learning_rate": 3.1435438658805995e-05, "loss": 1.1764, "step": 5861000 }, { "epoch": 3.51, "learning_rate": 3.1433342893176555e-05, "loss": 1.1689, "step": 5861500 }, { "epoch": 3.51, "learning_rate": 3.143124292761599e-05, "loss": 1.1577, "step": 5862000 }, { "epoch": 3.51, "learning_rate": 3.142914296205542e-05, "loss": 1.1434, "step": 5862500 }, { "epoch": 3.52, "learning_rate": 3.142704299649486e-05, "loss": 1.1596, "step": 5863000 }, { "epoch": 3.52, "learning_rate": 3.142494303093429e-05, "loss": 1.1496, "step": 5863500 }, { "epoch": 3.52, "learning_rate": 3.142284306537372e-05, "loss": 1.1566, "step": 5864000 }, { "epoch": 3.52, "learning_rate": 3.142074309981316e-05, "loss": 1.1699, "step": 5864500 }, { "epoch": 3.52, "learning_rate": 3.1418643134252596e-05, "loss": 1.1789, "step": 5865000 }, { "epoch": 3.52, "learning_rate": 3.141654736862316e-05, "loss": 1.1931, "step": 5865500 }, { "epoch": 3.52, "learning_rate": 3.1414447403062583e-05, "loss": 1.1471, "step": 5866000 }, { "epoch": 3.52, "learning_rate": 3.1412347437502024e-05, "loss": 1.1312, "step": 5866500 }, { "epoch": 3.52, "learning_rate": 3.141024747194146e-05, "loss": 1.1229, "step": 5867000 }, { "epoch": 3.52, "learning_rate": 3.140814750638089e-05, "loss": 1.1285, "step": 5867500 }, { "epoch": 3.52, "learning_rate": 3.140604754082033e-05, "loss": 1.1751, "step": 5868000 }, { "epoch": 3.52, "learning_rate": 3.1403951775190884e-05, "loss": 1.1562, "step": 5868500 }, { "epoch": 3.52, "learning_rate": 3.140185180963032e-05, "loss": 1.1648, "step": 5869000 }, { "epoch": 3.52, "learning_rate": 3.139975184406976e-05, "loss": 1.1274, "step": 5869500 }, { "epoch": 3.52, "learning_rate": 3.139765187850919e-05, "loss": 1.1679, "step": 5870000 }, { "epoch": 3.52, "learning_rate": 3.1395551912948625e-05, "loss": 1.1176, "step": 5870500 }, { "epoch": 3.52, "learning_rate": 3.1393451947388065e-05, "loss": 1.1843, "step": 5871000 }, { "epoch": 3.52, "learning_rate": 3.13913519818275e-05, "loss": 1.1483, "step": 5871500 }, { "epoch": 3.52, "learning_rate": 3.138925621619805e-05, "loss": 1.1767, "step": 5872000 }, { "epoch": 3.52, "learning_rate": 3.1387156250637486e-05, "loss": 1.1732, "step": 5872500 }, { "epoch": 3.52, "learning_rate": 3.1385056285076926e-05, "loss": 1.1732, "step": 5873000 }, { "epoch": 3.52, "learning_rate": 3.138295631951636e-05, "loss": 1.16, "step": 5873500 }, { "epoch": 3.52, "learning_rate": 3.138085635395579e-05, "loss": 1.1241, "step": 5874000 }, { "epoch": 3.52, "learning_rate": 3.137875638839523e-05, "loss": 1.1589, "step": 5874500 }, { "epoch": 3.52, "learning_rate": 3.1376656422834667e-05, "loss": 1.1451, "step": 5875000 }, { "epoch": 3.52, "learning_rate": 3.13745564572741e-05, "loss": 1.1634, "step": 5875500 }, { "epoch": 3.52, "learning_rate": 3.1372460691644654e-05, "loss": 1.1576, "step": 5876000 }, { "epoch": 3.52, "learning_rate": 3.1370360726084094e-05, "loss": 1.1629, "step": 5876500 }, { "epoch": 3.52, "learning_rate": 3.136826496045465e-05, "loss": 1.1765, "step": 5877000 }, { "epoch": 3.52, "learning_rate": 3.136616499489408e-05, "loss": 1.1559, "step": 5877500 }, { "epoch": 3.52, "learning_rate": 3.136406502933352e-05, "loss": 1.169, "step": 5878000 }, { "epoch": 3.52, "learning_rate": 3.1361965063772955e-05, "loss": 1.1637, "step": 5878500 }, { "epoch": 3.52, "learning_rate": 3.135986509821239e-05, "loss": 1.1497, "step": 5879000 }, { "epoch": 3.52, "learning_rate": 3.135776513265183e-05, "loss": 1.1297, "step": 5879500 }, { "epoch": 3.53, "learning_rate": 3.135566516709126e-05, "loss": 1.1368, "step": 5880000 }, { "epoch": 3.53, "learning_rate": 3.1353565201530695e-05, "loss": 1.1574, "step": 5880500 }, { "epoch": 3.53, "learning_rate": 3.135146523597013e-05, "loss": 1.1638, "step": 5881000 }, { "epoch": 3.53, "learning_rate": 3.134936527040956e-05, "loss": 1.1624, "step": 5881500 }, { "epoch": 3.53, "learning_rate": 3.1347265304848996e-05, "loss": 1.1721, "step": 5882000 }, { "epoch": 3.53, "learning_rate": 3.1345165339288436e-05, "loss": 1.1526, "step": 5882500 }, { "epoch": 3.53, "learning_rate": 3.1343069573658996e-05, "loss": 1.1727, "step": 5883000 }, { "epoch": 3.53, "learning_rate": 3.134096960809842e-05, "loss": 1.1446, "step": 5883500 }, { "epoch": 3.53, "learning_rate": 3.1338869642537856e-05, "loss": 1.1466, "step": 5884000 }, { "epoch": 3.53, "learning_rate": 3.1336769676977297e-05, "loss": 1.1409, "step": 5884500 }, { "epoch": 3.53, "learning_rate": 3.133467391134786e-05, "loss": 1.1721, "step": 5885000 }, { "epoch": 3.53, "learning_rate": 3.133257394578729e-05, "loss": 1.1691, "step": 5885500 }, { "epoch": 3.53, "learning_rate": 3.1330473980226724e-05, "loss": 1.1641, "step": 5886000 }, { "epoch": 3.53, "learning_rate": 3.132837401466616e-05, "loss": 1.1508, "step": 5886500 }, { "epoch": 3.53, "learning_rate": 3.132627404910559e-05, "loss": 1.1638, "step": 5887000 }, { "epoch": 3.53, "learning_rate": 3.132417408354503e-05, "loss": 1.16, "step": 5887500 }, { "epoch": 3.53, "learning_rate": 3.1322074117984464e-05, "loss": 1.1597, "step": 5888000 }, { "epoch": 3.53, "learning_rate": 3.131997835235502e-05, "loss": 1.146, "step": 5888500 }, { "epoch": 3.53, "learning_rate": 3.131787838679445e-05, "loss": 1.1556, "step": 5889000 }, { "epoch": 3.53, "learning_rate": 3.131577842123389e-05, "loss": 1.1483, "step": 5889500 }, { "epoch": 3.53, "learning_rate": 3.1313678455673325e-05, "loss": 1.1471, "step": 5890000 }, { "epoch": 3.53, "learning_rate": 3.131157849011276e-05, "loss": 1.1895, "step": 5890500 }, { "epoch": 3.53, "learning_rate": 3.13094785245522e-05, "loss": 1.1553, "step": 5891000 }, { "epoch": 3.53, "learning_rate": 3.130738275892275e-05, "loss": 1.1956, "step": 5891500 }, { "epoch": 3.53, "learning_rate": 3.1305282793362186e-05, "loss": 1.1658, "step": 5892000 }, { "epoch": 3.53, "learning_rate": 3.1303182827801626e-05, "loss": 1.1574, "step": 5892500 }, { "epoch": 3.53, "learning_rate": 3.130108286224106e-05, "loss": 1.1656, "step": 5893000 }, { "epoch": 3.53, "learning_rate": 3.129898709661161e-05, "loss": 1.148, "step": 5893500 }, { "epoch": 3.53, "learning_rate": 3.1296887131051047e-05, "loss": 1.1602, "step": 5894000 }, { "epoch": 3.53, "learning_rate": 3.129479136542161e-05, "loss": 1.1267, "step": 5894500 }, { "epoch": 3.53, "learning_rate": 3.129269139986105e-05, "loss": 1.1904, "step": 5895000 }, { "epoch": 3.53, "learning_rate": 3.1290591434300474e-05, "loss": 1.137, "step": 5895500 }, { "epoch": 3.53, "learning_rate": 3.128849146873991e-05, "loss": 1.158, "step": 5896000 }, { "epoch": 3.54, "learning_rate": 3.128639150317935e-05, "loss": 1.1577, "step": 5896500 }, { "epoch": 3.54, "learning_rate": 3.128429153761878e-05, "loss": 1.1484, "step": 5897000 }, { "epoch": 3.54, "learning_rate": 3.1282191572058214e-05, "loss": 1.1578, "step": 5897500 }, { "epoch": 3.54, "learning_rate": 3.1280091606497655e-05, "loss": 1.1826, "step": 5898000 }, { "epoch": 3.54, "learning_rate": 3.127799164093709e-05, "loss": 1.1629, "step": 5898500 }, { "epoch": 3.54, "learning_rate": 3.127589167537652e-05, "loss": 1.1477, "step": 5899000 }, { "epoch": 3.54, "learning_rate": 3.127379590974708e-05, "loss": 1.1423, "step": 5899500 }, { "epoch": 3.54, "learning_rate": 3.1271695944186515e-05, "loss": 1.2008, "step": 5900000 }, { "epoch": 3.54, "eval_loss": 1.1256732940673828, "eval_runtime": 1107.8778, "eval_samples_per_second": 475.431, "eval_steps_per_second": 79.239, "step": 5900000 }, { "epoch": 3.54, "learning_rate": 3.126959597862595e-05, "loss": 1.1658, "step": 5900500 }, { "epoch": 3.54, "learning_rate": 3.126749601306539e-05, "loss": 1.1671, "step": 5901000 }, { "epoch": 3.54, "learning_rate": 3.126539604750482e-05, "loss": 1.1394, "step": 5901500 }, { "epoch": 3.54, "learning_rate": 3.1263296081944256e-05, "loss": 1.1411, "step": 5902000 }, { "epoch": 3.54, "learning_rate": 3.126120031631481e-05, "loss": 1.1527, "step": 5902500 }, { "epoch": 3.54, "learning_rate": 3.125910035075425e-05, "loss": 1.102, "step": 5903000 }, { "epoch": 3.54, "learning_rate": 3.125700038519368e-05, "loss": 1.1446, "step": 5903500 }, { "epoch": 3.54, "learning_rate": 3.125490041963312e-05, "loss": 1.1394, "step": 5904000 }, { "epoch": 3.54, "learning_rate": 3.125280465400367e-05, "loss": 1.1653, "step": 5904500 }, { "epoch": 3.54, "learning_rate": 3.125070468844311e-05, "loss": 1.1468, "step": 5905000 }, { "epoch": 3.54, "learning_rate": 3.1248604722882544e-05, "loss": 1.1894, "step": 5905500 }, { "epoch": 3.54, "learning_rate": 3.124650475732198e-05, "loss": 1.1312, "step": 5906000 }, { "epoch": 3.54, "learning_rate": 3.124440479176142e-05, "loss": 1.1552, "step": 5906500 }, { "epoch": 3.54, "learning_rate": 3.124230482620085e-05, "loss": 1.1743, "step": 5907000 }, { "epoch": 3.54, "learning_rate": 3.1240204860640285e-05, "loss": 1.1693, "step": 5907500 }, { "epoch": 3.54, "learning_rate": 3.123810489507972e-05, "loss": 1.1624, "step": 5908000 }, { "epoch": 3.54, "learning_rate": 3.123600912945028e-05, "loss": 1.1454, "step": 5908500 }, { "epoch": 3.54, "learning_rate": 3.123390916388971e-05, "loss": 1.1572, "step": 5909000 }, { "epoch": 3.54, "learning_rate": 3.123180919832915e-05, "loss": 1.1449, "step": 5909500 }, { "epoch": 3.54, "learning_rate": 3.1229713432699706e-05, "loss": 1.1558, "step": 5910000 }, { "epoch": 3.54, "learning_rate": 3.122761346713914e-05, "loss": 1.1809, "step": 5910500 }, { "epoch": 3.54, "learning_rate": 3.122551350157857e-05, "loss": 1.1526, "step": 5911000 }, { "epoch": 3.54, "learning_rate": 3.122341353601801e-05, "loss": 1.1782, "step": 5911500 }, { "epoch": 3.54, "learning_rate": 3.1221317770388566e-05, "loss": 1.171, "step": 5912000 }, { "epoch": 3.54, "learning_rate": 3.1219217804828e-05, "loss": 1.149, "step": 5912500 }, { "epoch": 3.55, "learning_rate": 3.1217117839267433e-05, "loss": 1.1716, "step": 5913000 }, { "epoch": 3.55, "learning_rate": 3.1215022073637994e-05, "loss": 1.1784, "step": 5913500 }, { "epoch": 3.55, "learning_rate": 3.121292210807743e-05, "loss": 1.1595, "step": 5914000 }, { "epoch": 3.55, "learning_rate": 3.121082214251686e-05, "loss": 1.1569, "step": 5914500 }, { "epoch": 3.55, "learning_rate": 3.12087221769563e-05, "loss": 1.1631, "step": 5915000 }, { "epoch": 3.55, "learning_rate": 3.1206622211395734e-05, "loss": 1.1243, "step": 5915500 }, { "epoch": 3.55, "learning_rate": 3.120452224583517e-05, "loss": 1.153, "step": 5916000 }, { "epoch": 3.55, "learning_rate": 3.120242228027461e-05, "loss": 1.1717, "step": 5916500 }, { "epoch": 3.55, "learning_rate": 3.120032231471404e-05, "loss": 1.1821, "step": 5917000 }, { "epoch": 3.55, "learning_rate": 3.119822234915347e-05, "loss": 1.1428, "step": 5917500 }, { "epoch": 3.55, "learning_rate": 3.119612238359291e-05, "loss": 1.1571, "step": 5918000 }, { "epoch": 3.55, "learning_rate": 3.119402661796347e-05, "loss": 1.1713, "step": 5918500 }, { "epoch": 3.55, "learning_rate": 3.11919266524029e-05, "loss": 1.1632, "step": 5919000 }, { "epoch": 3.55, "learning_rate": 3.1189826686842336e-05, "loss": 1.1444, "step": 5919500 }, { "epoch": 3.55, "learning_rate": 3.118772672128177e-05, "loss": 1.1484, "step": 5920000 }, { "epoch": 3.55, "learning_rate": 3.11856267557212e-05, "loss": 1.1473, "step": 5920500 }, { "epoch": 3.55, "learning_rate": 3.1183526790160636e-05, "loss": 1.1684, "step": 5921000 }, { "epoch": 3.55, "learning_rate": 3.1181426824600076e-05, "loss": 1.1445, "step": 5921500 }, { "epoch": 3.55, "learning_rate": 3.117932685903951e-05, "loss": 1.142, "step": 5922000 }, { "epoch": 3.55, "learning_rate": 3.117723109341006e-05, "loss": 1.1717, "step": 5922500 }, { "epoch": 3.55, "learning_rate": 3.1175135327780624e-05, "loss": 1.1472, "step": 5923000 }, { "epoch": 3.55, "learning_rate": 3.1173035362220064e-05, "loss": 1.1487, "step": 5923500 }, { "epoch": 3.55, "learning_rate": 3.117093959659062e-05, "loss": 1.1424, "step": 5924000 }, { "epoch": 3.55, "learning_rate": 3.116883963103005e-05, "loss": 1.1648, "step": 5924500 }, { "epoch": 3.55, "learning_rate": 3.1166739665469484e-05, "loss": 1.1606, "step": 5925000 }, { "epoch": 3.55, "learning_rate": 3.1164639699908925e-05, "loss": 1.1984, "step": 5925500 }, { "epoch": 3.55, "learning_rate": 3.116253973434836e-05, "loss": 1.1892, "step": 5926000 }, { "epoch": 3.55, "learning_rate": 3.116043976878779e-05, "loss": 1.1595, "step": 5926500 }, { "epoch": 3.55, "learning_rate": 3.1158339803227225e-05, "loss": 1.1622, "step": 5927000 }, { "epoch": 3.55, "learning_rate": 3.115623983766666e-05, "loss": 1.1702, "step": 5927500 }, { "epoch": 3.55, "learning_rate": 3.115413987210609e-05, "loss": 1.1708, "step": 5928000 }, { "epoch": 3.55, "learning_rate": 3.115203990654553e-05, "loss": 1.1384, "step": 5928500 }, { "epoch": 3.55, "learning_rate": 3.1149939940984966e-05, "loss": 1.1754, "step": 5929000 }, { "epoch": 3.55, "learning_rate": 3.11478399754244e-05, "loss": 1.1443, "step": 5929500 }, { "epoch": 3.56, "learning_rate": 3.114574420979496e-05, "loss": 1.174, "step": 5930000 }, { "epoch": 3.56, "learning_rate": 3.114364424423439e-05, "loss": 1.148, "step": 5930500 }, { "epoch": 3.56, "learning_rate": 3.1141544278673826e-05, "loss": 1.1494, "step": 5931000 }, { "epoch": 3.56, "learning_rate": 3.1139444313113267e-05, "loss": 1.1627, "step": 5931500 }, { "epoch": 3.56, "learning_rate": 3.113734854748382e-05, "loss": 1.1867, "step": 5932000 }, { "epoch": 3.56, "learning_rate": 3.113525278185438e-05, "loss": 1.1328, "step": 5932500 }, { "epoch": 3.56, "learning_rate": 3.1133152816293814e-05, "loss": 1.1611, "step": 5933000 }, { "epoch": 3.56, "learning_rate": 3.113105285073325e-05, "loss": 1.1465, "step": 5933500 }, { "epoch": 3.56, "learning_rate": 3.112895288517269e-05, "loss": 1.1771, "step": 5934000 }, { "epoch": 3.56, "learning_rate": 3.112685711954324e-05, "loss": 1.1597, "step": 5934500 }, { "epoch": 3.56, "learning_rate": 3.1124757153982675e-05, "loss": 1.1522, "step": 5935000 }, { "epoch": 3.56, "learning_rate": 3.112265718842211e-05, "loss": 1.1517, "step": 5935500 }, { "epoch": 3.56, "learning_rate": 3.112055722286155e-05, "loss": 1.1615, "step": 5936000 }, { "epoch": 3.56, "learning_rate": 3.1118457257300975e-05, "loss": 1.1654, "step": 5936500 }, { "epoch": 3.56, "learning_rate": 3.1116357291740415e-05, "loss": 1.1515, "step": 5937000 }, { "epoch": 3.56, "learning_rate": 3.1114261526110976e-05, "loss": 1.1499, "step": 5937500 }, { "epoch": 3.56, "learning_rate": 3.111216156055041e-05, "loss": 1.1658, "step": 5938000 }, { "epoch": 3.56, "learning_rate": 3.111006159498984e-05, "loss": 1.1458, "step": 5938500 }, { "epoch": 3.56, "learning_rate": 3.1107961629429276e-05, "loss": 1.1981, "step": 5939000 }, { "epoch": 3.56, "learning_rate": 3.110586166386871e-05, "loss": 1.1786, "step": 5939500 }, { "epoch": 3.56, "learning_rate": 3.110376169830814e-05, "loss": 1.1494, "step": 5940000 }, { "epoch": 3.56, "learning_rate": 3.110166173274758e-05, "loss": 1.1672, "step": 5940500 }, { "epoch": 3.56, "learning_rate": 3.1099565967118144e-05, "loss": 1.1746, "step": 5941000 }, { "epoch": 3.56, "learning_rate": 3.109746600155757e-05, "loss": 1.1339, "step": 5941500 }, { "epoch": 3.56, "learning_rate": 3.1095366035997004e-05, "loss": 1.1534, "step": 5942000 }, { "epoch": 3.56, "learning_rate": 3.1093266070436444e-05, "loss": 1.1508, "step": 5942500 }, { "epoch": 3.56, "learning_rate": 3.1091170304807004e-05, "loss": 1.1381, "step": 5943000 }, { "epoch": 3.56, "learning_rate": 3.108907033924644e-05, "loss": 1.1895, "step": 5943500 }, { "epoch": 3.56, "learning_rate": 3.108697037368587e-05, "loss": 1.1562, "step": 5944000 }, { "epoch": 3.56, "learning_rate": 3.1084870408125305e-05, "loss": 1.1579, "step": 5944500 }, { "epoch": 3.56, "learning_rate": 3.1082774642495865e-05, "loss": 1.1596, "step": 5945000 }, { "epoch": 3.56, "learning_rate": 3.10806746769353e-05, "loss": 1.1483, "step": 5945500 }, { "epoch": 3.56, "learning_rate": 3.107857471137473e-05, "loss": 1.1724, "step": 5946000 }, { "epoch": 3.57, "learning_rate": 3.1076474745814165e-05, "loss": 1.1667, "step": 5946500 }, { "epoch": 3.57, "learning_rate": 3.10743747802536e-05, "loss": 1.1498, "step": 5947000 }, { "epoch": 3.57, "learning_rate": 3.107227481469304e-05, "loss": 1.1598, "step": 5947500 }, { "epoch": 3.57, "learning_rate": 3.107017484913247e-05, "loss": 1.1706, "step": 5948000 }, { "epoch": 3.57, "learning_rate": 3.1068074883571906e-05, "loss": 1.1771, "step": 5948500 }, { "epoch": 3.57, "learning_rate": 3.1065974918011346e-05, "loss": 1.161, "step": 5949000 }, { "epoch": 3.57, "learning_rate": 3.106387495245078e-05, "loss": 1.1309, "step": 5949500 }, { "epoch": 3.57, "learning_rate": 3.106177498689021e-05, "loss": 1.1689, "step": 5950000 }, { "epoch": 3.57, "learning_rate": 3.1059675021329653e-05, "loss": 1.1624, "step": 5950500 }, { "epoch": 3.57, "learning_rate": 3.105757925570021e-05, "loss": 1.1674, "step": 5951000 }, { "epoch": 3.57, "learning_rate": 3.105547929013964e-05, "loss": 1.1829, "step": 5951500 }, { "epoch": 3.57, "learning_rate": 3.105337932457908e-05, "loss": 1.1584, "step": 5952000 }, { "epoch": 3.57, "learning_rate": 3.1051279359018514e-05, "loss": 1.1646, "step": 5952500 }, { "epoch": 3.57, "learning_rate": 3.104917939345795e-05, "loss": 1.1595, "step": 5953000 }, { "epoch": 3.57, "learning_rate": 3.104707942789739e-05, "loss": 1.1445, "step": 5953500 }, { "epoch": 3.57, "learning_rate": 3.1044979462336815e-05, "loss": 1.1636, "step": 5954000 }, { "epoch": 3.57, "learning_rate": 3.1042883696707375e-05, "loss": 1.1679, "step": 5954500 }, { "epoch": 3.57, "learning_rate": 3.104078373114681e-05, "loss": 1.1605, "step": 5955000 }, { "epoch": 3.57, "learning_rate": 3.103868376558625e-05, "loss": 1.1737, "step": 5955500 }, { "epoch": 3.57, "learning_rate": 3.103658380002568e-05, "loss": 1.1143, "step": 5956000 }, { "epoch": 3.57, "learning_rate": 3.103448383446511e-05, "loss": 1.1539, "step": 5956500 }, { "epoch": 3.57, "learning_rate": 3.103238386890455e-05, "loss": 1.1602, "step": 5957000 }, { "epoch": 3.57, "learning_rate": 3.103028390334398e-05, "loss": 1.1507, "step": 5957500 }, { "epoch": 3.57, "learning_rate": 3.1028183937783416e-05, "loss": 1.1976, "step": 5958000 }, { "epoch": 3.57, "learning_rate": 3.1026088172153976e-05, "loss": 1.1228, "step": 5958500 }, { "epoch": 3.57, "learning_rate": 3.1023992406524537e-05, "loss": 1.1585, "step": 5959000 }, { "epoch": 3.57, "learning_rate": 3.102189244096397e-05, "loss": 1.1258, "step": 5959500 }, { "epoch": 3.57, "learning_rate": 3.1019792475403403e-05, "loss": 1.1572, "step": 5960000 }, { "epoch": 3.57, "learning_rate": 3.1017692509842844e-05, "loss": 1.1701, "step": 5960500 }, { "epoch": 3.57, "learning_rate": 3.101559254428228e-05, "loss": 1.1691, "step": 5961000 }, { "epoch": 3.57, "learning_rate": 3.1013492578721704e-05, "loss": 1.1618, "step": 5961500 }, { "epoch": 3.57, "learning_rate": 3.1011396813092264e-05, "loss": 1.1777, "step": 5962000 }, { "epoch": 3.57, "learning_rate": 3.1009296847531704e-05, "loss": 1.1518, "step": 5962500 }, { "epoch": 3.58, "learning_rate": 3.100719688197114e-05, "loss": 1.166, "step": 5963000 }, { "epoch": 3.58, "learning_rate": 3.1005096916410565e-05, "loss": 1.1649, "step": 5963500 }, { "epoch": 3.58, "learning_rate": 3.1002996950850005e-05, "loss": 1.166, "step": 5964000 }, { "epoch": 3.58, "learning_rate": 3.100089698528944e-05, "loss": 1.1495, "step": 5964500 }, { "epoch": 3.58, "learning_rate": 3.099879701972887e-05, "loss": 1.1867, "step": 5965000 }, { "epoch": 3.58, "learning_rate": 3.099670125409943e-05, "loss": 1.1511, "step": 5965500 }, { "epoch": 3.58, "learning_rate": 3.0994601288538866e-05, "loss": 1.1707, "step": 5966000 }, { "epoch": 3.58, "learning_rate": 3.09925013229783e-05, "loss": 1.1873, "step": 5966500 }, { "epoch": 3.58, "learning_rate": 3.099040135741774e-05, "loss": 1.1604, "step": 5967000 }, { "epoch": 3.58, "learning_rate": 3.098830139185717e-05, "loss": 1.1714, "step": 5967500 }, { "epoch": 3.58, "learning_rate": 3.0986201426296606e-05, "loss": 1.1375, "step": 5968000 }, { "epoch": 3.58, "learning_rate": 3.0984101460736046e-05, "loss": 1.1387, "step": 5968500 }, { "epoch": 3.58, "learning_rate": 3.09820056951066e-05, "loss": 1.1694, "step": 5969000 }, { "epoch": 3.58, "learning_rate": 3.0979905729546033e-05, "loss": 1.1235, "step": 5969500 }, { "epoch": 3.58, "learning_rate": 3.097780576398547e-05, "loss": 1.1716, "step": 5970000 }, { "epoch": 3.58, "learning_rate": 3.097570579842491e-05, "loss": 1.153, "step": 5970500 }, { "epoch": 3.58, "learning_rate": 3.097360583286434e-05, "loss": 1.1445, "step": 5971000 }, { "epoch": 3.58, "learning_rate": 3.0971505867303774e-05, "loss": 1.1352, "step": 5971500 }, { "epoch": 3.58, "learning_rate": 3.0969405901743214e-05, "loss": 1.1645, "step": 5972000 }, { "epoch": 3.58, "learning_rate": 3.096730593618265e-05, "loss": 1.1357, "step": 5972500 }, { "epoch": 3.58, "learning_rate": 3.09652101705532e-05, "loss": 1.1388, "step": 5973000 }, { "epoch": 3.58, "learning_rate": 3.0963110204992635e-05, "loss": 1.1792, "step": 5973500 }, { "epoch": 3.58, "learning_rate": 3.0961010239432075e-05, "loss": 1.1273, "step": 5974000 }, { "epoch": 3.58, "learning_rate": 3.095891027387151e-05, "loss": 1.1706, "step": 5974500 }, { "epoch": 3.58, "learning_rate": 3.095681450824206e-05, "loss": 1.1705, "step": 5975000 }, { "epoch": 3.58, "learning_rate": 3.0954718742612616e-05, "loss": 1.1275, "step": 5975500 }, { "epoch": 3.58, "learning_rate": 3.0952618777052056e-05, "loss": 1.1598, "step": 5976000 }, { "epoch": 3.58, "learning_rate": 3.095051881149149e-05, "loss": 1.1587, "step": 5976500 }, { "epoch": 3.58, "learning_rate": 3.094841884593092e-05, "loss": 1.1657, "step": 5977000 }, { "epoch": 3.58, "learning_rate": 3.094632308030148e-05, "loss": 1.1642, "step": 5977500 }, { "epoch": 3.58, "learning_rate": 3.0944223114740917e-05, "loss": 1.1787, "step": 5978000 }, { "epoch": 3.58, "learning_rate": 3.094212314918035e-05, "loss": 1.1465, "step": 5978500 }, { "epoch": 3.58, "learning_rate": 3.0940023183619783e-05, "loss": 1.1526, "step": 5979000 }, { "epoch": 3.58, "learning_rate": 3.0937927417990344e-05, "loss": 1.1358, "step": 5979500 }, { "epoch": 3.59, "learning_rate": 3.0935827452429784e-05, "loss": 1.1675, "step": 5980000 }, { "epoch": 3.59, "learning_rate": 3.093372748686921e-05, "loss": 1.1751, "step": 5980500 }, { "epoch": 3.59, "learning_rate": 3.093162752130865e-05, "loss": 1.1717, "step": 5981000 }, { "epoch": 3.59, "learning_rate": 3.0929527555748084e-05, "loss": 1.154, "step": 5981500 }, { "epoch": 3.59, "learning_rate": 3.092742759018752e-05, "loss": 1.1558, "step": 5982000 }, { "epoch": 3.59, "learning_rate": 3.092532762462696e-05, "loss": 1.1396, "step": 5982500 }, { "epoch": 3.59, "learning_rate": 3.092322765906639e-05, "loss": 1.1186, "step": 5983000 }, { "epoch": 3.59, "learning_rate": 3.0921131893436945e-05, "loss": 1.164, "step": 5983500 }, { "epoch": 3.59, "learning_rate": 3.091903192787638e-05, "loss": 1.162, "step": 5984000 }, { "epoch": 3.59, "learning_rate": 3.091693196231582e-05, "loss": 1.1591, "step": 5984500 }, { "epoch": 3.59, "learning_rate": 3.091483199675525e-05, "loss": 1.1537, "step": 5985000 }, { "epoch": 3.59, "learning_rate": 3.091274043105693e-05, "loss": 1.1672, "step": 5985500 }, { "epoch": 3.59, "learning_rate": 3.0910640465496366e-05, "loss": 1.1763, "step": 5986000 }, { "epoch": 3.59, "learning_rate": 3.09085404999358e-05, "loss": 1.1476, "step": 5986500 }, { "epoch": 3.59, "learning_rate": 3.090644053437524e-05, "loss": 1.1415, "step": 5987000 }, { "epoch": 3.59, "learning_rate": 3.090434056881467e-05, "loss": 1.1441, "step": 5987500 }, { "epoch": 3.59, "learning_rate": 3.090224480318523e-05, "loss": 1.1301, "step": 5988000 }, { "epoch": 3.59, "learning_rate": 3.090014483762467e-05, "loss": 1.1673, "step": 5988500 }, { "epoch": 3.59, "learning_rate": 3.08980448720641e-05, "loss": 1.1652, "step": 5989000 }, { "epoch": 3.59, "learning_rate": 3.0895949106434654e-05, "loss": 1.1336, "step": 5989500 }, { "epoch": 3.59, "learning_rate": 3.089384914087409e-05, "loss": 1.1357, "step": 5990000 }, { "epoch": 3.59, "learning_rate": 3.089174917531353e-05, "loss": 1.172, "step": 5990500 }, { "epoch": 3.59, "learning_rate": 3.088964920975296e-05, "loss": 1.1875, "step": 5991000 }, { "epoch": 3.59, "learning_rate": 3.0887549244192395e-05, "loss": 1.1785, "step": 5991500 }, { "epoch": 3.59, "learning_rate": 3.0885449278631835e-05, "loss": 1.1766, "step": 5992000 }, { "epoch": 3.59, "learning_rate": 3.088334931307126e-05, "loss": 1.1447, "step": 5992500 }, { "epoch": 3.59, "learning_rate": 3.0881249347510695e-05, "loss": 1.1534, "step": 5993000 }, { "epoch": 3.59, "learning_rate": 3.0879149381950135e-05, "loss": 1.1705, "step": 5993500 }, { "epoch": 3.59, "learning_rate": 3.087704941638957e-05, "loss": 1.1489, "step": 5994000 }, { "epoch": 3.59, "learning_rate": 3.0874949450829e-05, "loss": 1.1322, "step": 5994500 }, { "epoch": 3.59, "learning_rate": 3.087284948526844e-05, "loss": 1.1457, "step": 5995000 }, { "epoch": 3.59, "learning_rate": 3.0870753719638996e-05, "loss": 1.156, "step": 5995500 }, { "epoch": 3.59, "learning_rate": 3.0868657954009557e-05, "loss": 1.1486, "step": 5996000 }, { "epoch": 3.6, "learning_rate": 3.086655798844899e-05, "loss": 1.1583, "step": 5996500 }, { "epoch": 3.6, "learning_rate": 3.0864458022888423e-05, "loss": 1.1577, "step": 5997000 }, { "epoch": 3.6, "learning_rate": 3.086235805732786e-05, "loss": 1.1596, "step": 5997500 }, { "epoch": 3.6, "learning_rate": 3.086025809176729e-05, "loss": 1.1564, "step": 5998000 }, { "epoch": 3.6, "learning_rate": 3.085815812620673e-05, "loss": 1.1541, "step": 5998500 }, { "epoch": 3.6, "learning_rate": 3.0856058160646164e-05, "loss": 1.1695, "step": 5999000 }, { "epoch": 3.6, "learning_rate": 3.08539581950856e-05, "loss": 1.1244, "step": 5999500 }, { "epoch": 3.6, "learning_rate": 3.085186242945615e-05, "loss": 1.1495, "step": 6000000 }, { "epoch": 3.6, "eval_loss": 1.1201045513153076, "eval_runtime": 1104.3731, "eval_samples_per_second": 476.94, "eval_steps_per_second": 79.49, "step": 6000000 }, { "epoch": 3.6, "learning_rate": 3.084976246389559e-05, "loss": 1.1418, "step": 6000500 }, { "epoch": 3.6, "learning_rate": 3.0847662498335025e-05, "loss": 1.1379, "step": 6001000 }, { "epoch": 3.6, "learning_rate": 3.084556253277446e-05, "loss": 1.1534, "step": 6001500 }, { "epoch": 3.6, "learning_rate": 3.08434625672139e-05, "loss": 1.1605, "step": 6002000 }, { "epoch": 3.6, "learning_rate": 3.084136260165333e-05, "loss": 1.162, "step": 6002500 }, { "epoch": 3.6, "learning_rate": 3.083926263609277e-05, "loss": 1.1471, "step": 6003000 }, { "epoch": 3.6, "learning_rate": 3.0837166870463326e-05, "loss": 1.1559, "step": 6003500 }, { "epoch": 3.6, "learning_rate": 3.083506690490276e-05, "loss": 1.1769, "step": 6004000 }, { "epoch": 3.6, "learning_rate": 3.083296693934219e-05, "loss": 1.1522, "step": 6004500 }, { "epoch": 3.6, "learning_rate": 3.083086697378163e-05, "loss": 1.1381, "step": 6005000 }, { "epoch": 3.6, "learning_rate": 3.0828767008221066e-05, "loss": 1.1566, "step": 6005500 }, { "epoch": 3.6, "learning_rate": 3.08266670426605e-05, "loss": 1.1572, "step": 6006000 }, { "epoch": 3.6, "learning_rate": 3.082456707709994e-05, "loss": 1.1549, "step": 6006500 }, { "epoch": 3.6, "learning_rate": 3.0822471311470494e-05, "loss": 1.1552, "step": 6007000 }, { "epoch": 3.6, "learning_rate": 3.082037134590993e-05, "loss": 1.1433, "step": 6007500 }, { "epoch": 3.6, "learning_rate": 3.081827138034936e-05, "loss": 1.1434, "step": 6008000 }, { "epoch": 3.6, "learning_rate": 3.08161714147888e-05, "loss": 1.1637, "step": 6008500 }, { "epoch": 3.6, "learning_rate": 3.0814075649159354e-05, "loss": 1.1689, "step": 6009000 }, { "epoch": 3.6, "learning_rate": 3.081197568359879e-05, "loss": 1.1448, "step": 6009500 }, { "epoch": 3.6, "learning_rate": 3.080987571803823e-05, "loss": 1.1786, "step": 6010000 }, { "epoch": 3.6, "learning_rate": 3.080777575247766e-05, "loss": 1.1791, "step": 6010500 }, { "epoch": 3.6, "learning_rate": 3.0805675786917095e-05, "loss": 1.1565, "step": 6011000 }, { "epoch": 3.6, "learning_rate": 3.0803575821356535e-05, "loss": 1.1375, "step": 6011500 }, { "epoch": 3.6, "learning_rate": 3.080147585579596e-05, "loss": 1.1463, "step": 6012000 }, { "epoch": 3.6, "learning_rate": 3.0799375890235395e-05, "loss": 1.1686, "step": 6012500 }, { "epoch": 3.61, "learning_rate": 3.0797280124605956e-05, "loss": 1.1566, "step": 6013000 }, { "epoch": 3.61, "learning_rate": 3.0795180159045396e-05, "loss": 1.165, "step": 6013500 }, { "epoch": 3.61, "learning_rate": 3.079308019348483e-05, "loss": 1.1428, "step": 6014000 }, { "epoch": 3.61, "learning_rate": 3.079098442785538e-05, "loss": 1.1633, "step": 6014500 }, { "epoch": 3.61, "learning_rate": 3.0788884462294816e-05, "loss": 1.1404, "step": 6015000 }, { "epoch": 3.61, "learning_rate": 3.078678449673426e-05, "loss": 1.1488, "step": 6015500 }, { "epoch": 3.61, "learning_rate": 3.078468453117369e-05, "loss": 1.1348, "step": 6016000 }, { "epoch": 3.61, "learning_rate": 3.0782584565613124e-05, "loss": 1.1345, "step": 6016500 }, { "epoch": 3.61, "learning_rate": 3.078048460005256e-05, "loss": 1.1492, "step": 6017000 }, { "epoch": 3.61, "learning_rate": 3.077838463449199e-05, "loss": 1.1607, "step": 6017500 }, { "epoch": 3.61, "learning_rate": 3.077628886886255e-05, "loss": 1.1618, "step": 6018000 }, { "epoch": 3.61, "learning_rate": 3.077418890330199e-05, "loss": 1.1709, "step": 6018500 }, { "epoch": 3.61, "learning_rate": 3.077208893774142e-05, "loss": 1.1431, "step": 6019000 }, { "epoch": 3.61, "learning_rate": 3.076998897218085e-05, "loss": 1.1381, "step": 6019500 }, { "epoch": 3.61, "learning_rate": 3.076788900662029e-05, "loss": 1.129, "step": 6020000 }, { "epoch": 3.61, "learning_rate": 3.0765789041059725e-05, "loss": 1.1241, "step": 6020500 }, { "epoch": 3.61, "learning_rate": 3.076368907549916e-05, "loss": 1.1424, "step": 6021000 }, { "epoch": 3.61, "learning_rate": 3.07615891099386e-05, "loss": 1.1486, "step": 6021500 }, { "epoch": 3.61, "learning_rate": 3.075949334430915e-05, "loss": 1.1554, "step": 6022000 }, { "epoch": 3.61, "learning_rate": 3.0757393378748586e-05, "loss": 1.1303, "step": 6022500 }, { "epoch": 3.61, "learning_rate": 3.0755297613119146e-05, "loss": 1.1941, "step": 6023000 }, { "epoch": 3.61, "learning_rate": 3.075319764755858e-05, "loss": 1.1647, "step": 6023500 }, { "epoch": 3.61, "learning_rate": 3.075109768199801e-05, "loss": 1.1657, "step": 6024000 }, { "epoch": 3.61, "learning_rate": 3.0748997716437446e-05, "loss": 1.1495, "step": 6024500 }, { "epoch": 3.61, "learning_rate": 3.074689775087689e-05, "loss": 1.1555, "step": 6025000 }, { "epoch": 3.61, "learning_rate": 3.074479778531632e-05, "loss": 1.131, "step": 6025500 }, { "epoch": 3.61, "learning_rate": 3.0742697819755754e-05, "loss": 1.1432, "step": 6026000 }, { "epoch": 3.61, "learning_rate": 3.0740597854195194e-05, "loss": 1.1818, "step": 6026500 }, { "epoch": 3.61, "learning_rate": 3.073850208856575e-05, "loss": 1.1621, "step": 6027000 }, { "epoch": 3.61, "learning_rate": 3.073640212300518e-05, "loss": 1.1507, "step": 6027500 }, { "epoch": 3.61, "learning_rate": 3.073430635737574e-05, "loss": 1.1735, "step": 6028000 }, { "epoch": 3.61, "learning_rate": 3.0732206391815175e-05, "loss": 1.1571, "step": 6028500 }, { "epoch": 3.61, "learning_rate": 3.073010642625461e-05, "loss": 1.1825, "step": 6029000 }, { "epoch": 3.61, "learning_rate": 3.072800646069404e-05, "loss": 1.1564, "step": 6029500 }, { "epoch": 3.62, "learning_rate": 3.0725906495133475e-05, "loss": 1.1501, "step": 6030000 }, { "epoch": 3.62, "learning_rate": 3.0723806529572915e-05, "loss": 1.1777, "step": 6030500 }, { "epoch": 3.62, "learning_rate": 3.0721714963874596e-05, "loss": 1.1715, "step": 6031000 }, { "epoch": 3.62, "learning_rate": 3.071961499831403e-05, "loss": 1.1574, "step": 6031500 }, { "epoch": 3.62, "learning_rate": 3.071751503275346e-05, "loss": 1.1526, "step": 6032000 }, { "epoch": 3.62, "learning_rate": 3.07154150671929e-05, "loss": 1.1695, "step": 6032500 }, { "epoch": 3.62, "learning_rate": 3.0713315101632336e-05, "loss": 1.1318, "step": 6033000 }, { "epoch": 3.62, "learning_rate": 3.071121513607176e-05, "loss": 1.1538, "step": 6033500 }, { "epoch": 3.62, "learning_rate": 3.07091151705112e-05, "loss": 1.1761, "step": 6034000 }, { "epoch": 3.62, "learning_rate": 3.070701520495064e-05, "loss": 1.1666, "step": 6034500 }, { "epoch": 3.62, "learning_rate": 3.070491523939007e-05, "loss": 1.128, "step": 6035000 }, { "epoch": 3.62, "learning_rate": 3.070281947376063e-05, "loss": 1.1584, "step": 6035500 }, { "epoch": 3.62, "learning_rate": 3.0700719508200064e-05, "loss": 1.1697, "step": 6036000 }, { "epoch": 3.62, "learning_rate": 3.06986195426395e-05, "loss": 1.1391, "step": 6036500 }, { "epoch": 3.62, "learning_rate": 3.069651957707893e-05, "loss": 1.1531, "step": 6037000 }, { "epoch": 3.62, "learning_rate": 3.069442381144949e-05, "loss": 1.1702, "step": 6037500 }, { "epoch": 3.62, "learning_rate": 3.069232384588893e-05, "loss": 1.1404, "step": 6038000 }, { "epoch": 3.62, "learning_rate": 3.069022388032836e-05, "loss": 1.158, "step": 6038500 }, { "epoch": 3.62, "learning_rate": 3.06881239147678e-05, "loss": 1.154, "step": 6039000 }, { "epoch": 3.62, "learning_rate": 3.068602394920723e-05, "loss": 1.1648, "step": 6039500 }, { "epoch": 3.62, "learning_rate": 3.0683923983646665e-05, "loss": 1.141, "step": 6040000 }, { "epoch": 3.62, "learning_rate": 3.0681824018086106e-05, "loss": 1.1544, "step": 6040500 }, { "epoch": 3.62, "learning_rate": 3.067972405252554e-05, "loss": 1.1476, "step": 6041000 }, { "epoch": 3.62, "learning_rate": 3.067762828689609e-05, "loss": 1.1499, "step": 6041500 }, { "epoch": 3.62, "learning_rate": 3.0675528321335526e-05, "loss": 1.1424, "step": 6042000 }, { "epoch": 3.62, "learning_rate": 3.0673428355774966e-05, "loss": 1.166, "step": 6042500 }, { "epoch": 3.62, "learning_rate": 3.06713283902144e-05, "loss": 1.1406, "step": 6043000 }, { "epoch": 3.62, "learning_rate": 3.066923262458495e-05, "loss": 1.1702, "step": 6043500 }, { "epoch": 3.62, "learning_rate": 3.0667136858955514e-05, "loss": 1.1587, "step": 6044000 }, { "epoch": 3.62, "learning_rate": 3.066503689339495e-05, "loss": 1.1714, "step": 6044500 }, { "epoch": 3.62, "learning_rate": 3.066293692783439e-05, "loss": 1.1564, "step": 6045000 }, { "epoch": 3.62, "learning_rate": 3.0660836962273814e-05, "loss": 1.1633, "step": 6045500 }, { "epoch": 3.62, "learning_rate": 3.0658736996713254e-05, "loss": 1.1154, "step": 6046000 }, { "epoch": 3.63, "learning_rate": 3.0656641231083815e-05, "loss": 1.1355, "step": 6046500 }, { "epoch": 3.63, "learning_rate": 3.065454126552325e-05, "loss": 1.1485, "step": 6047000 }, { "epoch": 3.63, "learning_rate": 3.065244129996268e-05, "loss": 1.1724, "step": 6047500 }, { "epoch": 3.63, "learning_rate": 3.0650341334402115e-05, "loss": 1.171, "step": 6048000 }, { "epoch": 3.63, "learning_rate": 3.064824136884155e-05, "loss": 1.1207, "step": 6048500 }, { "epoch": 3.63, "learning_rate": 3.064614560321211e-05, "loss": 1.1697, "step": 6049000 }, { "epoch": 3.63, "learning_rate": 3.064404563765154e-05, "loss": 1.1359, "step": 6049500 }, { "epoch": 3.63, "learning_rate": 3.0641945672090976e-05, "loss": 1.1411, "step": 6050000 }, { "epoch": 3.63, "learning_rate": 3.063984570653041e-05, "loss": 1.1821, "step": 6050500 }, { "epoch": 3.63, "learning_rate": 3.063774574096984e-05, "loss": 1.1201, "step": 6051000 }, { "epoch": 3.63, "learning_rate": 3.063564577540928e-05, "loss": 1.1454, "step": 6051500 }, { "epoch": 3.63, "learning_rate": 3.0633545809848716e-05, "loss": 1.1424, "step": 6052000 }, { "epoch": 3.63, "learning_rate": 3.063145004421927e-05, "loss": 1.1445, "step": 6052500 }, { "epoch": 3.63, "learning_rate": 3.062935007865871e-05, "loss": 1.1573, "step": 6053000 }, { "epoch": 3.63, "learning_rate": 3.0627250113098144e-05, "loss": 1.156, "step": 6053500 }, { "epoch": 3.63, "learning_rate": 3.062515014753758e-05, "loss": 1.1538, "step": 6054000 }, { "epoch": 3.63, "learning_rate": 3.062305018197702e-05, "loss": 1.1468, "step": 6054500 }, { "epoch": 3.63, "learning_rate": 3.062095021641645e-05, "loss": 1.1716, "step": 6055000 }, { "epoch": 3.63, "learning_rate": 3.0618850250855884e-05, "loss": 1.1697, "step": 6055500 }, { "epoch": 3.63, "learning_rate": 3.061675448522644e-05, "loss": 1.1826, "step": 6056000 }, { "epoch": 3.63, "learning_rate": 3.061465451966588e-05, "loss": 1.1528, "step": 6056500 }, { "epoch": 3.63, "learning_rate": 3.061255875403644e-05, "loss": 1.1555, "step": 6057000 }, { "epoch": 3.63, "learning_rate": 3.0610458788475865e-05, "loss": 1.1688, "step": 6057500 }, { "epoch": 3.63, "learning_rate": 3.06083588229153e-05, "loss": 1.1609, "step": 6058000 }, { "epoch": 3.63, "learning_rate": 3.060625885735474e-05, "loss": 1.1878, "step": 6058500 }, { "epoch": 3.63, "learning_rate": 3.060415889179417e-05, "loss": 1.1578, "step": 6059000 }, { "epoch": 3.63, "learning_rate": 3.0602058926233606e-05, "loss": 1.1734, "step": 6059500 }, { "epoch": 3.63, "learning_rate": 3.0599958960673046e-05, "loss": 1.1419, "step": 6060000 }, { "epoch": 3.63, "learning_rate": 3.059785899511248e-05, "loss": 1.18, "step": 6060500 }, { "epoch": 3.63, "learning_rate": 3.059576322948303e-05, "loss": 1.1561, "step": 6061000 }, { "epoch": 3.63, "learning_rate": 3.059366326392247e-05, "loss": 1.159, "step": 6061500 }, { "epoch": 3.63, "learning_rate": 3.059156329836191e-05, "loss": 1.1631, "step": 6062000 }, { "epoch": 3.63, "learning_rate": 3.058946333280134e-05, "loss": 1.1621, "step": 6062500 }, { "epoch": 3.64, "learning_rate": 3.058736336724078e-05, "loss": 1.1111, "step": 6063000 }, { "epoch": 3.64, "learning_rate": 3.0585263401680214e-05, "loss": 1.1738, "step": 6063500 }, { "epoch": 3.64, "learning_rate": 3.058316343611965e-05, "loss": 1.1416, "step": 6064000 }, { "epoch": 3.64, "learning_rate": 3.058106347055909e-05, "loss": 1.1578, "step": 6064500 }, { "epoch": 3.64, "learning_rate": 3.057896770492964e-05, "loss": 1.1813, "step": 6065000 }, { "epoch": 3.64, "learning_rate": 3.0576867739369075e-05, "loss": 1.1445, "step": 6065500 }, { "epoch": 3.64, "learning_rate": 3.057476777380851e-05, "loss": 1.1671, "step": 6066000 }, { "epoch": 3.64, "learning_rate": 3.057266780824795e-05, "loss": 1.1427, "step": 6066500 }, { "epoch": 3.64, "learning_rate": 3.05705720426185e-05, "loss": 1.1394, "step": 6067000 }, { "epoch": 3.64, "learning_rate": 3.0568472077057935e-05, "loss": 1.1664, "step": 6067500 }, { "epoch": 3.64, "learning_rate": 3.056637211149737e-05, "loss": 1.1623, "step": 6068000 }, { "epoch": 3.64, "learning_rate": 3.056427214593681e-05, "loss": 1.1842, "step": 6068500 }, { "epoch": 3.64, "learning_rate": 3.056217638030736e-05, "loss": 1.1929, "step": 6069000 }, { "epoch": 3.64, "learning_rate": 3.0560076414746796e-05, "loss": 1.1508, "step": 6069500 }, { "epoch": 3.64, "learning_rate": 3.0557976449186236e-05, "loss": 1.1145, "step": 6070000 }, { "epoch": 3.64, "learning_rate": 3.055588068355679e-05, "loss": 1.1495, "step": 6070500 }, { "epoch": 3.64, "learning_rate": 3.055378071799622e-05, "loss": 1.1704, "step": 6071000 }, { "epoch": 3.64, "learning_rate": 3.055168075243566e-05, "loss": 1.152, "step": 6071500 }, { "epoch": 3.64, "learning_rate": 3.05495807868751e-05, "loss": 1.1681, "step": 6072000 }, { "epoch": 3.64, "learning_rate": 3.054748082131453e-05, "loss": 1.1432, "step": 6072500 }, { "epoch": 3.64, "learning_rate": 3.0545380855753964e-05, "loss": 1.1594, "step": 6073000 }, { "epoch": 3.64, "learning_rate": 3.0543280890193404e-05, "loss": 1.1433, "step": 6073500 }, { "epoch": 3.64, "learning_rate": 3.054118092463284e-05, "loss": 1.1563, "step": 6074000 }, { "epoch": 3.64, "learning_rate": 3.053908095907227e-05, "loss": 1.1349, "step": 6074500 }, { "epoch": 3.64, "learning_rate": 3.0536980993511704e-05, "loss": 1.1558, "step": 6075000 }, { "epoch": 3.64, "learning_rate": 3.053488102795114e-05, "loss": 1.1547, "step": 6075500 }, { "epoch": 3.64, "learning_rate": 3.053278106239058e-05, "loss": 1.1457, "step": 6076000 }, { "epoch": 3.64, "learning_rate": 3.053068529676114e-05, "loss": 1.1422, "step": 6076500 }, { "epoch": 3.64, "learning_rate": 3.0528585331200565e-05, "loss": 1.1602, "step": 6077000 }, { "epoch": 3.64, "learning_rate": 3.052648536564e-05, "loss": 1.1473, "step": 6077500 }, { "epoch": 3.64, "learning_rate": 3.052438540007944e-05, "loss": 1.1515, "step": 6078000 }, { "epoch": 3.64, "learning_rate": 3.052228543451887e-05, "loss": 1.153, "step": 6078500 }, { "epoch": 3.64, "learning_rate": 3.0520185468958306e-05, "loss": 1.136, "step": 6079000 }, { "epoch": 3.64, "learning_rate": 3.051808970332886e-05, "loss": 1.134, "step": 6079500 }, { "epoch": 3.65, "learning_rate": 3.0515989737768296e-05, "loss": 1.1399, "step": 6080000 }, { "epoch": 3.65, "learning_rate": 3.0513893972138857e-05, "loss": 1.1549, "step": 6080500 }, { "epoch": 3.65, "learning_rate": 3.0511794006578293e-05, "loss": 1.138, "step": 6081000 }, { "epoch": 3.65, "learning_rate": 3.050969404101773e-05, "loss": 1.1546, "step": 6081500 }, { "epoch": 3.65, "learning_rate": 3.050759407545716e-05, "loss": 1.1754, "step": 6082000 }, { "epoch": 3.65, "learning_rate": 3.0505494109896594e-05, "loss": 1.1669, "step": 6082500 }, { "epoch": 3.65, "learning_rate": 3.050339414433603e-05, "loss": 1.1386, "step": 6083000 }, { "epoch": 3.65, "learning_rate": 3.0501294178775467e-05, "loss": 1.1635, "step": 6083500 }, { "epoch": 3.65, "learning_rate": 3.04991942132149e-05, "loss": 1.1371, "step": 6084000 }, { "epoch": 3.65, "learning_rate": 3.0497098447585458e-05, "loss": 1.1633, "step": 6084500 }, { "epoch": 3.65, "learning_rate": 3.049499848202489e-05, "loss": 1.1573, "step": 6085000 }, { "epoch": 3.65, "learning_rate": 3.0492898516464328e-05, "loss": 1.1577, "step": 6085500 }, { "epoch": 3.65, "learning_rate": 3.0490798550903765e-05, "loss": 1.1513, "step": 6086000 }, { "epoch": 3.65, "learning_rate": 3.04886985853432e-05, "loss": 1.1378, "step": 6086500 }, { "epoch": 3.65, "learning_rate": 3.0486602819713752e-05, "loss": 1.1809, "step": 6087000 }, { "epoch": 3.65, "learning_rate": 3.048450285415319e-05, "loss": 1.1462, "step": 6087500 }, { "epoch": 3.65, "learning_rate": 3.0482402888592626e-05, "loss": 1.1566, "step": 6088000 }, { "epoch": 3.65, "learning_rate": 3.0480302923032063e-05, "loss": 1.1596, "step": 6088500 }, { "epoch": 3.65, "learning_rate": 3.0478202957471496e-05, "loss": 1.1632, "step": 6089000 }, { "epoch": 3.65, "learning_rate": 3.0476102991910933e-05, "loss": 1.1371, "step": 6089500 }, { "epoch": 3.65, "learning_rate": 3.047400302635037e-05, "loss": 1.1612, "step": 6090000 }, { "epoch": 3.65, "learning_rate": 3.0471903060789803e-05, "loss": 1.1379, "step": 6090500 }, { "epoch": 3.65, "learning_rate": 3.0469807295160357e-05, "loss": 1.1444, "step": 6091000 }, { "epoch": 3.65, "learning_rate": 3.0467707329599794e-05, "loss": 1.1628, "step": 6091500 }, { "epoch": 3.65, "learning_rate": 3.046560736403923e-05, "loss": 1.1588, "step": 6092000 }, { "epoch": 3.65, "learning_rate": 3.0463507398478664e-05, "loss": 1.1528, "step": 6092500 }, { "epoch": 3.65, "learning_rate": 3.04614074329181e-05, "loss": 1.1547, "step": 6093000 }, { "epoch": 3.65, "learning_rate": 3.0459311667288654e-05, "loss": 1.1631, "step": 6093500 }, { "epoch": 3.65, "learning_rate": 3.045721170172809e-05, "loss": 1.134, "step": 6094000 }, { "epoch": 3.65, "learning_rate": 3.0455111736167528e-05, "loss": 1.1726, "step": 6094500 }, { "epoch": 3.65, "learning_rate": 3.045301177060696e-05, "loss": 1.1727, "step": 6095000 }, { "epoch": 3.65, "learning_rate": 3.04509118050464e-05, "loss": 1.1599, "step": 6095500 }, { "epoch": 3.65, "learning_rate": 3.0448811839485835e-05, "loss": 1.1706, "step": 6096000 }, { "epoch": 3.66, "learning_rate": 3.044671187392527e-05, "loss": 1.1685, "step": 6096500 }, { "epoch": 3.66, "learning_rate": 3.04446119083647e-05, "loss": 1.1744, "step": 6097000 }, { "epoch": 3.66, "learning_rate": 3.044251614273526e-05, "loss": 1.151, "step": 6097500 }, { "epoch": 3.66, "learning_rate": 3.0440416177174696e-05, "loss": 1.1316, "step": 6098000 }, { "epoch": 3.66, "learning_rate": 3.043832041154525e-05, "loss": 1.1459, "step": 6098500 }, { "epoch": 3.66, "learning_rate": 3.0436220445984686e-05, "loss": 1.1428, "step": 6099000 }, { "epoch": 3.66, "learning_rate": 3.043412048042412e-05, "loss": 1.1525, "step": 6099500 }, { "epoch": 3.66, "learning_rate": 3.0432020514863557e-05, "loss": 1.1382, "step": 6100000 }, { "epoch": 3.66, "eval_loss": 1.120174527168274, "eval_runtime": 1109.1773, "eval_samples_per_second": 474.874, "eval_steps_per_second": 79.146, "step": 6100000 }, { "epoch": 3.66, "learning_rate": 3.0429920549302994e-05, "loss": 1.1469, "step": 6100500 }, { "epoch": 3.66, "learning_rate": 3.0427820583742427e-05, "loss": 1.1104, "step": 6101000 }, { "epoch": 3.66, "learning_rate": 3.0425724818112984e-05, "loss": 1.1603, "step": 6101500 }, { "epoch": 3.66, "learning_rate": 3.0423624852552417e-05, "loss": 1.1347, "step": 6102000 }, { "epoch": 3.66, "learning_rate": 3.0421524886991854e-05, "loss": 1.1423, "step": 6102500 }, { "epoch": 3.66, "learning_rate": 3.0419429121362408e-05, "loss": 1.156, "step": 6103000 }, { "epoch": 3.66, "learning_rate": 3.0417329155801845e-05, "loss": 1.1341, "step": 6103500 }, { "epoch": 3.66, "learning_rate": 3.041522919024128e-05, "loss": 1.1442, "step": 6104000 }, { "epoch": 3.66, "learning_rate": 3.0413129224680715e-05, "loss": 1.1476, "step": 6104500 }, { "epoch": 3.66, "learning_rate": 3.0411029259120152e-05, "loss": 1.1441, "step": 6105000 }, { "epoch": 3.66, "learning_rate": 3.040892929355959e-05, "loss": 1.1622, "step": 6105500 }, { "epoch": 3.66, "learning_rate": 3.0406829327999022e-05, "loss": 1.1632, "step": 6106000 }, { "epoch": 3.66, "learning_rate": 3.0404729362438452e-05, "loss": 1.1527, "step": 6106500 }, { "epoch": 3.66, "learning_rate": 3.0402633596809013e-05, "loss": 1.1411, "step": 6107000 }, { "epoch": 3.66, "learning_rate": 3.040053363124845e-05, "loss": 1.1428, "step": 6107500 }, { "epoch": 3.66, "learning_rate": 3.0398433665687886e-05, "loss": 1.172, "step": 6108000 }, { "epoch": 3.66, "learning_rate": 3.039633370012732e-05, "loss": 1.1856, "step": 6108500 }, { "epoch": 3.66, "learning_rate": 3.039423373456675e-05, "loss": 1.1281, "step": 6109000 }, { "epoch": 3.66, "learning_rate": 3.039213796893731e-05, "loss": 1.1458, "step": 6109500 }, { "epoch": 3.66, "learning_rate": 3.0390038003376747e-05, "loss": 1.1556, "step": 6110000 }, { "epoch": 3.66, "learning_rate": 3.038793803781618e-05, "loss": 1.1614, "step": 6110500 }, { "epoch": 3.66, "learning_rate": 3.0385838072255617e-05, "loss": 1.1637, "step": 6111000 }, { "epoch": 3.66, "learning_rate": 3.038374230662617e-05, "loss": 1.142, "step": 6111500 }, { "epoch": 3.66, "learning_rate": 3.0381642341065608e-05, "loss": 1.1465, "step": 6112000 }, { "epoch": 3.66, "learning_rate": 3.0379542375505045e-05, "loss": 1.1742, "step": 6112500 }, { "epoch": 3.66, "learning_rate": 3.0377442409944478e-05, "loss": 1.1331, "step": 6113000 }, { "epoch": 3.67, "learning_rate": 3.0375342444383908e-05, "loss": 1.1521, "step": 6113500 }, { "epoch": 3.67, "learning_rate": 3.037324667875447e-05, "loss": 1.1347, "step": 6114000 }, { "epoch": 3.67, "learning_rate": 3.0371146713193905e-05, "loss": 1.1877, "step": 6114500 }, { "epoch": 3.67, "learning_rate": 3.0369046747633342e-05, "loss": 1.1685, "step": 6115000 }, { "epoch": 3.67, "learning_rate": 3.0366946782072776e-05, "loss": 1.1454, "step": 6115500 }, { "epoch": 3.67, "learning_rate": 3.0364846816512206e-05, "loss": 1.1869, "step": 6116000 }, { "epoch": 3.67, "learning_rate": 3.0362755250813886e-05, "loss": 1.1657, "step": 6116500 }, { "epoch": 3.67, "learning_rate": 3.036065528525332e-05, "loss": 1.1685, "step": 6117000 }, { "epoch": 3.67, "learning_rate": 3.0358555319692756e-05, "loss": 1.1543, "step": 6117500 }, { "epoch": 3.67, "learning_rate": 3.035645955406331e-05, "loss": 1.1836, "step": 6118000 }, { "epoch": 3.67, "learning_rate": 3.0354359588502747e-05, "loss": 1.1189, "step": 6118500 }, { "epoch": 3.67, "learning_rate": 3.035225962294218e-05, "loss": 1.1723, "step": 6119000 }, { "epoch": 3.67, "learning_rate": 3.0350159657381617e-05, "loss": 1.1785, "step": 6119500 }, { "epoch": 3.67, "learning_rate": 3.0348059691821054e-05, "loss": 1.1609, "step": 6120000 }, { "epoch": 3.67, "learning_rate": 3.034595972626049e-05, "loss": 1.1392, "step": 6120500 }, { "epoch": 3.67, "learning_rate": 3.0343859760699924e-05, "loss": 1.1431, "step": 6121000 }, { "epoch": 3.67, "learning_rate": 3.034175979513936e-05, "loss": 1.1535, "step": 6121500 }, { "epoch": 3.67, "learning_rate": 3.0339659829578798e-05, "loss": 1.1606, "step": 6122000 }, { "epoch": 3.67, "learning_rate": 3.033755986401823e-05, "loss": 1.1676, "step": 6122500 }, { "epoch": 3.67, "learning_rate": 3.033545989845766e-05, "loss": 1.1375, "step": 6123000 }, { "epoch": 3.67, "learning_rate": 3.03333599328971e-05, "loss": 1.1511, "step": 6123500 }, { "epoch": 3.67, "learning_rate": 3.0331259967336532e-05, "loss": 1.1453, "step": 6124000 }, { "epoch": 3.67, "learning_rate": 3.032916000177597e-05, "loss": 1.1658, "step": 6124500 }, { "epoch": 3.67, "learning_rate": 3.0327060036215406e-05, "loss": 1.1464, "step": 6125000 }, { "epoch": 3.67, "learning_rate": 3.032496427058596e-05, "loss": 1.1363, "step": 6125500 }, { "epoch": 3.67, "learning_rate": 3.0322864305025396e-05, "loss": 1.1365, "step": 6126000 }, { "epoch": 3.67, "learning_rate": 3.032076433946483e-05, "loss": 1.1587, "step": 6126500 }, { "epoch": 3.67, "learning_rate": 3.0318664373904266e-05, "loss": 1.1528, "step": 6127000 }, { "epoch": 3.67, "learning_rate": 3.0316564408343703e-05, "loss": 1.1654, "step": 6127500 }, { "epoch": 3.67, "learning_rate": 3.0314464442783137e-05, "loss": 1.1701, "step": 6128000 }, { "epoch": 3.67, "learning_rate": 3.0312364477222573e-05, "loss": 1.1637, "step": 6128500 }, { "epoch": 3.67, "learning_rate": 3.031026451166201e-05, "loss": 1.1302, "step": 6129000 }, { "epoch": 3.67, "learning_rate": 3.0308168746032564e-05, "loss": 1.171, "step": 6129500 }, { "epoch": 3.68, "learning_rate": 3.0306068780472e-05, "loss": 1.1277, "step": 6130000 }, { "epoch": 3.68, "learning_rate": 3.0303968814911434e-05, "loss": 1.1384, "step": 6130500 }, { "epoch": 3.68, "learning_rate": 3.030186884935087e-05, "loss": 1.1523, "step": 6131000 }, { "epoch": 3.68, "learning_rate": 3.0299768883790308e-05, "loss": 1.1489, "step": 6131500 }, { "epoch": 3.68, "learning_rate": 3.029766891822974e-05, "loss": 1.1221, "step": 6132000 }, { "epoch": 3.68, "learning_rate": 3.0295568952669178e-05, "loss": 1.179, "step": 6132500 }, { "epoch": 3.68, "learning_rate": 3.0293473187039732e-05, "loss": 1.141, "step": 6133000 }, { "epoch": 3.68, "learning_rate": 3.0291377421410285e-05, "loss": 1.1342, "step": 6133500 }, { "epoch": 3.68, "learning_rate": 3.0289277455849722e-05, "loss": 1.1506, "step": 6134000 }, { "epoch": 3.68, "learning_rate": 3.028717749028916e-05, "loss": 1.15, "step": 6134500 }, { "epoch": 3.68, "learning_rate": 3.0285077524728592e-05, "loss": 1.1467, "step": 6135000 }, { "epoch": 3.68, "learning_rate": 3.028297755916803e-05, "loss": 1.1683, "step": 6135500 }, { "epoch": 3.68, "learning_rate": 3.0280877593607466e-05, "loss": 1.1494, "step": 6136000 }, { "epoch": 3.68, "learning_rate": 3.02787776280469e-05, "loss": 1.1451, "step": 6136500 }, { "epoch": 3.68, "learning_rate": 3.0276677662486336e-05, "loss": 1.1338, "step": 6137000 }, { "epoch": 3.68, "learning_rate": 3.0274577696925773e-05, "loss": 1.1574, "step": 6137500 }, { "epoch": 3.68, "learning_rate": 3.0272477731365207e-05, "loss": 1.1702, "step": 6138000 }, { "epoch": 3.68, "learning_rate": 3.0270377765804637e-05, "loss": 1.1666, "step": 6138500 }, { "epoch": 3.68, "learning_rate": 3.0268277800244074e-05, "loss": 1.131, "step": 6139000 }, { "epoch": 3.68, "learning_rate": 3.0266182034614634e-05, "loss": 1.142, "step": 6139500 }, { "epoch": 3.68, "learning_rate": 3.0264086268985188e-05, "loss": 1.151, "step": 6140000 }, { "epoch": 3.68, "learning_rate": 3.0261986303424624e-05, "loss": 1.1583, "step": 6140500 }, { "epoch": 3.68, "learning_rate": 3.025988633786406e-05, "loss": 1.155, "step": 6141000 }, { "epoch": 3.68, "learning_rate": 3.0257786372303495e-05, "loss": 1.1406, "step": 6141500 }, { "epoch": 3.68, "learning_rate": 3.025568640674293e-05, "loss": 1.1571, "step": 6142000 }, { "epoch": 3.68, "learning_rate": 3.025358644118237e-05, "loss": 1.154, "step": 6142500 }, { "epoch": 3.68, "learning_rate": 3.0251486475621795e-05, "loss": 1.1897, "step": 6143000 }, { "epoch": 3.68, "learning_rate": 3.0249386510061232e-05, "loss": 1.1374, "step": 6143500 }, { "epoch": 3.68, "learning_rate": 3.0247290744431792e-05, "loss": 1.1547, "step": 6144000 }, { "epoch": 3.68, "learning_rate": 3.0245194978802346e-05, "loss": 1.1428, "step": 6144500 }, { "epoch": 3.68, "learning_rate": 3.0243095013241783e-05, "loss": 1.1508, "step": 6145000 }, { "epoch": 3.68, "learning_rate": 3.024099504768122e-05, "loss": 1.1623, "step": 6145500 }, { "epoch": 3.68, "learning_rate": 3.0238895082120653e-05, "loss": 1.1682, "step": 6146000 }, { "epoch": 3.69, "learning_rate": 3.023679511656009e-05, "loss": 1.1247, "step": 6146500 }, { "epoch": 3.69, "learning_rate": 3.0234699350930644e-05, "loss": 1.1689, "step": 6147000 }, { "epoch": 3.69, "learning_rate": 3.023259938537008e-05, "loss": 1.1537, "step": 6147500 }, { "epoch": 3.69, "learning_rate": 3.0230499419809517e-05, "loss": 1.1619, "step": 6148000 }, { "epoch": 3.69, "learning_rate": 3.022839945424895e-05, "loss": 1.1738, "step": 6148500 }, { "epoch": 3.69, "learning_rate": 3.0226299488688388e-05, "loss": 1.1228, "step": 6149000 }, { "epoch": 3.69, "learning_rate": 3.0224199523127824e-05, "loss": 1.1653, "step": 6149500 }, { "epoch": 3.69, "learning_rate": 3.0222103757498378e-05, "loss": 1.1271, "step": 6150000 }, { "epoch": 3.69, "learning_rate": 3.022000379193781e-05, "loss": 1.1697, "step": 6150500 }, { "epoch": 3.69, "learning_rate": 3.0217903826377248e-05, "loss": 1.156, "step": 6151000 }, { "epoch": 3.69, "learning_rate": 3.0215803860816685e-05, "loss": 1.1462, "step": 6151500 }, { "epoch": 3.69, "learning_rate": 3.021370389525612e-05, "loss": 1.1442, "step": 6152000 }, { "epoch": 3.69, "learning_rate": 3.021160392969555e-05, "loss": 1.1822, "step": 6152500 }, { "epoch": 3.69, "learning_rate": 3.020950816406611e-05, "loss": 1.1674, "step": 6153000 }, { "epoch": 3.69, "learning_rate": 3.0207408198505546e-05, "loss": 1.1494, "step": 6153500 }, { "epoch": 3.69, "learning_rate": 3.0205308232944983e-05, "loss": 1.1846, "step": 6154000 }, { "epoch": 3.69, "learning_rate": 3.0203208267384416e-05, "loss": 1.1832, "step": 6154500 }, { "epoch": 3.69, "learning_rate": 3.0201108301823846e-05, "loss": 1.1234, "step": 6155000 }, { "epoch": 3.69, "learning_rate": 3.0199008336263283e-05, "loss": 1.1712, "step": 6155500 }, { "epoch": 3.69, "learning_rate": 3.0196912570633843e-05, "loss": 1.1637, "step": 6156000 }, { "epoch": 3.69, "learning_rate": 3.019481260507328e-05, "loss": 1.1471, "step": 6156500 }, { "epoch": 3.69, "learning_rate": 3.0192712639512714e-05, "loss": 1.1457, "step": 6157000 }, { "epoch": 3.69, "learning_rate": 3.0190612673952144e-05, "loss": 1.1688, "step": 6157500 }, { "epoch": 3.69, "learning_rate": 3.018851270839158e-05, "loss": 1.1388, "step": 6158000 }, { "epoch": 3.69, "learning_rate": 3.0186412742831014e-05, "loss": 1.1892, "step": 6158500 }, { "epoch": 3.69, "learning_rate": 3.018431277727045e-05, "loss": 1.1355, "step": 6159000 }, { "epoch": 3.69, "learning_rate": 3.0182212811709888e-05, "loss": 1.1735, "step": 6159500 }, { "epoch": 3.69, "learning_rate": 3.018011704608044e-05, "loss": 1.1391, "step": 6160000 }, { "epoch": 3.69, "learning_rate": 3.0178017080519878e-05, "loss": 1.1477, "step": 6160500 }, { "epoch": 3.69, "learning_rate": 3.017591711495931e-05, "loss": 1.1407, "step": 6161000 }, { "epoch": 3.69, "learning_rate": 3.017381714939875e-05, "loss": 1.1555, "step": 6161500 }, { "epoch": 3.69, "learning_rate": 3.0171721383769302e-05, "loss": 1.1495, "step": 6162000 }, { "epoch": 3.69, "learning_rate": 3.016962141820874e-05, "loss": 1.1324, "step": 6162500 }, { "epoch": 3.69, "learning_rate": 3.0167521452648176e-05, "loss": 1.1659, "step": 6163000 }, { "epoch": 3.7, "learning_rate": 3.016542148708761e-05, "loss": 1.1679, "step": 6163500 }, { "epoch": 3.7, "learning_rate": 3.0163321521527046e-05, "loss": 1.1372, "step": 6164000 }, { "epoch": 3.7, "learning_rate": 3.0161229955828723e-05, "loss": 1.1401, "step": 6164500 }, { "epoch": 3.7, "learning_rate": 3.015913419019928e-05, "loss": 1.148, "step": 6165000 }, { "epoch": 3.7, "learning_rate": 3.0157034224638714e-05, "loss": 1.1725, "step": 6165500 }, { "epoch": 3.7, "learning_rate": 3.015493425907815e-05, "loss": 1.1322, "step": 6166000 }, { "epoch": 3.7, "learning_rate": 3.0152834293517587e-05, "loss": 1.1643, "step": 6166500 }, { "epoch": 3.7, "learning_rate": 3.015073432795702e-05, "loss": 1.1411, "step": 6167000 }, { "epoch": 3.7, "learning_rate": 3.0148634362396458e-05, "loss": 1.1757, "step": 6167500 }, { "epoch": 3.7, "learning_rate": 3.0146534396835894e-05, "loss": 1.1603, "step": 6168000 }, { "epoch": 3.7, "learning_rate": 3.0144434431275328e-05, "loss": 1.1595, "step": 6168500 }, { "epoch": 3.7, "learning_rate": 3.0142334465714765e-05, "loss": 1.1181, "step": 6169000 }, { "epoch": 3.7, "learning_rate": 3.0140238700085318e-05, "loss": 1.1606, "step": 6169500 }, { "epoch": 3.7, "learning_rate": 3.0138138734524755e-05, "loss": 1.1676, "step": 6170000 }, { "epoch": 3.7, "learning_rate": 3.0136038768964192e-05, "loss": 1.1244, "step": 6170500 }, { "epoch": 3.7, "learning_rate": 3.0133938803403625e-05, "loss": 1.1441, "step": 6171000 }, { "epoch": 3.7, "learning_rate": 3.013184303777418e-05, "loss": 1.1508, "step": 6171500 }, { "epoch": 3.7, "learning_rate": 3.0129743072213616e-05, "loss": 1.1545, "step": 6172000 }, { "epoch": 3.7, "learning_rate": 3.0127643106653053e-05, "loss": 1.139, "step": 6172500 }, { "epoch": 3.7, "learning_rate": 3.0125543141092486e-05, "loss": 1.172, "step": 6173000 }, { "epoch": 3.7, "learning_rate": 3.0123443175531923e-05, "loss": 1.1656, "step": 6173500 }, { "epoch": 3.7, "learning_rate": 3.0121343209971353e-05, "loss": 1.1637, "step": 6174000 }, { "epoch": 3.7, "learning_rate": 3.011924324441079e-05, "loss": 1.1281, "step": 6174500 }, { "epoch": 3.7, "learning_rate": 3.0117143278850223e-05, "loss": 1.1649, "step": 6175000 }, { "epoch": 3.7, "learning_rate": 3.011504331328966e-05, "loss": 1.1623, "step": 6175500 }, { "epoch": 3.7, "learning_rate": 3.011294754766022e-05, "loss": 1.1399, "step": 6176000 }, { "epoch": 3.7, "learning_rate": 3.011084758209965e-05, "loss": 1.1642, "step": 6176500 }, { "epoch": 3.7, "learning_rate": 3.010875181647021e-05, "loss": 1.1463, "step": 6177000 }, { "epoch": 3.7, "learning_rate": 3.0106651850909648e-05, "loss": 1.1625, "step": 6177500 }, { "epoch": 3.7, "learning_rate": 3.010455188534908e-05, "loss": 1.1537, "step": 6178000 }, { "epoch": 3.7, "learning_rate": 3.0102451919788518e-05, "loss": 1.1777, "step": 6178500 }, { "epoch": 3.7, "learning_rate": 3.0100351954227948e-05, "loss": 1.1529, "step": 6179000 }, { "epoch": 3.7, "learning_rate": 3.0098251988667382e-05, "loss": 1.1825, "step": 6179500 }, { "epoch": 3.71, "learning_rate": 3.009615202310682e-05, "loss": 1.1534, "step": 6180000 }, { "epoch": 3.71, "learning_rate": 3.0094052057546255e-05, "loss": 1.161, "step": 6180500 }, { "epoch": 3.71, "learning_rate": 3.0091952091985692e-05, "loss": 1.1582, "step": 6181000 }, { "epoch": 3.71, "learning_rate": 3.0089852126425126e-05, "loss": 1.1355, "step": 6181500 }, { "epoch": 3.71, "learning_rate": 3.0087752160864563e-05, "loss": 1.1742, "step": 6182000 }, { "epoch": 3.71, "learning_rate": 3.0085656395235116e-05, "loss": 1.1637, "step": 6182500 }, { "epoch": 3.71, "learning_rate": 3.0083556429674553e-05, "loss": 1.1849, "step": 6183000 }, { "epoch": 3.71, "learning_rate": 3.0081460664045107e-05, "loss": 1.1681, "step": 6183500 }, { "epoch": 3.71, "learning_rate": 3.0079360698484543e-05, "loss": 1.1403, "step": 6184000 }, { "epoch": 3.71, "learning_rate": 3.0077260732923977e-05, "loss": 1.1678, "step": 6184500 }, { "epoch": 3.71, "learning_rate": 3.0075160767363414e-05, "loss": 1.1464, "step": 6185000 }, { "epoch": 3.71, "learning_rate": 3.007306080180285e-05, "loss": 1.1458, "step": 6185500 }, { "epoch": 3.71, "learning_rate": 3.0070960836242284e-05, "loss": 1.1497, "step": 6186000 }, { "epoch": 3.71, "learning_rate": 3.006886087068172e-05, "loss": 1.1336, "step": 6186500 }, { "epoch": 3.71, "learning_rate": 3.0066760905121158e-05, "loss": 1.1482, "step": 6187000 }, { "epoch": 3.71, "learning_rate": 3.006466093956059e-05, "loss": 1.157, "step": 6187500 }, { "epoch": 3.71, "learning_rate": 3.0062560974000028e-05, "loss": 1.1463, "step": 6188000 }, { "epoch": 3.71, "learning_rate": 3.006046520837058e-05, "loss": 1.1655, "step": 6188500 }, { "epoch": 3.71, "learning_rate": 3.005836524281002e-05, "loss": 1.1561, "step": 6189000 }, { "epoch": 3.71, "learning_rate": 3.0056265277249455e-05, "loss": 1.1351, "step": 6189500 }, { "epoch": 3.71, "learning_rate": 3.005416531168889e-05, "loss": 1.1453, "step": 6190000 }, { "epoch": 3.71, "learning_rate": 3.0052065346128326e-05, "loss": 1.1432, "step": 6190500 }, { "epoch": 3.71, "learning_rate": 3.0049965380567762e-05, "loss": 1.1202, "step": 6191000 }, { "epoch": 3.71, "learning_rate": 3.004786541500719e-05, "loss": 1.1381, "step": 6191500 }, { "epoch": 3.71, "learning_rate": 3.0045765449446626e-05, "loss": 1.1366, "step": 6192000 }, { "epoch": 3.71, "learning_rate": 3.0043673883748306e-05, "loss": 1.1712, "step": 6192500 }, { "epoch": 3.71, "learning_rate": 3.004157391818774e-05, "loss": 1.1298, "step": 6193000 }, { "epoch": 3.71, "learning_rate": 3.0039473952627177e-05, "loss": 1.1207, "step": 6193500 }, { "epoch": 3.71, "learning_rate": 3.0037373987066614e-05, "loss": 1.1794, "step": 6194000 }, { "epoch": 3.71, "learning_rate": 3.0035274021506047e-05, "loss": 1.1426, "step": 6194500 }, { "epoch": 3.71, "learning_rate": 3.0033174055945484e-05, "loss": 1.133, "step": 6195000 }, { "epoch": 3.71, "learning_rate": 3.003107409038492e-05, "loss": 1.1499, "step": 6195500 }, { "epoch": 3.71, "learning_rate": 3.0028978324755474e-05, "loss": 1.1531, "step": 6196000 }, { "epoch": 3.72, "learning_rate": 3.002687835919491e-05, "loss": 1.155, "step": 6196500 }, { "epoch": 3.72, "learning_rate": 3.0024778393634345e-05, "loss": 1.1642, "step": 6197000 }, { "epoch": 3.72, "learning_rate": 3.002267842807378e-05, "loss": 1.1266, "step": 6197500 }, { "epoch": 3.72, "learning_rate": 3.002057846251322e-05, "loss": 1.1724, "step": 6198000 }, { "epoch": 3.72, "learning_rate": 3.0018478496952645e-05, "loss": 1.1978, "step": 6198500 }, { "epoch": 3.72, "learning_rate": 3.0016378531392082e-05, "loss": 1.1399, "step": 6199000 }, { "epoch": 3.72, "learning_rate": 3.001427856583152e-05, "loss": 1.164, "step": 6199500 }, { "epoch": 3.72, "learning_rate": 3.001218280020208e-05, "loss": 1.1524, "step": 6200000 }, { "epoch": 3.72, "eval_loss": 1.116134762763977, "eval_runtime": 1099.3849, "eval_samples_per_second": 479.104, "eval_steps_per_second": 79.851, "step": 6200000 }, { "epoch": 3.72, "learning_rate": 3.0010082834641516e-05, "loss": 1.1378, "step": 6200500 }, { "epoch": 3.72, "learning_rate": 3.0007982869080943e-05, "loss": 1.1335, "step": 6201000 }, { "epoch": 3.72, "learning_rate": 3.000588290352038e-05, "loss": 1.142, "step": 6201500 }, { "epoch": 3.72, "learning_rate": 3.000378713789094e-05, "loss": 1.1316, "step": 6202000 }, { "epoch": 3.72, "learning_rate": 3.0001691372261493e-05, "loss": 1.1503, "step": 6202500 }, { "epoch": 3.72, "learning_rate": 2.999959140670093e-05, "loss": 1.1535, "step": 6203000 }, { "epoch": 3.72, "learning_rate": 2.9997491441140367e-05, "loss": 1.1509, "step": 6203500 }, { "epoch": 3.72, "learning_rate": 2.99953914755798e-05, "loss": 1.1635, "step": 6204000 }, { "epoch": 3.72, "learning_rate": 2.9993291510019237e-05, "loss": 1.1405, "step": 6204500 }, { "epoch": 3.72, "learning_rate": 2.9991191544458674e-05, "loss": 1.1681, "step": 6205000 }, { "epoch": 3.72, "learning_rate": 2.9989095778829228e-05, "loss": 1.1532, "step": 6205500 }, { "epoch": 3.72, "learning_rate": 2.9986995813268665e-05, "loss": 1.1368, "step": 6206000 }, { "epoch": 3.72, "learning_rate": 2.9984895847708098e-05, "loss": 1.139, "step": 6206500 }, { "epoch": 3.72, "learning_rate": 2.9982795882147535e-05, "loss": 1.143, "step": 6207000 }, { "epoch": 3.72, "learning_rate": 2.9980695916586972e-05, "loss": 1.1573, "step": 6207500 }, { "epoch": 3.72, "learning_rate": 2.99785959510264e-05, "loss": 1.134, "step": 6208000 }, { "epoch": 3.72, "learning_rate": 2.997650018539696e-05, "loss": 1.1822, "step": 6208500 }, { "epoch": 3.72, "learning_rate": 2.9974400219836396e-05, "loss": 1.1359, "step": 6209000 }, { "epoch": 3.72, "learning_rate": 2.9972300254275832e-05, "loss": 1.1359, "step": 6209500 }, { "epoch": 3.72, "learning_rate": 2.9970204488646386e-05, "loss": 1.1415, "step": 6210000 }, { "epoch": 3.72, "learning_rate": 2.9968104523085823e-05, "loss": 1.1546, "step": 6210500 }, { "epoch": 3.72, "learning_rate": 2.9966004557525256e-05, "loss": 1.1757, "step": 6211000 }, { "epoch": 3.72, "learning_rate": 2.9963904591964693e-05, "loss": 1.1412, "step": 6211500 }, { "epoch": 3.72, "learning_rate": 2.996180462640413e-05, "loss": 1.1388, "step": 6212000 }, { "epoch": 3.72, "learning_rate": 2.9959704660843564e-05, "loss": 1.1408, "step": 6212500 }, { "epoch": 3.72, "learning_rate": 2.9957604695282994e-05, "loss": 1.1606, "step": 6213000 }, { "epoch": 3.73, "learning_rate": 2.995550472972243e-05, "loss": 1.154, "step": 6213500 }, { "epoch": 3.73, "learning_rate": 2.9953404764161867e-05, "loss": 1.1552, "step": 6214000 }, { "epoch": 3.73, "learning_rate": 2.99513047986013e-05, "loss": 1.1552, "step": 6214500 }, { "epoch": 3.73, "learning_rate": 2.9949204833040738e-05, "loss": 1.125, "step": 6215000 }, { "epoch": 3.73, "learning_rate": 2.9947104867480174e-05, "loss": 1.1474, "step": 6215500 }, { "epoch": 3.73, "learning_rate": 2.9945009101850728e-05, "loss": 1.1463, "step": 6216000 }, { "epoch": 3.73, "learning_rate": 2.994290913629016e-05, "loss": 1.1347, "step": 6216500 }, { "epoch": 3.73, "learning_rate": 2.99408091707296e-05, "loss": 1.1755, "step": 6217000 }, { "epoch": 3.73, "learning_rate": 2.9938709205169035e-05, "loss": 1.1816, "step": 6217500 }, { "epoch": 3.73, "learning_rate": 2.993661343953959e-05, "loss": 1.1187, "step": 6218000 }, { "epoch": 3.73, "learning_rate": 2.9934513473979026e-05, "loss": 1.1528, "step": 6218500 }, { "epoch": 3.73, "learning_rate": 2.993241350841846e-05, "loss": 1.163, "step": 6219000 }, { "epoch": 3.73, "learning_rate": 2.9930313542857896e-05, "loss": 1.1477, "step": 6219500 }, { "epoch": 3.73, "learning_rate": 2.9928213577297333e-05, "loss": 1.1582, "step": 6220000 }, { "epoch": 3.73, "learning_rate": 2.9926113611736766e-05, "loss": 1.1518, "step": 6220500 }, { "epoch": 3.73, "learning_rate": 2.9924017846107323e-05, "loss": 1.171, "step": 6221000 }, { "epoch": 3.73, "learning_rate": 2.9921917880546757e-05, "loss": 1.1644, "step": 6221500 }, { "epoch": 3.73, "learning_rate": 2.9919817914986193e-05, "loss": 1.1546, "step": 6222000 }, { "epoch": 3.73, "learning_rate": 2.991771794942563e-05, "loss": 1.1343, "step": 6222500 }, { "epoch": 3.73, "learning_rate": 2.9915617983865064e-05, "loss": 1.1176, "step": 6223000 }, { "epoch": 3.73, "learning_rate": 2.9913522218235617e-05, "loss": 1.1206, "step": 6223500 }, { "epoch": 3.73, "learning_rate": 2.9911422252675054e-05, "loss": 1.1348, "step": 6224000 }, { "epoch": 3.73, "learning_rate": 2.990932228711449e-05, "loss": 1.1622, "step": 6224500 }, { "epoch": 3.73, "learning_rate": 2.9907222321553925e-05, "loss": 1.1402, "step": 6225000 }, { "epoch": 3.73, "learning_rate": 2.990512235599336e-05, "loss": 1.1905, "step": 6225500 }, { "epoch": 3.73, "learning_rate": 2.9903022390432798e-05, "loss": 1.1585, "step": 6226000 }, { "epoch": 3.73, "learning_rate": 2.9900922424872235e-05, "loss": 1.1717, "step": 6226500 }, { "epoch": 3.73, "learning_rate": 2.989882245931167e-05, "loss": 1.1645, "step": 6227000 }, { "epoch": 3.73, "learning_rate": 2.9896726693682222e-05, "loss": 1.1655, "step": 6227500 }, { "epoch": 3.73, "learning_rate": 2.989463092805278e-05, "loss": 1.1356, "step": 6228000 }, { "epoch": 3.73, "learning_rate": 2.9892530962492213e-05, "loss": 1.1626, "step": 6228500 }, { "epoch": 3.73, "learning_rate": 2.989043099693165e-05, "loss": 1.1559, "step": 6229000 }, { "epoch": 3.73, "learning_rate": 2.9888331031371086e-05, "loss": 1.1745, "step": 6229500 }, { "epoch": 3.74, "learning_rate": 2.988623106581052e-05, "loss": 1.1542, "step": 6230000 }, { "epoch": 3.74, "learning_rate": 2.9884131100249957e-05, "loss": 1.1397, "step": 6230500 }, { "epoch": 3.74, "learning_rate": 2.9882031134689393e-05, "loss": 1.1583, "step": 6231000 }, { "epoch": 3.74, "learning_rate": 2.9879931169128827e-05, "loss": 1.1385, "step": 6231500 }, { "epoch": 3.74, "learning_rate": 2.987783540349938e-05, "loss": 1.1636, "step": 6232000 }, { "epoch": 3.74, "learning_rate": 2.9875735437938817e-05, "loss": 1.1114, "step": 6232500 }, { "epoch": 3.74, "learning_rate": 2.9873635472378254e-05, "loss": 1.1735, "step": 6233000 }, { "epoch": 3.74, "learning_rate": 2.987153550681769e-05, "loss": 1.1443, "step": 6233500 }, { "epoch": 3.74, "learning_rate": 2.9869439741188245e-05, "loss": 1.1686, "step": 6234000 }, { "epoch": 3.74, "learning_rate": 2.9867339775627678e-05, "loss": 1.1631, "step": 6234500 }, { "epoch": 3.74, "learning_rate": 2.9865244009998235e-05, "loss": 1.161, "step": 6235000 }, { "epoch": 3.74, "learning_rate": 2.986314404443767e-05, "loss": 1.1653, "step": 6235500 }, { "epoch": 3.74, "learning_rate": 2.9861044078877105e-05, "loss": 1.1406, "step": 6236000 }, { "epoch": 3.74, "learning_rate": 2.9858944113316542e-05, "loss": 1.1427, "step": 6236500 }, { "epoch": 3.74, "learning_rate": 2.9856844147755976e-05, "loss": 1.1699, "step": 6237000 }, { "epoch": 3.74, "learning_rate": 2.9854744182195412e-05, "loss": 1.1711, "step": 6237500 }, { "epoch": 3.74, "learning_rate": 2.985264421663485e-05, "loss": 1.1351, "step": 6238000 }, { "epoch": 3.74, "learning_rate": 2.9850544251074283e-05, "loss": 1.1303, "step": 6238500 }, { "epoch": 3.74, "learning_rate": 2.984844848544484e-05, "loss": 1.1704, "step": 6239000 }, { "epoch": 3.74, "learning_rate": 2.9846348519884273e-05, "loss": 1.1441, "step": 6239500 }, { "epoch": 3.74, "learning_rate": 2.984424855432371e-05, "loss": 1.1517, "step": 6240000 }, { "epoch": 3.74, "learning_rate": 2.9842152788694264e-05, "loss": 1.1693, "step": 6240500 }, { "epoch": 3.74, "learning_rate": 2.98400528231337e-05, "loss": 1.1604, "step": 6241000 }, { "epoch": 3.74, "learning_rate": 2.9837952857573134e-05, "loss": 1.1735, "step": 6241500 }, { "epoch": 3.74, "learning_rate": 2.983585289201257e-05, "loss": 1.1373, "step": 6242000 }, { "epoch": 3.74, "learning_rate": 2.9833757126383124e-05, "loss": 1.1246, "step": 6242500 }, { "epoch": 3.74, "learning_rate": 2.983165716082256e-05, "loss": 1.1595, "step": 6243000 }, { "epoch": 3.74, "learning_rate": 2.9829557195261998e-05, "loss": 1.1617, "step": 6243500 }, { "epoch": 3.74, "learning_rate": 2.982745722970143e-05, "loss": 1.1339, "step": 6244000 }, { "epoch": 3.74, "learning_rate": 2.9825357264140868e-05, "loss": 1.1773, "step": 6244500 }, { "epoch": 3.74, "learning_rate": 2.9823257298580305e-05, "loss": 1.1555, "step": 6245000 }, { "epoch": 3.74, "learning_rate": 2.982116153295086e-05, "loss": 1.1475, "step": 6245500 }, { "epoch": 3.74, "learning_rate": 2.9819061567390296e-05, "loss": 1.1174, "step": 6246000 }, { "epoch": 3.75, "learning_rate": 2.981696160182973e-05, "loss": 1.1641, "step": 6246500 }, { "epoch": 3.75, "learning_rate": 2.9814861636269166e-05, "loss": 1.1374, "step": 6247000 }, { "epoch": 3.75, "learning_rate": 2.9812761670708603e-05, "loss": 1.1506, "step": 6247500 }, { "epoch": 3.75, "learning_rate": 2.9810661705148036e-05, "loss": 1.143, "step": 6248000 }, { "epoch": 3.75, "learning_rate": 2.9808561739587473e-05, "loss": 1.1523, "step": 6248500 }, { "epoch": 3.75, "learning_rate": 2.980646177402691e-05, "loss": 1.1386, "step": 6249000 }, { "epoch": 3.75, "learning_rate": 2.9804366008397463e-05, "loss": 1.1433, "step": 6249500 }, { "epoch": 3.75, "learning_rate": 2.9802266042836897e-05, "loss": 1.1422, "step": 6250000 }, { "epoch": 3.75, "learning_rate": 2.9800170277207454e-05, "loss": 1.1313, "step": 6250500 }, { "epoch": 3.75, "learning_rate": 2.9798070311646887e-05, "loss": 1.1299, "step": 6251000 }, { "epoch": 3.75, "learning_rate": 2.9795970346086324e-05, "loss": 1.1819, "step": 6251500 }, { "epoch": 3.75, "learning_rate": 2.979387038052576e-05, "loss": 1.1488, "step": 6252000 }, { "epoch": 3.75, "learning_rate": 2.9791770414965194e-05, "loss": 1.1652, "step": 6252500 }, { "epoch": 3.75, "learning_rate": 2.978967464933575e-05, "loss": 1.1318, "step": 6253000 }, { "epoch": 3.75, "learning_rate": 2.9787574683775185e-05, "loss": 1.1702, "step": 6253500 }, { "epoch": 3.75, "learning_rate": 2.9785474718214622e-05, "loss": 1.1657, "step": 6254000 }, { "epoch": 3.75, "learning_rate": 2.978337475265406e-05, "loss": 1.1684, "step": 6254500 }, { "epoch": 3.75, "learning_rate": 2.9781274787093492e-05, "loss": 1.1595, "step": 6255000 }, { "epoch": 3.75, "learning_rate": 2.9779179021464046e-05, "loss": 1.1631, "step": 6255500 }, { "epoch": 3.75, "learning_rate": 2.9777079055903482e-05, "loss": 1.1437, "step": 6256000 }, { "epoch": 3.75, "learning_rate": 2.977497909034292e-05, "loss": 1.1439, "step": 6256500 }, { "epoch": 3.75, "learning_rate": 2.9772879124782353e-05, "loss": 1.1488, "step": 6257000 }, { "epoch": 3.75, "learning_rate": 2.977077915922179e-05, "loss": 1.1564, "step": 6257500 }, { "epoch": 3.75, "learning_rate": 2.9768679193661226e-05, "loss": 1.1542, "step": 6258000 }, { "epoch": 3.75, "learning_rate": 2.976657922810066e-05, "loss": 1.15, "step": 6258500 }, { "epoch": 3.75, "learning_rate": 2.976447926254009e-05, "loss": 1.1665, "step": 6259000 }, { "epoch": 3.75, "learning_rate": 2.976238349691065e-05, "loss": 1.1457, "step": 6259500 }, { "epoch": 3.75, "learning_rate": 2.9760287731281207e-05, "loss": 1.1529, "step": 6260000 }, { "epoch": 3.75, "learning_rate": 2.975818776572064e-05, "loss": 1.1935, "step": 6260500 }, { "epoch": 3.75, "learning_rate": 2.9756087800160078e-05, "loss": 1.1577, "step": 6261000 }, { "epoch": 3.75, "learning_rate": 2.9753987834599514e-05, "loss": 1.1538, "step": 6261500 }, { "epoch": 3.75, "learning_rate": 2.9751887869038948e-05, "loss": 1.1364, "step": 6262000 }, { "epoch": 3.75, "learning_rate": 2.9749787903478385e-05, "loss": 1.166, "step": 6262500 }, { "epoch": 3.75, "learning_rate": 2.974768793791782e-05, "loss": 1.1569, "step": 6263000 }, { "epoch": 3.76, "learning_rate": 2.9745592172288375e-05, "loss": 1.158, "step": 6263500 }, { "epoch": 3.76, "learning_rate": 2.974349220672781e-05, "loss": 1.15, "step": 6264000 }, { "epoch": 3.76, "learning_rate": 2.9741392241167245e-05, "loss": 1.1489, "step": 6264500 }, { "epoch": 3.76, "learning_rate": 2.9739292275606682e-05, "loss": 1.1415, "step": 6265000 }, { "epoch": 3.76, "learning_rate": 2.9737196509977236e-05, "loss": 1.1564, "step": 6265500 }, { "epoch": 3.76, "learning_rate": 2.9735096544416673e-05, "loss": 1.1131, "step": 6266000 }, { "epoch": 3.76, "learning_rate": 2.9732996578856106e-05, "loss": 1.1563, "step": 6266500 }, { "epoch": 3.76, "learning_rate": 2.9730900813226663e-05, "loss": 1.1357, "step": 6267000 }, { "epoch": 3.76, "learning_rate": 2.9728800847666097e-05, "loss": 1.1619, "step": 6267500 }, { "epoch": 3.76, "learning_rate": 2.9726700882105533e-05, "loss": 1.1224, "step": 6268000 }, { "epoch": 3.76, "learning_rate": 2.972460091654497e-05, "loss": 1.1513, "step": 6268500 }, { "epoch": 3.76, "learning_rate": 2.9722500950984404e-05, "loss": 1.143, "step": 6269000 }, { "epoch": 3.76, "learning_rate": 2.972040098542384e-05, "loss": 1.152, "step": 6269500 }, { "epoch": 3.76, "learning_rate": 2.9718301019863277e-05, "loss": 1.1715, "step": 6270000 }, { "epoch": 3.76, "learning_rate": 2.971620105430271e-05, "loss": 1.1505, "step": 6270500 }, { "epoch": 3.76, "learning_rate": 2.971410108874214e-05, "loss": 1.1466, "step": 6271000 }, { "epoch": 3.76, "learning_rate": 2.9712001123181578e-05, "loss": 1.1332, "step": 6271500 }, { "epoch": 3.76, "learning_rate": 2.9709901157621015e-05, "loss": 1.1262, "step": 6272000 }, { "epoch": 3.76, "learning_rate": 2.9707805391991575e-05, "loss": 1.1396, "step": 6272500 }, { "epoch": 3.76, "learning_rate": 2.9705705426431002e-05, "loss": 1.1239, "step": 6273000 }, { "epoch": 3.76, "learning_rate": 2.970360546087044e-05, "loss": 1.184, "step": 6273500 }, { "epoch": 3.76, "learning_rate": 2.9701505495309875e-05, "loss": 1.1437, "step": 6274000 }, { "epoch": 3.76, "learning_rate": 2.969940552974931e-05, "loss": 1.1741, "step": 6274500 }, { "epoch": 3.76, "learning_rate": 2.9697305564188746e-05, "loss": 1.1421, "step": 6275000 }, { "epoch": 3.76, "learning_rate": 2.96952097985593e-05, "loss": 1.1659, "step": 6275500 }, { "epoch": 3.76, "learning_rate": 2.9693109832998736e-05, "loss": 1.1747, "step": 6276000 }, { "epoch": 3.76, "learning_rate": 2.9691009867438173e-05, "loss": 1.1387, "step": 6276500 }, { "epoch": 3.76, "learning_rate": 2.9688909901877606e-05, "loss": 1.1428, "step": 6277000 }, { "epoch": 3.76, "learning_rate": 2.9686809936317043e-05, "loss": 1.1533, "step": 6277500 }, { "epoch": 3.76, "learning_rate": 2.968470997075648e-05, "loss": 1.1676, "step": 6278000 }, { "epoch": 3.76, "learning_rate": 2.9682610005195914e-05, "loss": 1.1697, "step": 6278500 }, { "epoch": 3.76, "learning_rate": 2.968051003963535e-05, "loss": 1.1545, "step": 6279000 }, { "epoch": 3.76, "learning_rate": 2.9678414274005904e-05, "loss": 1.14, "step": 6279500 }, { "epoch": 3.77, "learning_rate": 2.967631430844534e-05, "loss": 1.1575, "step": 6280000 }, { "epoch": 3.77, "learning_rate": 2.9674218542815894e-05, "loss": 1.1682, "step": 6280500 }, { "epoch": 3.77, "learning_rate": 2.967211857725533e-05, "loss": 1.1556, "step": 6281000 }, { "epoch": 3.77, "learning_rate": 2.9670018611694765e-05, "loss": 1.1523, "step": 6281500 }, { "epoch": 3.77, "learning_rate": 2.96679186461342e-05, "loss": 1.1714, "step": 6282000 }, { "epoch": 3.77, "learning_rate": 2.966581868057364e-05, "loss": 1.1294, "step": 6282500 }, { "epoch": 3.77, "learning_rate": 2.9663718715013072e-05, "loss": 1.1608, "step": 6283000 }, { "epoch": 3.77, "learning_rate": 2.966162294938363e-05, "loss": 1.1272, "step": 6283500 }, { "epoch": 3.77, "learning_rate": 2.9659522983823062e-05, "loss": 1.1666, "step": 6284000 }, { "epoch": 3.77, "learning_rate": 2.96574230182625e-05, "loss": 1.1339, "step": 6284500 }, { "epoch": 3.77, "learning_rate": 2.9655323052701936e-05, "loss": 1.1549, "step": 6285000 }, { "epoch": 3.77, "learning_rate": 2.965322308714137e-05, "loss": 1.1629, "step": 6285500 }, { "epoch": 3.77, "learning_rate": 2.9651123121580806e-05, "loss": 1.149, "step": 6286000 }, { "epoch": 3.77, "learning_rate": 2.964902735595136e-05, "loss": 1.1445, "step": 6286500 }, { "epoch": 3.77, "learning_rate": 2.9646927390390797e-05, "loss": 1.1671, "step": 6287000 }, { "epoch": 3.77, "learning_rate": 2.9644827424830234e-05, "loss": 1.1227, "step": 6287500 }, { "epoch": 3.77, "learning_rate": 2.9642727459269667e-05, "loss": 1.1383, "step": 6288000 }, { "epoch": 3.77, "learning_rate": 2.9640627493709104e-05, "loss": 1.1429, "step": 6288500 }, { "epoch": 3.77, "learning_rate": 2.9638531728079657e-05, "loss": 1.1414, "step": 6289000 }, { "epoch": 3.77, "learning_rate": 2.9636431762519094e-05, "loss": 1.176, "step": 6289500 }, { "epoch": 3.77, "learning_rate": 2.9634331796958528e-05, "loss": 1.1837, "step": 6290000 }, { "epoch": 3.77, "learning_rate": 2.9632231831397965e-05, "loss": 1.1506, "step": 6290500 }, { "epoch": 3.77, "learning_rate": 2.96301318658374e-05, "loss": 1.13, "step": 6291000 }, { "epoch": 3.77, "learning_rate": 2.9628036100207955e-05, "loss": 1.1653, "step": 6291500 }, { "epoch": 3.77, "learning_rate": 2.9625936134647392e-05, "loss": 1.163, "step": 6292000 }, { "epoch": 3.77, "learning_rate": 2.9623836169086825e-05, "loss": 1.1654, "step": 6292500 }, { "epoch": 3.77, "learning_rate": 2.9621736203526262e-05, "loss": 1.1521, "step": 6293000 }, { "epoch": 3.77, "learning_rate": 2.96196362379657e-05, "loss": 1.1532, "step": 6293500 }, { "epoch": 3.77, "learning_rate": 2.9617536272405133e-05, "loss": 1.1584, "step": 6294000 }, { "epoch": 3.77, "learning_rate": 2.961543630684457e-05, "loss": 1.1674, "step": 6294500 }, { "epoch": 3.77, "learning_rate": 2.9613336341284006e-05, "loss": 1.1768, "step": 6295000 }, { "epoch": 3.77, "learning_rate": 2.961124057565456e-05, "loss": 1.1619, "step": 6295500 }, { "epoch": 3.77, "learning_rate": 2.9609140610093997e-05, "loss": 1.1394, "step": 6296000 }, { "epoch": 3.78, "learning_rate": 2.960704064453343e-05, "loss": 1.173, "step": 6296500 }, { "epoch": 3.78, "learning_rate": 2.9604940678972867e-05, "loss": 1.1725, "step": 6297000 }, { "epoch": 3.78, "learning_rate": 2.9602840713412304e-05, "loss": 1.1771, "step": 6297500 }, { "epoch": 3.78, "learning_rate": 2.960074074785173e-05, "loss": 1.1591, "step": 6298000 }, { "epoch": 3.78, "learning_rate": 2.9598644982222294e-05, "loss": 1.1779, "step": 6298500 }, { "epoch": 3.78, "learning_rate": 2.9596545016661728e-05, "loss": 1.1493, "step": 6299000 }, { "epoch": 3.78, "learning_rate": 2.959444925103228e-05, "loss": 1.1416, "step": 6299500 }, { "epoch": 3.78, "learning_rate": 2.9592349285471718e-05, "loss": 1.1302, "step": 6300000 }, { "epoch": 3.78, "eval_loss": 1.1142629384994507, "eval_runtime": 1100.8622, "eval_samples_per_second": 478.461, "eval_steps_per_second": 79.744, "step": 6300000 }, { "epoch": 3.78, "learning_rate": 2.9590249319911155e-05, "loss": 1.1701, "step": 6300500 }, { "epoch": 3.78, "learning_rate": 2.958814935435059e-05, "loss": 1.1607, "step": 6301000 }, { "epoch": 3.78, "learning_rate": 2.9586049388790025e-05, "loss": 1.1362, "step": 6301500 }, { "epoch": 3.78, "learning_rate": 2.9583949423229462e-05, "loss": 1.1545, "step": 6302000 }, { "epoch": 3.78, "learning_rate": 2.9581849457668892e-05, "loss": 1.1501, "step": 6302500 }, { "epoch": 3.78, "learning_rate": 2.9579749492108326e-05, "loss": 1.1484, "step": 6303000 }, { "epoch": 3.78, "learning_rate": 2.9577649526547762e-05, "loss": 1.1433, "step": 6303500 }, { "epoch": 3.78, "learning_rate": 2.95755495609872e-05, "loss": 1.1189, "step": 6304000 }, { "epoch": 3.78, "learning_rate": 2.957345379535776e-05, "loss": 1.1372, "step": 6304500 }, { "epoch": 3.78, "learning_rate": 2.957135382979719e-05, "loss": 1.1482, "step": 6305000 }, { "epoch": 3.78, "learning_rate": 2.9569253864236623e-05, "loss": 1.1552, "step": 6305500 }, { "epoch": 3.78, "learning_rate": 2.956715389867606e-05, "loss": 1.145, "step": 6306000 }, { "epoch": 3.78, "learning_rate": 2.9565053933115497e-05, "loss": 1.1358, "step": 6306500 }, { "epoch": 3.78, "learning_rate": 2.956295396755493e-05, "loss": 1.1507, "step": 6307000 }, { "epoch": 3.78, "learning_rate": 2.9560854001994367e-05, "loss": 1.1342, "step": 6307500 }, { "epoch": 3.78, "learning_rate": 2.9558754036433804e-05, "loss": 1.1372, "step": 6308000 }, { "epoch": 3.78, "learning_rate": 2.9556658270804358e-05, "loss": 1.133, "step": 6308500 }, { "epoch": 3.78, "learning_rate": 2.955455830524379e-05, "loss": 1.1505, "step": 6309000 }, { "epoch": 3.78, "learning_rate": 2.9552458339683228e-05, "loss": 1.1621, "step": 6309500 }, { "epoch": 3.78, "learning_rate": 2.9550358374122665e-05, "loss": 1.1193, "step": 6310000 }, { "epoch": 3.78, "learning_rate": 2.954826260849322e-05, "loss": 1.1537, "step": 6310500 }, { "epoch": 3.78, "learning_rate": 2.9546162642932655e-05, "loss": 1.1231, "step": 6311000 }, { "epoch": 3.78, "learning_rate": 2.954406267737209e-05, "loss": 1.171, "step": 6311500 }, { "epoch": 3.78, "learning_rate": 2.9541966911742646e-05, "loss": 1.1666, "step": 6312000 }, { "epoch": 3.78, "learning_rate": 2.953986694618208e-05, "loss": 1.1388, "step": 6312500 }, { "epoch": 3.78, "learning_rate": 2.953777118055264e-05, "loss": 1.1517, "step": 6313000 }, { "epoch": 3.79, "learning_rate": 2.9535671214992076e-05, "loss": 1.1569, "step": 6313500 }, { "epoch": 3.79, "learning_rate": 2.9533571249431513e-05, "loss": 1.1477, "step": 6314000 }, { "epoch": 3.79, "learning_rate": 2.953147128387094e-05, "loss": 1.1786, "step": 6314500 }, { "epoch": 3.79, "learning_rate": 2.9529371318310377e-05, "loss": 1.1513, "step": 6315000 }, { "epoch": 3.79, "learning_rate": 2.9527271352749813e-05, "loss": 1.1475, "step": 6315500 }, { "epoch": 3.79, "learning_rate": 2.9525171387189247e-05, "loss": 1.154, "step": 6316000 }, { "epoch": 3.79, "learning_rate": 2.9523071421628684e-05, "loss": 1.1686, "step": 6316500 }, { "epoch": 3.79, "learning_rate": 2.952097145606812e-05, "loss": 1.1663, "step": 6317000 }, { "epoch": 3.79, "learning_rate": 2.9518871490507554e-05, "loss": 1.1444, "step": 6317500 }, { "epoch": 3.79, "learning_rate": 2.951677152494699e-05, "loss": 1.1384, "step": 6318000 }, { "epoch": 3.79, "learning_rate": 2.9514671559386428e-05, "loss": 1.1615, "step": 6318500 }, { "epoch": 3.79, "learning_rate": 2.951257579375698e-05, "loss": 1.1554, "step": 6319000 }, { "epoch": 3.79, "learning_rate": 2.9510475828196418e-05, "loss": 1.1294, "step": 6319500 }, { "epoch": 3.79, "learning_rate": 2.950837586263585e-05, "loss": 1.1821, "step": 6320000 }, { "epoch": 3.79, "learning_rate": 2.950628009700641e-05, "loss": 1.1441, "step": 6320500 }, { "epoch": 3.79, "learning_rate": 2.9504180131445842e-05, "loss": 1.169, "step": 6321000 }, { "epoch": 3.79, "learning_rate": 2.950208016588528e-05, "loss": 1.1573, "step": 6321500 }, { "epoch": 3.79, "learning_rate": 2.9499980200324716e-05, "loss": 1.115, "step": 6322000 }, { "epoch": 3.79, "learning_rate": 2.949788023476415e-05, "loss": 1.1358, "step": 6322500 }, { "epoch": 3.79, "learning_rate": 2.9495780269203586e-05, "loss": 1.1601, "step": 6323000 }, { "epoch": 3.79, "learning_rate": 2.9493680303643023e-05, "loss": 1.1571, "step": 6323500 }, { "epoch": 3.79, "learning_rate": 2.9491580338082456e-05, "loss": 1.1412, "step": 6324000 }, { "epoch": 3.79, "learning_rate": 2.9489484572453013e-05, "loss": 1.1825, "step": 6324500 }, { "epoch": 3.79, "learning_rate": 2.9487384606892447e-05, "loss": 1.1538, "step": 6325000 }, { "epoch": 3.79, "learning_rate": 2.9485284641331884e-05, "loss": 1.1491, "step": 6325500 }, { "epoch": 3.79, "learning_rate": 2.9483188875702437e-05, "loss": 1.1397, "step": 6326000 }, { "epoch": 3.79, "learning_rate": 2.9481088910141874e-05, "loss": 1.1431, "step": 6326500 }, { "epoch": 3.79, "learning_rate": 2.9478988944581308e-05, "loss": 1.1394, "step": 6327000 }, { "epoch": 3.79, "learning_rate": 2.9476888979020744e-05, "loss": 1.1483, "step": 6327500 }, { "epoch": 3.79, "learning_rate": 2.9474793213391298e-05, "loss": 1.1308, "step": 6328000 }, { "epoch": 3.79, "learning_rate": 2.9472693247830735e-05, "loss": 1.1452, "step": 6328500 }, { "epoch": 3.79, "learning_rate": 2.947059328227017e-05, "loss": 1.1507, "step": 6329000 }, { "epoch": 3.79, "learning_rate": 2.9468493316709605e-05, "loss": 1.1404, "step": 6329500 }, { "epoch": 3.8, "learning_rate": 2.9466393351149042e-05, "loss": 1.1502, "step": 6330000 }, { "epoch": 3.8, "learning_rate": 2.946429338558848e-05, "loss": 1.1437, "step": 6330500 }, { "epoch": 3.8, "learning_rate": 2.9462197619959032e-05, "loss": 1.1592, "step": 6331000 }, { "epoch": 3.8, "learning_rate": 2.946009765439847e-05, "loss": 1.1501, "step": 6331500 }, { "epoch": 3.8, "learning_rate": 2.9457997688837903e-05, "loss": 1.1403, "step": 6332000 }, { "epoch": 3.8, "learning_rate": 2.945589772327734e-05, "loss": 1.1749, "step": 6332500 }, { "epoch": 3.8, "learning_rate": 2.9453801957647893e-05, "loss": 1.1462, "step": 6333000 }, { "epoch": 3.8, "learning_rate": 2.945170199208733e-05, "loss": 1.1396, "step": 6333500 }, { "epoch": 3.8, "learning_rate": 2.9449602026526763e-05, "loss": 1.1589, "step": 6334000 }, { "epoch": 3.8, "learning_rate": 2.94475020609662e-05, "loss": 1.1418, "step": 6334500 }, { "epoch": 3.8, "learning_rate": 2.9445402095405637e-05, "loss": 1.1617, "step": 6335000 }, { "epoch": 3.8, "learning_rate": 2.944330212984507e-05, "loss": 1.1423, "step": 6335500 }, { "epoch": 3.8, "learning_rate": 2.9441202164284507e-05, "loss": 1.1528, "step": 6336000 }, { "epoch": 3.8, "learning_rate": 2.9439102198723938e-05, "loss": 1.1143, "step": 6336500 }, { "epoch": 3.8, "learning_rate": 2.9437010633025615e-05, "loss": 1.1624, "step": 6337000 }, { "epoch": 3.8, "learning_rate": 2.943491066746505e-05, "loss": 1.1583, "step": 6337500 }, { "epoch": 3.8, "learning_rate": 2.9432810701904488e-05, "loss": 1.146, "step": 6338000 }, { "epoch": 3.8, "learning_rate": 2.9430710736343925e-05, "loss": 1.1578, "step": 6338500 }, { "epoch": 3.8, "learning_rate": 2.942861077078336e-05, "loss": 1.1176, "step": 6339000 }, { "epoch": 3.8, "learning_rate": 2.9426510805222795e-05, "loss": 1.1487, "step": 6339500 }, { "epoch": 3.8, "learning_rate": 2.942441503959335e-05, "loss": 1.127, "step": 6340000 }, { "epoch": 3.8, "learning_rate": 2.9422315074032786e-05, "loss": 1.1658, "step": 6340500 }, { "epoch": 3.8, "learning_rate": 2.942021510847222e-05, "loss": 1.1271, "step": 6341000 }, { "epoch": 3.8, "learning_rate": 2.9418115142911656e-05, "loss": 1.1245, "step": 6341500 }, { "epoch": 3.8, "learning_rate": 2.9416015177351093e-05, "loss": 1.1522, "step": 6342000 }, { "epoch": 3.8, "learning_rate": 2.9413919411721647e-05, "loss": 1.1556, "step": 6342500 }, { "epoch": 3.8, "learning_rate": 2.9411819446161083e-05, "loss": 1.1405, "step": 6343000 }, { "epoch": 3.8, "learning_rate": 2.9409719480600517e-05, "loss": 1.1715, "step": 6343500 }, { "epoch": 3.8, "learning_rate": 2.9407619515039954e-05, "loss": 1.1773, "step": 6344000 }, { "epoch": 3.8, "learning_rate": 2.940551954947939e-05, "loss": 1.171, "step": 6344500 }, { "epoch": 3.8, "learning_rate": 2.9403419583918824e-05, "loss": 1.1588, "step": 6345000 }, { "epoch": 3.8, "learning_rate": 2.940132381828938e-05, "loss": 1.166, "step": 6345500 }, { "epoch": 3.8, "learning_rate": 2.9399223852728814e-05, "loss": 1.1576, "step": 6346000 }, { "epoch": 3.8, "learning_rate": 2.939712388716825e-05, "loss": 1.1573, "step": 6346500 }, { "epoch": 3.81, "learning_rate": 2.9395023921607688e-05, "loss": 1.163, "step": 6347000 }, { "epoch": 3.81, "learning_rate": 2.939292395604712e-05, "loss": 1.1324, "step": 6347500 }, { "epoch": 3.81, "learning_rate": 2.939082399048656e-05, "loss": 1.1718, "step": 6348000 }, { "epoch": 3.81, "learning_rate": 2.938872402492599e-05, "loss": 1.1377, "step": 6348500 }, { "epoch": 3.81, "learning_rate": 2.9386624059365422e-05, "loss": 1.1313, "step": 6349000 }, { "epoch": 3.81, "learning_rate": 2.9384532493667102e-05, "loss": 1.139, "step": 6349500 }, { "epoch": 3.81, "learning_rate": 2.938243252810654e-05, "loss": 1.1574, "step": 6350000 }, { "epoch": 3.81, "learning_rate": 2.9380332562545973e-05, "loss": 1.153, "step": 6350500 }, { "epoch": 3.81, "learning_rate": 2.937823259698541e-05, "loss": 1.1503, "step": 6351000 }, { "epoch": 3.81, "learning_rate": 2.9376132631424846e-05, "loss": 1.1715, "step": 6351500 }, { "epoch": 3.81, "learning_rate": 2.93740368657954e-05, "loss": 1.1512, "step": 6352000 }, { "epoch": 3.81, "learning_rate": 2.9371936900234837e-05, "loss": 1.1293, "step": 6352500 }, { "epoch": 3.81, "learning_rate": 2.936983693467427e-05, "loss": 1.1209, "step": 6353000 }, { "epoch": 3.81, "learning_rate": 2.9367736969113707e-05, "loss": 1.1816, "step": 6353500 }, { "epoch": 3.81, "learning_rate": 2.936564120348426e-05, "loss": 1.1411, "step": 6354000 }, { "epoch": 3.81, "learning_rate": 2.9363541237923698e-05, "loss": 1.1558, "step": 6354500 }, { "epoch": 3.81, "learning_rate": 2.936144127236313e-05, "loss": 1.1356, "step": 6355000 }, { "epoch": 3.81, "learning_rate": 2.9359341306802568e-05, "loss": 1.156, "step": 6355500 }, { "epoch": 3.81, "learning_rate": 2.935724554117312e-05, "loss": 1.1202, "step": 6356000 }, { "epoch": 3.81, "learning_rate": 2.935514557561256e-05, "loss": 1.1772, "step": 6356500 }, { "epoch": 3.81, "learning_rate": 2.9353045610051995e-05, "loss": 1.1753, "step": 6357000 }, { "epoch": 3.81, "learning_rate": 2.935094564449143e-05, "loss": 1.1371, "step": 6357500 }, { "epoch": 3.81, "learning_rate": 2.9348845678930865e-05, "loss": 1.1471, "step": 6358000 }, { "epoch": 3.81, "learning_rate": 2.9346745713370302e-05, "loss": 1.1319, "step": 6358500 }, { "epoch": 3.81, "learning_rate": 2.9344645747809736e-05, "loss": 1.1418, "step": 6359000 }, { "epoch": 3.81, "learning_rate": 2.9342545782249173e-05, "loss": 1.1395, "step": 6359500 }, { "epoch": 3.81, "learning_rate": 2.9340454216550846e-05, "loss": 1.1293, "step": 6360000 }, { "epoch": 3.81, "learning_rate": 2.933835425099028e-05, "loss": 1.1455, "step": 6360500 }, { "epoch": 3.81, "learning_rate": 2.9336254285429717e-05, "loss": 1.1404, "step": 6361000 }, { "epoch": 3.81, "learning_rate": 2.9334154319869153e-05, "loss": 1.1588, "step": 6361500 }, { "epoch": 3.81, "learning_rate": 2.9332054354308587e-05, "loss": 1.1373, "step": 6362000 }, { "epoch": 3.81, "learning_rate": 2.9329954388748024e-05, "loss": 1.1468, "step": 6362500 }, { "epoch": 3.81, "learning_rate": 2.932785442318746e-05, "loss": 1.1653, "step": 6363000 }, { "epoch": 3.82, "learning_rate": 2.9325754457626897e-05, "loss": 1.1602, "step": 6363500 }, { "epoch": 3.82, "learning_rate": 2.932365449206633e-05, "loss": 1.1418, "step": 6364000 }, { "epoch": 3.82, "learning_rate": 2.9321554526505768e-05, "loss": 1.158, "step": 6364500 }, { "epoch": 3.82, "learning_rate": 2.9319454560945205e-05, "loss": 1.121, "step": 6365000 }, { "epoch": 3.82, "learning_rate": 2.931735459538463e-05, "loss": 1.1797, "step": 6365500 }, { "epoch": 3.82, "learning_rate": 2.931525882975519e-05, "loss": 1.1629, "step": 6366000 }, { "epoch": 3.82, "learning_rate": 2.931316306412575e-05, "loss": 1.1188, "step": 6366500 }, { "epoch": 3.82, "learning_rate": 2.9311063098565182e-05, "loss": 1.1251, "step": 6367000 }, { "epoch": 3.82, "learning_rate": 2.930896313300462e-05, "loss": 1.141, "step": 6367500 }, { "epoch": 3.82, "learning_rate": 2.9306863167444056e-05, "loss": 1.1275, "step": 6368000 }, { "epoch": 3.82, "learning_rate": 2.930476320188349e-05, "loss": 1.1674, "step": 6368500 }, { "epoch": 3.82, "learning_rate": 2.9302663236322926e-05, "loss": 1.1019, "step": 6369000 }, { "epoch": 3.82, "learning_rate": 2.93005716706246e-05, "loss": 1.1742, "step": 6369500 }, { "epoch": 3.82, "learning_rate": 2.9298471705064033e-05, "loss": 1.1239, "step": 6370000 }, { "epoch": 3.82, "learning_rate": 2.929637173950347e-05, "loss": 1.1452, "step": 6370500 }, { "epoch": 3.82, "learning_rate": 2.9294271773942907e-05, "loss": 1.1468, "step": 6371000 }, { "epoch": 3.82, "learning_rate": 2.929217180838234e-05, "loss": 1.1493, "step": 6371500 }, { "epoch": 3.82, "learning_rate": 2.9290071842821777e-05, "loss": 1.1628, "step": 6372000 }, { "epoch": 3.82, "learning_rate": 2.9287971877261214e-05, "loss": 1.1375, "step": 6372500 }, { "epoch": 3.82, "learning_rate": 2.9285871911700648e-05, "loss": 1.1646, "step": 6373000 }, { "epoch": 3.82, "learning_rate": 2.9283771946140084e-05, "loss": 1.1526, "step": 6373500 }, { "epoch": 3.82, "learning_rate": 2.928167198057952e-05, "loss": 1.1391, "step": 6374000 }, { "epoch": 3.82, "learning_rate": 2.9279576214950075e-05, "loss": 1.1551, "step": 6374500 }, { "epoch": 3.82, "learning_rate": 2.927747624938951e-05, "loss": 1.158, "step": 6375000 }, { "epoch": 3.82, "learning_rate": 2.9275376283828945e-05, "loss": 1.1802, "step": 6375500 }, { "epoch": 3.82, "learning_rate": 2.9273276318268382e-05, "loss": 1.1535, "step": 6376000 }, { "epoch": 3.82, "learning_rate": 2.927117635270782e-05, "loss": 1.1703, "step": 6376500 }, { "epoch": 3.82, "learning_rate": 2.926907638714725e-05, "loss": 1.137, "step": 6377000 }, { "epoch": 3.82, "learning_rate": 2.926698062151781e-05, "loss": 1.1448, "step": 6377500 }, { "epoch": 3.82, "learning_rate": 2.9264880655957243e-05, "loss": 1.1348, "step": 6378000 }, { "epoch": 3.82, "learning_rate": 2.926278069039668e-05, "loss": 1.1508, "step": 6378500 }, { "epoch": 3.82, "learning_rate": 2.9260680724836116e-05, "loss": 1.1369, "step": 6379000 }, { "epoch": 3.82, "learning_rate": 2.9258580759275543e-05, "loss": 1.1485, "step": 6379500 }, { "epoch": 3.83, "learning_rate": 2.925648079371498e-05, "loss": 1.1537, "step": 6380000 }, { "epoch": 3.83, "learning_rate": 2.9254380828154417e-05, "loss": 1.1395, "step": 6380500 }, { "epoch": 3.83, "learning_rate": 2.925228086259385e-05, "loss": 1.1564, "step": 6381000 }, { "epoch": 3.83, "learning_rate": 2.925018509696441e-05, "loss": 1.1556, "step": 6381500 }, { "epoch": 3.83, "learning_rate": 2.924808513140384e-05, "loss": 1.1532, "step": 6382000 }, { "epoch": 3.83, "learning_rate": 2.9245985165843278e-05, "loss": 1.1614, "step": 6382500 }, { "epoch": 3.83, "learning_rate": 2.9243893600144958e-05, "loss": 1.145, "step": 6383000 }, { "epoch": 3.83, "learning_rate": 2.924179363458439e-05, "loss": 1.1421, "step": 6383500 }, { "epoch": 3.83, "learning_rate": 2.9239693669023828e-05, "loss": 1.1744, "step": 6384000 }, { "epoch": 3.83, "learning_rate": 2.9237593703463265e-05, "loss": 1.1265, "step": 6384500 }, { "epoch": 3.83, "learning_rate": 2.92354937379027e-05, "loss": 1.1366, "step": 6385000 }, { "epoch": 3.83, "learning_rate": 2.9233393772342135e-05, "loss": 1.1851, "step": 6385500 }, { "epoch": 3.83, "learning_rate": 2.9231293806781572e-05, "loss": 1.1504, "step": 6386000 }, { "epoch": 3.83, "learning_rate": 2.9229193841221006e-05, "loss": 1.1536, "step": 6386500 }, { "epoch": 3.83, "learning_rate": 2.9227093875660436e-05, "loss": 1.1324, "step": 6387000 }, { "epoch": 3.83, "learning_rate": 2.9224993910099873e-05, "loss": 1.1473, "step": 6387500 }, { "epoch": 3.83, "learning_rate": 2.9222898144470433e-05, "loss": 1.1204, "step": 6388000 }, { "epoch": 3.83, "learning_rate": 2.9220798178909866e-05, "loss": 1.1727, "step": 6388500 }, { "epoch": 3.83, "learning_rate": 2.9218698213349297e-05, "loss": 1.137, "step": 6389000 }, { "epoch": 3.83, "learning_rate": 2.9216598247788733e-05, "loss": 1.1289, "step": 6389500 }, { "epoch": 3.83, "learning_rate": 2.921449828222817e-05, "loss": 1.1758, "step": 6390000 }, { "epoch": 3.83, "learning_rate": 2.921240251659873e-05, "loss": 1.1721, "step": 6390500 }, { "epoch": 3.83, "learning_rate": 2.9210302551038164e-05, "loss": 1.1607, "step": 6391000 }, { "epoch": 3.83, "learning_rate": 2.9208202585477594e-05, "loss": 1.1545, "step": 6391500 }, { "epoch": 3.83, "learning_rate": 2.920610261991703e-05, "loss": 1.1675, "step": 6392000 }, { "epoch": 3.83, "learning_rate": 2.9204002654356468e-05, "loss": 1.1299, "step": 6392500 }, { "epoch": 3.83, "learning_rate": 2.9201906888727028e-05, "loss": 1.1492, "step": 6393000 }, { "epoch": 3.83, "learning_rate": 2.919980692316646e-05, "loss": 1.1301, "step": 6393500 }, { "epoch": 3.83, "learning_rate": 2.919770695760589e-05, "loss": 1.1461, "step": 6394000 }, { "epoch": 3.83, "learning_rate": 2.919560699204533e-05, "loss": 1.1367, "step": 6394500 }, { "epoch": 3.83, "learning_rate": 2.9193507026484762e-05, "loss": 1.1604, "step": 6395000 }, { "epoch": 3.83, "learning_rate": 2.91914070609242e-05, "loss": 1.1456, "step": 6395500 }, { "epoch": 3.83, "learning_rate": 2.918931549522588e-05, "loss": 1.1273, "step": 6396000 }, { "epoch": 3.83, "learning_rate": 2.9187215529665313e-05, "loss": 1.1729, "step": 6396500 }, { "epoch": 3.84, "learning_rate": 2.918511556410475e-05, "loss": 1.1879, "step": 6397000 }, { "epoch": 3.84, "learning_rate": 2.9183015598544186e-05, "loss": 1.1485, "step": 6397500 }, { "epoch": 3.84, "learning_rate": 2.918091563298362e-05, "loss": 1.1499, "step": 6398000 }, { "epoch": 3.84, "learning_rate": 2.917881566742305e-05, "loss": 1.1123, "step": 6398500 }, { "epoch": 3.84, "learning_rate": 2.9176715701862487e-05, "loss": 1.1355, "step": 6399000 }, { "epoch": 3.84, "learning_rate": 2.9174615736301924e-05, "loss": 1.1486, "step": 6399500 }, { "epoch": 3.84, "learning_rate": 2.9172515770741357e-05, "loss": 1.1244, "step": 6400000 }, { "epoch": 3.84, "eval_loss": 1.1107254028320312, "eval_runtime": 1100.5378, "eval_samples_per_second": 478.602, "eval_steps_per_second": 79.767, "step": 6400000 }, { "epoch": 3.84, "learning_rate": 2.9170420005111918e-05, "loss": 1.1573, "step": 6400500 }, { "epoch": 3.84, "learning_rate": 2.9168320039551348e-05, "loss": 1.1234, "step": 6401000 }, { "epoch": 3.84, "learning_rate": 2.9166220073990784e-05, "loss": 1.144, "step": 6401500 }, { "epoch": 3.84, "learning_rate": 2.9164124308361345e-05, "loss": 1.1565, "step": 6402000 }, { "epoch": 3.84, "learning_rate": 2.916202434280078e-05, "loss": 1.152, "step": 6402500 }, { "epoch": 3.84, "learning_rate": 2.9159924377240215e-05, "loss": 1.1448, "step": 6403000 }, { "epoch": 3.84, "learning_rate": 2.9157824411679645e-05, "loss": 1.1413, "step": 6403500 }, { "epoch": 3.84, "learning_rate": 2.9155724446119082e-05, "loss": 1.1792, "step": 6404000 }, { "epoch": 3.84, "learning_rate": 2.9153624480558515e-05, "loss": 1.1571, "step": 6404500 }, { "epoch": 3.84, "learning_rate": 2.9151524514997952e-05, "loss": 1.1721, "step": 6405000 }, { "epoch": 3.84, "learning_rate": 2.914942454943739e-05, "loss": 1.1198, "step": 6405500 }, { "epoch": 3.84, "learning_rate": 2.9147324583876823e-05, "loss": 1.1264, "step": 6406000 }, { "epoch": 3.84, "learning_rate": 2.914522461831626e-05, "loss": 1.1476, "step": 6406500 }, { "epoch": 3.84, "learning_rate": 2.9143124652755696e-05, "loss": 1.1388, "step": 6407000 }, { "epoch": 3.84, "learning_rate": 2.914102888712625e-05, "loss": 1.1306, "step": 6407500 }, { "epoch": 3.84, "learning_rate": 2.9138928921565687e-05, "loss": 1.1588, "step": 6408000 }, { "epoch": 3.84, "learning_rate": 2.913682895600512e-05, "loss": 1.1406, "step": 6408500 }, { "epoch": 3.84, "learning_rate": 2.9134728990444557e-05, "loss": 1.1738, "step": 6409000 }, { "epoch": 3.84, "learning_rate": 2.9132629024883994e-05, "loss": 1.1782, "step": 6409500 }, { "epoch": 3.84, "learning_rate": 2.9130529059323427e-05, "loss": 1.119, "step": 6410000 }, { "epoch": 3.84, "learning_rate": 2.9128429093762864e-05, "loss": 1.1334, "step": 6410500 }, { "epoch": 3.84, "learning_rate": 2.91263291282023e-05, "loss": 1.1608, "step": 6411000 }, { "epoch": 3.84, "learning_rate": 2.9124233362572855e-05, "loss": 1.1497, "step": 6411500 }, { "epoch": 3.84, "learning_rate": 2.912213339701229e-05, "loss": 1.1336, "step": 6412000 }, { "epoch": 3.84, "learning_rate": 2.9120033431451725e-05, "loss": 1.1536, "step": 6412500 }, { "epoch": 3.84, "learning_rate": 2.911793766582228e-05, "loss": 1.1429, "step": 6413000 }, { "epoch": 3.85, "learning_rate": 2.9115837700261715e-05, "loss": 1.1292, "step": 6413500 }, { "epoch": 3.85, "learning_rate": 2.9113737734701152e-05, "loss": 1.1498, "step": 6414000 }, { "epoch": 3.85, "learning_rate": 2.9111637769140586e-05, "loss": 1.1494, "step": 6414500 }, { "epoch": 3.85, "learning_rate": 2.9109537803580022e-05, "loss": 1.1556, "step": 6415000 }, { "epoch": 3.85, "learning_rate": 2.9107442037950576e-05, "loss": 1.1526, "step": 6415500 }, { "epoch": 3.85, "learning_rate": 2.9105342072390013e-05, "loss": 1.151, "step": 6416000 }, { "epoch": 3.85, "learning_rate": 2.910324210682945e-05, "loss": 1.135, "step": 6416500 }, { "epoch": 3.85, "learning_rate": 2.9101142141268883e-05, "loss": 1.1464, "step": 6417000 }, { "epoch": 3.85, "learning_rate": 2.909904217570832e-05, "loss": 1.1554, "step": 6417500 }, { "epoch": 3.85, "learning_rate": 2.9096942210147757e-05, "loss": 1.1383, "step": 6418000 }, { "epoch": 3.85, "learning_rate": 2.9094842244587187e-05, "loss": 1.1781, "step": 6418500 }, { "epoch": 3.85, "learning_rate": 2.909274227902662e-05, "loss": 1.1394, "step": 6419000 }, { "epoch": 3.85, "learning_rate": 2.909064651339718e-05, "loss": 1.1346, "step": 6419500 }, { "epoch": 3.85, "learning_rate": 2.9088546547836618e-05, "loss": 1.153, "step": 6420000 }, { "epoch": 3.85, "learning_rate": 2.9086446582276054e-05, "loss": 1.1331, "step": 6420500 }, { "epoch": 3.85, "learning_rate": 2.908434661671548e-05, "loss": 1.1259, "step": 6421000 }, { "epoch": 3.85, "learning_rate": 2.908225085108604e-05, "loss": 1.1542, "step": 6421500 }, { "epoch": 3.85, "learning_rate": 2.908015088552548e-05, "loss": 1.136, "step": 6422000 }, { "epoch": 3.85, "learning_rate": 2.9078050919964915e-05, "loss": 1.1283, "step": 6422500 }, { "epoch": 3.85, "learning_rate": 2.9075950954404352e-05, "loss": 1.1524, "step": 6423000 }, { "epoch": 3.85, "learning_rate": 2.907385098884378e-05, "loss": 1.1465, "step": 6423500 }, { "epoch": 3.85, "learning_rate": 2.9071751023283216e-05, "loss": 1.1425, "step": 6424000 }, { "epoch": 3.85, "learning_rate": 2.9069651057722652e-05, "loss": 1.1341, "step": 6424500 }, { "epoch": 3.85, "learning_rate": 2.9067555292093213e-05, "loss": 1.1486, "step": 6425000 }, { "epoch": 3.85, "learning_rate": 2.9065455326532643e-05, "loss": 1.1451, "step": 6425500 }, { "epoch": 3.85, "learning_rate": 2.9063355360972076e-05, "loss": 1.1687, "step": 6426000 }, { "epoch": 3.85, "learning_rate": 2.9061255395411513e-05, "loss": 1.1163, "step": 6426500 }, { "epoch": 3.85, "learning_rate": 2.905915542985095e-05, "loss": 1.1413, "step": 6427000 }, { "epoch": 3.85, "learning_rate": 2.9057055464290383e-05, "loss": 1.1364, "step": 6427500 }, { "epoch": 3.85, "learning_rate": 2.905495549872982e-05, "loss": 1.1197, "step": 6428000 }, { "epoch": 3.85, "learning_rate": 2.9052859733100374e-05, "loss": 1.1473, "step": 6428500 }, { "epoch": 3.85, "learning_rate": 2.905075976753981e-05, "loss": 1.1556, "step": 6429000 }, { "epoch": 3.85, "learning_rate": 2.9048659801979248e-05, "loss": 1.1462, "step": 6429500 }, { "epoch": 3.86, "learning_rate": 2.904655983641868e-05, "loss": 1.1499, "step": 6430000 }, { "epoch": 3.86, "learning_rate": 2.9044464070789235e-05, "loss": 1.1586, "step": 6430500 }, { "epoch": 3.86, "learning_rate": 2.904236410522867e-05, "loss": 1.147, "step": 6431000 }, { "epoch": 3.86, "learning_rate": 2.904026413966811e-05, "loss": 1.1532, "step": 6431500 }, { "epoch": 3.86, "learning_rate": 2.9038164174107542e-05, "loss": 1.1314, "step": 6432000 }, { "epoch": 3.86, "learning_rate": 2.903606420854698e-05, "loss": 1.1111, "step": 6432500 }, { "epoch": 3.86, "learning_rate": 2.9033968442917532e-05, "loss": 1.1443, "step": 6433000 }, { "epoch": 3.86, "learning_rate": 2.903186847735697e-05, "loss": 1.1481, "step": 6433500 }, { "epoch": 3.86, "learning_rate": 2.9029768511796406e-05, "loss": 1.1732, "step": 6434000 }, { "epoch": 3.86, "learning_rate": 2.902766854623584e-05, "loss": 1.1428, "step": 6434500 }, { "epoch": 3.86, "learning_rate": 2.9025572780606393e-05, "loss": 1.1729, "step": 6435000 }, { "epoch": 3.86, "learning_rate": 2.902347281504583e-05, "loss": 1.165, "step": 6435500 }, { "epoch": 3.86, "learning_rate": 2.9021372849485267e-05, "loss": 1.1532, "step": 6436000 }, { "epoch": 3.86, "learning_rate": 2.9019272883924703e-05, "loss": 1.137, "step": 6436500 }, { "epoch": 3.86, "learning_rate": 2.9017177118295264e-05, "loss": 1.1305, "step": 6437000 }, { "epoch": 3.86, "learning_rate": 2.901507715273469e-05, "loss": 1.178, "step": 6437500 }, { "epoch": 3.86, "learning_rate": 2.9012977187174127e-05, "loss": 1.1525, "step": 6438000 }, { "epoch": 3.86, "learning_rate": 2.9010877221613564e-05, "loss": 1.1507, "step": 6438500 }, { "epoch": 3.86, "learning_rate": 2.9008777256052998e-05, "loss": 1.143, "step": 6439000 }, { "epoch": 3.86, "learning_rate": 2.9006677290492434e-05, "loss": 1.1121, "step": 6439500 }, { "epoch": 3.86, "learning_rate": 2.900457732493187e-05, "loss": 1.1336, "step": 6440000 }, { "epoch": 3.86, "learning_rate": 2.9002477359371305e-05, "loss": 1.1587, "step": 6440500 }, { "epoch": 3.86, "learning_rate": 2.9000385793672985e-05, "loss": 1.1389, "step": 6441000 }, { "epoch": 3.86, "learning_rate": 2.8998285828112422e-05, "loss": 1.1539, "step": 6441500 }, { "epoch": 3.86, "learning_rate": 2.8996185862551856e-05, "loss": 1.1405, "step": 6442000 }, { "epoch": 3.86, "learning_rate": 2.8994085896991286e-05, "loss": 1.1906, "step": 6442500 }, { "epoch": 3.86, "learning_rate": 2.8991985931430722e-05, "loss": 1.1849, "step": 6443000 }, { "epoch": 3.86, "learning_rate": 2.8989890165801283e-05, "loss": 1.1908, "step": 6443500 }, { "epoch": 3.86, "learning_rate": 2.898779020024072e-05, "loss": 1.1568, "step": 6444000 }, { "epoch": 3.86, "learning_rate": 2.8985690234680146e-05, "loss": 1.1257, "step": 6444500 }, { "epoch": 3.86, "learning_rate": 2.8983590269119583e-05, "loss": 1.1728, "step": 6445000 }, { "epoch": 3.86, "learning_rate": 2.8981494503490144e-05, "loss": 1.1386, "step": 6445500 }, { "epoch": 3.86, "learning_rate": 2.897939453792958e-05, "loss": 1.1422, "step": 6446000 }, { "epoch": 3.86, "learning_rate": 2.8977294572369014e-05, "loss": 1.1376, "step": 6446500 }, { "epoch": 3.87, "learning_rate": 2.8975194606808444e-05, "loss": 1.1537, "step": 6447000 }, { "epoch": 3.87, "learning_rate": 2.897309464124788e-05, "loss": 1.1788, "step": 6447500 }, { "epoch": 3.87, "learning_rate": 2.897099887561844e-05, "loss": 1.1385, "step": 6448000 }, { "epoch": 3.87, "learning_rate": 2.8968903109988995e-05, "loss": 1.1402, "step": 6448500 }, { "epoch": 3.87, "learning_rate": 2.896680314442843e-05, "loss": 1.1382, "step": 6449000 }, { "epoch": 3.87, "learning_rate": 2.896470317886787e-05, "loss": 1.1366, "step": 6449500 }, { "epoch": 3.87, "learning_rate": 2.8962603213307302e-05, "loss": 1.1474, "step": 6450000 }, { "epoch": 3.87, "learning_rate": 2.8960507447677855e-05, "loss": 1.1267, "step": 6450500 }, { "epoch": 3.87, "learning_rate": 2.8958407482117292e-05, "loss": 1.1365, "step": 6451000 }, { "epoch": 3.87, "learning_rate": 2.895630751655673e-05, "loss": 1.155, "step": 6451500 }, { "epoch": 3.87, "learning_rate": 2.8954207550996163e-05, "loss": 1.1758, "step": 6452000 }, { "epoch": 3.87, "learning_rate": 2.89521075854356e-05, "loss": 1.1655, "step": 6452500 }, { "epoch": 3.87, "learning_rate": 2.8950007619875036e-05, "loss": 1.1277, "step": 6453000 }, { "epoch": 3.87, "learning_rate": 2.894790765431447e-05, "loss": 1.139, "step": 6453500 }, { "epoch": 3.87, "learning_rate": 2.8945807688753907e-05, "loss": 1.1529, "step": 6454000 }, { "epoch": 3.87, "learning_rate": 2.8943707723193337e-05, "loss": 1.1426, "step": 6454500 }, { "epoch": 3.87, "learning_rate": 2.8941616157495017e-05, "loss": 1.157, "step": 6455000 }, { "epoch": 3.87, "learning_rate": 2.893951619193445e-05, "loss": 1.159, "step": 6455500 }, { "epoch": 3.87, "learning_rate": 2.8937416226373887e-05, "loss": 1.1571, "step": 6456000 }, { "epoch": 3.87, "learning_rate": 2.8935316260813324e-05, "loss": 1.1744, "step": 6456500 }, { "epoch": 3.87, "learning_rate": 2.8933216295252758e-05, "loss": 1.1638, "step": 6457000 }, { "epoch": 3.87, "learning_rate": 2.8931116329692195e-05, "loss": 1.1291, "step": 6457500 }, { "epoch": 3.87, "learning_rate": 2.892901636413163e-05, "loss": 1.1318, "step": 6458000 }, { "epoch": 3.87, "learning_rate": 2.8926916398571065e-05, "loss": 1.1379, "step": 6458500 }, { "epoch": 3.87, "learning_rate": 2.8924816433010495e-05, "loss": 1.1387, "step": 6459000 }, { "epoch": 3.87, "learning_rate": 2.8922720667381055e-05, "loss": 1.1413, "step": 6459500 }, { "epoch": 3.87, "learning_rate": 2.8920620701820492e-05, "loss": 1.127, "step": 6460000 }, { "epoch": 3.87, "learning_rate": 2.8918520736259926e-05, "loss": 1.167, "step": 6460500 }, { "epoch": 3.87, "learning_rate": 2.8916420770699362e-05, "loss": 1.1246, "step": 6461000 }, { "epoch": 3.87, "learning_rate": 2.8914320805138793e-05, "loss": 1.1271, "step": 6461500 }, { "epoch": 3.87, "learning_rate": 2.891222083957823e-05, "loss": 1.1196, "step": 6462000 }, { "epoch": 3.87, "learning_rate": 2.891012507394879e-05, "loss": 1.1589, "step": 6462500 }, { "epoch": 3.87, "learning_rate": 2.8908025108388223e-05, "loss": 1.1359, "step": 6463000 }, { "epoch": 3.88, "learning_rate": 2.890592514282766e-05, "loss": 1.1473, "step": 6463500 }, { "epoch": 3.88, "learning_rate": 2.890382517726709e-05, "loss": 1.16, "step": 6464000 }, { "epoch": 3.88, "learning_rate": 2.8901725211706527e-05, "loss": 1.1472, "step": 6464500 }, { "epoch": 3.88, "learning_rate": 2.8899629446077087e-05, "loss": 1.1498, "step": 6465000 }, { "epoch": 3.88, "learning_rate": 2.889752948051652e-05, "loss": 1.1576, "step": 6465500 }, { "epoch": 3.88, "learning_rate": 2.889542951495595e-05, "loss": 1.1406, "step": 6466000 }, { "epoch": 3.88, "learning_rate": 2.8893329549395388e-05, "loss": 1.1619, "step": 6466500 }, { "epoch": 3.88, "learning_rate": 2.889122958383482e-05, "loss": 1.1479, "step": 6467000 }, { "epoch": 3.88, "learning_rate": 2.8889129618274258e-05, "loss": 1.1235, "step": 6467500 }, { "epoch": 3.88, "learning_rate": 2.8887029652713695e-05, "loss": 1.1605, "step": 6468000 }, { "epoch": 3.88, "learning_rate": 2.8884929687153132e-05, "loss": 1.1235, "step": 6468500 }, { "epoch": 3.88, "learning_rate": 2.8882833921523685e-05, "loss": 1.1486, "step": 6469000 }, { "epoch": 3.88, "learning_rate": 2.888073395596312e-05, "loss": 1.1016, "step": 6469500 }, { "epoch": 3.88, "learning_rate": 2.887863819033368e-05, "loss": 1.1495, "step": 6470000 }, { "epoch": 3.88, "learning_rate": 2.8876538224773116e-05, "loss": 1.1418, "step": 6470500 }, { "epoch": 3.88, "learning_rate": 2.8874438259212546e-05, "loss": 1.1416, "step": 6471000 }, { "epoch": 3.88, "learning_rate": 2.8872338293651983e-05, "loss": 1.1443, "step": 6471500 }, { "epoch": 3.88, "learning_rate": 2.8870238328091416e-05, "loss": 1.1518, "step": 6472000 }, { "epoch": 3.88, "learning_rate": 2.8868138362530853e-05, "loss": 1.1398, "step": 6472500 }, { "epoch": 3.88, "learning_rate": 2.886603839697029e-05, "loss": 1.1661, "step": 6473000 }, { "epoch": 3.88, "learning_rate": 2.8863938431409723e-05, "loss": 1.1479, "step": 6473500 }, { "epoch": 3.88, "learning_rate": 2.8861842665780277e-05, "loss": 1.1499, "step": 6474000 }, { "epoch": 3.88, "learning_rate": 2.885974690015084e-05, "loss": 1.1365, "step": 6474500 }, { "epoch": 3.88, "learning_rate": 2.8857646934590274e-05, "loss": 1.1503, "step": 6475000 }, { "epoch": 3.88, "learning_rate": 2.8855546969029704e-05, "loss": 1.1549, "step": 6475500 }, { "epoch": 3.88, "learning_rate": 2.885344700346914e-05, "loss": 1.1305, "step": 6476000 }, { "epoch": 3.88, "learning_rate": 2.8851347037908575e-05, "loss": 1.1436, "step": 6476500 }, { "epoch": 3.88, "learning_rate": 2.8849251272279135e-05, "loss": 1.1596, "step": 6477000 }, { "epoch": 3.88, "learning_rate": 2.8847151306718572e-05, "loss": 1.1337, "step": 6477500 }, { "epoch": 3.88, "learning_rate": 2.8845051341158002e-05, "loss": 1.1407, "step": 6478000 }, { "epoch": 3.88, "learning_rate": 2.884295137559744e-05, "loss": 1.1245, "step": 6478500 }, { "epoch": 3.88, "learning_rate": 2.8840851410036872e-05, "loss": 1.146, "step": 6479000 }, { "epoch": 3.88, "learning_rate": 2.8838755644407433e-05, "loss": 1.1464, "step": 6479500 }, { "epoch": 3.89, "learning_rate": 2.883665567884687e-05, "loss": 1.1535, "step": 6480000 }, { "epoch": 3.89, "learning_rate": 2.88345557132863e-05, "loss": 1.1589, "step": 6480500 }, { "epoch": 3.89, "learning_rate": 2.8832455747725736e-05, "loss": 1.1364, "step": 6481000 }, { "epoch": 3.89, "learning_rate": 2.883035578216517e-05, "loss": 1.1205, "step": 6481500 }, { "epoch": 3.89, "learning_rate": 2.882826001653573e-05, "loss": 1.1261, "step": 6482000 }, { "epoch": 3.89, "learning_rate": 2.8826160050975167e-05, "loss": 1.1695, "step": 6482500 }, { "epoch": 3.89, "learning_rate": 2.8824060085414597e-05, "loss": 1.1242, "step": 6483000 }, { "epoch": 3.89, "learning_rate": 2.882196011985403e-05, "loss": 1.143, "step": 6483500 }, { "epoch": 3.89, "learning_rate": 2.8819860154293467e-05, "loss": 1.1313, "step": 6484000 }, { "epoch": 3.89, "learning_rate": 2.8817764388664028e-05, "loss": 1.1538, "step": 6484500 }, { "epoch": 3.89, "learning_rate": 2.8815664423103465e-05, "loss": 1.1352, "step": 6485000 }, { "epoch": 3.89, "learning_rate": 2.8813564457542895e-05, "loss": 1.1497, "step": 6485500 }, { "epoch": 3.89, "learning_rate": 2.8811464491982328e-05, "loss": 1.1356, "step": 6486000 }, { "epoch": 3.89, "learning_rate": 2.880936872635289e-05, "loss": 1.1486, "step": 6486500 }, { "epoch": 3.89, "learning_rate": 2.8807268760792325e-05, "loss": 1.118, "step": 6487000 }, { "epoch": 3.89, "learning_rate": 2.8805168795231755e-05, "loss": 1.1366, "step": 6487500 }, { "epoch": 3.89, "learning_rate": 2.8803068829671192e-05, "loss": 1.1601, "step": 6488000 }, { "epoch": 3.89, "learning_rate": 2.8800968864110626e-05, "loss": 1.1001, "step": 6488500 }, { "epoch": 3.89, "learning_rate": 2.8798868898550062e-05, "loss": 1.1464, "step": 6489000 }, { "epoch": 3.89, "learning_rate": 2.8796773132920623e-05, "loss": 1.1466, "step": 6489500 }, { "epoch": 3.89, "learning_rate": 2.8794673167360053e-05, "loss": 1.1205, "step": 6490000 }, { "epoch": 3.89, "learning_rate": 2.8792573201799486e-05, "loss": 1.1337, "step": 6490500 }, { "epoch": 3.89, "learning_rate": 2.8790473236238923e-05, "loss": 1.1529, "step": 6491000 }, { "epoch": 3.89, "learning_rate": 2.878837327067836e-05, "loss": 1.1405, "step": 6491500 }, { "epoch": 3.89, "learning_rate": 2.878627750504892e-05, "loss": 1.1446, "step": 6492000 }, { "epoch": 3.89, "learning_rate": 2.878417753948835e-05, "loss": 1.1416, "step": 6492500 }, { "epoch": 3.89, "learning_rate": 2.8782077573927784e-05, "loss": 1.1228, "step": 6493000 }, { "epoch": 3.89, "learning_rate": 2.877997760836722e-05, "loss": 1.1209, "step": 6493500 }, { "epoch": 3.89, "learning_rate": 2.8777877642806658e-05, "loss": 1.169, "step": 6494000 }, { "epoch": 3.89, "learning_rate": 2.8775781877177218e-05, "loss": 1.1356, "step": 6494500 }, { "epoch": 3.89, "learning_rate": 2.877368611154777e-05, "loss": 1.1532, "step": 6495000 }, { "epoch": 3.89, "learning_rate": 2.877158614598721e-05, "loss": 1.147, "step": 6495500 }, { "epoch": 3.89, "learning_rate": 2.8769486180426642e-05, "loss": 1.1345, "step": 6496000 }, { "epoch": 3.89, "learning_rate": 2.876738621486608e-05, "loss": 1.1453, "step": 6496500 }, { "epoch": 3.9, "learning_rate": 2.876528624930551e-05, "loss": 1.1289, "step": 6497000 }, { "epoch": 3.9, "learning_rate": 2.8763186283744942e-05, "loss": 1.1727, "step": 6497500 }, { "epoch": 3.9, "learning_rate": 2.876108631818438e-05, "loss": 1.1603, "step": 6498000 }, { "epoch": 3.9, "learning_rate": 2.8758986352623816e-05, "loss": 1.1164, "step": 6498500 }, { "epoch": 3.9, "learning_rate": 2.875688638706325e-05, "loss": 1.1503, "step": 6499000 }, { "epoch": 3.9, "learning_rate": 2.8754790621433806e-05, "loss": 1.1488, "step": 6499500 }, { "epoch": 3.9, "learning_rate": 2.875269065587324e-05, "loss": 1.1639, "step": 6500000 }, { "epoch": 3.9, "eval_loss": 1.1070631742477417, "eval_runtime": 1102.123, "eval_samples_per_second": 477.914, "eval_steps_per_second": 79.653, "step": 6500000 }, { "epoch": 3.9, "learning_rate": 2.8750590690312677e-05, "loss": 1.1265, "step": 6500500 }, { "epoch": 3.9, "learning_rate": 2.8748490724752114e-05, "loss": 1.1418, "step": 6501000 }, { "epoch": 3.9, "learning_rate": 2.8746390759191547e-05, "loss": 1.1576, "step": 6501500 }, { "epoch": 3.9, "learning_rate": 2.8744294993562104e-05, "loss": 1.1346, "step": 6502000 }, { "epoch": 3.9, "learning_rate": 2.8742195028001537e-05, "loss": 1.1479, "step": 6502500 }, { "epoch": 3.9, "learning_rate": 2.8740095062440974e-05, "loss": 1.127, "step": 6503000 }, { "epoch": 3.9, "learning_rate": 2.873799509688041e-05, "loss": 1.179, "step": 6503500 }, { "epoch": 3.9, "learning_rate": 2.8735895131319845e-05, "loss": 1.1507, "step": 6504000 }, { "epoch": 3.9, "learning_rate": 2.8733799365690398e-05, "loss": 1.1546, "step": 6504500 }, { "epoch": 3.9, "learning_rate": 2.8731699400129835e-05, "loss": 1.1458, "step": 6505000 }, { "epoch": 3.9, "learning_rate": 2.8729599434569272e-05, "loss": 1.1623, "step": 6505500 }, { "epoch": 3.9, "learning_rate": 2.8727499469008705e-05, "loss": 1.1579, "step": 6506000 }, { "epoch": 3.9, "learning_rate": 2.8725399503448142e-05, "loss": 1.1621, "step": 6506500 }, { "epoch": 3.9, "learning_rate": 2.8723303737818696e-05, "loss": 1.1139, "step": 6507000 }, { "epoch": 3.9, "learning_rate": 2.8721203772258133e-05, "loss": 1.1145, "step": 6507500 }, { "epoch": 3.9, "learning_rate": 2.871910380669757e-05, "loss": 1.1221, "step": 6508000 }, { "epoch": 3.9, "learning_rate": 2.8717003841137003e-05, "loss": 1.1303, "step": 6508500 }, { "epoch": 3.9, "learning_rate": 2.871490387557644e-05, "loss": 1.1757, "step": 6509000 }, { "epoch": 3.9, "learning_rate": 2.8712803910015877e-05, "loss": 1.1302, "step": 6509500 }, { "epoch": 3.9, "learning_rate": 2.871070394445531e-05, "loss": 1.1413, "step": 6510000 }, { "epoch": 3.9, "learning_rate": 2.8708608178825867e-05, "loss": 1.1361, "step": 6510500 }, { "epoch": 3.9, "learning_rate": 2.87065082132653e-05, "loss": 1.135, "step": 6511000 }, { "epoch": 3.9, "learning_rate": 2.8704408247704737e-05, "loss": 1.1599, "step": 6511500 }, { "epoch": 3.9, "learning_rate": 2.8702308282144174e-05, "loss": 1.1331, "step": 6512000 }, { "epoch": 3.9, "learning_rate": 2.8700208316583608e-05, "loss": 1.1425, "step": 6512500 }, { "epoch": 3.9, "learning_rate": 2.869811255095416e-05, "loss": 1.1182, "step": 6513000 }, { "epoch": 3.91, "learning_rate": 2.8696012585393598e-05, "loss": 1.1179, "step": 6513500 }, { "epoch": 3.91, "learning_rate": 2.8693912619833035e-05, "loss": 1.1591, "step": 6514000 }, { "epoch": 3.91, "learning_rate": 2.8691812654272472e-05, "loss": 1.1661, "step": 6514500 }, { "epoch": 3.91, "learning_rate": 2.8689712688711905e-05, "loss": 1.1541, "step": 6515000 }, { "epoch": 3.91, "learning_rate": 2.8687612723151342e-05, "loss": 1.1357, "step": 6515500 }, { "epoch": 3.91, "learning_rate": 2.868551275759078e-05, "loss": 1.1656, "step": 6516000 }, { "epoch": 3.91, "learning_rate": 2.8683416991961332e-05, "loss": 1.1305, "step": 6516500 }, { "epoch": 3.91, "learning_rate": 2.8681317026400766e-05, "loss": 1.1531, "step": 6517000 }, { "epoch": 3.91, "learning_rate": 2.8679217060840203e-05, "loss": 1.1646, "step": 6517500 }, { "epoch": 3.91, "learning_rate": 2.867711709527964e-05, "loss": 1.119, "step": 6518000 }, { "epoch": 3.91, "learning_rate": 2.8675017129719073e-05, "loss": 1.1151, "step": 6518500 }, { "epoch": 3.91, "learning_rate": 2.867291716415851e-05, "loss": 1.1619, "step": 6519000 }, { "epoch": 3.91, "learning_rate": 2.867081719859794e-05, "loss": 1.1333, "step": 6519500 }, { "epoch": 3.91, "learning_rate": 2.8668717233037377e-05, "loss": 1.1357, "step": 6520000 }, { "epoch": 3.91, "learning_rate": 2.8666621467407937e-05, "loss": 1.1619, "step": 6520500 }, { "epoch": 3.91, "learning_rate": 2.866452150184737e-05, "loss": 1.1516, "step": 6521000 }, { "epoch": 3.91, "learning_rate": 2.8662425736217928e-05, "loss": 1.1531, "step": 6521500 }, { "epoch": 3.91, "learning_rate": 2.866032577065736e-05, "loss": 1.155, "step": 6522000 }, { "epoch": 3.91, "learning_rate": 2.8658225805096798e-05, "loss": 1.1458, "step": 6522500 }, { "epoch": 3.91, "learning_rate": 2.8656125839536235e-05, "loss": 1.1442, "step": 6523000 }, { "epoch": 3.91, "learning_rate": 2.8654025873975668e-05, "loss": 1.1605, "step": 6523500 }, { "epoch": 3.91, "learning_rate": 2.8651925908415098e-05, "loss": 1.1747, "step": 6524000 }, { "epoch": 3.91, "learning_rate": 2.864983014278566e-05, "loss": 1.1515, "step": 6524500 }, { "epoch": 3.91, "learning_rate": 2.8647730177225095e-05, "loss": 1.1339, "step": 6525000 }, { "epoch": 3.91, "learning_rate": 2.864563441159565e-05, "loss": 1.1529, "step": 6525500 }, { "epoch": 3.91, "learning_rate": 2.8643534446035086e-05, "loss": 1.1247, "step": 6526000 }, { "epoch": 3.91, "learning_rate": 2.864143448047452e-05, "loss": 1.1317, "step": 6526500 }, { "epoch": 3.91, "learning_rate": 2.8639334514913956e-05, "loss": 1.1487, "step": 6527000 }, { "epoch": 3.91, "learning_rate": 2.8637234549353393e-05, "loss": 1.1228, "step": 6527500 }, { "epoch": 3.91, "learning_rate": 2.8635134583792826e-05, "loss": 1.1575, "step": 6528000 }, { "epoch": 3.91, "learning_rate": 2.8633034618232263e-05, "loss": 1.1244, "step": 6528500 }, { "epoch": 3.91, "learning_rate": 2.8630934652671693e-05, "loss": 1.1857, "step": 6529000 }, { "epoch": 3.91, "learning_rate": 2.8628838887042254e-05, "loss": 1.1311, "step": 6529500 }, { "epoch": 3.91, "learning_rate": 2.862673892148169e-05, "loss": 1.1563, "step": 6530000 }, { "epoch": 3.92, "learning_rate": 2.8624643155852244e-05, "loss": 1.1278, "step": 6530500 }, { "epoch": 3.92, "learning_rate": 2.8622543190291678e-05, "loss": 1.1403, "step": 6531000 }, { "epoch": 3.92, "learning_rate": 2.8620443224731114e-05, "loss": 1.1502, "step": 6531500 }, { "epoch": 3.92, "learning_rate": 2.861834325917055e-05, "loss": 1.0933, "step": 6532000 }, { "epoch": 3.92, "learning_rate": 2.8616243293609985e-05, "loss": 1.1212, "step": 6532500 }, { "epoch": 3.92, "learning_rate": 2.861414332804942e-05, "loss": 1.1327, "step": 6533000 }, { "epoch": 3.92, "learning_rate": 2.8612043362488852e-05, "loss": 1.1203, "step": 6533500 }, { "epoch": 3.92, "learning_rate": 2.860994339692829e-05, "loss": 1.1624, "step": 6534000 }, { "epoch": 3.92, "learning_rate": 2.8607843431367722e-05, "loss": 1.1682, "step": 6534500 }, { "epoch": 3.92, "learning_rate": 2.860574346580716e-05, "loss": 1.1483, "step": 6535000 }, { "epoch": 3.92, "learning_rate": 2.8603643500246596e-05, "loss": 1.1605, "step": 6535500 }, { "epoch": 3.92, "learning_rate": 2.860154353468603e-05, "loss": 1.1192, "step": 6536000 }, { "epoch": 3.92, "learning_rate": 2.8599447769056586e-05, "loss": 1.1167, "step": 6536500 }, { "epoch": 3.92, "learning_rate": 2.859734780349602e-05, "loss": 1.1704, "step": 6537000 }, { "epoch": 3.92, "learning_rate": 2.8595247837935456e-05, "loss": 1.1449, "step": 6537500 }, { "epoch": 3.92, "learning_rate": 2.8593147872374893e-05, "loss": 1.1307, "step": 6538000 }, { "epoch": 3.92, "learning_rate": 2.8591047906814327e-05, "loss": 1.1684, "step": 6538500 }, { "epoch": 3.92, "learning_rate": 2.8588947941253764e-05, "loss": 1.1343, "step": 6539000 }, { "epoch": 3.92, "learning_rate": 2.85868479756932e-05, "loss": 1.1515, "step": 6539500 }, { "epoch": 3.92, "learning_rate": 2.8584748010132634e-05, "loss": 1.1379, "step": 6540000 }, { "epoch": 3.92, "learning_rate": 2.858265224450319e-05, "loss": 1.1502, "step": 6540500 }, { "epoch": 3.92, "learning_rate": 2.8580556478873744e-05, "loss": 1.1715, "step": 6541000 }, { "epoch": 3.92, "learning_rate": 2.8578456513313178e-05, "loss": 1.1301, "step": 6541500 }, { "epoch": 3.92, "learning_rate": 2.8576356547752615e-05, "loss": 1.1212, "step": 6542000 }, { "epoch": 3.92, "learning_rate": 2.857425658219205e-05, "loss": 1.1383, "step": 6542500 }, { "epoch": 3.92, "learning_rate": 2.8572156616631485e-05, "loss": 1.1494, "step": 6543000 }, { "epoch": 3.92, "learning_rate": 2.8570056651070922e-05, "loss": 1.0922, "step": 6543500 }, { "epoch": 3.92, "learning_rate": 2.856795668551036e-05, "loss": 1.1376, "step": 6544000 }, { "epoch": 3.92, "learning_rate": 2.8565856719949792e-05, "loss": 1.1256, "step": 6544500 }, { "epoch": 3.92, "learning_rate": 2.856375675438923e-05, "loss": 1.1655, "step": 6545000 }, { "epoch": 3.92, "learning_rate": 2.8561660988759783e-05, "loss": 1.1351, "step": 6545500 }, { "epoch": 3.92, "learning_rate": 2.855956102319922e-05, "loss": 1.1422, "step": 6546000 }, { "epoch": 3.92, "learning_rate": 2.8557461057638656e-05, "loss": 1.1466, "step": 6546500 }, { "epoch": 3.93, "learning_rate": 2.855536109207809e-05, "loss": 1.1292, "step": 6547000 }, { "epoch": 3.93, "learning_rate": 2.8553261126517527e-05, "loss": 1.1679, "step": 6547500 }, { "epoch": 3.93, "learning_rate": 2.8551161160956963e-05, "loss": 1.1309, "step": 6548000 }, { "epoch": 3.93, "learning_rate": 2.8549065395327517e-05, "loss": 1.1794, "step": 6548500 }, { "epoch": 3.93, "learning_rate": 2.8546965429766954e-05, "loss": 1.1424, "step": 6549000 }, { "epoch": 3.93, "learning_rate": 2.8544865464206387e-05, "loss": 1.1279, "step": 6549500 }, { "epoch": 3.93, "learning_rate": 2.8542765498645824e-05, "loss": 1.1205, "step": 6550000 }, { "epoch": 3.93, "learning_rate": 2.854066553308526e-05, "loss": 1.1501, "step": 6550500 }, { "epoch": 3.93, "learning_rate": 2.8538569767455815e-05, "loss": 1.1525, "step": 6551000 }, { "epoch": 3.93, "learning_rate": 2.8536469801895248e-05, "loss": 1.1488, "step": 6551500 }, { "epoch": 3.93, "learning_rate": 2.8534369836334685e-05, "loss": 1.1394, "step": 6552000 }, { "epoch": 3.93, "learning_rate": 2.8532269870774122e-05, "loss": 1.1221, "step": 6552500 }, { "epoch": 3.93, "learning_rate": 2.8530169905213555e-05, "loss": 1.1207, "step": 6553000 }, { "epoch": 3.93, "learning_rate": 2.8528069939652985e-05, "loss": 1.129, "step": 6553500 }, { "epoch": 3.93, "learning_rate": 2.8525974174023546e-05, "loss": 1.1655, "step": 6554000 }, { "epoch": 3.93, "learning_rate": 2.8523874208462983e-05, "loss": 1.145, "step": 6554500 }, { "epoch": 3.93, "learning_rate": 2.852177424290242e-05, "loss": 1.1447, "step": 6555000 }, { "epoch": 3.93, "learning_rate": 2.8519674277341853e-05, "loss": 1.1401, "step": 6555500 }, { "epoch": 3.93, "learning_rate": 2.8517574311781283e-05, "loss": 1.1545, "step": 6556000 }, { "epoch": 3.93, "learning_rate": 2.851547434622072e-05, "loss": 1.1027, "step": 6556500 }, { "epoch": 3.93, "learning_rate": 2.8513374380660157e-05, "loss": 1.1132, "step": 6557000 }, { "epoch": 3.93, "learning_rate": 2.851127441509959e-05, "loss": 1.1295, "step": 6557500 }, { "epoch": 3.93, "learning_rate": 2.850917864947015e-05, "loss": 1.1303, "step": 6558000 }, { "epoch": 3.93, "learning_rate": 2.8507082883840704e-05, "loss": 1.1643, "step": 6558500 }, { "epoch": 3.93, "learning_rate": 2.850498291828014e-05, "loss": 1.1344, "step": 6559000 }, { "epoch": 3.93, "learning_rate": 2.8502882952719578e-05, "loss": 1.1642, "step": 6559500 }, { "epoch": 3.93, "learning_rate": 2.8500782987159015e-05, "loss": 1.1263, "step": 6560000 }, { "epoch": 3.93, "learning_rate": 2.849868302159844e-05, "loss": 1.1592, "step": 6560500 }, { "epoch": 3.93, "learning_rate": 2.8496583056037878e-05, "loss": 1.1624, "step": 6561000 }, { "epoch": 3.93, "learning_rate": 2.8494483090477315e-05, "loss": 1.1383, "step": 6561500 }, { "epoch": 3.93, "learning_rate": 2.8492387324847875e-05, "loss": 1.1196, "step": 6562000 }, { "epoch": 3.93, "learning_rate": 2.849028735928731e-05, "loss": 1.1474, "step": 6562500 }, { "epoch": 3.93, "learning_rate": 2.848818739372674e-05, "loss": 1.1447, "step": 6563000 }, { "epoch": 3.94, "learning_rate": 2.8486087428166176e-05, "loss": 1.1722, "step": 6563500 }, { "epoch": 3.94, "learning_rate": 2.8483987462605612e-05, "loss": 1.1221, "step": 6564000 }, { "epoch": 3.94, "learning_rate": 2.8481887497045046e-05, "loss": 1.1389, "step": 6564500 }, { "epoch": 3.94, "learning_rate": 2.8479791731415606e-05, "loss": 1.1421, "step": 6565000 }, { "epoch": 3.94, "learning_rate": 2.8477691765855036e-05, "loss": 1.1445, "step": 6565500 }, { "epoch": 3.94, "learning_rate": 2.8475591800294473e-05, "loss": 1.1419, "step": 6566000 }, { "epoch": 3.94, "learning_rate": 2.847349183473391e-05, "loss": 1.1388, "step": 6566500 }, { "epoch": 3.94, "learning_rate": 2.847139606910447e-05, "loss": 1.1422, "step": 6567000 }, { "epoch": 3.94, "learning_rate": 2.8469296103543904e-05, "loss": 1.1452, "step": 6567500 }, { "epoch": 3.94, "learning_rate": 2.8467196137983334e-05, "loss": 1.136, "step": 6568000 }, { "epoch": 3.94, "learning_rate": 2.846509617242277e-05, "loss": 1.1411, "step": 6568500 }, { "epoch": 3.94, "learning_rate": 2.8462996206862204e-05, "loss": 1.1512, "step": 6569000 }, { "epoch": 3.94, "learning_rate": 2.846089624130164e-05, "loss": 1.1579, "step": 6569500 }, { "epoch": 3.94, "learning_rate": 2.8458796275741078e-05, "loss": 1.1598, "step": 6570000 }, { "epoch": 3.94, "learning_rate": 2.845669631018051e-05, "loss": 1.1416, "step": 6570500 }, { "epoch": 3.94, "learning_rate": 2.845460054455107e-05, "loss": 1.1398, "step": 6571000 }, { "epoch": 3.94, "learning_rate": 2.8452500578990502e-05, "loss": 1.1411, "step": 6571500 }, { "epoch": 3.94, "learning_rate": 2.845040061342994e-05, "loss": 1.1808, "step": 6572000 }, { "epoch": 3.94, "learning_rate": 2.8448300647869375e-05, "loss": 1.1606, "step": 6572500 }, { "epoch": 3.94, "learning_rate": 2.844620488223993e-05, "loss": 1.1672, "step": 6573000 }, { "epoch": 3.94, "learning_rate": 2.8444104916679366e-05, "loss": 1.1308, "step": 6573500 }, { "epoch": 3.94, "learning_rate": 2.8442009151049926e-05, "loss": 1.1324, "step": 6574000 }, { "epoch": 3.94, "learning_rate": 2.843990918548936e-05, "loss": 1.1399, "step": 6574500 }, { "epoch": 3.94, "learning_rate": 2.843780921992879e-05, "loss": 1.1661, "step": 6575000 }, { "epoch": 3.94, "learning_rate": 2.8435709254368227e-05, "loss": 1.1364, "step": 6575500 }, { "epoch": 3.94, "learning_rate": 2.843360928880766e-05, "loss": 1.1257, "step": 6576000 }, { "epoch": 3.94, "learning_rate": 2.843151352317822e-05, "loss": 1.136, "step": 6576500 }, { "epoch": 3.94, "learning_rate": 2.8429413557617657e-05, "loss": 1.1531, "step": 6577000 }, { "epoch": 3.94, "learning_rate": 2.8427313592057087e-05, "loss": 1.1784, "step": 6577500 }, { "epoch": 3.94, "learning_rate": 2.8425213626496524e-05, "loss": 1.1312, "step": 6578000 }, { "epoch": 3.94, "learning_rate": 2.8423117860867085e-05, "loss": 1.1468, "step": 6578500 }, { "epoch": 3.94, "learning_rate": 2.8421017895306518e-05, "loss": 1.1303, "step": 6579000 }, { "epoch": 3.94, "learning_rate": 2.8418917929745948e-05, "loss": 1.1404, "step": 6579500 }, { "epoch": 3.94, "learning_rate": 2.8416817964185385e-05, "loss": 1.1437, "step": 6580000 }, { "epoch": 3.95, "learning_rate": 2.8414717998624822e-05, "loss": 1.1471, "step": 6580500 }, { "epoch": 3.95, "learning_rate": 2.8412618033064255e-05, "loss": 1.1351, "step": 6581000 }, { "epoch": 3.95, "learning_rate": 2.8410518067503692e-05, "loss": 1.1305, "step": 6581500 }, { "epoch": 3.95, "learning_rate": 2.840841810194313e-05, "loss": 1.1592, "step": 6582000 }, { "epoch": 3.95, "learning_rate": 2.8406322336313683e-05, "loss": 1.1355, "step": 6582500 }, { "epoch": 3.95, "learning_rate": 2.8404222370753116e-05, "loss": 1.1395, "step": 6583000 }, { "epoch": 3.95, "learning_rate": 2.8402122405192553e-05, "loss": 1.1338, "step": 6583500 }, { "epoch": 3.95, "learning_rate": 2.8400026639563113e-05, "loss": 1.1247, "step": 6584000 }, { "epoch": 3.95, "learning_rate": 2.8397926674002543e-05, "loss": 1.1598, "step": 6584500 }, { "epoch": 3.95, "learning_rate": 2.8395830908373104e-05, "loss": 1.1406, "step": 6585000 }, { "epoch": 3.95, "learning_rate": 2.839373094281254e-05, "loss": 1.1325, "step": 6585500 }, { "epoch": 3.95, "learning_rate": 2.8391630977251974e-05, "loss": 1.1666, "step": 6586000 }, { "epoch": 3.95, "learning_rate": 2.838953101169141e-05, "loss": 1.0994, "step": 6586500 }, { "epoch": 3.95, "learning_rate": 2.838743104613084e-05, "loss": 1.1249, "step": 6587000 }, { "epoch": 3.95, "learning_rate": 2.8385331080570278e-05, "loss": 1.1261, "step": 6587500 }, { "epoch": 3.95, "learning_rate": 2.838323111500971e-05, "loss": 1.134, "step": 6588000 }, { "epoch": 3.95, "learning_rate": 2.8381131149449148e-05, "loss": 1.1502, "step": 6588500 }, { "epoch": 3.95, "learning_rate": 2.8379031183888585e-05, "loss": 1.1713, "step": 6589000 }, { "epoch": 3.95, "learning_rate": 2.8376931218328018e-05, "loss": 1.149, "step": 6589500 }, { "epoch": 3.95, "learning_rate": 2.8374831252767455e-05, "loss": 1.1434, "step": 6590000 }, { "epoch": 3.95, "learning_rate": 2.8372731287206892e-05, "loss": 1.1035, "step": 6590500 }, { "epoch": 3.95, "learning_rate": 2.8370635521577446e-05, "loss": 1.1335, "step": 6591000 }, { "epoch": 3.95, "learning_rate": 2.836853555601688e-05, "loss": 1.1326, "step": 6591500 }, { "epoch": 3.95, "learning_rate": 2.8366439790387436e-05, "loss": 1.1416, "step": 6592000 }, { "epoch": 3.95, "learning_rate": 2.836433982482687e-05, "loss": 1.1375, "step": 6592500 }, { "epoch": 3.95, "learning_rate": 2.8362239859266306e-05, "loss": 1.1277, "step": 6593000 }, { "epoch": 3.95, "learning_rate": 2.8360139893705743e-05, "loss": 1.1489, "step": 6593500 }, { "epoch": 3.95, "learning_rate": 2.8358044128076297e-05, "loss": 1.1477, "step": 6594000 }, { "epoch": 3.95, "learning_rate": 2.8355944162515734e-05, "loss": 1.129, "step": 6594500 }, { "epoch": 3.95, "learning_rate": 2.8353844196955167e-05, "loss": 1.1445, "step": 6595000 }, { "epoch": 3.95, "learning_rate": 2.8351744231394604e-05, "loss": 1.1469, "step": 6595500 }, { "epoch": 3.95, "learning_rate": 2.8349648465765164e-05, "loss": 1.1493, "step": 6596000 }, { "epoch": 3.95, "learning_rate": 2.8347548500204594e-05, "loss": 1.1268, "step": 6596500 }, { "epoch": 3.96, "learning_rate": 2.8345448534644028e-05, "loss": 1.1436, "step": 6597000 }, { "epoch": 3.96, "learning_rate": 2.8343348569083465e-05, "loss": 1.1636, "step": 6597500 }, { "epoch": 3.96, "learning_rate": 2.83412486035229e-05, "loss": 1.1557, "step": 6598000 }, { "epoch": 3.96, "learning_rate": 2.8339148637962335e-05, "loss": 1.1582, "step": 6598500 }, { "epoch": 3.96, "learning_rate": 2.8337052872332892e-05, "loss": 1.1278, "step": 6599000 }, { "epoch": 3.96, "learning_rate": 2.8334952906772325e-05, "loss": 1.1254, "step": 6599500 }, { "epoch": 3.96, "learning_rate": 2.8332852941211762e-05, "loss": 1.1299, "step": 6600000 }, { "epoch": 3.96, "eval_loss": 1.1053975820541382, "eval_runtime": 1136.7471, "eval_samples_per_second": 463.357, "eval_steps_per_second": 77.227, "step": 6600000 }, { "epoch": 3.96, "learning_rate": 2.83307529756512e-05, "loss": 1.1166, "step": 6600500 }, { "epoch": 3.96, "learning_rate": 2.8328657210021753e-05, "loss": 1.1778, "step": 6601000 }, { "epoch": 3.96, "learning_rate": 2.832655724446119e-05, "loss": 1.1451, "step": 6601500 }, { "epoch": 3.96, "learning_rate": 2.832446147883175e-05, "loss": 1.132, "step": 6602000 }, { "epoch": 3.96, "learning_rate": 2.8322361513271183e-05, "loss": 1.1175, "step": 6602500 }, { "epoch": 3.96, "learning_rate": 2.832026154771062e-05, "loss": 1.1149, "step": 6603000 }, { "epoch": 3.96, "learning_rate": 2.831816158215005e-05, "loss": 1.1919, "step": 6603500 }, { "epoch": 3.96, "learning_rate": 2.8316061616589484e-05, "loss": 1.1429, "step": 6604000 }, { "epoch": 3.96, "learning_rate": 2.831396165102892e-05, "loss": 1.1324, "step": 6604500 }, { "epoch": 3.96, "learning_rate": 2.8311861685468357e-05, "loss": 1.1427, "step": 6605000 }, { "epoch": 3.96, "learning_rate": 2.8309761719907794e-05, "loss": 1.1462, "step": 6605500 }, { "epoch": 3.96, "learning_rate": 2.8307661754347228e-05, "loss": 1.1253, "step": 6606000 }, { "epoch": 3.96, "learning_rate": 2.8305561788786664e-05, "loss": 1.1617, "step": 6606500 }, { "epoch": 3.96, "learning_rate": 2.8303466023157218e-05, "loss": 1.113, "step": 6607000 }, { "epoch": 3.96, "learning_rate": 2.8301366057596655e-05, "loss": 1.1298, "step": 6607500 }, { "epoch": 3.96, "learning_rate": 2.829926609203609e-05, "loss": 1.1618, "step": 6608000 }, { "epoch": 3.96, "learning_rate": 2.8297166126475525e-05, "loss": 1.1417, "step": 6608500 }, { "epoch": 3.96, "learning_rate": 2.8295066160914962e-05, "loss": 1.1328, "step": 6609000 }, { "epoch": 3.96, "learning_rate": 2.8292966195354395e-05, "loss": 1.1584, "step": 6609500 }, { "epoch": 3.96, "learning_rate": 2.8290866229793832e-05, "loss": 1.1354, "step": 6610000 }, { "epoch": 3.96, "learning_rate": 2.8288770464164386e-05, "loss": 1.1325, "step": 6610500 }, { "epoch": 3.96, "learning_rate": 2.8286670498603823e-05, "loss": 1.1364, "step": 6611000 }, { "epoch": 3.96, "learning_rate": 2.828457053304326e-05, "loss": 1.1489, "step": 6611500 }, { "epoch": 3.96, "learning_rate": 2.8282470567482693e-05, "loss": 1.1213, "step": 6612000 }, { "epoch": 3.96, "learning_rate": 2.828037060192213e-05, "loss": 1.1831, "step": 6612500 }, { "epoch": 3.96, "learning_rate": 2.8278274836292683e-05, "loss": 1.1451, "step": 6613000 }, { "epoch": 3.97, "learning_rate": 2.827617487073212e-05, "loss": 1.1673, "step": 6613500 }, { "epoch": 3.97, "learning_rate": 2.8274074905171557e-05, "loss": 1.1529, "step": 6614000 }, { "epoch": 3.97, "learning_rate": 2.827197493961099e-05, "loss": 1.1351, "step": 6614500 }, { "epoch": 3.97, "learning_rate": 2.8269874974050427e-05, "loss": 1.1315, "step": 6615000 }, { "epoch": 3.97, "learning_rate": 2.826777920842098e-05, "loss": 1.1342, "step": 6615500 }, { "epoch": 3.97, "learning_rate": 2.8265679242860418e-05, "loss": 1.1648, "step": 6616000 }, { "epoch": 3.97, "learning_rate": 2.826357927729985e-05, "loss": 1.1257, "step": 6616500 }, { "epoch": 3.97, "learning_rate": 2.8261479311739288e-05, "loss": 1.1424, "step": 6617000 }, { "epoch": 3.97, "learning_rate": 2.8259383546109842e-05, "loss": 1.1509, "step": 6617500 }, { "epoch": 3.97, "learning_rate": 2.825728358054928e-05, "loss": 1.1564, "step": 6618000 }, { "epoch": 3.97, "learning_rate": 2.8255183614988715e-05, "loss": 1.1612, "step": 6618500 }, { "epoch": 3.97, "learning_rate": 2.825308364942815e-05, "loss": 1.1354, "step": 6619000 }, { "epoch": 3.97, "learning_rate": 2.8250983683867586e-05, "loss": 1.1359, "step": 6619500 }, { "epoch": 3.97, "learning_rate": 2.824889211816926e-05, "loss": 1.1657, "step": 6620000 }, { "epoch": 3.97, "learning_rate": 2.8246792152608693e-05, "loss": 1.1439, "step": 6620500 }, { "epoch": 3.97, "learning_rate": 2.824469218704813e-05, "loss": 1.1338, "step": 6621000 }, { "epoch": 3.97, "learning_rate": 2.8242592221487567e-05, "loss": 1.1486, "step": 6621500 }, { "epoch": 3.97, "learning_rate": 2.8240492255927e-05, "loss": 1.1352, "step": 6622000 }, { "epoch": 3.97, "learning_rate": 2.8238392290366437e-05, "loss": 1.1408, "step": 6622500 }, { "epoch": 3.97, "learning_rate": 2.8236292324805874e-05, "loss": 1.1077, "step": 6623000 }, { "epoch": 3.97, "learning_rate": 2.8234192359245307e-05, "loss": 1.1398, "step": 6623500 }, { "epoch": 3.97, "learning_rate": 2.8232092393684744e-05, "loss": 1.1576, "step": 6624000 }, { "epoch": 3.97, "learning_rate": 2.8229996628055298e-05, "loss": 1.1296, "step": 6624500 }, { "epoch": 3.97, "learning_rate": 2.8227896662494735e-05, "loss": 1.1579, "step": 6625000 }, { "epoch": 3.97, "learning_rate": 2.822579669693417e-05, "loss": 1.1228, "step": 6625500 }, { "epoch": 3.97, "learning_rate": 2.8223696731373605e-05, "loss": 1.1247, "step": 6626000 }, { "epoch": 3.97, "learning_rate": 2.8221600965744162e-05, "loss": 1.1206, "step": 6626500 }, { "epoch": 3.97, "learning_rate": 2.8219501000183595e-05, "loss": 1.1666, "step": 6627000 }, { "epoch": 3.97, "learning_rate": 2.8217401034623032e-05, "loss": 1.1292, "step": 6627500 }, { "epoch": 3.97, "learning_rate": 2.821530106906247e-05, "loss": 1.1444, "step": 6628000 }, { "epoch": 3.97, "learning_rate": 2.8213201103501902e-05, "loss": 1.1176, "step": 6628500 }, { "epoch": 3.97, "learning_rate": 2.821110113794134e-05, "loss": 1.1175, "step": 6629000 }, { "epoch": 3.97, "learning_rate": 2.8209005372311893e-05, "loss": 1.155, "step": 6629500 }, { "epoch": 3.97, "learning_rate": 2.820690540675133e-05, "loss": 1.1742, "step": 6630000 }, { "epoch": 3.98, "learning_rate": 2.8204809641121883e-05, "loss": 1.1485, "step": 6630500 }, { "epoch": 3.98, "learning_rate": 2.820270967556132e-05, "loss": 1.1496, "step": 6631000 }, { "epoch": 3.98, "learning_rate": 2.8200609710000754e-05, "loss": 1.1309, "step": 6631500 }, { "epoch": 3.98, "learning_rate": 2.819850974444019e-05, "loss": 1.1193, "step": 6632000 }, { "epoch": 3.98, "learning_rate": 2.8196409778879627e-05, "loss": 1.1329, "step": 6632500 }, { "epoch": 3.98, "learning_rate": 2.819430981331906e-05, "loss": 1.1419, "step": 6633000 }, { "epoch": 3.98, "learning_rate": 2.8192209847758498e-05, "loss": 1.1545, "step": 6633500 }, { "epoch": 3.98, "learning_rate": 2.8190109882197934e-05, "loss": 1.1379, "step": 6634000 }, { "epoch": 3.98, "learning_rate": 2.8188009916637368e-05, "loss": 1.1372, "step": 6634500 }, { "epoch": 3.98, "learning_rate": 2.8185914151007925e-05, "loss": 1.1271, "step": 6635000 }, { "epoch": 3.98, "learning_rate": 2.8183814185447358e-05, "loss": 1.1563, "step": 6635500 }, { "epoch": 3.98, "learning_rate": 2.8181714219886795e-05, "loss": 1.1735, "step": 6636000 }, { "epoch": 3.98, "learning_rate": 2.8179614254326232e-05, "loss": 1.1352, "step": 6636500 }, { "epoch": 3.98, "learning_rate": 2.8177522688627902e-05, "loss": 1.1551, "step": 6637000 }, { "epoch": 3.98, "learning_rate": 2.817542272306734e-05, "loss": 1.1317, "step": 6637500 }, { "epoch": 3.98, "learning_rate": 2.8173322757506776e-05, "loss": 1.118, "step": 6638000 }, { "epoch": 3.98, "learning_rate": 2.817122279194621e-05, "loss": 1.1318, "step": 6638500 }, { "epoch": 3.98, "learning_rate": 2.8169122826385646e-05, "loss": 1.146, "step": 6639000 }, { "epoch": 3.98, "learning_rate": 2.8167022860825083e-05, "loss": 1.1461, "step": 6639500 }, { "epoch": 3.98, "learning_rate": 2.8164922895264517e-05, "loss": 1.1404, "step": 6640000 }, { "epoch": 3.98, "learning_rate": 2.8162822929703953e-05, "loss": 1.1597, "step": 6640500 }, { "epoch": 3.98, "learning_rate": 2.816072296414339e-05, "loss": 1.1558, "step": 6641000 }, { "epoch": 3.98, "learning_rate": 2.8158622998582824e-05, "loss": 1.1395, "step": 6641500 }, { "epoch": 3.98, "learning_rate": 2.815652303302226e-05, "loss": 1.1533, "step": 6642000 }, { "epoch": 3.98, "learning_rate": 2.815442306746169e-05, "loss": 1.1513, "step": 6642500 }, { "epoch": 3.98, "learning_rate": 2.8152323101901127e-05, "loss": 1.1391, "step": 6643000 }, { "epoch": 3.98, "learning_rate": 2.8150227336271688e-05, "loss": 1.1492, "step": 6643500 }, { "epoch": 3.98, "learning_rate": 2.814812737071112e-05, "loss": 1.1401, "step": 6644000 }, { "epoch": 3.98, "learning_rate": 2.8146027405150558e-05, "loss": 1.1278, "step": 6644500 }, { "epoch": 3.98, "learning_rate": 2.8143927439589988e-05, "loss": 1.1623, "step": 6645000 }, { "epoch": 3.98, "learning_rate": 2.8141827474029425e-05, "loss": 1.1539, "step": 6645500 }, { "epoch": 3.98, "learning_rate": 2.813972750846886e-05, "loss": 1.1516, "step": 6646000 }, { "epoch": 3.98, "learning_rate": 2.8137627542908295e-05, "loss": 1.1657, "step": 6646500 }, { "epoch": 3.99, "learning_rate": 2.813553177727885e-05, "loss": 1.1493, "step": 6647000 }, { "epoch": 3.99, "learning_rate": 2.8133431811718286e-05, "loss": 1.1608, "step": 6647500 }, { "epoch": 3.99, "learning_rate": 2.813133184615772e-05, "loss": 1.1212, "step": 6648000 }, { "epoch": 3.99, "learning_rate": 2.812923608052828e-05, "loss": 1.1432, "step": 6648500 }, { "epoch": 3.99, "learning_rate": 2.8127136114967716e-05, "loss": 1.1756, "step": 6649000 }, { "epoch": 3.99, "learning_rate": 2.8125036149407147e-05, "loss": 1.1559, "step": 6649500 }, { "epoch": 3.99, "learning_rate": 2.8122936183846583e-05, "loss": 1.175, "step": 6650000 }, { "epoch": 3.99, "learning_rate": 2.8120836218286017e-05, "loss": 1.1538, "step": 6650500 }, { "epoch": 3.99, "learning_rate": 2.8118736252725454e-05, "loss": 1.1343, "step": 6651000 }, { "epoch": 3.99, "learning_rate": 2.811663628716489e-05, "loss": 1.1303, "step": 6651500 }, { "epoch": 3.99, "learning_rate": 2.8114536321604324e-05, "loss": 1.1432, "step": 6652000 }, { "epoch": 3.99, "learning_rate": 2.811243635604376e-05, "loss": 1.1276, "step": 6652500 }, { "epoch": 3.99, "learning_rate": 2.8110340590414314e-05, "loss": 1.1386, "step": 6653000 }, { "epoch": 3.99, "learning_rate": 2.810824062485375e-05, "loss": 1.1416, "step": 6653500 }, { "epoch": 3.99, "learning_rate": 2.8106140659293188e-05, "loss": 1.1282, "step": 6654000 }, { "epoch": 3.99, "learning_rate": 2.810404069373262e-05, "loss": 1.1437, "step": 6654500 }, { "epoch": 3.99, "learning_rate": 2.8101944928103175e-05, "loss": 1.1523, "step": 6655000 }, { "epoch": 3.99, "learning_rate": 2.8099844962542612e-05, "loss": 1.1614, "step": 6655500 }, { "epoch": 3.99, "learning_rate": 2.809774499698205e-05, "loss": 1.1564, "step": 6656000 }, { "epoch": 3.99, "learning_rate": 2.8095645031421482e-05, "loss": 1.1576, "step": 6656500 }, { "epoch": 3.99, "learning_rate": 2.809354926579204e-05, "loss": 1.1441, "step": 6657000 }, { "epoch": 3.99, "learning_rate": 2.8091449300231473e-05, "loss": 1.1578, "step": 6657500 }, { "epoch": 3.99, "learning_rate": 2.808934933467091e-05, "loss": 1.143, "step": 6658000 }, { "epoch": 3.99, "learning_rate": 2.8087249369110346e-05, "loss": 1.1553, "step": 6658500 }, { "epoch": 3.99, "learning_rate": 2.808514940354978e-05, "loss": 1.138, "step": 6659000 }, { "epoch": 3.99, "learning_rate": 2.8083049437989217e-05, "loss": 1.1453, "step": 6659500 }, { "epoch": 3.99, "learning_rate": 2.8080949472428654e-05, "loss": 1.1214, "step": 6660000 }, { "epoch": 3.99, "learning_rate": 2.8078849506868087e-05, "loss": 1.1574, "step": 6660500 }, { "epoch": 3.99, "learning_rate": 2.8076753741238644e-05, "loss": 1.1698, "step": 6661000 }, { "epoch": 3.99, "learning_rate": 2.8074657975609198e-05, "loss": 1.1365, "step": 6661500 }, { "epoch": 3.99, "learning_rate": 2.807255801004863e-05, "loss": 1.1467, "step": 6662000 }, { "epoch": 3.99, "learning_rate": 2.8070458044488068e-05, "loss": 1.1199, "step": 6662500 }, { "epoch": 3.99, "learning_rate": 2.8068358078927505e-05, "loss": 1.1397, "step": 6663000 }, { "epoch": 4.0, "learning_rate": 2.8066258113366938e-05, "loss": 1.1153, "step": 6663500 }, { "epoch": 4.0, "learning_rate": 2.8064158147806375e-05, "loss": 1.1376, "step": 6664000 }, { "epoch": 4.0, "learning_rate": 2.806206238217693e-05, "loss": 1.1499, "step": 6664500 }, { "epoch": 4.0, "learning_rate": 2.8059962416616365e-05, "loss": 1.1314, "step": 6665000 }, { "epoch": 4.0, "learning_rate": 2.8057862451055802e-05, "loss": 1.1478, "step": 6665500 }, { "epoch": 4.0, "learning_rate": 2.8055762485495236e-05, "loss": 1.1365, "step": 6666000 }, { "epoch": 4.0, "learning_rate": 2.8053666719865793e-05, "loss": 1.1097, "step": 6666500 }, { "epoch": 4.0, "learning_rate": 2.8051566754305226e-05, "loss": 1.1546, "step": 6667000 }, { "epoch": 4.0, "learning_rate": 2.8049466788744663e-05, "loss": 1.1474, "step": 6667500 }, { "epoch": 4.0, "learning_rate": 2.80473668231841e-05, "loss": 1.1392, "step": 6668000 }, { "epoch": 4.0, "learning_rate": 2.8045271057554653e-05, "loss": 1.1135, "step": 6668500 }, { "epoch": 4.0, "learning_rate": 2.8043175291925214e-05, "loss": 1.1623, "step": 6669000 }, { "epoch": 4.0, "learning_rate": 2.8041075326364647e-05, "loss": 1.166, "step": 6669500 }, { "epoch": 4.0, "learning_rate": 2.8038975360804084e-05, "loss": 1.1476, "step": 6670000 }, { "epoch": 4.0, "learning_rate": 2.803687539524352e-05, "loss": 1.1547, "step": 6670500 }, { "epoch": 4.0, "learning_rate": 2.803477542968295e-05, "loss": 1.1494, "step": 6671000 }, { "epoch": 4.0, "learning_rate": 2.8032675464122384e-05, "loss": 1.1527, "step": 6671500 }, { "epoch": 4.0, "learning_rate": 2.803057549856182e-05, "loss": 1.1182, "step": 6672000 }, { "epoch": 4.0, "learning_rate": 2.8028475533001258e-05, "loss": 1.0951, "step": 6672500 }, { "epoch": 4.0, "learning_rate": 2.802637976737182e-05, "loss": 1.123, "step": 6673000 }, { "epoch": 4.0, "learning_rate": 2.802427980181125e-05, "loss": 1.0784, "step": 6673500 }, { "epoch": 4.0, "learning_rate": 2.8022179836250682e-05, "loss": 1.0887, "step": 6674000 }, { "epoch": 4.0, "learning_rate": 2.802007987069012e-05, "loss": 1.0797, "step": 6674500 }, { "epoch": 4.0, "learning_rate": 2.8017979905129556e-05, "loss": 1.0999, "step": 6675000 }, { "epoch": 4.0, "learning_rate": 2.801587993956899e-05, "loss": 1.0957, "step": 6675500 }, { "epoch": 4.0, "learning_rate": 2.8013784173939543e-05, "loss": 1.102, "step": 6676000 }, { "epoch": 4.0, "learning_rate": 2.801168420837898e-05, "loss": 1.0994, "step": 6676500 }, { "epoch": 4.0, "learning_rate": 2.8009584242818416e-05, "loss": 1.1061, "step": 6677000 }, { "epoch": 4.0, "learning_rate": 2.8007484277257853e-05, "loss": 1.0902, "step": 6677500 }, { "epoch": 4.0, "learning_rate": 2.8005384311697287e-05, "loss": 1.0907, "step": 6678000 }, { "epoch": 4.0, "learning_rate": 2.800328854606784e-05, "loss": 1.1171, "step": 6678500 }, { "epoch": 4.0, "learning_rate": 2.8001188580507277e-05, "loss": 1.1177, "step": 6679000 }, { "epoch": 4.0, "learning_rate": 2.7999088614946714e-05, "loss": 1.0836, "step": 6679500 }, { "epoch": 4.0, "learning_rate": 2.7996988649386147e-05, "loss": 1.1335, "step": 6680000 }, { "epoch": 4.01, "learning_rate": 2.7994888683825584e-05, "loss": 1.1234, "step": 6680500 }, { "epoch": 4.01, "learning_rate": 2.799278871826502e-05, "loss": 1.1053, "step": 6681000 }, { "epoch": 4.01, "learning_rate": 2.7990692952635578e-05, "loss": 1.1265, "step": 6681500 }, { "epoch": 4.01, "learning_rate": 2.798859298707501e-05, "loss": 1.0843, "step": 6682000 }, { "epoch": 4.01, "learning_rate": 2.7986493021514445e-05, "loss": 1.1397, "step": 6682500 }, { "epoch": 4.01, "learning_rate": 2.7984393055953882e-05, "loss": 1.1023, "step": 6683000 }, { "epoch": 4.01, "learning_rate": 2.798229729032444e-05, "loss": 1.0721, "step": 6683500 }, { "epoch": 4.01, "learning_rate": 2.7980197324763876e-05, "loss": 1.1035, "step": 6684000 }, { "epoch": 4.01, "learning_rate": 2.797809735920331e-05, "loss": 1.1101, "step": 6684500 }, { "epoch": 4.01, "learning_rate": 2.7975997393642743e-05, "loss": 1.0797, "step": 6685000 }, { "epoch": 4.01, "learning_rate": 2.797389742808218e-05, "loss": 1.115, "step": 6685500 }, { "epoch": 4.01, "learning_rate": 2.7971797462521616e-05, "loss": 1.08, "step": 6686000 }, { "epoch": 4.01, "learning_rate": 2.796969749696105e-05, "loss": 1.081, "step": 6686500 }, { "epoch": 4.01, "learning_rate": 2.7967597531400483e-05, "loss": 1.1315, "step": 6687000 }, { "epoch": 4.01, "learning_rate": 2.796550176577104e-05, "loss": 1.105, "step": 6687500 }, { "epoch": 4.01, "learning_rate": 2.7963406000141597e-05, "loss": 1.1179, "step": 6688000 }, { "epoch": 4.01, "learning_rate": 2.7961306034581034e-05, "loss": 1.107, "step": 6688500 }, { "epoch": 4.01, "learning_rate": 2.7959206069020467e-05, "loss": 1.0812, "step": 6689000 }, { "epoch": 4.01, "learning_rate": 2.79571061034599e-05, "loss": 1.0981, "step": 6689500 }, { "epoch": 4.01, "learning_rate": 2.7955006137899338e-05, "loss": 1.1077, "step": 6690000 }, { "epoch": 4.01, "learning_rate": 2.7952910372269895e-05, "loss": 1.116, "step": 6690500 }, { "epoch": 4.01, "learning_rate": 2.795081040670933e-05, "loss": 1.0949, "step": 6691000 }, { "epoch": 4.01, "learning_rate": 2.7948710441148765e-05, "loss": 1.0958, "step": 6691500 }, { "epoch": 4.01, "learning_rate": 2.79466104755882e-05, "loss": 1.0903, "step": 6692000 }, { "epoch": 4.01, "learning_rate": 2.7944510510027635e-05, "loss": 1.0907, "step": 6692500 }, { "epoch": 4.01, "learning_rate": 2.7942410544467072e-05, "loss": 1.1169, "step": 6693000 }, { "epoch": 4.01, "learning_rate": 2.794031477883763e-05, "loss": 1.0921, "step": 6693500 }, { "epoch": 4.01, "learning_rate": 2.793821481327706e-05, "loss": 1.0921, "step": 6694000 }, { "epoch": 4.01, "learning_rate": 2.7936114847716496e-05, "loss": 1.0842, "step": 6694500 }, { "epoch": 4.01, "learning_rate": 2.7934014882155933e-05, "loss": 1.0857, "step": 6695000 }, { "epoch": 4.01, "learning_rate": 2.793191911652649e-05, "loss": 1.0997, "step": 6695500 }, { "epoch": 4.01, "learning_rate": 2.7929823350897043e-05, "loss": 1.0893, "step": 6696000 }, { "epoch": 4.01, "learning_rate": 2.792772338533648e-05, "loss": 1.1014, "step": 6696500 }, { "epoch": 4.02, "learning_rate": 2.7925623419775914e-05, "loss": 1.0807, "step": 6697000 }, { "epoch": 4.02, "learning_rate": 2.792352345421535e-05, "loss": 1.0693, "step": 6697500 }, { "epoch": 4.02, "learning_rate": 2.7921423488654787e-05, "loss": 1.0768, "step": 6698000 }, { "epoch": 4.02, "learning_rate": 2.791932352309422e-05, "loss": 1.0837, "step": 6698500 }, { "epoch": 4.02, "learning_rate": 2.7917223557533654e-05, "loss": 1.089, "step": 6699000 }, { "epoch": 4.02, "learning_rate": 2.791512359197309e-05, "loss": 1.1103, "step": 6699500 }, { "epoch": 4.02, "learning_rate": 2.7913023626412528e-05, "loss": 1.0976, "step": 6700000 }, { "epoch": 4.02, "eval_loss": 1.1042001247406006, "eval_runtime": 1105.5887, "eval_samples_per_second": 476.416, "eval_steps_per_second": 79.403, "step": 6700000 }, { "epoch": 4.02, "learning_rate": 2.791092366085196e-05, "loss": 1.0699, "step": 6700500 }, { "epoch": 4.02, "learning_rate": 2.7908827895222515e-05, "loss": 1.1071, "step": 6701000 }, { "epoch": 4.02, "learning_rate": 2.7906727929661952e-05, "loss": 1.1017, "step": 6701500 }, { "epoch": 4.02, "learning_rate": 2.790462796410139e-05, "loss": 1.0971, "step": 6702000 }, { "epoch": 4.02, "learning_rate": 2.7902532198471946e-05, "loss": 1.1278, "step": 6702500 }, { "epoch": 4.02, "learning_rate": 2.7900432232911383e-05, "loss": 1.0875, "step": 6703000 }, { "epoch": 4.02, "learning_rate": 2.7898332267350813e-05, "loss": 1.077, "step": 6703500 }, { "epoch": 4.02, "learning_rate": 2.789623230179025e-05, "loss": 1.1148, "step": 6704000 }, { "epoch": 4.02, "learning_rate": 2.7894132336229686e-05, "loss": 1.1277, "step": 6704500 }, { "epoch": 4.02, "learning_rate": 2.789203237066912e-05, "loss": 1.0689, "step": 6705000 }, { "epoch": 4.02, "learning_rate": 2.7889932405108557e-05, "loss": 1.1066, "step": 6705500 }, { "epoch": 4.02, "learning_rate": 2.788783243954799e-05, "loss": 1.1246, "step": 6706000 }, { "epoch": 4.02, "learning_rate": 2.7885736673918547e-05, "loss": 1.0982, "step": 6706500 }, { "epoch": 4.02, "learning_rate": 2.7883636708357984e-05, "loss": 1.1093, "step": 6707000 }, { "epoch": 4.02, "learning_rate": 2.7881536742797417e-05, "loss": 1.1031, "step": 6707500 }, { "epoch": 4.02, "learning_rate": 2.7879436777236854e-05, "loss": 1.1149, "step": 6708000 }, { "epoch": 4.02, "learning_rate": 2.7877336811676288e-05, "loss": 1.0958, "step": 6708500 }, { "epoch": 4.02, "learning_rate": 2.787523684611572e-05, "loss": 1.1406, "step": 6709000 }, { "epoch": 4.02, "learning_rate": 2.7873136880555158e-05, "loss": 1.0926, "step": 6709500 }, { "epoch": 4.02, "learning_rate": 2.7871041114925715e-05, "loss": 1.1037, "step": 6710000 }, { "epoch": 4.02, "learning_rate": 2.7868941149365152e-05, "loss": 1.0921, "step": 6710500 }, { "epoch": 4.02, "learning_rate": 2.7866841183804585e-05, "loss": 1.0839, "step": 6711000 }, { "epoch": 4.02, "learning_rate": 2.786474121824402e-05, "loss": 1.0922, "step": 6711500 }, { "epoch": 4.02, "learning_rate": 2.7862641252683456e-05, "loss": 1.0899, "step": 6712000 }, { "epoch": 4.02, "learning_rate": 2.7860541287122892e-05, "loss": 1.1145, "step": 6712500 }, { "epoch": 4.02, "learning_rate": 2.785844552149345e-05, "loss": 1.1082, "step": 6713000 }, { "epoch": 4.03, "learning_rate": 2.7856349755864003e-05, "loss": 1.1, "step": 6713500 }, { "epoch": 4.03, "learning_rate": 2.785424979030344e-05, "loss": 1.1247, "step": 6714000 }, { "epoch": 4.03, "learning_rate": 2.7852149824742873e-05, "loss": 1.1087, "step": 6714500 }, { "epoch": 4.03, "learning_rate": 2.785004985918231e-05, "loss": 1.1108, "step": 6715000 }, { "epoch": 4.03, "learning_rate": 2.7847949893621744e-05, "loss": 1.1321, "step": 6715500 }, { "epoch": 4.03, "learning_rate": 2.7845849928061177e-05, "loss": 1.0895, "step": 6716000 }, { "epoch": 4.03, "learning_rate": 2.7843754162431737e-05, "loss": 1.0997, "step": 6716500 }, { "epoch": 4.03, "learning_rate": 2.784165419687117e-05, "loss": 1.1038, "step": 6717000 }, { "epoch": 4.03, "learning_rate": 2.7839554231310608e-05, "loss": 1.0815, "step": 6717500 }, { "epoch": 4.03, "learning_rate": 2.783745426575004e-05, "loss": 1.1015, "step": 6718000 }, { "epoch": 4.03, "learning_rate": 2.7835354300189475e-05, "loss": 1.088, "step": 6718500 }, { "epoch": 4.03, "learning_rate": 2.783325433462891e-05, "loss": 1.1224, "step": 6719000 }, { "epoch": 4.03, "learning_rate": 2.783115436906835e-05, "loss": 1.1163, "step": 6719500 }, { "epoch": 4.03, "learning_rate": 2.7829054403507782e-05, "loss": 1.1105, "step": 6720000 }, { "epoch": 4.03, "learning_rate": 2.782695443794722e-05, "loss": 1.1135, "step": 6720500 }, { "epoch": 4.03, "learning_rate": 2.7824854472386652e-05, "loss": 1.0769, "step": 6721000 }, { "epoch": 4.03, "learning_rate": 2.782275870675721e-05, "loss": 1.1109, "step": 6721500 }, { "epoch": 4.03, "learning_rate": 2.7820658741196646e-05, "loss": 1.0998, "step": 6722000 }, { "epoch": 4.03, "learning_rate": 2.781855877563608e-05, "loss": 1.136, "step": 6722500 }, { "epoch": 4.03, "learning_rate": 2.7816458810075513e-05, "loss": 1.1141, "step": 6723000 }, { "epoch": 4.03, "learning_rate": 2.781435884451495e-05, "loss": 1.1206, "step": 6723500 }, { "epoch": 4.03, "learning_rate": 2.7812263078885507e-05, "loss": 1.1066, "step": 6724000 }, { "epoch": 4.03, "learning_rate": 2.781016311332494e-05, "loss": 1.098, "step": 6724500 }, { "epoch": 4.03, "learning_rate": 2.7808063147764377e-05, "loss": 1.1055, "step": 6725000 }, { "epoch": 4.03, "learning_rate": 2.780596738213493e-05, "loss": 1.0986, "step": 6725500 }, { "epoch": 4.03, "learning_rate": 2.7803867416574367e-05, "loss": 1.1165, "step": 6726000 }, { "epoch": 4.03, "learning_rate": 2.7801767451013804e-05, "loss": 1.1311, "step": 6726500 }, { "epoch": 4.03, "learning_rate": 2.7799667485453238e-05, "loss": 1.0955, "step": 6727000 }, { "epoch": 4.03, "learning_rate": 2.7797567519892675e-05, "loss": 1.0762, "step": 6727500 }, { "epoch": 4.03, "learning_rate": 2.7795467554332108e-05, "loss": 1.0958, "step": 6728000 }, { "epoch": 4.03, "learning_rate": 2.779336758877154e-05, "loss": 1.1248, "step": 6728500 }, { "epoch": 4.03, "learning_rate": 2.7791267623210978e-05, "loss": 1.1025, "step": 6729000 }, { "epoch": 4.03, "learning_rate": 2.7789171857581535e-05, "loss": 1.1038, "step": 6729500 }, { "epoch": 4.03, "learning_rate": 2.7787071892020972e-05, "loss": 1.1074, "step": 6730000 }, { "epoch": 4.04, "learning_rate": 2.7784971926460406e-05, "loss": 1.1046, "step": 6730500 }, { "epoch": 4.04, "learning_rate": 2.778287196089984e-05, "loss": 1.1062, "step": 6731000 }, { "epoch": 4.04, "learning_rate": 2.7780771995339276e-05, "loss": 1.1035, "step": 6731500 }, { "epoch": 4.04, "learning_rate": 2.7778672029778713e-05, "loss": 1.0792, "step": 6732000 }, { "epoch": 4.04, "learning_rate": 2.777657626414927e-05, "loss": 1.0923, "step": 6732500 }, { "epoch": 4.04, "learning_rate": 2.7774476298588703e-05, "loss": 1.0809, "step": 6733000 }, { "epoch": 4.04, "learning_rate": 2.777238053295926e-05, "loss": 1.1036, "step": 6733500 }, { "epoch": 4.04, "learning_rate": 2.7770280567398694e-05, "loss": 1.1067, "step": 6734000 }, { "epoch": 4.04, "learning_rate": 2.776818060183813e-05, "loss": 1.1032, "step": 6734500 }, { "epoch": 4.04, "learning_rate": 2.7766080636277564e-05, "loss": 1.1346, "step": 6735000 }, { "epoch": 4.04, "learning_rate": 2.7763980670716997e-05, "loss": 1.1225, "step": 6735500 }, { "epoch": 4.04, "learning_rate": 2.7761880705156434e-05, "loss": 1.1248, "step": 6736000 }, { "epoch": 4.04, "learning_rate": 2.775978073959587e-05, "loss": 1.0935, "step": 6736500 }, { "epoch": 4.04, "learning_rate": 2.7757680774035308e-05, "loss": 1.1185, "step": 6737000 }, { "epoch": 4.04, "learning_rate": 2.775558500840586e-05, "loss": 1.1204, "step": 6737500 }, { "epoch": 4.04, "learning_rate": 2.7753485042845295e-05, "loss": 1.0994, "step": 6738000 }, { "epoch": 4.04, "learning_rate": 2.7751385077284732e-05, "loss": 1.1057, "step": 6738500 }, { "epoch": 4.04, "learning_rate": 2.774928511172417e-05, "loss": 1.081, "step": 6739000 }, { "epoch": 4.04, "learning_rate": 2.7747185146163602e-05, "loss": 1.0747, "step": 6739500 }, { "epoch": 4.04, "learning_rate": 2.774508938053416e-05, "loss": 1.106, "step": 6740000 }, { "epoch": 4.04, "learning_rate": 2.7742989414973592e-05, "loss": 1.1109, "step": 6740500 }, { "epoch": 4.04, "learning_rate": 2.774088944941303e-05, "loss": 1.0931, "step": 6741000 }, { "epoch": 4.04, "learning_rate": 2.7738789483852466e-05, "loss": 1.0992, "step": 6741500 }, { "epoch": 4.04, "learning_rate": 2.77366895182919e-05, "loss": 1.1114, "step": 6742000 }, { "epoch": 4.04, "learning_rate": 2.7734589552731333e-05, "loss": 1.0996, "step": 6742500 }, { "epoch": 4.04, "learning_rate": 2.773248958717077e-05, "loss": 1.1265, "step": 6743000 }, { "epoch": 4.04, "learning_rate": 2.7730393821541327e-05, "loss": 1.1023, "step": 6743500 }, { "epoch": 4.04, "learning_rate": 2.7728293855980764e-05, "loss": 1.1232, "step": 6744000 }, { "epoch": 4.04, "learning_rate": 2.7726198090351317e-05, "loss": 1.1105, "step": 6744500 }, { "epoch": 4.04, "learning_rate": 2.772409812479075e-05, "loss": 1.0796, "step": 6745000 }, { "epoch": 4.04, "learning_rate": 2.7721998159230188e-05, "loss": 1.0974, "step": 6745500 }, { "epoch": 4.04, "learning_rate": 2.7719898193669624e-05, "loss": 1.1317, "step": 6746000 }, { "epoch": 4.04, "learning_rate": 2.7717798228109058e-05, "loss": 1.0828, "step": 6746500 }, { "epoch": 4.05, "learning_rate": 2.7715698262548495e-05, "loss": 1.1073, "step": 6747000 }, { "epoch": 4.05, "learning_rate": 2.7713598296987928e-05, "loss": 1.1082, "step": 6747500 }, { "epoch": 4.05, "learning_rate": 2.7711498331427365e-05, "loss": 1.1336, "step": 6748000 }, { "epoch": 4.05, "learning_rate": 2.7709402565797922e-05, "loss": 1.1136, "step": 6748500 }, { "epoch": 4.05, "learning_rate": 2.7707302600237356e-05, "loss": 1.0775, "step": 6749000 }, { "epoch": 4.05, "learning_rate": 2.7705202634676792e-05, "loss": 1.0865, "step": 6749500 }, { "epoch": 4.05, "learning_rate": 2.7703102669116226e-05, "loss": 1.0845, "step": 6750000 }, { "epoch": 4.05, "learning_rate": 2.770100270355566e-05, "loss": 1.1213, "step": 6750500 }, { "epoch": 4.05, "learning_rate": 2.7698902737995096e-05, "loss": 1.1024, "step": 6751000 }, { "epoch": 4.05, "learning_rate": 2.7696802772434533e-05, "loss": 1.0939, "step": 6751500 }, { "epoch": 4.05, "learning_rate": 2.7694702806873966e-05, "loss": 1.0909, "step": 6752000 }, { "epoch": 4.05, "learning_rate": 2.7692607041244523e-05, "loss": 1.101, "step": 6752500 }, { "epoch": 4.05, "learning_rate": 2.7690507075683957e-05, "loss": 1.1097, "step": 6753000 }, { "epoch": 4.05, "learning_rate": 2.7688411310054514e-05, "loss": 1.1101, "step": 6753500 }, { "epoch": 4.05, "learning_rate": 2.768631554442507e-05, "loss": 1.1193, "step": 6754000 }, { "epoch": 4.05, "learning_rate": 2.7684215578864504e-05, "loss": 1.1049, "step": 6754500 }, { "epoch": 4.05, "learning_rate": 2.768211561330394e-05, "loss": 1.1058, "step": 6755000 }, { "epoch": 4.05, "learning_rate": 2.7680015647743378e-05, "loss": 1.0939, "step": 6755500 }, { "epoch": 4.05, "learning_rate": 2.767791568218281e-05, "loss": 1.1267, "step": 6756000 }, { "epoch": 4.05, "learning_rate": 2.7675815716622248e-05, "loss": 1.1316, "step": 6756500 }, { "epoch": 4.05, "learning_rate": 2.7673715751061682e-05, "loss": 1.1128, "step": 6757000 }, { "epoch": 4.05, "learning_rate": 2.7671615785501115e-05, "loss": 1.1058, "step": 6757500 }, { "epoch": 4.05, "learning_rate": 2.7669515819940552e-05, "loss": 1.1003, "step": 6758000 }, { "epoch": 4.05, "learning_rate": 2.766742005431111e-05, "loss": 1.1204, "step": 6758500 }, { "epoch": 4.05, "learning_rate": 2.7665320088750546e-05, "loss": 1.1024, "step": 6759000 }, { "epoch": 4.05, "learning_rate": 2.766322012318998e-05, "loss": 1.1095, "step": 6759500 }, { "epoch": 4.05, "learning_rate": 2.7661120157629413e-05, "loss": 1.1191, "step": 6760000 }, { "epoch": 4.05, "learning_rate": 2.765902019206885e-05, "loss": 1.109, "step": 6760500 }, { "epoch": 4.05, "learning_rate": 2.7656924426439407e-05, "loss": 1.1154, "step": 6761000 }, { "epoch": 4.05, "learning_rate": 2.765482446087884e-05, "loss": 1.0895, "step": 6761500 }, { "epoch": 4.05, "learning_rate": 2.7652724495318277e-05, "loss": 1.1, "step": 6762000 }, { "epoch": 4.05, "learning_rate": 2.765062452975771e-05, "loss": 1.1222, "step": 6762500 }, { "epoch": 4.05, "learning_rate": 2.7648528764128267e-05, "loss": 1.0936, "step": 6763000 }, { "epoch": 4.05, "learning_rate": 2.7646428798567704e-05, "loss": 1.0933, "step": 6763500 }, { "epoch": 4.06, "learning_rate": 2.7644328833007138e-05, "loss": 1.1006, "step": 6764000 }, { "epoch": 4.06, "learning_rate": 2.764222886744657e-05, "loss": 1.1088, "step": 6764500 }, { "epoch": 4.06, "learning_rate": 2.7640128901886008e-05, "loss": 1.1005, "step": 6765000 }, { "epoch": 4.06, "learning_rate": 2.7638028936325445e-05, "loss": 1.1131, "step": 6765500 }, { "epoch": 4.06, "learning_rate": 2.763592897076488e-05, "loss": 1.0527, "step": 6766000 }, { "epoch": 4.06, "learning_rate": 2.7633833205135435e-05, "loss": 1.1177, "step": 6766500 }, { "epoch": 4.06, "learning_rate": 2.763173323957487e-05, "loss": 1.0913, "step": 6767000 }, { "epoch": 4.06, "learning_rate": 2.7629633274014305e-05, "loss": 1.0798, "step": 6767500 }, { "epoch": 4.06, "learning_rate": 2.7627533308453742e-05, "loss": 1.0966, "step": 6768000 }, { "epoch": 4.06, "learning_rate": 2.76254375428243e-05, "loss": 1.1047, "step": 6768500 }, { "epoch": 4.06, "learning_rate": 2.7623337577263733e-05, "loss": 1.1143, "step": 6769000 }, { "epoch": 4.06, "learning_rate": 2.7621237611703166e-05, "loss": 1.1149, "step": 6769500 }, { "epoch": 4.06, "learning_rate": 2.7619137646142603e-05, "loss": 1.0979, "step": 6770000 }, { "epoch": 4.06, "learning_rate": 2.761704608044428e-05, "loss": 1.0826, "step": 6770500 }, { "epoch": 4.06, "learning_rate": 2.7614946114883714e-05, "loss": 1.12, "step": 6771000 }, { "epoch": 4.06, "learning_rate": 2.761284614932315e-05, "loss": 1.1176, "step": 6771500 }, { "epoch": 4.06, "learning_rate": 2.7610746183762587e-05, "loss": 1.1035, "step": 6772000 }, { "epoch": 4.06, "learning_rate": 2.760864621820202e-05, "loss": 1.1009, "step": 6772500 }, { "epoch": 4.06, "learning_rate": 2.7606546252641458e-05, "loss": 1.0949, "step": 6773000 }, { "epoch": 4.06, "learning_rate": 2.760444628708089e-05, "loss": 1.1017, "step": 6773500 }, { "epoch": 4.06, "learning_rate": 2.7602346321520324e-05, "loss": 1.106, "step": 6774000 }, { "epoch": 4.06, "learning_rate": 2.760025055589088e-05, "loss": 1.1099, "step": 6774500 }, { "epoch": 4.06, "learning_rate": 2.7598150590330318e-05, "loss": 1.1, "step": 6775000 }, { "epoch": 4.06, "learning_rate": 2.7596050624769755e-05, "loss": 1.0991, "step": 6775500 }, { "epoch": 4.06, "learning_rate": 2.759395065920919e-05, "loss": 1.1099, "step": 6776000 }, { "epoch": 4.06, "learning_rate": 2.7591850693648622e-05, "loss": 1.1094, "step": 6776500 }, { "epoch": 4.06, "learning_rate": 2.758975072808806e-05, "loss": 1.1087, "step": 6777000 }, { "epoch": 4.06, "learning_rate": 2.7587650762527496e-05, "loss": 1.1071, "step": 6777500 }, { "epoch": 4.06, "learning_rate": 2.758555079696693e-05, "loss": 1.1125, "step": 6778000 }, { "epoch": 4.06, "learning_rate": 2.7583455031337483e-05, "loss": 1.1117, "step": 6778500 }, { "epoch": 4.06, "learning_rate": 2.7581359265708043e-05, "loss": 1.1064, "step": 6779000 }, { "epoch": 4.06, "learning_rate": 2.7579259300147477e-05, "loss": 1.0645, "step": 6779500 }, { "epoch": 4.06, "learning_rate": 2.7577159334586913e-05, "loss": 1.0815, "step": 6780000 }, { "epoch": 4.07, "learning_rate": 2.757505936902635e-05, "loss": 1.1059, "step": 6780500 }, { "epoch": 4.07, "learning_rate": 2.7572963603396904e-05, "loss": 1.1156, "step": 6781000 }, { "epoch": 4.07, "learning_rate": 2.7570863637836337e-05, "loss": 1.1028, "step": 6781500 }, { "epoch": 4.07, "learning_rate": 2.7568763672275774e-05, "loss": 1.1111, "step": 6782000 }, { "epoch": 4.07, "learning_rate": 2.756666370671521e-05, "loss": 1.1037, "step": 6782500 }, { "epoch": 4.07, "learning_rate": 2.7564563741154644e-05, "loss": 1.0984, "step": 6783000 }, { "epoch": 4.07, "learning_rate": 2.7562463775594078e-05, "loss": 1.0954, "step": 6783500 }, { "epoch": 4.07, "learning_rate": 2.7560363810033515e-05, "loss": 1.1251, "step": 6784000 }, { "epoch": 4.07, "learning_rate": 2.755826384447295e-05, "loss": 1.1067, "step": 6784500 }, { "epoch": 4.07, "learning_rate": 2.755616807884351e-05, "loss": 1.1245, "step": 6785000 }, { "epoch": 4.07, "learning_rate": 2.7554072313214062e-05, "loss": 1.1046, "step": 6785500 }, { "epoch": 4.07, "learning_rate": 2.75519723476535e-05, "loss": 1.0868, "step": 6786000 }, { "epoch": 4.07, "learning_rate": 2.7549872382092932e-05, "loss": 1.1265, "step": 6786500 }, { "epoch": 4.07, "learning_rate": 2.754777241653237e-05, "loss": 1.1063, "step": 6787000 }, { "epoch": 4.07, "learning_rate": 2.7545672450971806e-05, "loss": 1.1009, "step": 6787500 }, { "epoch": 4.07, "learning_rate": 2.7543572485411236e-05, "loss": 1.1147, "step": 6788000 }, { "epoch": 4.07, "learning_rate": 2.7541472519850673e-05, "loss": 1.0998, "step": 6788500 }, { "epoch": 4.07, "learning_rate": 2.753937255429011e-05, "loss": 1.0933, "step": 6789000 }, { "epoch": 4.07, "learning_rate": 2.7537272588729543e-05, "loss": 1.0982, "step": 6789500 }, { "epoch": 4.07, "learning_rate": 2.7535176823100104e-05, "loss": 1.1222, "step": 6790000 }, { "epoch": 4.07, "learning_rate": 2.7533076857539534e-05, "loss": 1.0852, "step": 6790500 }, { "epoch": 4.07, "learning_rate": 2.753097689197897e-05, "loss": 1.0952, "step": 6791000 }, { "epoch": 4.07, "learning_rate": 2.7528876926418408e-05, "loss": 1.108, "step": 6791500 }, { "epoch": 4.07, "learning_rate": 2.752677696085784e-05, "loss": 1.0951, "step": 6792000 }, { "epoch": 4.07, "learning_rate": 2.7524681195228398e-05, "loss": 1.1052, "step": 6792500 }, { "epoch": 4.07, "learning_rate": 2.752258122966783e-05, "loss": 1.0918, "step": 6793000 }, { "epoch": 4.07, "learning_rate": 2.7520481264107268e-05, "loss": 1.1251, "step": 6793500 }, { "epoch": 4.07, "learning_rate": 2.7518381298546705e-05, "loss": 1.0912, "step": 6794000 }, { "epoch": 4.07, "learning_rate": 2.7516285532917262e-05, "loss": 1.1171, "step": 6794500 }, { "epoch": 4.07, "learning_rate": 2.7514189767287816e-05, "loss": 1.1125, "step": 6795000 }, { "epoch": 4.07, "learning_rate": 2.7512089801727252e-05, "loss": 1.1045, "step": 6795500 }, { "epoch": 4.07, "learning_rate": 2.7509989836166686e-05, "loss": 1.1078, "step": 6796000 }, { "epoch": 4.07, "learning_rate": 2.7507889870606123e-05, "loss": 1.1118, "step": 6796500 }, { "epoch": 4.08, "learning_rate": 2.750578990504556e-05, "loss": 1.1053, "step": 6797000 }, { "epoch": 4.08, "learning_rate": 2.750368993948499e-05, "loss": 1.1427, "step": 6797500 }, { "epoch": 4.08, "learning_rate": 2.7501589973924427e-05, "loss": 1.1063, "step": 6798000 }, { "epoch": 4.08, "learning_rate": 2.7499490008363863e-05, "loss": 1.1371, "step": 6798500 }, { "epoch": 4.08, "learning_rate": 2.749739424273442e-05, "loss": 1.0789, "step": 6799000 }, { "epoch": 4.08, "learning_rate": 2.7495294277173854e-05, "loss": 1.1126, "step": 6799500 }, { "epoch": 4.08, "learning_rate": 2.7493194311613287e-05, "loss": 1.0887, "step": 6800000 }, { "epoch": 4.08, "eval_loss": 1.1015839576721191, "eval_runtime": 1110.8058, "eval_samples_per_second": 474.178, "eval_steps_per_second": 79.03, "step": 6800000 }, { "epoch": 4.08, "learning_rate": 2.7491094346052724e-05, "loss": 1.1064, "step": 6800500 }, { "epoch": 4.08, "learning_rate": 2.748899858042328e-05, "loss": 1.1013, "step": 6801000 }, { "epoch": 4.08, "learning_rate": 2.7486898614862718e-05, "loss": 1.1268, "step": 6801500 }, { "epoch": 4.08, "learning_rate": 2.748479864930215e-05, "loss": 1.0941, "step": 6802000 }, { "epoch": 4.08, "learning_rate": 2.748270288367271e-05, "loss": 1.124, "step": 6802500 }, { "epoch": 4.08, "learning_rate": 2.7480602918112142e-05, "loss": 1.117, "step": 6803000 }, { "epoch": 4.08, "learning_rate": 2.747850295255158e-05, "loss": 1.0988, "step": 6803500 }, { "epoch": 4.08, "learning_rate": 2.7476402986991016e-05, "loss": 1.1171, "step": 6804000 }, { "epoch": 4.08, "learning_rate": 2.7474303021430446e-05, "loss": 1.103, "step": 6804500 }, { "epoch": 4.08, "learning_rate": 2.7472203055869882e-05, "loss": 1.113, "step": 6805000 }, { "epoch": 4.08, "learning_rate": 2.747010309030932e-05, "loss": 1.1066, "step": 6805500 }, { "epoch": 4.08, "learning_rate": 2.7468003124748753e-05, "loss": 1.0861, "step": 6806000 }, { "epoch": 4.08, "learning_rate": 2.746590315918819e-05, "loss": 1.0916, "step": 6806500 }, { "epoch": 4.08, "learning_rate": 2.7463807393558743e-05, "loss": 1.0984, "step": 6807000 }, { "epoch": 4.08, "learning_rate": 2.746170742799818e-05, "loss": 1.1021, "step": 6807500 }, { "epoch": 4.08, "learning_rate": 2.7459607462437617e-05, "loss": 1.1295, "step": 6808000 }, { "epoch": 4.08, "learning_rate": 2.745750749687705e-05, "loss": 1.092, "step": 6808500 }, { "epoch": 4.08, "learning_rate": 2.7455407531316487e-05, "loss": 1.1074, "step": 6809000 }, { "epoch": 4.08, "learning_rate": 2.7453307565755924e-05, "loss": 1.0971, "step": 6809500 }, { "epoch": 4.08, "learning_rate": 2.7451211800126478e-05, "loss": 1.119, "step": 6810000 }, { "epoch": 4.08, "learning_rate": 2.744911183456591e-05, "loss": 1.0841, "step": 6810500 }, { "epoch": 4.08, "learning_rate": 2.7447011869005348e-05, "loss": 1.0983, "step": 6811000 }, { "epoch": 4.08, "learning_rate": 2.7444911903444785e-05, "loss": 1.1035, "step": 6811500 }, { "epoch": 4.08, "learning_rate": 2.7442811937884218e-05, "loss": 1.119, "step": 6812000 }, { "epoch": 4.08, "learning_rate": 2.7440716172254775e-05, "loss": 1.0848, "step": 6812500 }, { "epoch": 4.08, "learning_rate": 2.7438620406625332e-05, "loss": 1.1094, "step": 6813000 }, { "epoch": 4.08, "learning_rate": 2.7436520441064766e-05, "loss": 1.1133, "step": 6813500 }, { "epoch": 4.09, "learning_rate": 2.74344204755042e-05, "loss": 1.1134, "step": 6814000 }, { "epoch": 4.09, "learning_rate": 2.7432320509943636e-05, "loss": 1.1203, "step": 6814500 }, { "epoch": 4.09, "learning_rate": 2.7430224744314193e-05, "loss": 1.1206, "step": 6815000 }, { "epoch": 4.09, "learning_rate": 2.742812477875363e-05, "loss": 1.0886, "step": 6815500 }, { "epoch": 4.09, "learning_rate": 2.7426024813193063e-05, "loss": 1.1356, "step": 6816000 }, { "epoch": 4.09, "learning_rate": 2.7423924847632497e-05, "loss": 1.1121, "step": 6816500 }, { "epoch": 4.09, "learning_rate": 2.7421824882071933e-05, "loss": 1.1172, "step": 6817000 }, { "epoch": 4.09, "learning_rate": 2.7419724916511367e-05, "loss": 1.1113, "step": 6817500 }, { "epoch": 4.09, "learning_rate": 2.7417624950950804e-05, "loss": 1.1084, "step": 6818000 }, { "epoch": 4.09, "learning_rate": 2.741552498539024e-05, "loss": 1.1143, "step": 6818500 }, { "epoch": 4.09, "learning_rate": 2.7413429219760794e-05, "loss": 1.1158, "step": 6819000 }, { "epoch": 4.09, "learning_rate": 2.741132925420023e-05, "loss": 1.0951, "step": 6819500 }, { "epoch": 4.09, "learning_rate": 2.7409229288639664e-05, "loss": 1.1055, "step": 6820000 }, { "epoch": 4.09, "learning_rate": 2.74071293230791e-05, "loss": 1.1292, "step": 6820500 }, { "epoch": 4.09, "learning_rate": 2.7405029357518538e-05, "loss": 1.1109, "step": 6821000 }, { "epoch": 4.09, "learning_rate": 2.740292939195797e-05, "loss": 1.1227, "step": 6821500 }, { "epoch": 4.09, "learning_rate": 2.7400829426397405e-05, "loss": 1.0926, "step": 6822000 }, { "epoch": 4.09, "learning_rate": 2.7398729460836842e-05, "loss": 1.1248, "step": 6822500 }, { "epoch": 4.09, "learning_rate": 2.739662949527628e-05, "loss": 1.0952, "step": 6823000 }, { "epoch": 4.09, "learning_rate": 2.7394529529715712e-05, "loss": 1.129, "step": 6823500 }, { "epoch": 4.09, "learning_rate": 2.7392433764086266e-05, "loss": 1.1085, "step": 6824000 }, { "epoch": 4.09, "learning_rate": 2.7390333798525703e-05, "loss": 1.1205, "step": 6824500 }, { "epoch": 4.09, "learning_rate": 2.738823383296514e-05, "loss": 1.0958, "step": 6825000 }, { "epoch": 4.09, "learning_rate": 2.7386133867404573e-05, "loss": 1.1103, "step": 6825500 }, { "epoch": 4.09, "learning_rate": 2.738403390184401e-05, "loss": 1.0888, "step": 6826000 }, { "epoch": 4.09, "learning_rate": 2.7381938136214563e-05, "loss": 1.1221, "step": 6826500 }, { "epoch": 4.09, "learning_rate": 2.7379838170654e-05, "loss": 1.092, "step": 6827000 }, { "epoch": 4.09, "learning_rate": 2.7377738205093437e-05, "loss": 1.114, "step": 6827500 }, { "epoch": 4.09, "learning_rate": 2.737563823953287e-05, "loss": 1.0996, "step": 6828000 }, { "epoch": 4.09, "learning_rate": 2.7373538273972307e-05, "loss": 1.1398, "step": 6828500 }, { "epoch": 4.09, "learning_rate": 2.737144250834286e-05, "loss": 1.1142, "step": 6829000 }, { "epoch": 4.09, "learning_rate": 2.7369342542782298e-05, "loss": 1.1082, "step": 6829500 }, { "epoch": 4.09, "learning_rate": 2.7367242577221735e-05, "loss": 1.1167, "step": 6830000 }, { "epoch": 4.1, "learning_rate": 2.7365142611661168e-05, "loss": 1.1084, "step": 6830500 }, { "epoch": 4.1, "learning_rate": 2.7363042646100605e-05, "loss": 1.1295, "step": 6831000 }, { "epoch": 4.1, "learning_rate": 2.736094268054004e-05, "loss": 1.1329, "step": 6831500 }, { "epoch": 4.1, "learning_rate": 2.7358846914910595e-05, "loss": 1.0933, "step": 6832000 }, { "epoch": 4.1, "learning_rate": 2.735674694935003e-05, "loss": 1.1253, "step": 6832500 }, { "epoch": 4.1, "learning_rate": 2.7354646983789466e-05, "loss": 1.1145, "step": 6833000 }, { "epoch": 4.1, "learning_rate": 2.7352547018228903e-05, "loss": 1.0771, "step": 6833500 }, { "epoch": 4.1, "learning_rate": 2.7350447052668336e-05, "loss": 1.1167, "step": 6834000 }, { "epoch": 4.1, "learning_rate": 2.7348351287038893e-05, "loss": 1.1228, "step": 6834500 }, { "epoch": 4.1, "learning_rate": 2.7346251321478326e-05, "loss": 1.0675, "step": 6835000 }, { "epoch": 4.1, "learning_rate": 2.7344151355917763e-05, "loss": 1.1444, "step": 6835500 }, { "epoch": 4.1, "learning_rate": 2.73420513903572e-05, "loss": 1.1145, "step": 6836000 }, { "epoch": 4.1, "learning_rate": 2.733995142479663e-05, "loss": 1.1516, "step": 6836500 }, { "epoch": 4.1, "learning_rate": 2.7337851459236067e-05, "loss": 1.1141, "step": 6837000 }, { "epoch": 4.1, "learning_rate": 2.7335751493675504e-05, "loss": 1.1055, "step": 6837500 }, { "epoch": 4.1, "learning_rate": 2.733365572804606e-05, "loss": 1.1067, "step": 6838000 }, { "epoch": 4.1, "learning_rate": 2.7331555762485498e-05, "loss": 1.1488, "step": 6838500 }, { "epoch": 4.1, "learning_rate": 2.7329455796924928e-05, "loss": 1.1261, "step": 6839000 }, { "epoch": 4.1, "learning_rate": 2.7327355831364365e-05, "loss": 1.1072, "step": 6839500 }, { "epoch": 4.1, "learning_rate": 2.73252558658038e-05, "loss": 1.1064, "step": 6840000 }, { "epoch": 4.1, "learning_rate": 2.7323155900243235e-05, "loss": 1.1369, "step": 6840500 }, { "epoch": 4.1, "learning_rate": 2.7321060134613792e-05, "loss": 1.1436, "step": 6841000 }, { "epoch": 4.1, "learning_rate": 2.7318960169053225e-05, "loss": 1.0874, "step": 6841500 }, { "epoch": 4.1, "learning_rate": 2.7316864403423782e-05, "loss": 1.0803, "step": 6842000 }, { "epoch": 4.1, "learning_rate": 2.731476443786322e-05, "loss": 1.1165, "step": 6842500 }, { "epoch": 4.1, "learning_rate": 2.7312664472302656e-05, "loss": 1.1123, "step": 6843000 }, { "epoch": 4.1, "learning_rate": 2.7310564506742086e-05, "loss": 1.1061, "step": 6843500 }, { "epoch": 4.1, "learning_rate": 2.7308464541181523e-05, "loss": 1.1123, "step": 6844000 }, { "epoch": 4.1, "learning_rate": 2.730636457562096e-05, "loss": 1.1017, "step": 6844500 }, { "epoch": 4.1, "learning_rate": 2.7304264610060397e-05, "loss": 1.1115, "step": 6845000 }, { "epoch": 4.1, "learning_rate": 2.730216464449983e-05, "loss": 1.126, "step": 6845500 }, { "epoch": 4.1, "learning_rate": 2.7300068878870384e-05, "loss": 1.1314, "step": 6846000 }, { "epoch": 4.1, "learning_rate": 2.729796891330982e-05, "loss": 1.1182, "step": 6846500 }, { "epoch": 4.11, "learning_rate": 2.7295868947749257e-05, "loss": 1.111, "step": 6847000 }, { "epoch": 4.11, "learning_rate": 2.729376898218869e-05, "loss": 1.1493, "step": 6847500 }, { "epoch": 4.11, "learning_rate": 2.729167321655925e-05, "loss": 1.118, "step": 6848000 }, { "epoch": 4.11, "learning_rate": 2.728957325099868e-05, "loss": 1.1342, "step": 6848500 }, { "epoch": 4.11, "learning_rate": 2.7287473285438118e-05, "loss": 1.1174, "step": 6849000 }, { "epoch": 4.11, "learning_rate": 2.7285373319877555e-05, "loss": 1.0999, "step": 6849500 }, { "epoch": 4.11, "learning_rate": 2.728327335431699e-05, "loss": 1.1207, "step": 6850000 }, { "epoch": 4.11, "learning_rate": 2.7281177588687545e-05, "loss": 1.1054, "step": 6850500 }, { "epoch": 4.11, "learning_rate": 2.727907762312698e-05, "loss": 1.1054, "step": 6851000 }, { "epoch": 4.11, "learning_rate": 2.7276977657566416e-05, "loss": 1.1014, "step": 6851500 }, { "epoch": 4.11, "learning_rate": 2.7274877692005852e-05, "loss": 1.1035, "step": 6852000 }, { "epoch": 4.11, "learning_rate": 2.7272777726445286e-05, "loss": 1.1178, "step": 6852500 }, { "epoch": 4.11, "learning_rate": 2.7270677760884723e-05, "loss": 1.114, "step": 6853000 }, { "epoch": 4.11, "learning_rate": 2.7268581995255276e-05, "loss": 1.0994, "step": 6853500 }, { "epoch": 4.11, "learning_rate": 2.7266482029694713e-05, "loss": 1.0971, "step": 6854000 }, { "epoch": 4.11, "learning_rate": 2.7264382064134147e-05, "loss": 1.1231, "step": 6854500 }, { "epoch": 4.11, "learning_rate": 2.7262286298504707e-05, "loss": 1.0951, "step": 6855000 }, { "epoch": 4.11, "learning_rate": 2.7260186332944137e-05, "loss": 1.1121, "step": 6855500 }, { "epoch": 4.11, "learning_rate": 2.7258086367383574e-05, "loss": 1.1483, "step": 6856000 }, { "epoch": 4.11, "learning_rate": 2.725598640182301e-05, "loss": 1.1194, "step": 6856500 }, { "epoch": 4.11, "learning_rate": 2.7253886436262444e-05, "loss": 1.1017, "step": 6857000 }, { "epoch": 4.11, "learning_rate": 2.725178647070188e-05, "loss": 1.0907, "step": 6857500 }, { "epoch": 4.11, "learning_rate": 2.7249686505141315e-05, "loss": 1.1076, "step": 6858000 }, { "epoch": 4.11, "learning_rate": 2.7247586539580748e-05, "loss": 1.1086, "step": 6858500 }, { "epoch": 4.11, "learning_rate": 2.7245486574020185e-05, "loss": 1.1159, "step": 6859000 }, { "epoch": 4.11, "learning_rate": 2.7243386608459622e-05, "loss": 1.1063, "step": 6859500 }, { "epoch": 4.11, "learning_rate": 2.724129084283018e-05, "loss": 1.1143, "step": 6860000 }, { "epoch": 4.11, "learning_rate": 2.7239190877269612e-05, "loss": 1.1094, "step": 6860500 }, { "epoch": 4.11, "learning_rate": 2.7237090911709046e-05, "loss": 1.0927, "step": 6861000 }, { "epoch": 4.11, "learning_rate": 2.7234990946148482e-05, "loss": 1.1082, "step": 6861500 }, { "epoch": 4.11, "learning_rate": 2.723289518051904e-05, "loss": 1.0989, "step": 6862000 }, { "epoch": 4.11, "learning_rate": 2.7230795214958476e-05, "loss": 1.0945, "step": 6862500 }, { "epoch": 4.11, "learning_rate": 2.722869524939791e-05, "loss": 1.1073, "step": 6863000 }, { "epoch": 4.11, "learning_rate": 2.7226595283837343e-05, "loss": 1.1123, "step": 6863500 }, { "epoch": 4.12, "learning_rate": 2.722449531827678e-05, "loss": 1.103, "step": 6864000 }, { "epoch": 4.12, "learning_rate": 2.7222399552647337e-05, "loss": 1.0981, "step": 6864500 }, { "epoch": 4.12, "learning_rate": 2.7220299587086774e-05, "loss": 1.1409, "step": 6865000 }, { "epoch": 4.12, "learning_rate": 2.7218199621526204e-05, "loss": 1.1019, "step": 6865500 }, { "epoch": 4.12, "learning_rate": 2.721609965596564e-05, "loss": 1.1166, "step": 6866000 }, { "epoch": 4.12, "learning_rate": 2.7213999690405078e-05, "loss": 1.1385, "step": 6866500 }, { "epoch": 4.12, "learning_rate": 2.721189972484451e-05, "loss": 1.1397, "step": 6867000 }, { "epoch": 4.12, "learning_rate": 2.7209803959215068e-05, "loss": 1.104, "step": 6867500 }, { "epoch": 4.12, "learning_rate": 2.72077039936545e-05, "loss": 1.1103, "step": 6868000 }, { "epoch": 4.12, "learning_rate": 2.720560402809394e-05, "loss": 1.1064, "step": 6868500 }, { "epoch": 4.12, "learning_rate": 2.7203504062533375e-05, "loss": 1.1136, "step": 6869000 }, { "epoch": 4.12, "learning_rate": 2.7201408296903932e-05, "loss": 1.11, "step": 6869500 }, { "epoch": 4.12, "learning_rate": 2.7199308331343366e-05, "loss": 1.0982, "step": 6870000 }, { "epoch": 4.12, "learning_rate": 2.71972083657828e-05, "loss": 1.1084, "step": 6870500 }, { "epoch": 4.12, "learning_rate": 2.7195108400222236e-05, "loss": 1.0734, "step": 6871000 }, { "epoch": 4.12, "learning_rate": 2.7193008434661673e-05, "loss": 1.1411, "step": 6871500 }, { "epoch": 4.12, "learning_rate": 2.719091266903223e-05, "loss": 1.1074, "step": 6872000 }, { "epoch": 4.12, "learning_rate": 2.718881270347166e-05, "loss": 1.1156, "step": 6872500 }, { "epoch": 4.12, "learning_rate": 2.7186712737911097e-05, "loss": 1.1208, "step": 6873000 }, { "epoch": 4.12, "learning_rate": 2.7184612772350533e-05, "loss": 1.1012, "step": 6873500 }, { "epoch": 4.12, "learning_rate": 2.718251700672109e-05, "loss": 1.1078, "step": 6874000 }, { "epoch": 4.12, "learning_rate": 2.7180417041160527e-05, "loss": 1.1246, "step": 6874500 }, { "epoch": 4.12, "learning_rate": 2.7178317075599957e-05, "loss": 1.0907, "step": 6875000 }, { "epoch": 4.12, "learning_rate": 2.7176217110039394e-05, "loss": 1.1062, "step": 6875500 }, { "epoch": 4.12, "learning_rate": 2.717411714447883e-05, "loss": 1.1102, "step": 6876000 }, { "epoch": 4.12, "learning_rate": 2.7172017178918265e-05, "loss": 1.0909, "step": 6876500 }, { "epoch": 4.12, "learning_rate": 2.7169921413288825e-05, "loss": 1.1136, "step": 6877000 }, { "epoch": 4.12, "learning_rate": 2.7167821447728255e-05, "loss": 1.135, "step": 6877500 }, { "epoch": 4.12, "learning_rate": 2.7165721482167692e-05, "loss": 1.0908, "step": 6878000 }, { "epoch": 4.12, "learning_rate": 2.716362151660713e-05, "loss": 1.1129, "step": 6878500 }, { "epoch": 4.12, "learning_rate": 2.7161525750977686e-05, "loss": 1.116, "step": 6879000 }, { "epoch": 4.12, "learning_rate": 2.7159425785417116e-05, "loss": 1.1111, "step": 6879500 }, { "epoch": 4.12, "learning_rate": 2.7157325819856553e-05, "loss": 1.1365, "step": 6880000 }, { "epoch": 4.13, "learning_rate": 2.715522585429599e-05, "loss": 1.1188, "step": 6880500 }, { "epoch": 4.13, "learning_rate": 2.7153125888735426e-05, "loss": 1.0812, "step": 6881000 }, { "epoch": 4.13, "learning_rate": 2.715102592317486e-05, "loss": 1.1011, "step": 6881500 }, { "epoch": 4.13, "learning_rate": 2.7148925957614297e-05, "loss": 1.0899, "step": 6882000 }, { "epoch": 4.13, "learning_rate": 2.714682599205373e-05, "loss": 1.0958, "step": 6882500 }, { "epoch": 4.13, "learning_rate": 2.7144726026493163e-05, "loss": 1.1016, "step": 6883000 }, { "epoch": 4.13, "learning_rate": 2.714263026086372e-05, "loss": 1.1112, "step": 6883500 }, { "epoch": 4.13, "learning_rate": 2.7140530295303157e-05, "loss": 1.0992, "step": 6884000 }, { "epoch": 4.13, "learning_rate": 2.7138430329742594e-05, "loss": 1.1072, "step": 6884500 }, { "epoch": 4.13, "learning_rate": 2.7136330364182028e-05, "loss": 1.1256, "step": 6885000 }, { "epoch": 4.13, "learning_rate": 2.713423039862146e-05, "loss": 1.1247, "step": 6885500 }, { "epoch": 4.13, "learning_rate": 2.7132130433060898e-05, "loss": 1.1175, "step": 6886000 }, { "epoch": 4.13, "learning_rate": 2.7130034667431455e-05, "loss": 1.1009, "step": 6886500 }, { "epoch": 4.13, "learning_rate": 2.7127934701870888e-05, "loss": 1.0995, "step": 6887000 }, { "epoch": 4.13, "learning_rate": 2.7125834736310322e-05, "loss": 1.1152, "step": 6887500 }, { "epoch": 4.13, "learning_rate": 2.712373477074976e-05, "loss": 1.0998, "step": 6888000 }, { "epoch": 4.13, "learning_rate": 2.7121639005120316e-05, "loss": 1.1194, "step": 6888500 }, { "epoch": 4.13, "learning_rate": 2.7119539039559752e-05, "loss": 1.0983, "step": 6889000 }, { "epoch": 4.13, "learning_rate": 2.7117439073999186e-05, "loss": 1.1041, "step": 6889500 }, { "epoch": 4.13, "learning_rate": 2.711533910843862e-05, "loss": 1.0946, "step": 6890000 }, { "epoch": 4.13, "learning_rate": 2.7113239142878056e-05, "loss": 1.1038, "step": 6890500 }, { "epoch": 4.13, "learning_rate": 2.7111147577179737e-05, "loss": 1.101, "step": 6891000 }, { "epoch": 4.13, "learning_rate": 2.7109047611619167e-05, "loss": 1.1217, "step": 6891500 }, { "epoch": 4.13, "learning_rate": 2.7106947646058604e-05, "loss": 1.1252, "step": 6892000 }, { "epoch": 4.13, "learning_rate": 2.710484768049804e-05, "loss": 1.1203, "step": 6892500 }, { "epoch": 4.13, "learning_rate": 2.7102747714937474e-05, "loss": 1.1031, "step": 6893000 }, { "epoch": 4.13, "learning_rate": 2.710064774937691e-05, "loss": 1.0958, "step": 6893500 }, { "epoch": 4.13, "learning_rate": 2.7098547783816348e-05, "loss": 1.1095, "step": 6894000 }, { "epoch": 4.13, "learning_rate": 2.7096447818255778e-05, "loss": 1.1263, "step": 6894500 }, { "epoch": 4.13, "learning_rate": 2.7094347852695214e-05, "loss": 1.1408, "step": 6895000 }, { "epoch": 4.13, "learning_rate": 2.709224788713465e-05, "loss": 1.1096, "step": 6895500 }, { "epoch": 4.13, "learning_rate": 2.7090152121505208e-05, "loss": 1.1094, "step": 6896000 }, { "epoch": 4.13, "learning_rate": 2.7088052155944642e-05, "loss": 1.1285, "step": 6896500 }, { "epoch": 4.14, "learning_rate": 2.7085952190384075e-05, "loss": 1.1069, "step": 6897000 }, { "epoch": 4.14, "learning_rate": 2.7083852224823512e-05, "loss": 1.0934, "step": 6897500 }, { "epoch": 4.14, "learning_rate": 2.708175225926295e-05, "loss": 1.1032, "step": 6898000 }, { "epoch": 4.14, "learning_rate": 2.7079652293702382e-05, "loss": 1.1052, "step": 6898500 }, { "epoch": 4.14, "learning_rate": 2.707755652807294e-05, "loss": 1.0881, "step": 6899000 }, { "epoch": 4.14, "learning_rate": 2.7075456562512373e-05, "loss": 1.1221, "step": 6899500 }, { "epoch": 4.14, "learning_rate": 2.707335659695181e-05, "loss": 1.0958, "step": 6900000 }, { "epoch": 4.14, "eval_loss": 1.0961161851882935, "eval_runtime": 1104.919, "eval_samples_per_second": 476.705, "eval_steps_per_second": 79.451, "step": 6900000 }, { "epoch": 4.14, "learning_rate": 2.7071256631391246e-05, "loss": 1.1103, "step": 6900500 }, { "epoch": 4.14, "learning_rate": 2.706915666583068e-05, "loss": 1.1092, "step": 6901000 }, { "epoch": 4.14, "learning_rate": 2.7067056700270117e-05, "loss": 1.1185, "step": 6901500 }, { "epoch": 4.14, "learning_rate": 2.706495673470955e-05, "loss": 1.1211, "step": 6902000 }, { "epoch": 4.14, "learning_rate": 2.7062856769148984e-05, "loss": 1.1344, "step": 6902500 }, { "epoch": 4.14, "learning_rate": 2.706076100351954e-05, "loss": 1.1008, "step": 6903000 }, { "epoch": 4.14, "learning_rate": 2.7058661037958977e-05, "loss": 1.1185, "step": 6903500 }, { "epoch": 4.14, "learning_rate": 2.705656107239841e-05, "loss": 1.1351, "step": 6904000 }, { "epoch": 4.14, "learning_rate": 2.7054461106837848e-05, "loss": 1.1097, "step": 6904500 }, { "epoch": 4.14, "learning_rate": 2.705236114127728e-05, "loss": 1.1174, "step": 6905000 }, { "epoch": 4.14, "learning_rate": 2.7050265375647838e-05, "loss": 1.1179, "step": 6905500 }, { "epoch": 4.14, "learning_rate": 2.7048165410087275e-05, "loss": 1.1133, "step": 6906000 }, { "epoch": 4.14, "learning_rate": 2.704606544452671e-05, "loss": 1.0956, "step": 6906500 }, { "epoch": 4.14, "learning_rate": 2.7043965478966145e-05, "loss": 1.0875, "step": 6907000 }, { "epoch": 4.14, "learning_rate": 2.704186551340558e-05, "loss": 1.1104, "step": 6907500 }, { "epoch": 4.14, "learning_rate": 2.7039769747776136e-05, "loss": 1.0812, "step": 6908000 }, { "epoch": 4.14, "learning_rate": 2.7037669782215573e-05, "loss": 1.1348, "step": 6908500 }, { "epoch": 4.14, "learning_rate": 2.7035569816655006e-05, "loss": 1.0865, "step": 6909000 }, { "epoch": 4.14, "learning_rate": 2.703346985109444e-05, "loss": 1.1028, "step": 6909500 }, { "epoch": 4.14, "learning_rate": 2.7031374085465e-05, "loss": 1.1179, "step": 6910000 }, { "epoch": 4.14, "learning_rate": 2.7029274119904433e-05, "loss": 1.1212, "step": 6910500 }, { "epoch": 4.14, "learning_rate": 2.702717415434387e-05, "loss": 1.1247, "step": 6911000 }, { "epoch": 4.14, "learning_rate": 2.7025074188783304e-05, "loss": 1.1272, "step": 6911500 }, { "epoch": 4.14, "learning_rate": 2.7022974223222737e-05, "loss": 1.1097, "step": 6912000 }, { "epoch": 4.14, "learning_rate": 2.7020878457593294e-05, "loss": 1.1299, "step": 6912500 }, { "epoch": 4.14, "learning_rate": 2.701877849203273e-05, "loss": 1.0996, "step": 6913000 }, { "epoch": 4.14, "learning_rate": 2.7016678526472168e-05, "loss": 1.1152, "step": 6913500 }, { "epoch": 4.15, "learning_rate": 2.70145785609116e-05, "loss": 1.1205, "step": 6914000 }, { "epoch": 4.15, "learning_rate": 2.7012482795282158e-05, "loss": 1.1038, "step": 6914500 }, { "epoch": 4.15, "learning_rate": 2.701038282972159e-05, "loss": 1.1285, "step": 6915000 }, { "epoch": 4.15, "learning_rate": 2.700828286416103e-05, "loss": 1.1126, "step": 6915500 }, { "epoch": 4.15, "learning_rate": 2.7006182898600462e-05, "loss": 1.1208, "step": 6916000 }, { "epoch": 4.15, "learning_rate": 2.7004082933039895e-05, "loss": 1.1091, "step": 6916500 }, { "epoch": 4.15, "learning_rate": 2.7001982967479332e-05, "loss": 1.0976, "step": 6917000 }, { "epoch": 4.15, "learning_rate": 2.699988300191877e-05, "loss": 1.0749, "step": 6917500 }, { "epoch": 4.15, "learning_rate": 2.6997783036358203e-05, "loss": 1.1015, "step": 6918000 }, { "epoch": 4.15, "learning_rate": 2.699568727072876e-05, "loss": 1.1121, "step": 6918500 }, { "epoch": 4.15, "learning_rate": 2.6993587305168193e-05, "loss": 1.0831, "step": 6919000 }, { "epoch": 4.15, "learning_rate": 2.699148733960763e-05, "loss": 1.1387, "step": 6919500 }, { "epoch": 4.15, "learning_rate": 2.6989387374047067e-05, "loss": 1.0743, "step": 6920000 }, { "epoch": 4.15, "learning_rate": 2.69872874084865e-05, "loss": 1.1261, "step": 6920500 }, { "epoch": 4.15, "learning_rate": 2.6985191642857057e-05, "loss": 1.1023, "step": 6921000 }, { "epoch": 4.15, "learning_rate": 2.698309167729649e-05, "loss": 1.0984, "step": 6921500 }, { "epoch": 4.15, "learning_rate": 2.6980991711735927e-05, "loss": 1.1264, "step": 6922000 }, { "epoch": 4.15, "learning_rate": 2.6978891746175364e-05, "loss": 1.096, "step": 6922500 }, { "epoch": 4.15, "learning_rate": 2.6976791780614798e-05, "loss": 1.0982, "step": 6923000 }, { "epoch": 4.15, "learning_rate": 2.697469181505423e-05, "loss": 1.1053, "step": 6923500 }, { "epoch": 4.15, "learning_rate": 2.6972591849493668e-05, "loss": 1.0959, "step": 6924000 }, { "epoch": 4.15, "learning_rate": 2.6970496083864225e-05, "loss": 1.0996, "step": 6924500 }, { "epoch": 4.15, "learning_rate": 2.696839611830366e-05, "loss": 1.1039, "step": 6925000 }, { "epoch": 4.15, "learning_rate": 2.6966296152743095e-05, "loss": 1.1087, "step": 6925500 }, { "epoch": 4.15, "learning_rate": 2.696419618718253e-05, "loss": 1.1099, "step": 6926000 }, { "epoch": 4.15, "learning_rate": 2.6962100421553086e-05, "loss": 1.1337, "step": 6926500 }, { "epoch": 4.15, "learning_rate": 2.6960004655923643e-05, "loss": 1.1069, "step": 6927000 }, { "epoch": 4.15, "learning_rate": 2.695790469036308e-05, "loss": 1.1308, "step": 6927500 }, { "epoch": 4.15, "learning_rate": 2.6955804724802513e-05, "loss": 1.1099, "step": 6928000 }, { "epoch": 4.15, "learning_rate": 2.6953704759241946e-05, "loss": 1.1014, "step": 6928500 }, { "epoch": 4.15, "learning_rate": 2.6951604793681383e-05, "loss": 1.1114, "step": 6929000 }, { "epoch": 4.15, "learning_rate": 2.694950482812082e-05, "loss": 1.1025, "step": 6929500 }, { "epoch": 4.15, "learning_rate": 2.6947404862560254e-05, "loss": 1.1079, "step": 6930000 }, { "epoch": 4.16, "learning_rate": 2.694530489699969e-05, "loss": 1.1161, "step": 6930500 }, { "epoch": 4.16, "learning_rate": 2.6943204931439124e-05, "loss": 1.0865, "step": 6931000 }, { "epoch": 4.16, "learning_rate": 2.6941104965878557e-05, "loss": 1.1052, "step": 6931500 }, { "epoch": 4.16, "learning_rate": 2.6939005000317994e-05, "loss": 1.1014, "step": 6932000 }, { "epoch": 4.16, "learning_rate": 2.693690503475743e-05, "loss": 1.1237, "step": 6932500 }, { "epoch": 4.16, "learning_rate": 2.6934809269127985e-05, "loss": 1.1129, "step": 6933000 }, { "epoch": 4.16, "learning_rate": 2.693271350349854e-05, "loss": 1.1313, "step": 6933500 }, { "epoch": 4.16, "learning_rate": 2.693061353793798e-05, "loss": 1.1039, "step": 6934000 }, { "epoch": 4.16, "learning_rate": 2.6928513572377412e-05, "loss": 1.1039, "step": 6934500 }, { "epoch": 4.16, "learning_rate": 2.692641360681685e-05, "loss": 1.106, "step": 6935000 }, { "epoch": 4.16, "learning_rate": 2.6924313641256282e-05, "loss": 1.0976, "step": 6935500 }, { "epoch": 4.16, "learning_rate": 2.6922213675695716e-05, "loss": 1.0889, "step": 6936000 }, { "epoch": 4.16, "learning_rate": 2.6920113710135153e-05, "loss": 1.0729, "step": 6936500 }, { "epoch": 4.16, "learning_rate": 2.691801374457459e-05, "loss": 1.1357, "step": 6937000 }, { "epoch": 4.16, "learning_rate": 2.6915917978945146e-05, "loss": 1.1413, "step": 6937500 }, { "epoch": 4.16, "learning_rate": 2.691381801338458e-05, "loss": 1.1438, "step": 6938000 }, { "epoch": 4.16, "learning_rate": 2.6911718047824013e-05, "loss": 1.1087, "step": 6938500 }, { "epoch": 4.16, "learning_rate": 2.690961808226345e-05, "loss": 1.0966, "step": 6939000 }, { "epoch": 4.16, "learning_rate": 2.6907522316634007e-05, "loss": 1.0965, "step": 6939500 }, { "epoch": 4.16, "learning_rate": 2.6905422351073444e-05, "loss": 1.092, "step": 6940000 }, { "epoch": 4.16, "learning_rate": 2.6903322385512877e-05, "loss": 1.1321, "step": 6940500 }, { "epoch": 4.16, "learning_rate": 2.690122241995231e-05, "loss": 1.0838, "step": 6941000 }, { "epoch": 4.16, "learning_rate": 2.6899122454391748e-05, "loss": 1.114, "step": 6941500 }, { "epoch": 4.16, "learning_rate": 2.6897026688762305e-05, "loss": 1.1222, "step": 6942000 }, { "epoch": 4.16, "learning_rate": 2.6894926723201738e-05, "loss": 1.1192, "step": 6942500 }, { "epoch": 4.16, "learning_rate": 2.6892826757641175e-05, "loss": 1.091, "step": 6943000 }, { "epoch": 4.16, "learning_rate": 2.689072679208061e-05, "loss": 1.1177, "step": 6943500 }, { "epoch": 4.16, "learning_rate": 2.6888631026451165e-05, "loss": 1.1127, "step": 6944000 }, { "epoch": 4.16, "learning_rate": 2.6886531060890602e-05, "loss": 1.1064, "step": 6944500 }, { "epoch": 4.16, "learning_rate": 2.6884431095330036e-05, "loss": 1.1155, "step": 6945000 }, { "epoch": 4.16, "learning_rate": 2.688233112976947e-05, "loss": 1.0787, "step": 6945500 }, { "epoch": 4.16, "learning_rate": 2.6880231164208906e-05, "loss": 1.1253, "step": 6946000 }, { "epoch": 4.16, "learning_rate": 2.6878135398579463e-05, "loss": 1.1048, "step": 6946500 }, { "epoch": 4.17, "learning_rate": 2.68760354330189e-05, "loss": 1.1152, "step": 6947000 }, { "epoch": 4.17, "learning_rate": 2.6873935467458333e-05, "loss": 1.1106, "step": 6947500 }, { "epoch": 4.17, "learning_rate": 2.6871835501897767e-05, "loss": 1.1225, "step": 6948000 }, { "epoch": 4.17, "learning_rate": 2.6869735536337204e-05, "loss": 1.1242, "step": 6948500 }, { "epoch": 4.17, "learning_rate": 2.686763557077664e-05, "loss": 1.0918, "step": 6949000 }, { "epoch": 4.17, "learning_rate": 2.6865535605216074e-05, "loss": 1.077, "step": 6949500 }, { "epoch": 4.17, "learning_rate": 2.686343983958663e-05, "loss": 1.1159, "step": 6950000 }, { "epoch": 4.17, "learning_rate": 2.6861339874026064e-05, "loss": 1.1412, "step": 6950500 }, { "epoch": 4.17, "learning_rate": 2.68592399084655e-05, "loss": 1.1384, "step": 6951000 }, { "epoch": 4.17, "learning_rate": 2.6857139942904938e-05, "loss": 1.0889, "step": 6951500 }, { "epoch": 4.17, "learning_rate": 2.6855044177275495e-05, "loss": 1.133, "step": 6952000 }, { "epoch": 4.17, "learning_rate": 2.685294841164605e-05, "loss": 1.0989, "step": 6952500 }, { "epoch": 4.17, "learning_rate": 2.6850848446085485e-05, "loss": 1.1525, "step": 6953000 }, { "epoch": 4.17, "learning_rate": 2.684874848052492e-05, "loss": 1.0835, "step": 6953500 }, { "epoch": 4.17, "learning_rate": 2.6846648514964356e-05, "loss": 1.1168, "step": 6954000 }, { "epoch": 4.17, "learning_rate": 2.684454854940379e-05, "loss": 1.1041, "step": 6954500 }, { "epoch": 4.17, "learning_rate": 2.6842448583843223e-05, "loss": 1.1204, "step": 6955000 }, { "epoch": 4.17, "learning_rate": 2.684034861828266e-05, "loss": 1.1064, "step": 6955500 }, { "epoch": 4.17, "learning_rate": 2.6838248652722096e-05, "loss": 1.1138, "step": 6956000 }, { "epoch": 4.17, "learning_rate": 2.683614868716153e-05, "loss": 1.1074, "step": 6956500 }, { "epoch": 4.17, "learning_rate": 2.6834048721600967e-05, "loss": 1.1012, "step": 6957000 }, { "epoch": 4.17, "learning_rate": 2.683195295597152e-05, "loss": 1.1006, "step": 6957500 }, { "epoch": 4.17, "learning_rate": 2.6829852990410957e-05, "loss": 1.133, "step": 6958000 }, { "epoch": 4.17, "learning_rate": 2.6827753024850394e-05, "loss": 1.1041, "step": 6958500 }, { "epoch": 4.17, "learning_rate": 2.6825653059289827e-05, "loss": 1.1361, "step": 6959000 }, { "epoch": 4.17, "learning_rate": 2.6823553093729264e-05, "loss": 1.0986, "step": 6959500 }, { "epoch": 4.17, "learning_rate": 2.6821453128168698e-05, "loss": 1.1062, "step": 6960000 }, { "epoch": 4.17, "learning_rate": 2.6819361562470375e-05, "loss": 1.1284, "step": 6960500 }, { "epoch": 4.17, "learning_rate": 2.681726159690981e-05, "loss": 1.1295, "step": 6961000 }, { "epoch": 4.17, "learning_rate": 2.681516163134925e-05, "loss": 1.0945, "step": 6961500 }, { "epoch": 4.17, "learning_rate": 2.681306166578868e-05, "loss": 1.1237, "step": 6962000 }, { "epoch": 4.17, "learning_rate": 2.6810961700228115e-05, "loss": 1.1375, "step": 6962500 }, { "epoch": 4.17, "learning_rate": 2.6808861734667552e-05, "loss": 1.1245, "step": 6963000 }, { "epoch": 4.17, "learning_rate": 2.6806761769106986e-05, "loss": 1.1076, "step": 6963500 }, { "epoch": 4.18, "learning_rate": 2.6804661803546422e-05, "loss": 1.1001, "step": 6964000 }, { "epoch": 4.18, "learning_rate": 2.6802566037916976e-05, "loss": 1.0939, "step": 6964500 }, { "epoch": 4.18, "learning_rate": 2.6800466072356413e-05, "loss": 1.1451, "step": 6965000 }, { "epoch": 4.18, "learning_rate": 2.679837030672697e-05, "loss": 1.1219, "step": 6965500 }, { "epoch": 4.18, "learning_rate": 2.6796270341166407e-05, "loss": 1.0967, "step": 6966000 }, { "epoch": 4.18, "learning_rate": 2.6794170375605837e-05, "loss": 1.116, "step": 6966500 }, { "epoch": 4.18, "learning_rate": 2.6792070410045274e-05, "loss": 1.1049, "step": 6967000 }, { "epoch": 4.18, "learning_rate": 2.678997044448471e-05, "loss": 1.1038, "step": 6967500 }, { "epoch": 4.18, "learning_rate": 2.6787870478924144e-05, "loss": 1.1135, "step": 6968000 }, { "epoch": 4.18, "learning_rate": 2.678577051336358e-05, "loss": 1.0843, "step": 6968500 }, { "epoch": 4.18, "learning_rate": 2.6783670547803018e-05, "loss": 1.1373, "step": 6969000 }, { "epoch": 4.18, "learning_rate": 2.678157478217357e-05, "loss": 1.1125, "step": 6969500 }, { "epoch": 4.18, "learning_rate": 2.6779474816613008e-05, "loss": 1.1101, "step": 6970000 }, { "epoch": 4.18, "learning_rate": 2.677737485105244e-05, "loss": 1.0993, "step": 6970500 }, { "epoch": 4.18, "learning_rate": 2.677527488549188e-05, "loss": 1.077, "step": 6971000 }, { "epoch": 4.18, "learning_rate": 2.6773174919931312e-05, "loss": 1.1182, "step": 6971500 }, { "epoch": 4.18, "learning_rate": 2.677107495437075e-05, "loss": 1.0932, "step": 6972000 }, { "epoch": 4.18, "learning_rate": 2.6768979188741306e-05, "loss": 1.1211, "step": 6972500 }, { "epoch": 4.18, "learning_rate": 2.676687922318074e-05, "loss": 1.1, "step": 6973000 }, { "epoch": 4.18, "learning_rate": 2.6764783457551293e-05, "loss": 1.1017, "step": 6973500 }, { "epoch": 4.18, "learning_rate": 2.676268349199073e-05, "loss": 1.1147, "step": 6974000 }, { "epoch": 4.18, "learning_rate": 2.6760583526430166e-05, "loss": 1.1049, "step": 6974500 }, { "epoch": 4.18, "learning_rate": 2.6758483560869603e-05, "loss": 1.1317, "step": 6975000 }, { "epoch": 4.18, "learning_rate": 2.6756383595309037e-05, "loss": 1.1029, "step": 6975500 }, { "epoch": 4.18, "learning_rate": 2.6754283629748473e-05, "loss": 1.1064, "step": 6976000 }, { "epoch": 4.18, "learning_rate": 2.6752183664187907e-05, "loss": 1.0892, "step": 6976500 }, { "epoch": 4.18, "learning_rate": 2.675008369862734e-05, "loss": 1.1043, "step": 6977000 }, { "epoch": 4.18, "learning_rate": 2.6747987932997897e-05, "loss": 1.1199, "step": 6977500 }, { "epoch": 4.18, "learning_rate": 2.6745887967437334e-05, "loss": 1.1132, "step": 6978000 }, { "epoch": 4.18, "learning_rate": 2.674378800187677e-05, "loss": 1.0926, "step": 6978500 }, { "epoch": 4.18, "learning_rate": 2.6741688036316205e-05, "loss": 1.1303, "step": 6979000 }, { "epoch": 4.18, "learning_rate": 2.6739588070755638e-05, "loss": 1.1084, "step": 6979500 }, { "epoch": 4.18, "learning_rate": 2.6737488105195075e-05, "loss": 1.1373, "step": 6980000 }, { "epoch": 4.19, "learning_rate": 2.673538813963451e-05, "loss": 1.1211, "step": 6980500 }, { "epoch": 4.19, "learning_rate": 2.6733288174073945e-05, "loss": 1.1337, "step": 6981000 }, { "epoch": 4.19, "learning_rate": 2.67311924084445e-05, "loss": 1.1146, "step": 6981500 }, { "epoch": 4.19, "learning_rate": 2.6729092442883936e-05, "loss": 1.1014, "step": 6982000 }, { "epoch": 4.19, "learning_rate": 2.6726992477323372e-05, "loss": 1.1166, "step": 6982500 }, { "epoch": 4.19, "learning_rate": 2.6724892511762806e-05, "loss": 1.121, "step": 6983000 }, { "epoch": 4.19, "learning_rate": 2.6722796746133363e-05, "loss": 1.1255, "step": 6983500 }, { "epoch": 4.19, "learning_rate": 2.6720696780572796e-05, "loss": 1.0823, "step": 6984000 }, { "epoch": 4.19, "learning_rate": 2.6718596815012233e-05, "loss": 1.1042, "step": 6984500 }, { "epoch": 4.19, "learning_rate": 2.671649684945167e-05, "loss": 1.1647, "step": 6985000 }, { "epoch": 4.19, "learning_rate": 2.6714401083822227e-05, "loss": 1.1135, "step": 6985500 }, { "epoch": 4.19, "learning_rate": 2.671230111826166e-05, "loss": 1.1113, "step": 6986000 }, { "epoch": 4.19, "learning_rate": 2.6710201152701094e-05, "loss": 1.1195, "step": 6986500 }, { "epoch": 4.19, "learning_rate": 2.670810118714053e-05, "loss": 1.1024, "step": 6987000 }, { "epoch": 4.19, "learning_rate": 2.6706001221579968e-05, "loss": 1.0875, "step": 6987500 }, { "epoch": 4.19, "learning_rate": 2.6703905455950525e-05, "loss": 1.114, "step": 6988000 }, { "epoch": 4.19, "learning_rate": 2.6701809690321078e-05, "loss": 1.1141, "step": 6988500 }, { "epoch": 4.19, "learning_rate": 2.6699709724760515e-05, "loss": 1.1075, "step": 6989000 }, { "epoch": 4.19, "learning_rate": 2.669760975919995e-05, "loss": 1.1169, "step": 6989500 }, { "epoch": 4.19, "learning_rate": 2.6695509793639385e-05, "loss": 1.1158, "step": 6990000 }, { "epoch": 4.19, "learning_rate": 2.6693409828078822e-05, "loss": 1.1042, "step": 6990500 }, { "epoch": 4.19, "learning_rate": 2.6691309862518252e-05, "loss": 1.1042, "step": 6991000 }, { "epoch": 4.19, "learning_rate": 2.668920989695769e-05, "loss": 1.1256, "step": 6991500 }, { "epoch": 4.19, "learning_rate": 2.6687109931397126e-05, "loss": 1.0965, "step": 6992000 }, { "epoch": 4.19, "learning_rate": 2.6685014165767683e-05, "loss": 1.1359, "step": 6992500 }, { "epoch": 4.19, "learning_rate": 2.6682914200207116e-05, "loss": 1.1192, "step": 6993000 }, { "epoch": 4.19, "learning_rate": 2.668081423464655e-05, "loss": 1.0981, "step": 6993500 }, { "epoch": 4.19, "learning_rate": 2.6678714269085987e-05, "loss": 1.1194, "step": 6994000 }, { "epoch": 4.19, "learning_rate": 2.6676618503456544e-05, "loss": 1.0847, "step": 6994500 }, { "epoch": 4.19, "learning_rate": 2.667451853789598e-05, "loss": 1.0929, "step": 6995000 }, { "epoch": 4.19, "learning_rate": 2.667241857233541e-05, "loss": 1.13, "step": 6995500 }, { "epoch": 4.19, "learning_rate": 2.667032280670597e-05, "loss": 1.104, "step": 6996000 }, { "epoch": 4.19, "learning_rate": 2.6668222841145404e-05, "loss": 1.1254, "step": 6996500 }, { "epoch": 4.19, "learning_rate": 2.666612287558484e-05, "loss": 1.0985, "step": 6997000 }, { "epoch": 4.2, "learning_rate": 2.6664022910024278e-05, "loss": 1.1023, "step": 6997500 }, { "epoch": 4.2, "learning_rate": 2.6661922944463708e-05, "loss": 1.0989, "step": 6998000 }, { "epoch": 4.2, "learning_rate": 2.6659822978903145e-05, "loss": 1.1212, "step": 6998500 }, { "epoch": 4.2, "learning_rate": 2.6657723013342582e-05, "loss": 1.1182, "step": 6999000 }, { "epoch": 4.2, "learning_rate": 2.6655623047782015e-05, "loss": 1.1241, "step": 6999500 }, { "epoch": 4.2, "learning_rate": 2.6653523082221452e-05, "loss": 1.13, "step": 7000000 }, { "epoch": 4.2, "eval_loss": 1.0938860177993774, "eval_runtime": 1101.4526, "eval_samples_per_second": 478.205, "eval_steps_per_second": 79.701, "step": 7000000 }, { "epoch": 4.2, "learning_rate": 2.6651427316592006e-05, "loss": 1.101, "step": 7000500 }, { "epoch": 4.2, "learning_rate": 2.6649327351031442e-05, "loss": 1.1237, "step": 7001000 }, { "epoch": 4.2, "learning_rate": 2.664722738547088e-05, "loss": 1.1115, "step": 7001500 }, { "epoch": 4.2, "learning_rate": 2.6645127419910313e-05, "loss": 1.0917, "step": 7002000 }, { "epoch": 4.2, "learning_rate": 2.664302745434975e-05, "loss": 1.086, "step": 7002500 }, { "epoch": 4.2, "learning_rate": 2.6640927488789183e-05, "loss": 1.1106, "step": 7003000 }, { "epoch": 4.2, "learning_rate": 2.6638827523228617e-05, "loss": 1.0988, "step": 7003500 }, { "epoch": 4.2, "learning_rate": 2.6636731757599173e-05, "loss": 1.0975, "step": 7004000 }, { "epoch": 4.2, "learning_rate": 2.663463179203861e-05, "loss": 1.1179, "step": 7004500 }, { "epoch": 4.2, "learning_rate": 2.6632531826478047e-05, "loss": 1.1007, "step": 7005000 }, { "epoch": 4.2, "learning_rate": 2.663043186091748e-05, "loss": 1.0985, "step": 7005500 }, { "epoch": 4.2, "learning_rate": 2.6628331895356914e-05, "loss": 1.1329, "step": 7006000 }, { "epoch": 4.2, "learning_rate": 2.662623192979635e-05, "loss": 1.157, "step": 7006500 }, { "epoch": 4.2, "learning_rate": 2.6624136164166908e-05, "loss": 1.1197, "step": 7007000 }, { "epoch": 4.2, "learning_rate": 2.6622036198606345e-05, "loss": 1.1024, "step": 7007500 }, { "epoch": 4.2, "learning_rate": 2.6619936233045778e-05, "loss": 1.1162, "step": 7008000 }, { "epoch": 4.2, "learning_rate": 2.661783626748521e-05, "loss": 1.0849, "step": 7008500 }, { "epoch": 4.2, "learning_rate": 2.661574050185577e-05, "loss": 1.0909, "step": 7009000 }, { "epoch": 4.2, "learning_rate": 2.6613640536295205e-05, "loss": 1.1323, "step": 7009500 }, { "epoch": 4.2, "learning_rate": 2.661154057073464e-05, "loss": 1.1141, "step": 7010000 }, { "epoch": 4.2, "learning_rate": 2.6609440605174072e-05, "loss": 1.1436, "step": 7010500 }, { "epoch": 4.2, "learning_rate": 2.660734063961351e-05, "loss": 1.1168, "step": 7011000 }, { "epoch": 4.2, "learning_rate": 2.6605244873984066e-05, "loss": 1.0884, "step": 7011500 }, { "epoch": 4.2, "learning_rate": 2.6603144908423503e-05, "loss": 1.1199, "step": 7012000 }, { "epoch": 4.2, "learning_rate": 2.6601044942862937e-05, "loss": 1.1099, "step": 7012500 }, { "epoch": 4.2, "learning_rate": 2.659894497730237e-05, "loss": 1.1009, "step": 7013000 }, { "epoch": 4.2, "learning_rate": 2.6596845011741807e-05, "loss": 1.1321, "step": 7013500 }, { "epoch": 4.21, "learning_rate": 2.6594749246112364e-05, "loss": 1.1133, "step": 7014000 }, { "epoch": 4.21, "learning_rate": 2.65926492805518e-05, "loss": 1.1185, "step": 7014500 }, { "epoch": 4.21, "learning_rate": 2.6590549314991234e-05, "loss": 1.1212, "step": 7015000 }, { "epoch": 4.21, "learning_rate": 2.6588449349430668e-05, "loss": 1.1139, "step": 7015500 }, { "epoch": 4.21, "learning_rate": 2.6586349383870104e-05, "loss": 1.0749, "step": 7016000 }, { "epoch": 4.21, "learning_rate": 2.658425361824066e-05, "loss": 1.112, "step": 7016500 }, { "epoch": 4.21, "learning_rate": 2.6582153652680098e-05, "loss": 1.1256, "step": 7017000 }, { "epoch": 4.21, "learning_rate": 2.6580053687119528e-05, "loss": 1.1163, "step": 7017500 }, { "epoch": 4.21, "learning_rate": 2.6577953721558965e-05, "loss": 1.1055, "step": 7018000 }, { "epoch": 4.21, "learning_rate": 2.6575862155860646e-05, "loss": 1.1125, "step": 7018500 }, { "epoch": 4.21, "learning_rate": 2.657376219030008e-05, "loss": 1.1142, "step": 7019000 }, { "epoch": 4.21, "learning_rate": 2.6571662224739513e-05, "loss": 1.1012, "step": 7019500 }, { "epoch": 4.21, "learning_rate": 2.656956225917895e-05, "loss": 1.1292, "step": 7020000 }, { "epoch": 4.21, "learning_rate": 2.6567462293618383e-05, "loss": 1.1048, "step": 7020500 }, { "epoch": 4.21, "learning_rate": 2.656536232805782e-05, "loss": 1.1029, "step": 7021000 }, { "epoch": 4.21, "learning_rate": 2.6563262362497257e-05, "loss": 1.0997, "step": 7021500 }, { "epoch": 4.21, "learning_rate": 2.656116239693669e-05, "loss": 1.1123, "step": 7022000 }, { "epoch": 4.21, "learning_rate": 2.6559062431376123e-05, "loss": 1.1295, "step": 7022500 }, { "epoch": 4.21, "learning_rate": 2.655696246581556e-05, "loss": 1.093, "step": 7023000 }, { "epoch": 4.21, "learning_rate": 2.6554862500254997e-05, "loss": 1.1142, "step": 7023500 }, { "epoch": 4.21, "learning_rate": 2.655276253469443e-05, "loss": 1.1111, "step": 7024000 }, { "epoch": 4.21, "learning_rate": 2.6550666769064984e-05, "loss": 1.0865, "step": 7024500 }, { "epoch": 4.21, "learning_rate": 2.6548571003435545e-05, "loss": 1.1096, "step": 7025000 }, { "epoch": 4.21, "learning_rate": 2.6546471037874978e-05, "loss": 1.1359, "step": 7025500 }, { "epoch": 4.21, "learning_rate": 2.6544371072314415e-05, "loss": 1.1082, "step": 7026000 }, { "epoch": 4.21, "learning_rate": 2.654227110675385e-05, "loss": 1.117, "step": 7026500 }, { "epoch": 4.21, "learning_rate": 2.6540171141193282e-05, "loss": 1.0906, "step": 7027000 }, { "epoch": 4.21, "learning_rate": 2.653807117563272e-05, "loss": 1.0989, "step": 7027500 }, { "epoch": 4.21, "learning_rate": 2.6535971210072155e-05, "loss": 1.1188, "step": 7028000 }, { "epoch": 4.21, "learning_rate": 2.6533875444442712e-05, "loss": 1.1074, "step": 7028500 }, { "epoch": 4.21, "learning_rate": 2.6531775478882146e-05, "loss": 1.1005, "step": 7029000 }, { "epoch": 4.21, "learning_rate": 2.652967551332158e-05, "loss": 1.1237, "step": 7029500 }, { "epoch": 4.21, "learning_rate": 2.6527575547761016e-05, "loss": 1.1104, "step": 7030000 }, { "epoch": 4.22, "learning_rate": 2.6525475582200453e-05, "loss": 1.1383, "step": 7030500 }, { "epoch": 4.22, "learning_rate": 2.6523375616639886e-05, "loss": 1.0952, "step": 7031000 }, { "epoch": 4.22, "learning_rate": 2.652127985101044e-05, "loss": 1.0915, "step": 7031500 }, { "epoch": 4.22, "learning_rate": 2.6519179885449877e-05, "loss": 1.1039, "step": 7032000 }, { "epoch": 4.22, "learning_rate": 2.6517079919889314e-05, "loss": 1.0953, "step": 7032500 }, { "epoch": 4.22, "learning_rate": 2.6514979954328747e-05, "loss": 1.0999, "step": 7033000 }, { "epoch": 4.22, "learning_rate": 2.6512879988768184e-05, "loss": 1.1367, "step": 7033500 }, { "epoch": 4.22, "learning_rate": 2.6510784223138738e-05, "loss": 1.137, "step": 7034000 }, { "epoch": 4.22, "learning_rate": 2.6508684257578174e-05, "loss": 1.0915, "step": 7034500 }, { "epoch": 4.22, "learning_rate": 2.650658429201761e-05, "loss": 1.1547, "step": 7035000 }, { "epoch": 4.22, "learning_rate": 2.6504484326457045e-05, "loss": 1.0978, "step": 7035500 }, { "epoch": 4.22, "learning_rate": 2.650238436089648e-05, "loss": 1.1061, "step": 7036000 }, { "epoch": 4.22, "learning_rate": 2.6500288595267035e-05, "loss": 1.106, "step": 7036500 }, { "epoch": 4.22, "learning_rate": 2.6498188629706472e-05, "loss": 1.1006, "step": 7037000 }, { "epoch": 4.22, "learning_rate": 2.649608866414591e-05, "loss": 1.1028, "step": 7037500 }, { "epoch": 4.22, "learning_rate": 2.6493988698585342e-05, "loss": 1.1134, "step": 7038000 }, { "epoch": 4.22, "learning_rate": 2.649188873302478e-05, "loss": 1.1447, "step": 7038500 }, { "epoch": 4.22, "learning_rate": 2.6489792967395333e-05, "loss": 1.1097, "step": 7039000 }, { "epoch": 4.22, "learning_rate": 2.648769300183477e-05, "loss": 1.1143, "step": 7039500 }, { "epoch": 4.22, "learning_rate": 2.6485593036274203e-05, "loss": 1.1055, "step": 7040000 }, { "epoch": 4.22, "learning_rate": 2.648349307071364e-05, "loss": 1.1337, "step": 7040500 }, { "epoch": 4.22, "learning_rate": 2.6481393105153077e-05, "loss": 1.1181, "step": 7041000 }, { "epoch": 4.22, "learning_rate": 2.647929313959251e-05, "loss": 1.1208, "step": 7041500 }, { "epoch": 4.22, "learning_rate": 2.6477197373963067e-05, "loss": 1.1011, "step": 7042000 }, { "epoch": 4.22, "learning_rate": 2.64750974084025e-05, "loss": 1.1149, "step": 7042500 }, { "epoch": 4.22, "learning_rate": 2.6472997442841938e-05, "loss": 1.1013, "step": 7043000 }, { "epoch": 4.22, "learning_rate": 2.6470897477281374e-05, "loss": 1.1273, "step": 7043500 }, { "epoch": 4.22, "learning_rate": 2.6468797511720808e-05, "loss": 1.0871, "step": 7044000 }, { "epoch": 4.22, "learning_rate": 2.6466701746091365e-05, "loss": 1.1071, "step": 7044500 }, { "epoch": 4.22, "learning_rate": 2.6464601780530798e-05, "loss": 1.1004, "step": 7045000 }, { "epoch": 4.22, "learning_rate": 2.6462501814970235e-05, "loss": 1.1006, "step": 7045500 }, { "epoch": 4.22, "learning_rate": 2.6460401849409672e-05, "loss": 1.1462, "step": 7046000 }, { "epoch": 4.22, "learning_rate": 2.6458301883849102e-05, "loss": 1.1046, "step": 7046500 }, { "epoch": 4.22, "learning_rate": 2.645620191828854e-05, "loss": 1.088, "step": 7047000 }, { "epoch": 4.23, "learning_rate": 2.6454101952727976e-05, "loss": 1.1157, "step": 7047500 }, { "epoch": 4.23, "learning_rate": 2.645200198716741e-05, "loss": 1.1236, "step": 7048000 }, { "epoch": 4.23, "learning_rate": 2.644990622153797e-05, "loss": 1.1064, "step": 7048500 }, { "epoch": 4.23, "learning_rate": 2.6447810455908523e-05, "loss": 1.1195, "step": 7049000 }, { "epoch": 4.23, "learning_rate": 2.6445710490347957e-05, "loss": 1.1082, "step": 7049500 }, { "epoch": 4.23, "learning_rate": 2.6443610524787393e-05, "loss": 1.108, "step": 7050000 }, { "epoch": 4.23, "learning_rate": 2.644151055922683e-05, "loss": 1.1198, "step": 7050500 }, { "epoch": 4.23, "learning_rate": 2.6439410593666264e-05, "loss": 1.1003, "step": 7051000 }, { "epoch": 4.23, "learning_rate": 2.6437310628105697e-05, "loss": 1.1257, "step": 7051500 }, { "epoch": 4.23, "learning_rate": 2.6435210662545134e-05, "loss": 1.1128, "step": 7052000 }, { "epoch": 4.23, "learning_rate": 2.643311489691569e-05, "loss": 1.1, "step": 7052500 }, { "epoch": 4.23, "learning_rate": 2.6431014931355128e-05, "loss": 1.1066, "step": 7053000 }, { "epoch": 4.23, "learning_rate": 2.6428914965794558e-05, "loss": 1.1095, "step": 7053500 }, { "epoch": 4.23, "learning_rate": 2.6426815000233995e-05, "loss": 1.1109, "step": 7054000 }, { "epoch": 4.23, "learning_rate": 2.642471503467343e-05, "loss": 1.1325, "step": 7054500 }, { "epoch": 4.23, "learning_rate": 2.642261926904399e-05, "loss": 1.1038, "step": 7055000 }, { "epoch": 4.23, "learning_rate": 2.6420519303483425e-05, "loss": 1.1486, "step": 7055500 }, { "epoch": 4.23, "learning_rate": 2.6418419337922855e-05, "loss": 1.1164, "step": 7056000 }, { "epoch": 4.23, "learning_rate": 2.6416319372362292e-05, "loss": 1.117, "step": 7056500 }, { "epoch": 4.23, "learning_rate": 2.641421940680173e-05, "loss": 1.107, "step": 7057000 }, { "epoch": 4.23, "learning_rate": 2.6412119441241163e-05, "loss": 1.1392, "step": 7057500 }, { "epoch": 4.23, "learning_rate": 2.64100194756806e-05, "loss": 1.13, "step": 7058000 }, { "epoch": 4.23, "learning_rate": 2.6407919510120033e-05, "loss": 1.0785, "step": 7058500 }, { "epoch": 4.23, "learning_rate": 2.640582374449059e-05, "loss": 1.0909, "step": 7059000 }, { "epoch": 4.23, "learning_rate": 2.6403727978861147e-05, "loss": 1.1188, "step": 7059500 }, { "epoch": 4.23, "learning_rate": 2.6401628013300584e-05, "loss": 1.1243, "step": 7060000 }, { "epoch": 4.23, "learning_rate": 2.6399528047740014e-05, "loss": 1.1184, "step": 7060500 }, { "epoch": 4.23, "learning_rate": 2.639742808217945e-05, "loss": 1.1242, "step": 7061000 }, { "epoch": 4.23, "learning_rate": 2.6395328116618887e-05, "loss": 1.0821, "step": 7061500 }, { "epoch": 4.23, "learning_rate": 2.639322815105832e-05, "loss": 1.1058, "step": 7062000 }, { "epoch": 4.23, "learning_rate": 2.6391128185497758e-05, "loss": 1.1045, "step": 7062500 }, { "epoch": 4.23, "learning_rate": 2.6389028219937195e-05, "loss": 1.1053, "step": 7063000 }, { "epoch": 4.23, "learning_rate": 2.6386932454307748e-05, "loss": 1.1296, "step": 7063500 }, { "epoch": 4.24, "learning_rate": 2.6384832488747185e-05, "loss": 1.1119, "step": 7064000 }, { "epoch": 4.24, "learning_rate": 2.6382736723117742e-05, "loss": 1.1264, "step": 7064500 }, { "epoch": 4.24, "learning_rate": 2.6380636757557175e-05, "loss": 1.1321, "step": 7065000 }, { "epoch": 4.24, "learning_rate": 2.637853679199661e-05, "loss": 1.1282, "step": 7065500 }, { "epoch": 4.24, "learning_rate": 2.6376436826436046e-05, "loss": 1.095, "step": 7066000 }, { "epoch": 4.24, "learning_rate": 2.6374336860875483e-05, "loss": 1.1067, "step": 7066500 }, { "epoch": 4.24, "learning_rate": 2.6372236895314916e-05, "loss": 1.126, "step": 7067000 }, { "epoch": 4.24, "learning_rate": 2.6370136929754353e-05, "loss": 1.1246, "step": 7067500 }, { "epoch": 4.24, "learning_rate": 2.6368036964193786e-05, "loss": 1.0811, "step": 7068000 }, { "epoch": 4.24, "learning_rate": 2.636593699863322e-05, "loss": 1.129, "step": 7068500 }, { "epoch": 4.24, "learning_rate": 2.6363837033072657e-05, "loss": 1.1047, "step": 7069000 }, { "epoch": 4.24, "learning_rate": 2.6361737067512094e-05, "loss": 1.1224, "step": 7069500 }, { "epoch": 4.24, "learning_rate": 2.635964130188265e-05, "loss": 1.1175, "step": 7070000 }, { "epoch": 4.24, "learning_rate": 2.6357541336322084e-05, "loss": 1.0896, "step": 7070500 }, { "epoch": 4.24, "learning_rate": 2.6355441370761517e-05, "loss": 1.1021, "step": 7071000 }, { "epoch": 4.24, "learning_rate": 2.6353341405200954e-05, "loss": 1.111, "step": 7071500 }, { "epoch": 4.24, "learning_rate": 2.635124143964039e-05, "loss": 1.1155, "step": 7072000 }, { "epoch": 4.24, "learning_rate": 2.6349145674010948e-05, "loss": 1.1367, "step": 7072500 }, { "epoch": 4.24, "learning_rate": 2.6347045708450378e-05, "loss": 1.109, "step": 7073000 }, { "epoch": 4.24, "learning_rate": 2.6344945742889815e-05, "loss": 1.1194, "step": 7073500 }, { "epoch": 4.24, "learning_rate": 2.6342845777329252e-05, "loss": 1.1214, "step": 7074000 }, { "epoch": 4.24, "learning_rate": 2.634074581176869e-05, "loss": 1.1346, "step": 7074500 }, { "epoch": 4.24, "learning_rate": 2.6338645846208122e-05, "loss": 1.1052, "step": 7075000 }, { "epoch": 4.24, "learning_rate": 2.6336550080578676e-05, "loss": 1.1135, "step": 7075500 }, { "epoch": 4.24, "learning_rate": 2.6334450115018113e-05, "loss": 1.1425, "step": 7076000 }, { "epoch": 4.24, "learning_rate": 2.633235014945755e-05, "loss": 1.1029, "step": 7076500 }, { "epoch": 4.24, "learning_rate": 2.6330250183896983e-05, "loss": 1.0937, "step": 7077000 }, { "epoch": 4.24, "learning_rate": 2.632815441826754e-05, "loss": 1.1109, "step": 7077500 }, { "epoch": 4.24, "learning_rate": 2.6326054452706973e-05, "loss": 1.1003, "step": 7078000 }, { "epoch": 4.24, "learning_rate": 2.632395448714641e-05, "loss": 1.1171, "step": 7078500 }, { "epoch": 4.24, "learning_rate": 2.6321854521585847e-05, "loss": 1.1365, "step": 7079000 }, { "epoch": 4.24, "learning_rate": 2.631975455602528e-05, "loss": 1.1311, "step": 7079500 }, { "epoch": 4.24, "learning_rate": 2.6317658790395837e-05, "loss": 1.1228, "step": 7080000 }, { "epoch": 4.25, "learning_rate": 2.631555882483527e-05, "loss": 1.1068, "step": 7080500 }, { "epoch": 4.25, "learning_rate": 2.6313458859274708e-05, "loss": 1.1118, "step": 7081000 }, { "epoch": 4.25, "learning_rate": 2.6311358893714145e-05, "loss": 1.1303, "step": 7081500 }, { "epoch": 4.25, "learning_rate": 2.6309258928153578e-05, "loss": 1.1148, "step": 7082000 }, { "epoch": 4.25, "learning_rate": 2.630716316252413e-05, "loss": 1.1257, "step": 7082500 }, { "epoch": 4.25, "learning_rate": 2.630506319696357e-05, "loss": 1.0981, "step": 7083000 }, { "epoch": 4.25, "learning_rate": 2.6302963231403005e-05, "loss": 1.1329, "step": 7083500 }, { "epoch": 4.25, "learning_rate": 2.630086326584244e-05, "loss": 1.0933, "step": 7084000 }, { "epoch": 4.25, "learning_rate": 2.6298767500213e-05, "loss": 1.104, "step": 7084500 }, { "epoch": 4.25, "learning_rate": 2.629666753465243e-05, "loss": 1.1413, "step": 7085000 }, { "epoch": 4.25, "learning_rate": 2.6294567569091866e-05, "loss": 1.1227, "step": 7085500 }, { "epoch": 4.25, "learning_rate": 2.6292467603531303e-05, "loss": 1.0863, "step": 7086000 }, { "epoch": 4.25, "learning_rate": 2.6290367637970736e-05, "loss": 1.1186, "step": 7086500 }, { "epoch": 4.25, "learning_rate": 2.6288271872341293e-05, "loss": 1.1185, "step": 7087000 }, { "epoch": 4.25, "learning_rate": 2.6286171906780727e-05, "loss": 1.1399, "step": 7087500 }, { "epoch": 4.25, "learning_rate": 2.6284071941220164e-05, "loss": 1.0998, "step": 7088000 }, { "epoch": 4.25, "learning_rate": 2.628197617559072e-05, "loss": 1.1086, "step": 7088500 }, { "epoch": 4.25, "learning_rate": 2.6279876210030157e-05, "loss": 1.1031, "step": 7089000 }, { "epoch": 4.25, "learning_rate": 2.6277776244469587e-05, "loss": 1.1038, "step": 7089500 }, { "epoch": 4.25, "learning_rate": 2.6275676278909024e-05, "loss": 1.1304, "step": 7090000 }, { "epoch": 4.25, "learning_rate": 2.627358051327958e-05, "loss": 1.0953, "step": 7090500 }, { "epoch": 4.25, "learning_rate": 2.6271480547719018e-05, "loss": 1.1455, "step": 7091000 }, { "epoch": 4.25, "learning_rate": 2.6269380582158455e-05, "loss": 1.091, "step": 7091500 }, { "epoch": 4.25, "learning_rate": 2.6267280616597885e-05, "loss": 1.0988, "step": 7092000 }, { "epoch": 4.25, "learning_rate": 2.6265180651037322e-05, "loss": 1.0949, "step": 7092500 }, { "epoch": 4.25, "learning_rate": 2.626308068547676e-05, "loss": 1.1101, "step": 7093000 }, { "epoch": 4.25, "learning_rate": 2.6260980719916192e-05, "loss": 1.1452, "step": 7093500 }, { "epoch": 4.25, "learning_rate": 2.625888075435563e-05, "loss": 1.0996, "step": 7094000 }, { "epoch": 4.25, "learning_rate": 2.6256780788795066e-05, "loss": 1.1038, "step": 7094500 }, { "epoch": 4.25, "learning_rate": 2.6254680823234496e-05, "loss": 1.1048, "step": 7095000 }, { "epoch": 4.25, "learning_rate": 2.6252580857673933e-05, "loss": 1.1012, "step": 7095500 }, { "epoch": 4.25, "learning_rate": 2.625048509204449e-05, "loss": 1.0886, "step": 7096000 }, { "epoch": 4.25, "learning_rate": 2.6248385126483927e-05, "loss": 1.1351, "step": 7096500 }, { "epoch": 4.25, "learning_rate": 2.624628516092336e-05, "loss": 1.142, "step": 7097000 }, { "epoch": 4.26, "learning_rate": 2.6244185195362794e-05, "loss": 1.1158, "step": 7097500 }, { "epoch": 4.26, "learning_rate": 2.624208522980223e-05, "loss": 1.134, "step": 7098000 }, { "epoch": 4.26, "learning_rate": 2.6239989464172787e-05, "loss": 1.1476, "step": 7098500 }, { "epoch": 4.26, "learning_rate": 2.6237889498612224e-05, "loss": 1.1154, "step": 7099000 }, { "epoch": 4.26, "learning_rate": 2.6235789533051658e-05, "loss": 1.1034, "step": 7099500 }, { "epoch": 4.26, "learning_rate": 2.623368956749109e-05, "loss": 1.0942, "step": 7100000 }, { "epoch": 4.26, "eval_loss": 1.094136357307434, "eval_runtime": 1103.2512, "eval_samples_per_second": 477.425, "eval_steps_per_second": 79.571, "step": 7100000 }, { "epoch": 4.26, "learning_rate": 2.6231589601930528e-05, "loss": 1.1401, "step": 7100500 }, { "epoch": 4.26, "learning_rate": 2.6229493836301085e-05, "loss": 1.1107, "step": 7101000 }, { "epoch": 4.26, "learning_rate": 2.6227393870740522e-05, "loss": 1.1045, "step": 7101500 }, { "epoch": 4.26, "learning_rate": 2.6225293905179952e-05, "loss": 1.1181, "step": 7102000 }, { "epoch": 4.26, "learning_rate": 2.6223198139550512e-05, "loss": 1.1187, "step": 7102500 }, { "epoch": 4.26, "learning_rate": 2.6221098173989946e-05, "loss": 1.129, "step": 7103000 }, { "epoch": 4.26, "learning_rate": 2.6218998208429382e-05, "loss": 1.1183, "step": 7103500 }, { "epoch": 4.26, "learning_rate": 2.621689824286882e-05, "loss": 1.1201, "step": 7104000 }, { "epoch": 4.26, "learning_rate": 2.621479827730825e-05, "loss": 1.1255, "step": 7104500 }, { "epoch": 4.26, "learning_rate": 2.6212698311747686e-05, "loss": 1.1056, "step": 7105000 }, { "epoch": 4.26, "learning_rate": 2.6210598346187123e-05, "loss": 1.1132, "step": 7105500 }, { "epoch": 4.26, "learning_rate": 2.6208498380626557e-05, "loss": 1.1331, "step": 7106000 }, { "epoch": 4.26, "learning_rate": 2.6206398415065993e-05, "loss": 1.133, "step": 7106500 }, { "epoch": 4.26, "learning_rate": 2.6204302649436547e-05, "loss": 1.1054, "step": 7107000 }, { "epoch": 4.26, "learning_rate": 2.6202202683875984e-05, "loss": 1.1208, "step": 7107500 }, { "epoch": 4.26, "learning_rate": 2.620010271831542e-05, "loss": 1.1063, "step": 7108000 }, { "epoch": 4.26, "learning_rate": 2.6198002752754854e-05, "loss": 1.0986, "step": 7108500 }, { "epoch": 4.26, "learning_rate": 2.619591118705653e-05, "loss": 1.1417, "step": 7109000 }, { "epoch": 4.26, "learning_rate": 2.6193811221495968e-05, "loss": 1.0861, "step": 7109500 }, { "epoch": 4.26, "learning_rate": 2.61917112559354e-05, "loss": 1.101, "step": 7110000 }, { "epoch": 4.26, "learning_rate": 2.618961129037484e-05, "loss": 1.1249, "step": 7110500 }, { "epoch": 4.26, "learning_rate": 2.6187511324814275e-05, "loss": 1.0863, "step": 7111000 }, { "epoch": 4.26, "learning_rate": 2.6185411359253705e-05, "loss": 1.1143, "step": 7111500 }, { "epoch": 4.26, "learning_rate": 2.6183311393693142e-05, "loss": 1.0801, "step": 7112000 }, { "epoch": 4.26, "learning_rate": 2.618121142813258e-05, "loss": 1.1378, "step": 7112500 }, { "epoch": 4.26, "learning_rate": 2.6179111462572012e-05, "loss": 1.1244, "step": 7113000 }, { "epoch": 4.26, "learning_rate": 2.617701149701145e-05, "loss": 1.1158, "step": 7113500 }, { "epoch": 4.27, "learning_rate": 2.6174911531450883e-05, "loss": 1.1029, "step": 7114000 }, { "epoch": 4.27, "learning_rate": 2.617281156589032e-05, "loss": 1.1233, "step": 7114500 }, { "epoch": 4.27, "learning_rate": 2.6170715800260877e-05, "loss": 1.0978, "step": 7115000 }, { "epoch": 4.27, "learning_rate": 2.6168620034631434e-05, "loss": 1.0864, "step": 7115500 }, { "epoch": 4.27, "learning_rate": 2.6166520069070867e-05, "loss": 1.1017, "step": 7116000 }, { "epoch": 4.27, "learning_rate": 2.61644201035103e-05, "loss": 1.1097, "step": 7116500 }, { "epoch": 4.27, "learning_rate": 2.6162320137949737e-05, "loss": 1.1009, "step": 7117000 }, { "epoch": 4.27, "learning_rate": 2.6160220172389174e-05, "loss": 1.1077, "step": 7117500 }, { "epoch": 4.27, "learning_rate": 2.615812440675973e-05, "loss": 1.1103, "step": 7118000 }, { "epoch": 4.27, "learning_rate": 2.615602444119916e-05, "loss": 1.084, "step": 7118500 }, { "epoch": 4.27, "learning_rate": 2.6153924475638598e-05, "loss": 1.1226, "step": 7119000 }, { "epoch": 4.27, "learning_rate": 2.6151824510078035e-05, "loss": 1.1173, "step": 7119500 }, { "epoch": 4.27, "learning_rate": 2.614972454451747e-05, "loss": 1.1262, "step": 7120000 }, { "epoch": 4.27, "learning_rate": 2.6147624578956905e-05, "loss": 1.112, "step": 7120500 }, { "epoch": 4.27, "learning_rate": 2.614552881332746e-05, "loss": 1.1112, "step": 7121000 }, { "epoch": 4.27, "learning_rate": 2.6143428847766896e-05, "loss": 1.1291, "step": 7121500 }, { "epoch": 4.27, "learning_rate": 2.6141328882206332e-05, "loss": 1.1086, "step": 7122000 }, { "epoch": 4.27, "learning_rate": 2.6139228916645766e-05, "loss": 1.1182, "step": 7122500 }, { "epoch": 4.27, "learning_rate": 2.6137128951085203e-05, "loss": 1.1111, "step": 7123000 }, { "epoch": 4.27, "learning_rate": 2.613502898552464e-05, "loss": 1.1239, "step": 7123500 }, { "epoch": 4.27, "learning_rate": 2.6132933219895193e-05, "loss": 1.1135, "step": 7124000 }, { "epoch": 4.27, "learning_rate": 2.613083325433463e-05, "loss": 1.1116, "step": 7124500 }, { "epoch": 4.27, "learning_rate": 2.6128733288774063e-05, "loss": 1.1286, "step": 7125000 }, { "epoch": 4.27, "learning_rate": 2.61266333232135e-05, "loss": 1.1, "step": 7125500 }, { "epoch": 4.27, "learning_rate": 2.6124537557584054e-05, "loss": 1.1007, "step": 7126000 }, { "epoch": 4.27, "learning_rate": 2.612243759202349e-05, "loss": 1.1421, "step": 7126500 }, { "epoch": 4.27, "learning_rate": 2.6120337626462924e-05, "loss": 1.121, "step": 7127000 }, { "epoch": 4.27, "learning_rate": 2.611823766090236e-05, "loss": 1.0977, "step": 7127500 }, { "epoch": 4.27, "learning_rate": 2.6116137695341798e-05, "loss": 1.1336, "step": 7128000 }, { "epoch": 4.27, "learning_rate": 2.611404192971235e-05, "loss": 1.1251, "step": 7128500 }, { "epoch": 4.27, "learning_rate": 2.6111941964151788e-05, "loss": 1.1242, "step": 7129000 }, { "epoch": 4.27, "learning_rate": 2.6109841998591222e-05, "loss": 1.1024, "step": 7129500 }, { "epoch": 4.27, "learning_rate": 2.610774203303066e-05, "loss": 1.0996, "step": 7130000 }, { "epoch": 4.28, "learning_rate": 2.6105642067470095e-05, "loss": 1.0866, "step": 7130500 }, { "epoch": 4.28, "learning_rate": 2.610354630184065e-05, "loss": 1.1058, "step": 7131000 }, { "epoch": 4.28, "learning_rate": 2.6101446336280086e-05, "loss": 1.1442, "step": 7131500 }, { "epoch": 4.28, "learning_rate": 2.609934637071952e-05, "loss": 1.1203, "step": 7132000 }, { "epoch": 4.28, "learning_rate": 2.6097246405158956e-05, "loss": 1.1005, "step": 7132500 }, { "epoch": 4.28, "learning_rate": 2.609515063952951e-05, "loss": 1.1182, "step": 7133000 }, { "epoch": 4.28, "learning_rate": 2.6093050673968947e-05, "loss": 1.1318, "step": 7133500 }, { "epoch": 4.28, "learning_rate": 2.609095070840838e-05, "loss": 1.0991, "step": 7134000 }, { "epoch": 4.28, "learning_rate": 2.6088850742847817e-05, "loss": 1.1225, "step": 7134500 }, { "epoch": 4.28, "learning_rate": 2.6086750777287254e-05, "loss": 1.1256, "step": 7135000 }, { "epoch": 4.28, "learning_rate": 2.6084659211588927e-05, "loss": 1.0988, "step": 7135500 }, { "epoch": 4.28, "learning_rate": 2.6082559246028364e-05, "loss": 1.121, "step": 7136000 }, { "epoch": 4.28, "learning_rate": 2.60804592804678e-05, "loss": 1.1223, "step": 7136500 }, { "epoch": 4.28, "learning_rate": 2.6078359314907235e-05, "loss": 1.0989, "step": 7137000 }, { "epoch": 4.28, "learning_rate": 2.6076259349346668e-05, "loss": 1.1025, "step": 7137500 }, { "epoch": 4.28, "learning_rate": 2.6074159383786105e-05, "loss": 1.0884, "step": 7138000 }, { "epoch": 4.28, "learning_rate": 2.6072059418225542e-05, "loss": 1.1295, "step": 7138500 }, { "epoch": 4.28, "learning_rate": 2.6069959452664975e-05, "loss": 1.1365, "step": 7139000 }, { "epoch": 4.28, "learning_rate": 2.6067859487104412e-05, "loss": 1.1073, "step": 7139500 }, { "epoch": 4.28, "learning_rate": 2.6065763721474966e-05, "loss": 1.1087, "step": 7140000 }, { "epoch": 4.28, "learning_rate": 2.6063663755914402e-05, "loss": 1.1231, "step": 7140500 }, { "epoch": 4.28, "learning_rate": 2.6061563790353836e-05, "loss": 1.1097, "step": 7141000 }, { "epoch": 4.28, "learning_rate": 2.6059463824793273e-05, "loss": 1.1146, "step": 7141500 }, { "epoch": 4.28, "learning_rate": 2.605736385923271e-05, "loss": 1.1213, "step": 7142000 }, { "epoch": 4.28, "learning_rate": 2.6055268093603263e-05, "loss": 1.1343, "step": 7142500 }, { "epoch": 4.28, "learning_rate": 2.60531681280427e-05, "loss": 1.0961, "step": 7143000 }, { "epoch": 4.28, "learning_rate": 2.6051068162482134e-05, "loss": 1.0908, "step": 7143500 }, { "epoch": 4.28, "learning_rate": 2.604896819692157e-05, "loss": 1.1058, "step": 7144000 }, { "epoch": 4.28, "learning_rate": 2.6046868231361007e-05, "loss": 1.1137, "step": 7144500 }, { "epoch": 4.28, "learning_rate": 2.604477246573156e-05, "loss": 1.104, "step": 7145000 }, { "epoch": 4.28, "learning_rate": 2.6042672500170998e-05, "loss": 1.1273, "step": 7145500 }, { "epoch": 4.28, "learning_rate": 2.604057253461043e-05, "loss": 1.0898, "step": 7146000 }, { "epoch": 4.28, "learning_rate": 2.6038472569049868e-05, "loss": 1.1389, "step": 7146500 }, { "epoch": 4.28, "learning_rate": 2.603637680342042e-05, "loss": 1.0843, "step": 7147000 }, { "epoch": 4.29, "learning_rate": 2.603427683785986e-05, "loss": 1.1115, "step": 7147500 }, { "epoch": 4.29, "learning_rate": 2.6032176872299292e-05, "loss": 1.1248, "step": 7148000 }, { "epoch": 4.29, "learning_rate": 2.603007690673873e-05, "loss": 1.1257, "step": 7148500 }, { "epoch": 4.29, "learning_rate": 2.6027976941178166e-05, "loss": 1.0989, "step": 7149000 }, { "epoch": 4.29, "learning_rate": 2.602588117554872e-05, "loss": 1.1205, "step": 7149500 }, { "epoch": 4.29, "learning_rate": 2.6023781209988156e-05, "loss": 1.1033, "step": 7150000 }, { "epoch": 4.29, "learning_rate": 2.6021685444358713e-05, "loss": 1.1167, "step": 7150500 }, { "epoch": 4.29, "learning_rate": 2.6019585478798146e-05, "loss": 1.131, "step": 7151000 }, { "epoch": 4.29, "learning_rate": 2.6017485513237583e-05, "loss": 1.1109, "step": 7151500 }, { "epoch": 4.29, "learning_rate": 2.6015385547677017e-05, "loss": 1.1322, "step": 7152000 }, { "epoch": 4.29, "learning_rate": 2.6013285582116454e-05, "loss": 1.0956, "step": 7152500 }, { "epoch": 4.29, "learning_rate": 2.6011185616555887e-05, "loss": 1.1021, "step": 7153000 }, { "epoch": 4.29, "learning_rate": 2.6009085650995324e-05, "loss": 1.1128, "step": 7153500 }, { "epoch": 4.29, "learning_rate": 2.600698568543476e-05, "loss": 1.1478, "step": 7154000 }, { "epoch": 4.29, "learning_rate": 2.6004894119736434e-05, "loss": 1.1263, "step": 7154500 }, { "epoch": 4.29, "learning_rate": 2.600279415417587e-05, "loss": 1.0941, "step": 7155000 }, { "epoch": 4.29, "learning_rate": 2.6000694188615308e-05, "loss": 1.1104, "step": 7155500 }, { "epoch": 4.29, "learning_rate": 2.599859422305474e-05, "loss": 1.0944, "step": 7156000 }, { "epoch": 4.29, "learning_rate": 2.599649425749418e-05, "loss": 1.127, "step": 7156500 }, { "epoch": 4.29, "learning_rate": 2.5994394291933612e-05, "loss": 1.1318, "step": 7157000 }, { "epoch": 4.29, "learning_rate": 2.5992294326373045e-05, "loss": 1.1257, "step": 7157500 }, { "epoch": 4.29, "learning_rate": 2.5990194360812482e-05, "loss": 1.0964, "step": 7158000 }, { "epoch": 4.29, "learning_rate": 2.598809439525192e-05, "loss": 1.1121, "step": 7158500 }, { "epoch": 4.29, "learning_rate": 2.5985994429691352e-05, "loss": 1.1153, "step": 7159000 }, { "epoch": 4.29, "learning_rate": 2.598389866406191e-05, "loss": 1.1282, "step": 7159500 }, { "epoch": 4.29, "learning_rate": 2.5981798698501343e-05, "loss": 1.1259, "step": 7160000 }, { "epoch": 4.29, "learning_rate": 2.597969873294078e-05, "loss": 1.1077, "step": 7160500 }, { "epoch": 4.29, "learning_rate": 2.5977598767380217e-05, "loss": 1.1383, "step": 7161000 }, { "epoch": 4.29, "learning_rate": 2.597549880181965e-05, "loss": 1.1084, "step": 7161500 }, { "epoch": 4.29, "learning_rate": 2.5973398836259083e-05, "loss": 1.1256, "step": 7162000 }, { "epoch": 4.29, "learning_rate": 2.597129887069852e-05, "loss": 1.1252, "step": 7162500 }, { "epoch": 4.29, "learning_rate": 2.5969203105069077e-05, "loss": 1.1227, "step": 7163000 }, { "epoch": 4.29, "learning_rate": 2.5967103139508514e-05, "loss": 1.1211, "step": 7163500 }, { "epoch": 4.3, "learning_rate": 2.5965003173947948e-05, "loss": 1.1389, "step": 7164000 }, { "epoch": 4.3, "learning_rate": 2.596290320838738e-05, "loss": 1.1343, "step": 7164500 }, { "epoch": 4.3, "learning_rate": 2.5960803242826818e-05, "loss": 1.0913, "step": 7165000 }, { "epoch": 4.3, "learning_rate": 2.5958707477197375e-05, "loss": 1.0989, "step": 7165500 }, { "epoch": 4.3, "learning_rate": 2.595660751163681e-05, "loss": 1.11, "step": 7166000 }, { "epoch": 4.3, "learning_rate": 2.5954507546076242e-05, "loss": 1.1088, "step": 7166500 }, { "epoch": 4.3, "learning_rate": 2.595240758051568e-05, "loss": 1.0806, "step": 7167000 }, { "epoch": 4.3, "learning_rate": 2.5950307614955115e-05, "loss": 1.1309, "step": 7167500 }, { "epoch": 4.3, "learning_rate": 2.594820764939455e-05, "loss": 1.0935, "step": 7168000 }, { "epoch": 4.3, "learning_rate": 2.5946107683833986e-05, "loss": 1.1224, "step": 7168500 }, { "epoch": 4.3, "learning_rate": 2.594401191820454e-05, "loss": 1.1231, "step": 7169000 }, { "epoch": 4.3, "learning_rate": 2.5941916152575096e-05, "loss": 1.1247, "step": 7169500 }, { "epoch": 4.3, "learning_rate": 2.5939816187014533e-05, "loss": 1.1049, "step": 7170000 }, { "epoch": 4.3, "learning_rate": 2.593771622145397e-05, "loss": 1.0785, "step": 7170500 }, { "epoch": 4.3, "learning_rate": 2.5935616255893403e-05, "loss": 1.1153, "step": 7171000 }, { "epoch": 4.3, "learning_rate": 2.5933516290332837e-05, "loss": 1.0975, "step": 7171500 }, { "epoch": 4.3, "learning_rate": 2.5931416324772274e-05, "loss": 1.1043, "step": 7172000 }, { "epoch": 4.3, "learning_rate": 2.5929316359211707e-05, "loss": 1.1045, "step": 7172500 }, { "epoch": 4.3, "learning_rate": 2.5927216393651144e-05, "loss": 1.1053, "step": 7173000 }, { "epoch": 4.3, "learning_rate": 2.592511642809058e-05, "loss": 1.1296, "step": 7173500 }, { "epoch": 4.3, "learning_rate": 2.592301646253001e-05, "loss": 1.1214, "step": 7174000 }, { "epoch": 4.3, "learning_rate": 2.592092069690057e-05, "loss": 1.1488, "step": 7174500 }, { "epoch": 4.3, "learning_rate": 2.5918820731340005e-05, "loss": 1.1132, "step": 7175000 }, { "epoch": 4.3, "learning_rate": 2.591672076577944e-05, "loss": 1.125, "step": 7175500 }, { "epoch": 4.3, "learning_rate": 2.591462080021888e-05, "loss": 1.1143, "step": 7176000 }, { "epoch": 4.3, "learning_rate": 2.5912525034589432e-05, "loss": 1.1227, "step": 7176500 }, { "epoch": 4.3, "learning_rate": 2.5910425069028866e-05, "loss": 1.0856, "step": 7177000 }, { "epoch": 4.3, "learning_rate": 2.5908325103468302e-05, "loss": 1.0883, "step": 7177500 }, { "epoch": 4.3, "learning_rate": 2.590622513790774e-05, "loss": 1.1035, "step": 7178000 }, { "epoch": 4.3, "learning_rate": 2.5904129372278293e-05, "loss": 1.1121, "step": 7178500 }, { "epoch": 4.3, "learning_rate": 2.590202940671773e-05, "loss": 1.1316, "step": 7179000 }, { "epoch": 4.3, "learning_rate": 2.5899929441157163e-05, "loss": 1.1198, "step": 7179500 }, { "epoch": 4.3, "learning_rate": 2.58978294755966e-05, "loss": 1.1203, "step": 7180000 }, { "epoch": 4.31, "learning_rate": 2.5895729510036037e-05, "loss": 1.1103, "step": 7180500 }, { "epoch": 4.31, "learning_rate": 2.589363374440659e-05, "loss": 1.1084, "step": 7181000 }, { "epoch": 4.31, "learning_rate": 2.5891533778846027e-05, "loss": 1.1404, "step": 7181500 }, { "epoch": 4.31, "learning_rate": 2.588943381328546e-05, "loss": 1.0821, "step": 7182000 }, { "epoch": 4.31, "learning_rate": 2.5887333847724898e-05, "loss": 1.1035, "step": 7182500 }, { "epoch": 4.31, "learning_rate": 2.5885233882164334e-05, "loss": 1.1023, "step": 7183000 }, { "epoch": 4.31, "learning_rate": 2.5883133916603764e-05, "loss": 1.1039, "step": 7183500 }, { "epoch": 4.31, "learning_rate": 2.58810339510432e-05, "loss": 1.1137, "step": 7184000 }, { "epoch": 4.31, "learning_rate": 2.5878938185413758e-05, "loss": 1.1212, "step": 7184500 }, { "epoch": 4.31, "learning_rate": 2.5876838219853195e-05, "loss": 1.0997, "step": 7185000 }, { "epoch": 4.31, "learning_rate": 2.5874738254292632e-05, "loss": 1.0874, "step": 7185500 }, { "epoch": 4.31, "learning_rate": 2.5872638288732062e-05, "loss": 1.139, "step": 7186000 }, { "epoch": 4.31, "learning_rate": 2.58705383231715e-05, "loss": 1.1018, "step": 7186500 }, { "epoch": 4.31, "learning_rate": 2.5868438357610936e-05, "loss": 1.1336, "step": 7187000 }, { "epoch": 4.31, "learning_rate": 2.586633839205037e-05, "loss": 1.1226, "step": 7187500 }, { "epoch": 4.31, "learning_rate": 2.5864242626420926e-05, "loss": 1.1334, "step": 7188000 }, { "epoch": 4.31, "learning_rate": 2.586214266086036e-05, "loss": 1.1223, "step": 7188500 }, { "epoch": 4.31, "learning_rate": 2.5860042695299796e-05, "loss": 1.1171, "step": 7189000 }, { "epoch": 4.31, "learning_rate": 2.5857942729739233e-05, "loss": 1.1174, "step": 7189500 }, { "epoch": 4.31, "learning_rate": 2.5855842764178667e-05, "loss": 1.1035, "step": 7190000 }, { "epoch": 4.31, "learning_rate": 2.5853746998549224e-05, "loss": 1.1172, "step": 7190500 }, { "epoch": 4.31, "learning_rate": 2.5851647032988657e-05, "loss": 1.1113, "step": 7191000 }, { "epoch": 4.31, "learning_rate": 2.5849547067428094e-05, "loss": 1.1217, "step": 7191500 }, { "epoch": 4.31, "learning_rate": 2.5847447101867527e-05, "loss": 1.1204, "step": 7192000 }, { "epoch": 4.31, "learning_rate": 2.5845347136306964e-05, "loss": 1.1265, "step": 7192500 }, { "epoch": 4.31, "learning_rate": 2.584325137067752e-05, "loss": 1.0782, "step": 7193000 }, { "epoch": 4.31, "learning_rate": 2.5841151405116955e-05, "loss": 1.1423, "step": 7193500 }, { "epoch": 4.31, "learning_rate": 2.5839055639487512e-05, "loss": 1.1023, "step": 7194000 }, { "epoch": 4.31, "learning_rate": 2.583695567392695e-05, "loss": 1.1203, "step": 7194500 }, { "epoch": 4.31, "learning_rate": 2.5834855708366382e-05, "loss": 1.0985, "step": 7195000 }, { "epoch": 4.31, "learning_rate": 2.5832755742805815e-05, "loss": 1.1226, "step": 7195500 }, { "epoch": 4.31, "learning_rate": 2.5830655777245252e-05, "loss": 1.1571, "step": 7196000 }, { "epoch": 4.31, "learning_rate": 2.582855581168469e-05, "loss": 1.1139, "step": 7196500 }, { "epoch": 4.31, "learning_rate": 2.5826455846124123e-05, "loss": 1.0913, "step": 7197000 }, { "epoch": 4.32, "learning_rate": 2.582435588056356e-05, "loss": 1.0955, "step": 7197500 }, { "epoch": 4.32, "learning_rate": 2.5822260114934113e-05, "loss": 1.0899, "step": 7198000 }, { "epoch": 4.32, "learning_rate": 2.582016014937355e-05, "loss": 1.1048, "step": 7198500 }, { "epoch": 4.32, "learning_rate": 2.5818060183812983e-05, "loss": 1.1176, "step": 7199000 }, { "epoch": 4.32, "learning_rate": 2.581596021825242e-05, "loss": 1.1354, "step": 7199500 }, { "epoch": 4.32, "learning_rate": 2.5813860252691857e-05, "loss": 1.1147, "step": 7200000 }, { "epoch": 4.32, "eval_loss": 1.08935546875, "eval_runtime": 1105.7679, "eval_samples_per_second": 476.339, "eval_steps_per_second": 79.39, "step": 7200000 }, { "epoch": 4.32, "learning_rate": 2.581176448706241e-05, "loss": 1.0939, "step": 7200500 }, { "epoch": 4.32, "learning_rate": 2.5809664521501847e-05, "loss": 1.0863, "step": 7201000 }, { "epoch": 4.32, "learning_rate": 2.580756455594128e-05, "loss": 1.1042, "step": 7201500 }, { "epoch": 4.32, "learning_rate": 2.5805464590380718e-05, "loss": 1.1076, "step": 7202000 }, { "epoch": 4.32, "learning_rate": 2.5803364624820155e-05, "loss": 1.1468, "step": 7202500 }, { "epoch": 4.32, "learning_rate": 2.5801268859190708e-05, "loss": 1.1102, "step": 7203000 }, { "epoch": 4.32, "learning_rate": 2.5799168893630145e-05, "loss": 1.1118, "step": 7203500 }, { "epoch": 4.32, "learning_rate": 2.579706892806958e-05, "loss": 1.0949, "step": 7204000 }, { "epoch": 4.32, "learning_rate": 2.5794968962509015e-05, "loss": 1.0806, "step": 7204500 }, { "epoch": 4.32, "learning_rate": 2.5792868996948452e-05, "loss": 1.1055, "step": 7205000 }, { "epoch": 4.32, "learning_rate": 2.5790769031387882e-05, "loss": 1.1324, "step": 7205500 }, { "epoch": 4.32, "learning_rate": 2.578867326575844e-05, "loss": 1.0903, "step": 7206000 }, { "epoch": 4.32, "learning_rate": 2.5786573300197876e-05, "loss": 1.0703, "step": 7206500 }, { "epoch": 4.32, "learning_rate": 2.5784473334637313e-05, "loss": 1.0941, "step": 7207000 }, { "epoch": 4.32, "learning_rate": 2.578237336907675e-05, "loss": 1.1246, "step": 7207500 }, { "epoch": 4.32, "learning_rate": 2.5780277603447303e-05, "loss": 1.1361, "step": 7208000 }, { "epoch": 4.32, "learning_rate": 2.5778177637886737e-05, "loss": 1.1168, "step": 7208500 }, { "epoch": 4.32, "learning_rate": 2.5776077672326174e-05, "loss": 1.1109, "step": 7209000 }, { "epoch": 4.32, "learning_rate": 2.577397770676561e-05, "loss": 1.1167, "step": 7209500 }, { "epoch": 4.32, "learning_rate": 2.5771881941136164e-05, "loss": 1.0936, "step": 7210000 }, { "epoch": 4.32, "learning_rate": 2.57697819755756e-05, "loss": 1.102, "step": 7210500 }, { "epoch": 4.32, "learning_rate": 2.5767686209946158e-05, "loss": 1.1357, "step": 7211000 }, { "epoch": 4.32, "learning_rate": 2.576558624438559e-05, "loss": 1.1216, "step": 7211500 }, { "epoch": 4.32, "learning_rate": 2.5763486278825028e-05, "loss": 1.1042, "step": 7212000 }, { "epoch": 4.32, "learning_rate": 2.576138631326446e-05, "loss": 1.104, "step": 7212500 }, { "epoch": 4.32, "learning_rate": 2.5759286347703895e-05, "loss": 1.1095, "step": 7213000 }, { "epoch": 4.32, "learning_rate": 2.5757186382143332e-05, "loss": 1.1179, "step": 7213500 }, { "epoch": 4.33, "learning_rate": 2.575508641658277e-05, "loss": 1.1132, "step": 7214000 }, { "epoch": 4.33, "learning_rate": 2.5752986451022206e-05, "loss": 1.1242, "step": 7214500 }, { "epoch": 4.33, "learning_rate": 2.575089068539276e-05, "loss": 1.0941, "step": 7215000 }, { "epoch": 4.33, "learning_rate": 2.5748790719832193e-05, "loss": 1.1283, "step": 7215500 }, { "epoch": 4.33, "learning_rate": 2.574669495420275e-05, "loss": 1.0878, "step": 7216000 }, { "epoch": 4.33, "learning_rate": 2.5744594988642186e-05, "loss": 1.0854, "step": 7216500 }, { "epoch": 4.33, "learning_rate": 2.574249502308162e-05, "loss": 1.1257, "step": 7217000 }, { "epoch": 4.33, "learning_rate": 2.5740395057521057e-05, "loss": 1.1298, "step": 7217500 }, { "epoch": 4.33, "learning_rate": 2.573829509196049e-05, "loss": 1.1046, "step": 7218000 }, { "epoch": 4.33, "learning_rate": 2.5736195126399927e-05, "loss": 1.1189, "step": 7218500 }, { "epoch": 4.33, "learning_rate": 2.5734095160839364e-05, "loss": 1.1321, "step": 7219000 }, { "epoch": 4.33, "learning_rate": 2.5731995195278797e-05, "loss": 1.1022, "step": 7219500 }, { "epoch": 4.33, "learning_rate": 2.572989522971823e-05, "loss": 1.0668, "step": 7220000 }, { "epoch": 4.33, "learning_rate": 2.5727795264157668e-05, "loss": 1.1225, "step": 7220500 }, { "epoch": 4.33, "learning_rate": 2.5725703698459345e-05, "loss": 1.121, "step": 7221000 }, { "epoch": 4.33, "learning_rate": 2.572360373289878e-05, "loss": 1.0907, "step": 7221500 }, { "epoch": 4.33, "learning_rate": 2.5721503767338215e-05, "loss": 1.129, "step": 7222000 }, { "epoch": 4.33, "learning_rate": 2.571940380177765e-05, "loss": 1.091, "step": 7222500 }, { "epoch": 4.33, "learning_rate": 2.5717308036148206e-05, "loss": 1.1178, "step": 7223000 }, { "epoch": 4.33, "learning_rate": 2.5715208070587642e-05, "loss": 1.099, "step": 7223500 }, { "epoch": 4.33, "learning_rate": 2.571310810502708e-05, "loss": 1.1031, "step": 7224000 }, { "epoch": 4.33, "learning_rate": 2.5711008139466513e-05, "loss": 1.0976, "step": 7224500 }, { "epoch": 4.33, "learning_rate": 2.5708908173905946e-05, "loss": 1.1221, "step": 7225000 }, { "epoch": 4.33, "learning_rate": 2.5706808208345383e-05, "loss": 1.1084, "step": 7225500 }, { "epoch": 4.33, "learning_rate": 2.570470824278482e-05, "loss": 1.1165, "step": 7226000 }, { "epoch": 4.33, "learning_rate": 2.5702608277224253e-05, "loss": 1.1199, "step": 7226500 }, { "epoch": 4.33, "learning_rate": 2.5700508311663687e-05, "loss": 1.1324, "step": 7227000 }, { "epoch": 4.33, "learning_rate": 2.5698412546034244e-05, "loss": 1.0912, "step": 7227500 }, { "epoch": 4.33, "learning_rate": 2.569631258047368e-05, "loss": 1.1159, "step": 7228000 }, { "epoch": 4.33, "learning_rate": 2.5694212614913117e-05, "loss": 1.1319, "step": 7228500 }, { "epoch": 4.33, "learning_rate": 2.569211264935255e-05, "loss": 1.1102, "step": 7229000 }, { "epoch": 4.33, "learning_rate": 2.5690016883723104e-05, "loss": 1.1082, "step": 7229500 }, { "epoch": 4.33, "learning_rate": 2.568791691816254e-05, "loss": 1.1164, "step": 7230000 }, { "epoch": 4.33, "learning_rate": 2.5685816952601978e-05, "loss": 1.0967, "step": 7230500 }, { "epoch": 4.34, "learning_rate": 2.568371698704141e-05, "loss": 1.1223, "step": 7231000 }, { "epoch": 4.34, "learning_rate": 2.568162122141197e-05, "loss": 1.1179, "step": 7231500 }, { "epoch": 4.34, "learning_rate": 2.5679521255851402e-05, "loss": 1.0998, "step": 7232000 }, { "epoch": 4.34, "learning_rate": 2.567742129029084e-05, "loss": 1.1066, "step": 7232500 }, { "epoch": 4.34, "learning_rate": 2.5675321324730276e-05, "loss": 1.1033, "step": 7233000 }, { "epoch": 4.34, "learning_rate": 2.567322135916971e-05, "loss": 1.1222, "step": 7233500 }, { "epoch": 4.34, "learning_rate": 2.5671125593540266e-05, "loss": 1.0864, "step": 7234000 }, { "epoch": 4.34, "learning_rate": 2.56690256279797e-05, "loss": 1.0872, "step": 7234500 }, { "epoch": 4.34, "learning_rate": 2.5666925662419136e-05, "loss": 1.1062, "step": 7235000 }, { "epoch": 4.34, "learning_rate": 2.5664825696858573e-05, "loss": 1.1076, "step": 7235500 }, { "epoch": 4.34, "learning_rate": 2.5662725731298007e-05, "loss": 1.1113, "step": 7236000 }, { "epoch": 4.34, "learning_rate": 2.566062576573744e-05, "loss": 1.1073, "step": 7236500 }, { "epoch": 4.34, "learning_rate": 2.5658525800176877e-05, "loss": 1.0825, "step": 7237000 }, { "epoch": 4.34, "learning_rate": 2.565642583461631e-05, "loss": 1.0923, "step": 7237500 }, { "epoch": 4.34, "learning_rate": 2.5654330068986867e-05, "loss": 1.0792, "step": 7238000 }, { "epoch": 4.34, "learning_rate": 2.5652230103426304e-05, "loss": 1.1145, "step": 7238500 }, { "epoch": 4.34, "learning_rate": 2.5650134337796858e-05, "loss": 1.0902, "step": 7239000 }, { "epoch": 4.34, "learning_rate": 2.5648034372236295e-05, "loss": 1.0608, "step": 7239500 }, { "epoch": 4.34, "learning_rate": 2.564593440667573e-05, "loss": 1.1357, "step": 7240000 }, { "epoch": 4.34, "learning_rate": 2.5643834441115165e-05, "loss": 1.0829, "step": 7240500 }, { "epoch": 4.34, "learning_rate": 2.5641734475554602e-05, "loss": 1.0832, "step": 7241000 }, { "epoch": 4.34, "learning_rate": 2.5639634509994035e-05, "loss": 1.1002, "step": 7241500 }, { "epoch": 4.34, "learning_rate": 2.563753454443347e-05, "loss": 1.121, "step": 7242000 }, { "epoch": 4.34, "learning_rate": 2.563543877880403e-05, "loss": 1.1202, "step": 7242500 }, { "epoch": 4.34, "learning_rate": 2.5633338813243463e-05, "loss": 1.1058, "step": 7243000 }, { "epoch": 4.34, "learning_rate": 2.5631238847682896e-05, "loss": 1.0994, "step": 7243500 }, { "epoch": 4.34, "learning_rate": 2.5629138882122333e-05, "loss": 1.1311, "step": 7244000 }, { "epoch": 4.34, "learning_rate": 2.5627038916561766e-05, "loss": 1.1013, "step": 7244500 }, { "epoch": 4.34, "learning_rate": 2.5624943150932323e-05, "loss": 1.0978, "step": 7245000 }, { "epoch": 4.34, "learning_rate": 2.562284318537176e-05, "loss": 1.0832, "step": 7245500 }, { "epoch": 4.34, "learning_rate": 2.5620743219811194e-05, "loss": 1.1127, "step": 7246000 }, { "epoch": 4.34, "learning_rate": 2.561864325425063e-05, "loss": 1.1001, "step": 7246500 }, { "epoch": 4.34, "learning_rate": 2.5616543288690064e-05, "loss": 1.1032, "step": 7247000 }, { "epoch": 4.35, "learning_rate": 2.56144433231295e-05, "loss": 1.1024, "step": 7247500 }, { "epoch": 4.35, "learning_rate": 2.5612343357568938e-05, "loss": 1.1298, "step": 7248000 }, { "epoch": 4.35, "learning_rate": 2.561024759193949e-05, "loss": 1.0967, "step": 7248500 }, { "epoch": 4.35, "learning_rate": 2.5608147626378925e-05, "loss": 1.0992, "step": 7249000 }, { "epoch": 4.35, "learning_rate": 2.560604766081836e-05, "loss": 1.114, "step": 7249500 }, { "epoch": 4.35, "learning_rate": 2.56039476952578e-05, "loss": 1.1092, "step": 7250000 }, { "epoch": 4.35, "learning_rate": 2.5601847729697235e-05, "loss": 1.1117, "step": 7250500 }, { "epoch": 4.35, "learning_rate": 2.559975196406779e-05, "loss": 1.1126, "step": 7251000 }, { "epoch": 4.35, "learning_rate": 2.5597651998507222e-05, "loss": 1.1178, "step": 7251500 }, { "epoch": 4.35, "learning_rate": 2.559555203294666e-05, "loss": 1.1109, "step": 7252000 }, { "epoch": 4.35, "learning_rate": 2.5593452067386096e-05, "loss": 1.1, "step": 7252500 }, { "epoch": 4.35, "learning_rate": 2.559135210182553e-05, "loss": 1.1209, "step": 7253000 }, { "epoch": 4.35, "learning_rate": 2.5589256336196086e-05, "loss": 1.1151, "step": 7253500 }, { "epoch": 4.35, "learning_rate": 2.558715637063552e-05, "loss": 1.108, "step": 7254000 }, { "epoch": 4.35, "learning_rate": 2.5585056405074957e-05, "loss": 1.1244, "step": 7254500 }, { "epoch": 4.35, "learning_rate": 2.5582960639445514e-05, "loss": 1.1013, "step": 7255000 }, { "epoch": 4.35, "learning_rate": 2.5580860673884947e-05, "loss": 1.1193, "step": 7255500 }, { "epoch": 4.35, "learning_rate": 2.557876070832438e-05, "loss": 1.1175, "step": 7256000 }, { "epoch": 4.35, "learning_rate": 2.5576660742763817e-05, "loss": 1.0846, "step": 7256500 }, { "epoch": 4.35, "learning_rate": 2.5574560777203254e-05, "loss": 1.1029, "step": 7257000 }, { "epoch": 4.35, "learning_rate": 2.557246081164269e-05, "loss": 1.1286, "step": 7257500 }, { "epoch": 4.35, "learning_rate": 2.5570360846082125e-05, "loss": 1.1204, "step": 7258000 }, { "epoch": 4.35, "learning_rate": 2.5568260880521558e-05, "loss": 1.1105, "step": 7258500 }, { "epoch": 4.35, "learning_rate": 2.5566160914960995e-05, "loss": 1.0979, "step": 7259000 }, { "epoch": 4.35, "learning_rate": 2.556406094940043e-05, "loss": 1.1041, "step": 7259500 }, { "epoch": 4.35, "learning_rate": 2.5561960983839865e-05, "loss": 1.1305, "step": 7260000 }, { "epoch": 4.35, "learning_rate": 2.5559861018279302e-05, "loss": 1.0881, "step": 7260500 }, { "epoch": 4.35, "learning_rate": 2.5557765252649856e-05, "loss": 1.0929, "step": 7261000 }, { "epoch": 4.35, "learning_rate": 2.5555669487020413e-05, "loss": 1.1151, "step": 7261500 }, { "epoch": 4.35, "learning_rate": 2.555356952145985e-05, "loss": 1.1284, "step": 7262000 }, { "epoch": 4.35, "learning_rate": 2.5551469555899283e-05, "loss": 1.1008, "step": 7262500 }, { "epoch": 4.35, "learning_rate": 2.5549369590338716e-05, "loss": 1.1324, "step": 7263000 }, { "epoch": 4.35, "learning_rate": 2.5547273824709273e-05, "loss": 1.1112, "step": 7263500 }, { "epoch": 4.36, "learning_rate": 2.554517385914871e-05, "loss": 1.0945, "step": 7264000 }, { "epoch": 4.36, "learning_rate": 2.5543073893588147e-05, "loss": 1.101, "step": 7264500 }, { "epoch": 4.36, "learning_rate": 2.554097392802758e-05, "loss": 1.1016, "step": 7265000 }, { "epoch": 4.36, "learning_rate": 2.5538878162398134e-05, "loss": 1.1243, "step": 7265500 }, { "epoch": 4.36, "learning_rate": 2.553677819683757e-05, "loss": 1.1311, "step": 7266000 }, { "epoch": 4.36, "learning_rate": 2.5534678231277008e-05, "loss": 1.0794, "step": 7266500 }, { "epoch": 4.36, "learning_rate": 2.553257826571644e-05, "loss": 1.144, "step": 7267000 }, { "epoch": 4.36, "learning_rate": 2.5530478300155878e-05, "loss": 1.1156, "step": 7267500 }, { "epoch": 4.36, "learning_rate": 2.552837833459531e-05, "loss": 1.1113, "step": 7268000 }, { "epoch": 4.36, "learning_rate": 2.552627836903475e-05, "loss": 1.1087, "step": 7268500 }, { "epoch": 4.36, "learning_rate": 2.5524182603405305e-05, "loss": 1.1068, "step": 7269000 }, { "epoch": 4.36, "learning_rate": 2.552208263784474e-05, "loss": 1.0995, "step": 7269500 }, { "epoch": 4.36, "learning_rate": 2.5519982672284176e-05, "loss": 1.1182, "step": 7270000 }, { "epoch": 4.36, "learning_rate": 2.551788270672361e-05, "loss": 1.112, "step": 7270500 }, { "epoch": 4.36, "learning_rate": 2.5515786941094166e-05, "loss": 1.1108, "step": 7271000 }, { "epoch": 4.36, "learning_rate": 2.5513686975533603e-05, "loss": 1.0922, "step": 7271500 }, { "epoch": 4.36, "learning_rate": 2.5511587009973036e-05, "loss": 1.126, "step": 7272000 }, { "epoch": 4.36, "learning_rate": 2.550948704441247e-05, "loss": 1.129, "step": 7272500 }, { "epoch": 4.36, "learning_rate": 2.5507387078851907e-05, "loss": 1.1068, "step": 7273000 }, { "epoch": 4.36, "learning_rate": 2.550528711329134e-05, "loss": 1.0905, "step": 7273500 }, { "epoch": 4.36, "learning_rate": 2.5503187147730777e-05, "loss": 1.0995, "step": 7274000 }, { "epoch": 4.36, "learning_rate": 2.5501087182170214e-05, "loss": 1.1117, "step": 7274500 }, { "epoch": 4.36, "learning_rate": 2.5498987216609647e-05, "loss": 1.0853, "step": 7275000 }, { "epoch": 4.36, "learning_rate": 2.5496891450980204e-05, "loss": 1.1162, "step": 7275500 }, { "epoch": 4.36, "learning_rate": 2.5494791485419638e-05, "loss": 1.0862, "step": 7276000 }, { "epoch": 4.36, "learning_rate": 2.5492691519859075e-05, "loss": 1.1542, "step": 7276500 }, { "epoch": 4.36, "learning_rate": 2.549059155429851e-05, "loss": 1.1012, "step": 7277000 }, { "epoch": 4.36, "learning_rate": 2.5488495788669065e-05, "loss": 1.1075, "step": 7277500 }, { "epoch": 4.36, "learning_rate": 2.54863958231085e-05, "loss": 1.1146, "step": 7278000 }, { "epoch": 4.36, "learning_rate": 2.5484295857547935e-05, "loss": 1.1371, "step": 7278500 }, { "epoch": 4.36, "learning_rate": 2.5482195891987372e-05, "loss": 1.1027, "step": 7279000 }, { "epoch": 4.36, "learning_rate": 2.548009592642681e-05, "loss": 1.1161, "step": 7279500 }, { "epoch": 4.36, "learning_rate": 2.5478000160797363e-05, "loss": 1.1137, "step": 7280000 }, { "epoch": 4.36, "learning_rate": 2.547590439516792e-05, "loss": 1.1091, "step": 7280500 }, { "epoch": 4.37, "learning_rate": 2.5473804429607353e-05, "loss": 1.1058, "step": 7281000 }, { "epoch": 4.37, "learning_rate": 2.547170446404679e-05, "loss": 1.0876, "step": 7281500 }, { "epoch": 4.37, "learning_rate": 2.5469604498486223e-05, "loss": 1.1165, "step": 7282000 }, { "epoch": 4.37, "learning_rate": 2.546750453292566e-05, "loss": 1.0986, "step": 7282500 }, { "epoch": 4.37, "learning_rate": 2.5465404567365094e-05, "loss": 1.102, "step": 7283000 }, { "epoch": 4.37, "learning_rate": 2.546330460180453e-05, "loss": 1.1318, "step": 7283500 }, { "epoch": 4.37, "learning_rate": 2.5461204636243967e-05, "loss": 1.1011, "step": 7284000 }, { "epoch": 4.37, "learning_rate": 2.545910887061452e-05, "loss": 1.1232, "step": 7284500 }, { "epoch": 4.37, "learning_rate": 2.5457008905053954e-05, "loss": 1.1069, "step": 7285000 }, { "epoch": 4.37, "learning_rate": 2.545490893949339e-05, "loss": 1.1214, "step": 7285500 }, { "epoch": 4.37, "learning_rate": 2.5452808973932828e-05, "loss": 1.0934, "step": 7286000 }, { "epoch": 4.37, "learning_rate": 2.5450713208303385e-05, "loss": 1.114, "step": 7286500 }, { "epoch": 4.37, "learning_rate": 2.544861324274282e-05, "loss": 1.0969, "step": 7287000 }, { "epoch": 4.37, "learning_rate": 2.5446513277182252e-05, "loss": 1.1192, "step": 7287500 }, { "epoch": 4.37, "learning_rate": 2.544441331162169e-05, "loss": 1.116, "step": 7288000 }, { "epoch": 4.37, "learning_rate": 2.5442313346061126e-05, "loss": 1.0839, "step": 7288500 }, { "epoch": 4.37, "learning_rate": 2.5440217580431683e-05, "loss": 1.1075, "step": 7289000 }, { "epoch": 4.37, "learning_rate": 2.5438117614871116e-05, "loss": 1.0826, "step": 7289500 }, { "epoch": 4.37, "learning_rate": 2.543601764931055e-05, "loss": 1.1168, "step": 7290000 }, { "epoch": 4.37, "learning_rate": 2.5433917683749986e-05, "loss": 1.0868, "step": 7290500 }, { "epoch": 4.37, "learning_rate": 2.5431817718189423e-05, "loss": 1.1098, "step": 7291000 }, { "epoch": 4.37, "learning_rate": 2.5429721952559977e-05, "loss": 1.118, "step": 7291500 }, { "epoch": 4.37, "learning_rate": 2.5427621986999414e-05, "loss": 1.1099, "step": 7292000 }, { "epoch": 4.37, "learning_rate": 2.5425522021438847e-05, "loss": 1.1078, "step": 7292500 }, { "epoch": 4.37, "learning_rate": 2.5423426255809404e-05, "loss": 1.1029, "step": 7293000 }, { "epoch": 4.37, "learning_rate": 2.542132629024884e-05, "loss": 1.1181, "step": 7293500 }, { "epoch": 4.37, "learning_rate": 2.5419226324688274e-05, "loss": 1.1287, "step": 7294000 }, { "epoch": 4.37, "learning_rate": 2.5417126359127708e-05, "loss": 1.0935, "step": 7294500 }, { "epoch": 4.37, "learning_rate": 2.5415026393567145e-05, "loss": 1.1032, "step": 7295000 }, { "epoch": 4.37, "learning_rate": 2.541292642800658e-05, "loss": 1.1156, "step": 7295500 }, { "epoch": 4.37, "learning_rate": 2.5410826462446015e-05, "loss": 1.0906, "step": 7296000 }, { "epoch": 4.37, "learning_rate": 2.5408726496885452e-05, "loss": 1.1007, "step": 7296500 }, { "epoch": 4.37, "learning_rate": 2.5406630731256005e-05, "loss": 1.1069, "step": 7297000 }, { "epoch": 4.38, "learning_rate": 2.5404530765695442e-05, "loss": 1.0979, "step": 7297500 }, { "epoch": 4.38, "learning_rate": 2.540243080013488e-05, "loss": 1.1156, "step": 7298000 }, { "epoch": 4.38, "learning_rate": 2.5400330834574312e-05, "loss": 1.0998, "step": 7298500 }, { "epoch": 4.38, "learning_rate": 2.539823086901375e-05, "loss": 1.0841, "step": 7299000 }, { "epoch": 4.38, "learning_rate": 2.5396135103384303e-05, "loss": 1.1056, "step": 7299500 }, { "epoch": 4.38, "learning_rate": 2.539403513782374e-05, "loss": 1.1086, "step": 7300000 }, { "epoch": 4.38, "eval_loss": 1.0867536067962646, "eval_runtime": 1112.0821, "eval_samples_per_second": 473.634, "eval_steps_per_second": 78.939, "step": 7300000 }, { "epoch": 4.38, "learning_rate": 2.5391935172263177e-05, "loss": 1.1027, "step": 7300500 }, { "epoch": 4.38, "learning_rate": 2.538983520670261e-05, "loss": 1.1292, "step": 7301000 }, { "epoch": 4.38, "learning_rate": 2.5387735241142043e-05, "loss": 1.0909, "step": 7301500 }, { "epoch": 4.38, "learning_rate": 2.53856394755126e-05, "loss": 1.1298, "step": 7302000 }, { "epoch": 4.38, "learning_rate": 2.5383539509952037e-05, "loss": 1.1205, "step": 7302500 }, { "epoch": 4.38, "learning_rate": 2.538143954439147e-05, "loss": 1.111, "step": 7303000 }, { "epoch": 4.38, "learning_rate": 2.5379339578830908e-05, "loss": 1.1242, "step": 7303500 }, { "epoch": 4.38, "learning_rate": 2.537724381320146e-05, "loss": 1.111, "step": 7304000 }, { "epoch": 4.38, "learning_rate": 2.5375143847640898e-05, "loss": 1.1297, "step": 7304500 }, { "epoch": 4.38, "learning_rate": 2.5373043882080335e-05, "loss": 1.1054, "step": 7305000 }, { "epoch": 4.38, "learning_rate": 2.537094391651977e-05, "loss": 1.0879, "step": 7305500 }, { "epoch": 4.38, "learning_rate": 2.5368843950959205e-05, "loss": 1.1185, "step": 7306000 }, { "epoch": 4.38, "learning_rate": 2.536674818532976e-05, "loss": 1.1193, "step": 7306500 }, { "epoch": 4.38, "learning_rate": 2.5364648219769196e-05, "loss": 1.1073, "step": 7307000 }, { "epoch": 4.38, "learning_rate": 2.5362548254208632e-05, "loss": 1.1517, "step": 7307500 }, { "epoch": 4.38, "learning_rate": 2.5360448288648066e-05, "loss": 1.1299, "step": 7308000 }, { "epoch": 4.38, "learning_rate": 2.5358348323087503e-05, "loss": 1.1319, "step": 7308500 }, { "epoch": 4.38, "learning_rate": 2.5356252557458056e-05, "loss": 1.1471, "step": 7309000 }, { "epoch": 4.38, "learning_rate": 2.5354152591897493e-05, "loss": 1.125, "step": 7309500 }, { "epoch": 4.38, "learning_rate": 2.5352052626336927e-05, "loss": 1.0912, "step": 7310000 }, { "epoch": 4.38, "learning_rate": 2.5349952660776363e-05, "loss": 1.127, "step": 7310500 }, { "epoch": 4.38, "learning_rate": 2.5347856895146917e-05, "loss": 1.1254, "step": 7311000 }, { "epoch": 4.38, "learning_rate": 2.5345756929586354e-05, "loss": 1.1055, "step": 7311500 }, { "epoch": 4.38, "learning_rate": 2.534366116395691e-05, "loss": 1.1177, "step": 7312000 }, { "epoch": 4.38, "learning_rate": 2.5341561198396348e-05, "loss": 1.109, "step": 7312500 }, { "epoch": 4.38, "learning_rate": 2.533946123283578e-05, "loss": 1.1251, "step": 7313000 }, { "epoch": 4.38, "learning_rate": 2.5337361267275215e-05, "loss": 1.1033, "step": 7313500 }, { "epoch": 4.39, "learning_rate": 2.533526130171465e-05, "loss": 1.1078, "step": 7314000 }, { "epoch": 4.39, "learning_rate": 2.533316133615409e-05, "loss": 1.0762, "step": 7314500 }, { "epoch": 4.39, "learning_rate": 2.5331061370593522e-05, "loss": 1.1059, "step": 7315000 }, { "epoch": 4.39, "learning_rate": 2.532896140503296e-05, "loss": 1.0803, "step": 7315500 }, { "epoch": 4.39, "learning_rate": 2.5326861439472392e-05, "loss": 1.109, "step": 7316000 }, { "epoch": 4.39, "learning_rate": 2.532476987377407e-05, "loss": 1.102, "step": 7316500 }, { "epoch": 4.39, "learning_rate": 2.5322669908213506e-05, "loss": 1.1054, "step": 7317000 }, { "epoch": 4.39, "learning_rate": 2.5320569942652943e-05, "loss": 1.1003, "step": 7317500 }, { "epoch": 4.39, "learning_rate": 2.5318469977092373e-05, "loss": 1.1035, "step": 7318000 }, { "epoch": 4.39, "learning_rate": 2.531637001153181e-05, "loss": 1.1, "step": 7318500 }, { "epoch": 4.39, "learning_rate": 2.5314270045971247e-05, "loss": 1.1141, "step": 7319000 }, { "epoch": 4.39, "learning_rate": 2.531217008041068e-05, "loss": 1.1169, "step": 7319500 }, { "epoch": 4.39, "learning_rate": 2.5310070114850117e-05, "loss": 1.1102, "step": 7320000 }, { "epoch": 4.39, "learning_rate": 2.530797434922067e-05, "loss": 1.1213, "step": 7320500 }, { "epoch": 4.39, "learning_rate": 2.5305874383660107e-05, "loss": 1.1191, "step": 7321000 }, { "epoch": 4.39, "learning_rate": 2.5303774418099544e-05, "loss": 1.1212, "step": 7321500 }, { "epoch": 4.39, "learning_rate": 2.5301674452538978e-05, "loss": 1.1067, "step": 7322000 }, { "epoch": 4.39, "learning_rate": 2.529957868690953e-05, "loss": 1.088, "step": 7322500 }, { "epoch": 4.39, "learning_rate": 2.5297478721348968e-05, "loss": 1.1512, "step": 7323000 }, { "epoch": 4.39, "learning_rate": 2.5295378755788405e-05, "loss": 1.1175, "step": 7323500 }, { "epoch": 4.39, "learning_rate": 2.529327879022784e-05, "loss": 1.1244, "step": 7324000 }, { "epoch": 4.39, "learning_rate": 2.5291178824667275e-05, "loss": 1.0932, "step": 7324500 }, { "epoch": 4.39, "learning_rate": 2.5289078859106712e-05, "loss": 1.076, "step": 7325000 }, { "epoch": 4.39, "learning_rate": 2.5286978893546146e-05, "loss": 1.119, "step": 7325500 }, { "epoch": 4.39, "learning_rate": 2.5284883127916703e-05, "loss": 1.1184, "step": 7326000 }, { "epoch": 4.39, "learning_rate": 2.5282783162356136e-05, "loss": 1.1155, "step": 7326500 }, { "epoch": 4.39, "learning_rate": 2.5280683196795573e-05, "loss": 1.112, "step": 7327000 }, { "epoch": 4.39, "learning_rate": 2.527858323123501e-05, "loss": 1.0981, "step": 7327500 }, { "epoch": 4.39, "learning_rate": 2.5276483265674443e-05, "loss": 1.1161, "step": 7328000 }, { "epoch": 4.39, "learning_rate": 2.5274383300113877e-05, "loss": 1.1013, "step": 7328500 }, { "epoch": 4.39, "learning_rate": 2.5272283334553313e-05, "loss": 1.0909, "step": 7329000 }, { "epoch": 4.39, "learning_rate": 2.527018336899275e-05, "loss": 1.0878, "step": 7329500 }, { "epoch": 4.39, "learning_rate": 2.5268083403432184e-05, "loss": 1.1267, "step": 7330000 }, { "epoch": 4.39, "learning_rate": 2.5265987637802737e-05, "loss": 1.094, "step": 7330500 }, { "epoch": 4.4, "learning_rate": 2.5263887672242174e-05, "loss": 1.1051, "step": 7331000 }, { "epoch": 4.4, "learning_rate": 2.526178770668161e-05, "loss": 1.1039, "step": 7331500 }, { "epoch": 4.4, "learning_rate": 2.5259687741121044e-05, "loss": 1.1003, "step": 7332000 }, { "epoch": 4.4, "learning_rate": 2.525758777556048e-05, "loss": 1.1103, "step": 7332500 }, { "epoch": 4.4, "learning_rate": 2.5255492009931035e-05, "loss": 1.1141, "step": 7333000 }, { "epoch": 4.4, "learning_rate": 2.5253392044370472e-05, "loss": 1.0989, "step": 7333500 }, { "epoch": 4.4, "learning_rate": 2.525129207880991e-05, "loss": 1.1163, "step": 7334000 }, { "epoch": 4.4, "learning_rate": 2.5249192113249342e-05, "loss": 1.1062, "step": 7334500 }, { "epoch": 4.4, "learning_rate": 2.52470963476199e-05, "loss": 1.1212, "step": 7335000 }, { "epoch": 4.4, "learning_rate": 2.5244996382059332e-05, "loss": 1.1168, "step": 7335500 }, { "epoch": 4.4, "learning_rate": 2.524289641649877e-05, "loss": 1.1292, "step": 7336000 }, { "epoch": 4.4, "learning_rate": 2.5240796450938206e-05, "loss": 1.1312, "step": 7336500 }, { "epoch": 4.4, "learning_rate": 2.5238700685308763e-05, "loss": 1.0961, "step": 7337000 }, { "epoch": 4.4, "learning_rate": 2.5236600719748193e-05, "loss": 1.1217, "step": 7337500 }, { "epoch": 4.4, "learning_rate": 2.523450075418763e-05, "loss": 1.1153, "step": 7338000 }, { "epoch": 4.4, "learning_rate": 2.5232400788627067e-05, "loss": 1.0897, "step": 7338500 }, { "epoch": 4.4, "learning_rate": 2.5230305022997624e-05, "loss": 1.113, "step": 7339000 }, { "epoch": 4.4, "learning_rate": 2.522820505743706e-05, "loss": 1.1044, "step": 7339500 }, { "epoch": 4.4, "learning_rate": 2.522610509187649e-05, "loss": 1.1138, "step": 7340000 }, { "epoch": 4.4, "learning_rate": 2.5224005126315928e-05, "loss": 1.1104, "step": 7340500 }, { "epoch": 4.4, "learning_rate": 2.5221905160755364e-05, "loss": 1.1072, "step": 7341000 }, { "epoch": 4.4, "learning_rate": 2.5219805195194798e-05, "loss": 1.103, "step": 7341500 }, { "epoch": 4.4, "learning_rate": 2.5217709429565355e-05, "loss": 1.0897, "step": 7342000 }, { "epoch": 4.4, "learning_rate": 2.521560946400479e-05, "loss": 1.1209, "step": 7342500 }, { "epoch": 4.4, "learning_rate": 2.5213509498444225e-05, "loss": 1.1061, "step": 7343000 }, { "epoch": 4.4, "learning_rate": 2.5211409532883662e-05, "loss": 1.0803, "step": 7343500 }, { "epoch": 4.4, "learning_rate": 2.520931376725422e-05, "loss": 1.1124, "step": 7344000 }, { "epoch": 4.4, "learning_rate": 2.520721380169365e-05, "loss": 1.1188, "step": 7344500 }, { "epoch": 4.4, "learning_rate": 2.5205113836133086e-05, "loss": 1.1097, "step": 7345000 }, { "epoch": 4.4, "learning_rate": 2.5203013870572523e-05, "loss": 1.1151, "step": 7345500 }, { "epoch": 4.4, "learning_rate": 2.5200913905011956e-05, "loss": 1.109, "step": 7346000 }, { "epoch": 4.4, "learning_rate": 2.5198818139382517e-05, "loss": 1.1158, "step": 7346500 }, { "epoch": 4.4, "learning_rate": 2.5196718173821947e-05, "loss": 1.0797, "step": 7347000 }, { "epoch": 4.41, "learning_rate": 2.5194618208261383e-05, "loss": 1.0864, "step": 7347500 }, { "epoch": 4.41, "learning_rate": 2.519251824270082e-05, "loss": 1.082, "step": 7348000 }, { "epoch": 4.41, "learning_rate": 2.5190418277140254e-05, "loss": 1.1029, "step": 7348500 }, { "epoch": 4.41, "learning_rate": 2.518832251151081e-05, "loss": 1.1087, "step": 7349000 }, { "epoch": 4.41, "learning_rate": 2.5186226745881368e-05, "loss": 1.1211, "step": 7349500 }, { "epoch": 4.41, "learning_rate": 2.51841267803208e-05, "loss": 1.0941, "step": 7350000 }, { "epoch": 4.41, "learning_rate": 2.5182026814760238e-05, "loss": 1.1183, "step": 7350500 }, { "epoch": 4.41, "learning_rate": 2.5179926849199675e-05, "loss": 1.1007, "step": 7351000 }, { "epoch": 4.41, "learning_rate": 2.5177826883639105e-05, "loss": 1.1182, "step": 7351500 }, { "epoch": 4.41, "learning_rate": 2.5175726918078542e-05, "loss": 1.1318, "step": 7352000 }, { "epoch": 4.41, "learning_rate": 2.517362695251798e-05, "loss": 1.0863, "step": 7352500 }, { "epoch": 4.41, "learning_rate": 2.5171526986957412e-05, "loss": 1.0963, "step": 7353000 }, { "epoch": 4.41, "learning_rate": 2.5169431221327972e-05, "loss": 1.0835, "step": 7353500 }, { "epoch": 4.41, "learning_rate": 2.5167331255767403e-05, "loss": 1.1111, "step": 7354000 }, { "epoch": 4.41, "learning_rate": 2.516523129020684e-05, "loss": 1.115, "step": 7354500 }, { "epoch": 4.41, "learning_rate": 2.5163131324646276e-05, "loss": 1.1063, "step": 7355000 }, { "epoch": 4.41, "learning_rate": 2.516103135908571e-05, "loss": 1.0978, "step": 7355500 }, { "epoch": 4.41, "learning_rate": 2.5158931393525147e-05, "loss": 1.0936, "step": 7356000 }, { "epoch": 4.41, "learning_rate": 2.51568356278957e-05, "loss": 1.103, "step": 7356500 }, { "epoch": 4.41, "learning_rate": 2.5154735662335137e-05, "loss": 1.1147, "step": 7357000 }, { "epoch": 4.41, "learning_rate": 2.5152635696774574e-05, "loss": 1.1123, "step": 7357500 }, { "epoch": 4.41, "learning_rate": 2.5150535731214007e-05, "loss": 1.0867, "step": 7358000 }, { "epoch": 4.41, "learning_rate": 2.5148435765653444e-05, "loss": 1.1195, "step": 7358500 }, { "epoch": 4.41, "learning_rate": 2.5146340000023998e-05, "loss": 1.1041, "step": 7359000 }, { "epoch": 4.41, "learning_rate": 2.5144240034463435e-05, "loss": 1.1151, "step": 7359500 }, { "epoch": 4.41, "learning_rate": 2.5142140068902868e-05, "loss": 1.0806, "step": 7360000 }, { "epoch": 4.41, "learning_rate": 2.5140040103342305e-05, "loss": 1.1394, "step": 7360500 }, { "epoch": 4.41, "learning_rate": 2.513794433771286e-05, "loss": 1.1145, "step": 7361000 }, { "epoch": 4.41, "learning_rate": 2.5135844372152295e-05, "loss": 1.0833, "step": 7361500 }, { "epoch": 4.41, "learning_rate": 2.5133744406591732e-05, "loss": 1.1202, "step": 7362000 }, { "epoch": 4.41, "learning_rate": 2.5131644441031166e-05, "loss": 1.125, "step": 7362500 }, { "epoch": 4.41, "learning_rate": 2.5129544475470602e-05, "loss": 1.1085, "step": 7363000 }, { "epoch": 4.41, "learning_rate": 2.5127448709841156e-05, "loss": 1.1054, "step": 7363500 }, { "epoch": 4.42, "learning_rate": 2.5125348744280593e-05, "loss": 1.1118, "step": 7364000 }, { "epoch": 4.42, "learning_rate": 2.512324877872003e-05, "loss": 1.1111, "step": 7364500 }, { "epoch": 4.42, "learning_rate": 2.5121148813159463e-05, "loss": 1.1166, "step": 7365000 }, { "epoch": 4.42, "learning_rate": 2.511905304753002e-05, "loss": 1.1017, "step": 7365500 }, { "epoch": 4.42, "learning_rate": 2.5116953081969454e-05, "loss": 1.112, "step": 7366000 }, { "epoch": 4.42, "learning_rate": 2.511485311640889e-05, "loss": 1.1192, "step": 7366500 }, { "epoch": 4.42, "learning_rate": 2.5112753150848327e-05, "loss": 1.0884, "step": 7367000 }, { "epoch": 4.42, "learning_rate": 2.511065318528776e-05, "loss": 1.1157, "step": 7367500 }, { "epoch": 4.42, "learning_rate": 2.5108553219727198e-05, "loss": 1.0887, "step": 7368000 }, { "epoch": 4.42, "learning_rate": 2.5106453254166634e-05, "loss": 1.0926, "step": 7368500 }, { "epoch": 4.42, "learning_rate": 2.5104357488537188e-05, "loss": 1.1522, "step": 7369000 }, { "epoch": 4.42, "learning_rate": 2.510225752297662e-05, "loss": 1.1342, "step": 7369500 }, { "epoch": 4.42, "learning_rate": 2.5100157557416058e-05, "loss": 1.1415, "step": 7370000 }, { "epoch": 4.42, "learning_rate": 2.5098057591855495e-05, "loss": 1.1161, "step": 7370500 }, { "epoch": 4.42, "learning_rate": 2.509596182622605e-05, "loss": 1.1189, "step": 7371000 }, { "epoch": 4.42, "learning_rate": 2.5093861860665486e-05, "loss": 1.1043, "step": 7371500 }, { "epoch": 4.42, "learning_rate": 2.509176189510492e-05, "loss": 1.1184, "step": 7372000 }, { "epoch": 4.42, "learning_rate": 2.5089661929544356e-05, "loss": 1.1045, "step": 7372500 }, { "epoch": 4.42, "learning_rate": 2.5087561963983793e-05, "loss": 1.0795, "step": 7373000 }, { "epoch": 4.42, "learning_rate": 2.5085466198354346e-05, "loss": 1.0863, "step": 7373500 }, { "epoch": 4.42, "learning_rate": 2.5083370432724903e-05, "loss": 1.1085, "step": 7374000 }, { "epoch": 4.42, "learning_rate": 2.508127046716434e-05, "loss": 1.0946, "step": 7374500 }, { "epoch": 4.42, "learning_rate": 2.5079170501603774e-05, "loss": 1.0934, "step": 7375000 }, { "epoch": 4.42, "learning_rate": 2.5077070536043207e-05, "loss": 1.0952, "step": 7375500 }, { "epoch": 4.42, "learning_rate": 2.5074970570482644e-05, "loss": 1.0987, "step": 7376000 }, { "epoch": 4.42, "learning_rate": 2.5072870604922077e-05, "loss": 1.1103, "step": 7376500 }, { "epoch": 4.42, "learning_rate": 2.5070770639361514e-05, "loss": 1.0898, "step": 7377000 }, { "epoch": 4.42, "learning_rate": 2.506867067380095e-05, "loss": 1.0979, "step": 7377500 }, { "epoch": 4.42, "learning_rate": 2.5066574908171505e-05, "loss": 1.1216, "step": 7378000 }, { "epoch": 4.42, "learning_rate": 2.506447494261094e-05, "loss": 1.093, "step": 7378500 }, { "epoch": 4.42, "learning_rate": 2.5062374977050375e-05, "loss": 1.1252, "step": 7379000 }, { "epoch": 4.42, "learning_rate": 2.5060275011489812e-05, "loss": 1.1201, "step": 7379500 }, { "epoch": 4.42, "learning_rate": 2.505817504592925e-05, "loss": 1.0984, "step": 7380000 }, { "epoch": 4.42, "learning_rate": 2.5056079280299802e-05, "loss": 1.1139, "step": 7380500 }, { "epoch": 4.43, "learning_rate": 2.505397931473924e-05, "loss": 1.1305, "step": 7381000 }, { "epoch": 4.43, "learning_rate": 2.5051879349178672e-05, "loss": 1.1204, "step": 7381500 }, { "epoch": 4.43, "learning_rate": 2.504978358354923e-05, "loss": 1.0999, "step": 7382000 }, { "epoch": 4.43, "learning_rate": 2.5047683617988663e-05, "loss": 1.1226, "step": 7382500 }, { "epoch": 4.43, "learning_rate": 2.50455836524281e-05, "loss": 1.0772, "step": 7383000 }, { "epoch": 4.43, "learning_rate": 2.5043483686867533e-05, "loss": 1.1295, "step": 7383500 }, { "epoch": 4.43, "learning_rate": 2.504138372130697e-05, "loss": 1.1331, "step": 7384000 }, { "epoch": 4.43, "learning_rate": 2.5039283755746407e-05, "loss": 1.1122, "step": 7384500 }, { "epoch": 4.43, "learning_rate": 2.503718379018584e-05, "loss": 1.1019, "step": 7385000 }, { "epoch": 4.43, "learning_rate": 2.5035083824625274e-05, "loss": 1.1132, "step": 7385500 }, { "epoch": 4.43, "learning_rate": 2.503298805899583e-05, "loss": 1.111, "step": 7386000 }, { "epoch": 4.43, "learning_rate": 2.5030888093435268e-05, "loss": 1.1012, "step": 7386500 }, { "epoch": 4.43, "learning_rate": 2.5028788127874704e-05, "loss": 1.1527, "step": 7387000 }, { "epoch": 4.43, "learning_rate": 2.5026688162314138e-05, "loss": 1.0956, "step": 7387500 }, { "epoch": 4.43, "learning_rate": 2.502458819675357e-05, "loss": 1.1088, "step": 7388000 }, { "epoch": 4.43, "learning_rate": 2.502249243112413e-05, "loss": 1.0897, "step": 7388500 }, { "epoch": 4.43, "learning_rate": 2.5020392465563565e-05, "loss": 1.1157, "step": 7389000 }, { "epoch": 4.43, "learning_rate": 2.5018292500003002e-05, "loss": 1.1173, "step": 7389500 }, { "epoch": 4.43, "learning_rate": 2.5016192534442432e-05, "loss": 1.097, "step": 7390000 }, { "epoch": 4.43, "learning_rate": 2.501409676881299e-05, "loss": 1.1103, "step": 7390500 }, { "epoch": 4.43, "learning_rate": 2.5011996803252426e-05, "loss": 1.101, "step": 7391000 }, { "epoch": 4.43, "learning_rate": 2.5009896837691863e-05, "loss": 1.1038, "step": 7391500 }, { "epoch": 4.43, "learning_rate": 2.5007796872131296e-05, "loss": 1.1001, "step": 7392000 }, { "epoch": 4.43, "learning_rate": 2.5005701106501853e-05, "loss": 1.1277, "step": 7392500 }, { "epoch": 4.43, "learning_rate": 2.5003601140941287e-05, "loss": 1.0846, "step": 7393000 }, { "epoch": 4.43, "learning_rate": 2.5001501175380723e-05, "loss": 1.0849, "step": 7393500 }, { "epoch": 4.43, "learning_rate": 2.499940120982016e-05, "loss": 1.0944, "step": 7394000 }, { "epoch": 4.43, "learning_rate": 2.4997301244259594e-05, "loss": 1.1146, "step": 7394500 }, { "epoch": 4.43, "learning_rate": 2.499520547863015e-05, "loss": 1.1269, "step": 7395000 }, { "epoch": 4.43, "learning_rate": 2.4993105513069584e-05, "loss": 1.1046, "step": 7395500 }, { "epoch": 4.43, "learning_rate": 2.499100554750902e-05, "loss": 1.1168, "step": 7396000 }, { "epoch": 4.43, "learning_rate": 2.4988905581948458e-05, "loss": 1.0968, "step": 7396500 }, { "epoch": 4.43, "learning_rate": 2.498680561638789e-05, "loss": 1.0907, "step": 7397000 }, { "epoch": 4.44, "learning_rate": 2.4984709850758445e-05, "loss": 1.1242, "step": 7397500 }, { "epoch": 4.44, "learning_rate": 2.4982609885197882e-05, "loss": 1.1166, "step": 7398000 }, { "epoch": 4.44, "learning_rate": 2.498050991963732e-05, "loss": 1.1002, "step": 7398500 }, { "epoch": 4.44, "learning_rate": 2.4978409954076752e-05, "loss": 1.1077, "step": 7399000 }, { "epoch": 4.44, "learning_rate": 2.497631418844731e-05, "loss": 1.1191, "step": 7399500 }, { "epoch": 4.44, "learning_rate": 2.4974214222886743e-05, "loss": 1.0956, "step": 7400000 }, { "epoch": 4.44, "eval_loss": 1.0819727182388306, "eval_runtime": 1101.2934, "eval_samples_per_second": 478.274, "eval_steps_per_second": 79.713, "step": 7400000 }, { "epoch": 4.44, "learning_rate": 2.497211425732618e-05, "loss": 1.1036, "step": 7400500 }, { "epoch": 4.44, "learning_rate": 2.4970014291765616e-05, "loss": 1.0922, "step": 7401000 }, { "epoch": 4.44, "learning_rate": 2.4967918526136173e-05, "loss": 1.1333, "step": 7401500 }, { "epoch": 4.44, "learning_rate": 2.4965818560575607e-05, "loss": 1.1308, "step": 7402000 }, { "epoch": 4.44, "learning_rate": 2.496371859501504e-05, "loss": 1.1089, "step": 7402500 }, { "epoch": 4.44, "learning_rate": 2.4961618629454477e-05, "loss": 1.0969, "step": 7403000 }, { "epoch": 4.44, "learning_rate": 2.4959518663893914e-05, "loss": 1.1031, "step": 7403500 }, { "epoch": 4.44, "learning_rate": 2.4957422898264467e-05, "loss": 1.115, "step": 7404000 }, { "epoch": 4.44, "learning_rate": 2.49553229327039e-05, "loss": 1.1123, "step": 7404500 }, { "epoch": 4.44, "learning_rate": 2.4953222967143338e-05, "loss": 1.1056, "step": 7405000 }, { "epoch": 4.44, "learning_rate": 2.4951123001582775e-05, "loss": 1.0957, "step": 7405500 }, { "epoch": 4.44, "learning_rate": 2.494902303602221e-05, "loss": 1.1064, "step": 7406000 }, { "epoch": 4.44, "learning_rate": 2.4946927270392765e-05, "loss": 1.1157, "step": 7406500 }, { "epoch": 4.44, "learning_rate": 2.49448273048322e-05, "loss": 1.1099, "step": 7407000 }, { "epoch": 4.44, "learning_rate": 2.4942727339271635e-05, "loss": 1.0938, "step": 7407500 }, { "epoch": 4.44, "learning_rate": 2.4940627373711072e-05, "loss": 1.0929, "step": 7408000 }, { "epoch": 4.44, "learning_rate": 2.4938527408150506e-05, "loss": 1.0948, "step": 7408500 }, { "epoch": 4.44, "learning_rate": 2.4936427442589942e-05, "loss": 1.1184, "step": 7409000 }, { "epoch": 4.44, "learning_rate": 2.4934331676960496e-05, "loss": 1.124, "step": 7409500 }, { "epoch": 4.44, "learning_rate": 2.4932231711399933e-05, "loss": 1.1297, "step": 7410000 }, { "epoch": 4.44, "learning_rate": 2.493013174583937e-05, "loss": 1.1004, "step": 7410500 }, { "epoch": 4.44, "learning_rate": 2.4928031780278803e-05, "loss": 1.0813, "step": 7411000 }, { "epoch": 4.44, "learning_rate": 2.4925936014649357e-05, "loss": 1.0981, "step": 7411500 }, { "epoch": 4.44, "learning_rate": 2.4923836049088794e-05, "loss": 1.0896, "step": 7412000 }, { "epoch": 4.44, "learning_rate": 2.492173608352823e-05, "loss": 1.1012, "step": 7412500 }, { "epoch": 4.44, "learning_rate": 2.4919636117967667e-05, "loss": 1.1075, "step": 7413000 }, { "epoch": 4.44, "learning_rate": 2.49175361524071e-05, "loss": 1.1104, "step": 7413500 }, { "epoch": 4.44, "learning_rate": 2.4915436186846534e-05, "loss": 1.1289, "step": 7414000 }, { "epoch": 4.45, "learning_rate": 2.491333622128597e-05, "loss": 1.1326, "step": 7414500 }, { "epoch": 4.45, "learning_rate": 2.4911240455656528e-05, "loss": 1.1019, "step": 7415000 }, { "epoch": 4.45, "learning_rate": 2.490914049009596e-05, "loss": 1.1398, "step": 7415500 }, { "epoch": 4.45, "learning_rate": 2.4907040524535398e-05, "loss": 1.1104, "step": 7416000 }, { "epoch": 4.45, "learning_rate": 2.4904940558974832e-05, "loss": 1.0763, "step": 7416500 }, { "epoch": 4.45, "learning_rate": 2.490284059341427e-05, "loss": 1.1, "step": 7417000 }, { "epoch": 4.45, "learning_rate": 2.4900744827784826e-05, "loss": 1.0805, "step": 7417500 }, { "epoch": 4.45, "learning_rate": 2.489864486222426e-05, "loss": 1.0974, "step": 7418000 }, { "epoch": 4.45, "learning_rate": 2.4896549096594813e-05, "loss": 1.067, "step": 7418500 }, { "epoch": 4.45, "learning_rate": 2.489444913103425e-05, "loss": 1.087, "step": 7419000 }, { "epoch": 4.45, "learning_rate": 2.4892349165473686e-05, "loss": 1.1075, "step": 7419500 }, { "epoch": 4.45, "learning_rate": 2.4890249199913123e-05, "loss": 1.1312, "step": 7420000 }, { "epoch": 4.45, "learning_rate": 2.4888149234352557e-05, "loss": 1.1226, "step": 7420500 }, { "epoch": 4.45, "learning_rate": 2.488604926879199e-05, "loss": 1.1121, "step": 7421000 }, { "epoch": 4.45, "learning_rate": 2.4883949303231427e-05, "loss": 1.0908, "step": 7421500 }, { "epoch": 4.45, "learning_rate": 2.488184933767086e-05, "loss": 1.1004, "step": 7422000 }, { "epoch": 4.45, "learning_rate": 2.4879749372110297e-05, "loss": 1.1191, "step": 7422500 }, { "epoch": 4.45, "learning_rate": 2.4877649406549734e-05, "loss": 1.1159, "step": 7423000 }, { "epoch": 4.45, "learning_rate": 2.4875549440989167e-05, "loss": 1.1039, "step": 7423500 }, { "epoch": 4.45, "learning_rate": 2.48734494754286e-05, "loss": 1.101, "step": 7424000 }, { "epoch": 4.45, "learning_rate": 2.487135790973028e-05, "loss": 1.1022, "step": 7424500 }, { "epoch": 4.45, "learning_rate": 2.4869257944169715e-05, "loss": 1.0754, "step": 7425000 }, { "epoch": 4.45, "learning_rate": 2.4867157978609152e-05, "loss": 1.1119, "step": 7425500 }, { "epoch": 4.45, "learning_rate": 2.4865058013048585e-05, "loss": 1.1031, "step": 7426000 }, { "epoch": 4.45, "learning_rate": 2.4862962247419142e-05, "loss": 1.123, "step": 7426500 }, { "epoch": 4.45, "learning_rate": 2.486086228185858e-05, "loss": 1.0838, "step": 7427000 }, { "epoch": 4.45, "learning_rate": 2.4858762316298012e-05, "loss": 1.1071, "step": 7427500 }, { "epoch": 4.45, "learning_rate": 2.485666235073745e-05, "loss": 1.103, "step": 7428000 }, { "epoch": 4.45, "learning_rate": 2.4854566585108003e-05, "loss": 1.1402, "step": 7428500 }, { "epoch": 4.45, "learning_rate": 2.485246661954744e-05, "loss": 1.123, "step": 7429000 }, { "epoch": 4.45, "learning_rate": 2.4850366653986873e-05, "loss": 1.1218, "step": 7429500 }, { "epoch": 4.45, "learning_rate": 2.484826668842631e-05, "loss": 1.1101, "step": 7430000 }, { "epoch": 4.45, "learning_rate": 2.4846170922796864e-05, "loss": 1.1123, "step": 7430500 }, { "epoch": 4.46, "learning_rate": 2.48440709572363e-05, "loss": 1.1118, "step": 7431000 }, { "epoch": 4.46, "learning_rate": 2.4841970991675737e-05, "loss": 1.1152, "step": 7431500 }, { "epoch": 4.46, "learning_rate": 2.483987102611517e-05, "loss": 1.1393, "step": 7432000 }, { "epoch": 4.46, "learning_rate": 2.4837771060554608e-05, "loss": 1.1377, "step": 7432500 }, { "epoch": 4.46, "learning_rate": 2.483567529492516e-05, "loss": 1.1108, "step": 7433000 }, { "epoch": 4.46, "learning_rate": 2.4833575329364598e-05, "loss": 1.0918, "step": 7433500 }, { "epoch": 4.46, "learning_rate": 2.4831475363804035e-05, "loss": 1.1175, "step": 7434000 }, { "epoch": 4.46, "learning_rate": 2.482937539824347e-05, "loss": 1.107, "step": 7434500 }, { "epoch": 4.46, "learning_rate": 2.4827275432682905e-05, "loss": 1.1054, "step": 7435000 }, { "epoch": 4.46, "learning_rate": 2.482517966705346e-05, "loss": 1.1446, "step": 7435500 }, { "epoch": 4.46, "learning_rate": 2.4823079701492896e-05, "loss": 1.0919, "step": 7436000 }, { "epoch": 4.46, "learning_rate": 2.482097973593233e-05, "loss": 1.0988, "step": 7436500 }, { "epoch": 4.46, "learning_rate": 2.4818879770371766e-05, "loss": 1.0911, "step": 7437000 }, { "epoch": 4.46, "learning_rate": 2.481678400474232e-05, "loss": 1.1106, "step": 7437500 }, { "epoch": 4.46, "learning_rate": 2.4814684039181756e-05, "loss": 1.0979, "step": 7438000 }, { "epoch": 4.46, "learning_rate": 2.4812584073621193e-05, "loss": 1.0853, "step": 7438500 }, { "epoch": 4.46, "learning_rate": 2.4810484108060627e-05, "loss": 1.082, "step": 7439000 }, { "epoch": 4.46, "learning_rate": 2.4808384142500063e-05, "loss": 1.1189, "step": 7439500 }, { "epoch": 4.46, "learning_rate": 2.4806288376870617e-05, "loss": 1.1123, "step": 7440000 }, { "epoch": 4.46, "learning_rate": 2.4804188411310054e-05, "loss": 1.114, "step": 7440500 }, { "epoch": 4.46, "learning_rate": 2.480208844574949e-05, "loss": 1.0859, "step": 7441000 }, { "epoch": 4.46, "learning_rate": 2.4799988480188924e-05, "loss": 1.1041, "step": 7441500 }, { "epoch": 4.46, "learning_rate": 2.479788851462836e-05, "loss": 1.1015, "step": 7442000 }, { "epoch": 4.46, "learning_rate": 2.4795792748998915e-05, "loss": 1.084, "step": 7442500 }, { "epoch": 4.46, "learning_rate": 2.479369278343835e-05, "loss": 1.0923, "step": 7443000 }, { "epoch": 4.46, "learning_rate": 2.4791592817877785e-05, "loss": 1.1248, "step": 7443500 }, { "epoch": 4.46, "learning_rate": 2.4789492852317222e-05, "loss": 1.0863, "step": 7444000 }, { "epoch": 4.46, "learning_rate": 2.4787397086687775e-05, "loss": 1.1038, "step": 7444500 }, { "epoch": 4.46, "learning_rate": 2.4785297121127212e-05, "loss": 1.095, "step": 7445000 }, { "epoch": 4.46, "learning_rate": 2.478319715556665e-05, "loss": 1.0976, "step": 7445500 }, { "epoch": 4.46, "learning_rate": 2.4781097190006083e-05, "loss": 1.1227, "step": 7446000 }, { "epoch": 4.46, "learning_rate": 2.477899722444552e-05, "loss": 1.0986, "step": 7446500 }, { "epoch": 4.46, "learning_rate": 2.4776897258884956e-05, "loss": 1.0902, "step": 7447000 }, { "epoch": 4.47, "learning_rate": 2.477480149325551e-05, "loss": 1.0904, "step": 7447500 }, { "epoch": 4.47, "learning_rate": 2.4772701527694947e-05, "loss": 1.0893, "step": 7448000 }, { "epoch": 4.47, "learning_rate": 2.477060156213438e-05, "loss": 1.1108, "step": 7448500 }, { "epoch": 4.47, "learning_rate": 2.4768501596573817e-05, "loss": 1.0984, "step": 7449000 }, { "epoch": 4.47, "learning_rate": 2.4766401631013254e-05, "loss": 1.1063, "step": 7449500 }, { "epoch": 4.47, "learning_rate": 2.4764305865383807e-05, "loss": 1.1217, "step": 7450000 }, { "epoch": 4.47, "learning_rate": 2.476220589982324e-05, "loss": 1.1006, "step": 7450500 }, { "epoch": 4.47, "learning_rate": 2.4760105934262678e-05, "loss": 1.0982, "step": 7451000 }, { "epoch": 4.47, "learning_rate": 2.4758005968702115e-05, "loss": 1.1141, "step": 7451500 }, { "epoch": 4.47, "learning_rate": 2.4755906003141548e-05, "loss": 1.1104, "step": 7452000 }, { "epoch": 4.47, "learning_rate": 2.4753810237512105e-05, "loss": 1.1056, "step": 7452500 }, { "epoch": 4.47, "learning_rate": 2.475171027195154e-05, "loss": 1.1154, "step": 7453000 }, { "epoch": 4.47, "learning_rate": 2.4749610306390975e-05, "loss": 1.1071, "step": 7453500 }, { "epoch": 4.47, "learning_rate": 2.4747510340830412e-05, "loss": 1.1272, "step": 7454000 }, { "epoch": 4.47, "learning_rate": 2.4745414575200966e-05, "loss": 1.1293, "step": 7454500 }, { "epoch": 4.47, "learning_rate": 2.4743314609640403e-05, "loss": 1.0934, "step": 7455000 }, { "epoch": 4.47, "learning_rate": 2.4741214644079836e-05, "loss": 1.135, "step": 7455500 }, { "epoch": 4.47, "learning_rate": 2.4739114678519273e-05, "loss": 1.0855, "step": 7456000 }, { "epoch": 4.47, "learning_rate": 2.473701471295871e-05, "loss": 1.1227, "step": 7456500 }, { "epoch": 4.47, "learning_rate": 2.473491474739814e-05, "loss": 1.1052, "step": 7457000 }, { "epoch": 4.47, "learning_rate": 2.4732814781837577e-05, "loss": 1.1244, "step": 7457500 }, { "epoch": 4.47, "learning_rate": 2.4730719016208134e-05, "loss": 1.1011, "step": 7458000 }, { "epoch": 4.47, "learning_rate": 2.472861905064757e-05, "loss": 1.1125, "step": 7458500 }, { "epoch": 4.47, "learning_rate": 2.4726519085087007e-05, "loss": 1.0989, "step": 7459000 }, { "epoch": 4.47, "learning_rate": 2.4724419119526437e-05, "loss": 1.1215, "step": 7459500 }, { "epoch": 4.47, "learning_rate": 2.4722319153965874e-05, "loss": 1.0873, "step": 7460000 }, { "epoch": 4.47, "learning_rate": 2.472022338833643e-05, "loss": 1.1106, "step": 7460500 }, { "epoch": 4.47, "learning_rate": 2.4718123422775868e-05, "loss": 1.096, "step": 7461000 }, { "epoch": 4.47, "learning_rate": 2.47160234572153e-05, "loss": 1.0873, "step": 7461500 }, { "epoch": 4.47, "learning_rate": 2.4713923491654735e-05, "loss": 1.1038, "step": 7462000 }, { "epoch": 4.47, "learning_rate": 2.4711823526094172e-05, "loss": 1.0957, "step": 7462500 }, { "epoch": 4.47, "learning_rate": 2.470972776046473e-05, "loss": 1.0892, "step": 7463000 }, { "epoch": 4.47, "learning_rate": 2.4707627794904166e-05, "loss": 1.1252, "step": 7463500 }, { "epoch": 4.47, "learning_rate": 2.4705527829343596e-05, "loss": 1.1212, "step": 7464000 }, { "epoch": 4.48, "learning_rate": 2.4703427863783032e-05, "loss": 1.1256, "step": 7464500 }, { "epoch": 4.48, "learning_rate": 2.470132789822247e-05, "loss": 1.1171, "step": 7465000 }, { "epoch": 4.48, "learning_rate": 2.4699227932661903e-05, "loss": 1.0859, "step": 7465500 }, { "epoch": 4.48, "learning_rate": 2.469712796710134e-05, "loss": 1.1185, "step": 7466000 }, { "epoch": 4.48, "learning_rate": 2.4695028001540776e-05, "loss": 1.1007, "step": 7466500 }, { "epoch": 4.48, "learning_rate": 2.469293223591133e-05, "loss": 1.0856, "step": 7467000 }, { "epoch": 4.48, "learning_rate": 2.4690832270350767e-05, "loss": 1.0976, "step": 7467500 }, { "epoch": 4.48, "learning_rate": 2.4688736504721324e-05, "loss": 1.1136, "step": 7468000 }, { "epoch": 4.48, "learning_rate": 2.4686636539160757e-05, "loss": 1.1122, "step": 7468500 }, { "epoch": 4.48, "learning_rate": 2.468453657360019e-05, "loss": 1.1314, "step": 7469000 }, { "epoch": 4.48, "learning_rate": 2.4682436608039628e-05, "loss": 1.1105, "step": 7469500 }, { "epoch": 4.48, "learning_rate": 2.4680336642479064e-05, "loss": 1.129, "step": 7470000 }, { "epoch": 4.48, "learning_rate": 2.4678236676918498e-05, "loss": 1.1043, "step": 7470500 }, { "epoch": 4.48, "learning_rate": 2.4676136711357935e-05, "loss": 1.0941, "step": 7471000 }, { "epoch": 4.48, "learning_rate": 2.4674036745797368e-05, "loss": 1.1017, "step": 7471500 }, { "epoch": 4.48, "learning_rate": 2.4671940980167925e-05, "loss": 1.11, "step": 7472000 }, { "epoch": 4.48, "learning_rate": 2.466984101460736e-05, "loss": 1.0979, "step": 7472500 }, { "epoch": 4.48, "learning_rate": 2.4667741049046795e-05, "loss": 1.1045, "step": 7473000 }, { "epoch": 4.48, "learning_rate": 2.4665641083486232e-05, "loss": 1.0886, "step": 7473500 }, { "epoch": 4.48, "learning_rate": 2.4663545317856786e-05, "loss": 1.0895, "step": 7474000 }, { "epoch": 4.48, "learning_rate": 2.4661445352296223e-05, "loss": 1.1256, "step": 7474500 }, { "epoch": 4.48, "learning_rate": 2.4659345386735656e-05, "loss": 1.0896, "step": 7475000 }, { "epoch": 4.48, "learning_rate": 2.4657245421175093e-05, "loss": 1.1118, "step": 7475500 }, { "epoch": 4.48, "learning_rate": 2.465514545561453e-05, "loss": 1.1279, "step": 7476000 }, { "epoch": 4.48, "learning_rate": 2.4653049689985083e-05, "loss": 1.0868, "step": 7476500 }, { "epoch": 4.48, "learning_rate": 2.465094972442452e-05, "loss": 1.1001, "step": 7477000 }, { "epoch": 4.48, "learning_rate": 2.4648849758863954e-05, "loss": 1.1387, "step": 7477500 }, { "epoch": 4.48, "learning_rate": 2.464674979330339e-05, "loss": 1.1008, "step": 7478000 }, { "epoch": 4.48, "learning_rate": 2.4644649827742827e-05, "loss": 1.1051, "step": 7478500 }, { "epoch": 4.48, "learning_rate": 2.46425582620445e-05, "loss": 1.1224, "step": 7479000 }, { "epoch": 4.48, "learning_rate": 2.4640458296483938e-05, "loss": 1.0942, "step": 7479500 }, { "epoch": 4.48, "learning_rate": 2.4638358330923375e-05, "loss": 1.1131, "step": 7480000 }, { "epoch": 4.48, "learning_rate": 2.463625836536281e-05, "loss": 1.106, "step": 7480500 }, { "epoch": 4.49, "learning_rate": 2.4634158399802242e-05, "loss": 1.1317, "step": 7481000 }, { "epoch": 4.49, "learning_rate": 2.463205843424168e-05, "loss": 1.1177, "step": 7481500 }, { "epoch": 4.49, "learning_rate": 2.4629958468681112e-05, "loss": 1.115, "step": 7482000 }, { "epoch": 4.49, "learning_rate": 2.462785850312055e-05, "loss": 1.1143, "step": 7482500 }, { "epoch": 4.49, "learning_rate": 2.4625758537559986e-05, "loss": 1.1109, "step": 7483000 }, { "epoch": 4.49, "learning_rate": 2.462366277193054e-05, "loss": 1.1177, "step": 7483500 }, { "epoch": 4.49, "learning_rate": 2.4621562806369976e-05, "loss": 1.115, "step": 7484000 }, { "epoch": 4.49, "learning_rate": 2.461946284080941e-05, "loss": 1.1244, "step": 7484500 }, { "epoch": 4.49, "learning_rate": 2.4617362875248847e-05, "loss": 1.1165, "step": 7485000 }, { "epoch": 4.49, "learning_rate": 2.46152671096194e-05, "loss": 1.1331, "step": 7485500 }, { "epoch": 4.49, "learning_rate": 2.4613167144058837e-05, "loss": 1.0722, "step": 7486000 }, { "epoch": 4.49, "learning_rate": 2.461106717849827e-05, "loss": 1.0999, "step": 7486500 }, { "epoch": 4.49, "learning_rate": 2.4608967212937707e-05, "loss": 1.1252, "step": 7487000 }, { "epoch": 4.49, "learning_rate": 2.4606867247377144e-05, "loss": 1.0954, "step": 7487500 }, { "epoch": 4.49, "learning_rate": 2.4604771481747698e-05, "loss": 1.1145, "step": 7488000 }, { "epoch": 4.49, "learning_rate": 2.4602671516187135e-05, "loss": 1.0745, "step": 7488500 }, { "epoch": 4.49, "learning_rate": 2.4600571550626568e-05, "loss": 1.12, "step": 7489000 }, { "epoch": 4.49, "learning_rate": 2.4598471585066005e-05, "loss": 1.0918, "step": 7489500 }, { "epoch": 4.49, "learning_rate": 2.459637161950544e-05, "loss": 1.0847, "step": 7490000 }, { "epoch": 4.49, "learning_rate": 2.4594275853875995e-05, "loss": 1.095, "step": 7490500 }, { "epoch": 4.49, "learning_rate": 2.4592175888315432e-05, "loss": 1.1301, "step": 7491000 }, { "epoch": 4.49, "learning_rate": 2.4590075922754866e-05, "loss": 1.1277, "step": 7491500 }, { "epoch": 4.49, "learning_rate": 2.4587975957194302e-05, "loss": 1.1111, "step": 7492000 }, { "epoch": 4.49, "learning_rate": 2.458587599163374e-05, "loss": 1.1133, "step": 7492500 }, { "epoch": 4.49, "learning_rate": 2.4583780226004293e-05, "loss": 1.091, "step": 7493000 }, { "epoch": 4.49, "learning_rate": 2.4581680260443726e-05, "loss": 1.114, "step": 7493500 }, { "epoch": 4.49, "learning_rate": 2.4579580294883163e-05, "loss": 1.1266, "step": 7494000 }, { "epoch": 4.49, "learning_rate": 2.45774803293226e-05, "loss": 1.093, "step": 7494500 }, { "epoch": 4.49, "learning_rate": 2.4575380363762037e-05, "loss": 1.1002, "step": 7495000 }, { "epoch": 4.49, "learning_rate": 2.457328459813259e-05, "loss": 1.1107, "step": 7495500 }, { "epoch": 4.49, "learning_rate": 2.4571184632572024e-05, "loss": 1.077, "step": 7496000 }, { "epoch": 4.49, "learning_rate": 2.456908466701146e-05, "loss": 1.0986, "step": 7496500 }, { "epoch": 4.49, "learning_rate": 2.4566984701450898e-05, "loss": 1.0944, "step": 7497000 }, { "epoch": 4.5, "learning_rate": 2.456488473589033e-05, "loss": 1.1146, "step": 7497500 }, { "epoch": 4.5, "learning_rate": 2.4562788970260888e-05, "loss": 1.1162, "step": 7498000 }, { "epoch": 4.5, "learning_rate": 2.456068900470032e-05, "loss": 1.0997, "step": 7498500 }, { "epoch": 4.5, "learning_rate": 2.4558589039139758e-05, "loss": 1.1123, "step": 7499000 }, { "epoch": 4.5, "learning_rate": 2.4556489073579195e-05, "loss": 1.1413, "step": 7499500 }, { "epoch": 4.5, "learning_rate": 2.4554389108018625e-05, "loss": 1.1256, "step": 7500000 }, { "epoch": 4.5, "eval_loss": 1.0793567895889282, "eval_runtime": 1105.9813, "eval_samples_per_second": 476.247, "eval_steps_per_second": 79.375, "step": 7500000 }, { "epoch": 4.5, "learning_rate": 2.4552293342389186e-05, "loss": 1.0892, "step": 7500500 }, { "epoch": 4.5, "learning_rate": 2.455019337682862e-05, "loss": 1.1003, "step": 7501000 }, { "epoch": 4.5, "learning_rate": 2.4548093411268056e-05, "loss": 1.0527, "step": 7501500 }, { "epoch": 4.5, "learning_rate": 2.4545993445707493e-05, "loss": 1.1102, "step": 7502000 }, { "epoch": 4.5, "learning_rate": 2.4543893480146923e-05, "loss": 1.1144, "step": 7502500 }, { "epoch": 4.5, "learning_rate": 2.454179351458636e-05, "loss": 1.0879, "step": 7503000 }, { "epoch": 4.5, "learning_rate": 2.4539693549025796e-05, "loss": 1.0898, "step": 7503500 }, { "epoch": 4.5, "learning_rate": 2.453759358346523e-05, "loss": 1.1116, "step": 7504000 }, { "epoch": 4.5, "learning_rate": 2.4535502017766907e-05, "loss": 1.1066, "step": 7504500 }, { "epoch": 4.5, "learning_rate": 2.4533402052206344e-05, "loss": 1.0779, "step": 7505000 }, { "epoch": 4.5, "learning_rate": 2.4531302086645777e-05, "loss": 1.0772, "step": 7505500 }, { "epoch": 4.5, "learning_rate": 2.4529202121085214e-05, "loss": 1.1103, "step": 7506000 }, { "epoch": 4.5, "learning_rate": 2.452710215552465e-05, "loss": 1.1026, "step": 7506500 }, { "epoch": 4.5, "learning_rate": 2.4525006389895205e-05, "loss": 1.1216, "step": 7507000 }, { "epoch": 4.5, "learning_rate": 2.452290642433464e-05, "loss": 1.083, "step": 7507500 }, { "epoch": 4.5, "learning_rate": 2.4520806458774075e-05, "loss": 1.1449, "step": 7508000 }, { "epoch": 4.5, "learning_rate": 2.4518706493213512e-05, "loss": 1.1183, "step": 7508500 }, { "epoch": 4.5, "learning_rate": 2.451660652765295e-05, "loss": 1.1037, "step": 7509000 }, { "epoch": 4.5, "learning_rate": 2.4514506562092382e-05, "loss": 1.102, "step": 7509500 }, { "epoch": 4.5, "learning_rate": 2.4512410796462936e-05, "loss": 1.0942, "step": 7510000 }, { "epoch": 4.5, "learning_rate": 2.4510310830902372e-05, "loss": 1.0821, "step": 7510500 }, { "epoch": 4.5, "learning_rate": 2.450821086534181e-05, "loss": 1.1434, "step": 7511000 }, { "epoch": 4.5, "learning_rate": 2.4506110899781243e-05, "loss": 1.1118, "step": 7511500 }, { "epoch": 4.5, "learning_rate": 2.45040151341518e-05, "loss": 1.1333, "step": 7512000 }, { "epoch": 4.5, "learning_rate": 2.4501915168591233e-05, "loss": 1.0913, "step": 7512500 }, { "epoch": 4.5, "learning_rate": 2.449981520303067e-05, "loss": 1.1104, "step": 7513000 }, { "epoch": 4.5, "learning_rate": 2.4497715237470107e-05, "loss": 1.1215, "step": 7513500 }, { "epoch": 4.5, "learning_rate": 2.449561527190954e-05, "loss": 1.1285, "step": 7514000 }, { "epoch": 4.51, "learning_rate": 2.4493519506280097e-05, "loss": 1.0964, "step": 7514500 }, { "epoch": 4.51, "learning_rate": 2.449141954071953e-05, "loss": 1.0949, "step": 7515000 }, { "epoch": 4.51, "learning_rate": 2.4489319575158968e-05, "loss": 1.0909, "step": 7515500 }, { "epoch": 4.51, "learning_rate": 2.4487219609598404e-05, "loss": 1.1189, "step": 7516000 }, { "epoch": 4.51, "learning_rate": 2.4485119644037838e-05, "loss": 1.1029, "step": 7516500 }, { "epoch": 4.51, "learning_rate": 2.448302387840839e-05, "loss": 1.1232, "step": 7517000 }, { "epoch": 4.51, "learning_rate": 2.448092391284783e-05, "loss": 1.1275, "step": 7517500 }, { "epoch": 4.51, "learning_rate": 2.4478823947287265e-05, "loss": 1.1189, "step": 7518000 }, { "epoch": 4.51, "learning_rate": 2.44767239817267e-05, "loss": 1.1089, "step": 7518500 }, { "epoch": 4.51, "learning_rate": 2.4474624016166135e-05, "loss": 1.0983, "step": 7519000 }, { "epoch": 4.51, "learning_rate": 2.447252825053669e-05, "loss": 1.1265, "step": 7519500 }, { "epoch": 4.51, "learning_rate": 2.4470428284976126e-05, "loss": 1.1083, "step": 7520000 }, { "epoch": 4.51, "learning_rate": 2.4468328319415563e-05, "loss": 1.1332, "step": 7520500 }, { "epoch": 4.51, "learning_rate": 2.4466228353854996e-05, "loss": 1.1438, "step": 7521000 }, { "epoch": 4.51, "learning_rate": 2.446412838829443e-05, "loss": 1.1111, "step": 7521500 }, { "epoch": 4.51, "learning_rate": 2.4462032622664987e-05, "loss": 1.086, "step": 7522000 }, { "epoch": 4.51, "learning_rate": 2.4459932657104423e-05, "loss": 1.0774, "step": 7522500 }, { "epoch": 4.51, "learning_rate": 2.445783269154386e-05, "loss": 1.0884, "step": 7523000 }, { "epoch": 4.51, "learning_rate": 2.4455732725983294e-05, "loss": 1.1285, "step": 7523500 }, { "epoch": 4.51, "learning_rate": 2.4453632760422727e-05, "loss": 1.1096, "step": 7524000 }, { "epoch": 4.51, "learning_rate": 2.4451532794862164e-05, "loss": 1.1442, "step": 7524500 }, { "epoch": 4.51, "learning_rate": 2.444943702923272e-05, "loss": 1.0941, "step": 7525000 }, { "epoch": 4.51, "learning_rate": 2.4447337063672155e-05, "loss": 1.0897, "step": 7525500 }, { "epoch": 4.51, "learning_rate": 2.444523709811159e-05, "loss": 1.107, "step": 7526000 }, { "epoch": 4.51, "learning_rate": 2.4443137132551025e-05, "loss": 1.1121, "step": 7526500 }, { "epoch": 4.51, "learning_rate": 2.444103716699046e-05, "loss": 1.1284, "step": 7527000 }, { "epoch": 4.51, "learning_rate": 2.443894140136102e-05, "loss": 1.1107, "step": 7527500 }, { "epoch": 4.51, "learning_rate": 2.4436841435800452e-05, "loss": 1.1365, "step": 7528000 }, { "epoch": 4.51, "learning_rate": 2.443474147023989e-05, "loss": 1.1035, "step": 7528500 }, { "epoch": 4.51, "learning_rate": 2.4432641504679322e-05, "loss": 1.0991, "step": 7529000 }, { "epoch": 4.51, "learning_rate": 2.443054573904988e-05, "loss": 1.1189, "step": 7529500 }, { "epoch": 4.51, "learning_rate": 2.4428445773489316e-05, "loss": 1.0918, "step": 7530000 }, { "epoch": 4.51, "learning_rate": 2.442634580792875e-05, "loss": 1.0939, "step": 7530500 }, { "epoch": 4.52, "learning_rate": 2.4424245842368183e-05, "loss": 1.1103, "step": 7531000 }, { "epoch": 4.52, "learning_rate": 2.442215007673874e-05, "loss": 1.1208, "step": 7531500 }, { "epoch": 4.52, "learning_rate": 2.4420050111178177e-05, "loss": 1.087, "step": 7532000 }, { "epoch": 4.52, "learning_rate": 2.441795014561761e-05, "loss": 1.1015, "step": 7532500 }, { "epoch": 4.52, "learning_rate": 2.4415850180057047e-05, "loss": 1.1084, "step": 7533000 }, { "epoch": 4.52, "learning_rate": 2.441375021449648e-05, "loss": 1.0994, "step": 7533500 }, { "epoch": 4.52, "learning_rate": 2.4411654448867038e-05, "loss": 1.0786, "step": 7534000 }, { "epoch": 4.52, "learning_rate": 2.4409554483306475e-05, "loss": 1.1432, "step": 7534500 }, { "epoch": 4.52, "learning_rate": 2.4407454517745908e-05, "loss": 1.0933, "step": 7535000 }, { "epoch": 4.52, "learning_rate": 2.4405354552185345e-05, "loss": 1.1178, "step": 7535500 }, { "epoch": 4.52, "learning_rate": 2.44032587865559e-05, "loss": 1.0849, "step": 7536000 }, { "epoch": 4.52, "learning_rate": 2.4401158820995335e-05, "loss": 1.0914, "step": 7536500 }, { "epoch": 4.52, "learning_rate": 2.4399058855434772e-05, "loss": 1.0789, "step": 7537000 }, { "epoch": 4.52, "learning_rate": 2.4396958889874206e-05, "loss": 1.1101, "step": 7537500 }, { "epoch": 4.52, "learning_rate": 2.439486312424476e-05, "loss": 1.0976, "step": 7538000 }, { "epoch": 4.52, "learning_rate": 2.4392763158684196e-05, "loss": 1.0849, "step": 7538500 }, { "epoch": 4.52, "learning_rate": 2.4390663193123633e-05, "loss": 1.1032, "step": 7539000 }, { "epoch": 4.52, "learning_rate": 2.438856322756307e-05, "loss": 1.0919, "step": 7539500 }, { "epoch": 4.52, "learning_rate": 2.4386463262002503e-05, "loss": 1.1009, "step": 7540000 }, { "epoch": 4.52, "learning_rate": 2.4384367496373057e-05, "loss": 1.1129, "step": 7540500 }, { "epoch": 4.52, "learning_rate": 2.4382267530812494e-05, "loss": 1.1247, "step": 7541000 }, { "epoch": 4.52, "learning_rate": 2.438016756525193e-05, "loss": 1.0723, "step": 7541500 }, { "epoch": 4.52, "learning_rate": 2.4378067599691364e-05, "loss": 1.1061, "step": 7542000 }, { "epoch": 4.52, "learning_rate": 2.43759676341308e-05, "loss": 1.0612, "step": 7542500 }, { "epoch": 4.52, "learning_rate": 2.4373871868501354e-05, "loss": 1.0764, "step": 7543000 }, { "epoch": 4.52, "learning_rate": 2.437177190294079e-05, "loss": 1.08, "step": 7543500 }, { "epoch": 4.52, "learning_rate": 2.4369671937380228e-05, "loss": 1.0818, "step": 7544000 }, { "epoch": 4.52, "learning_rate": 2.436757197181966e-05, "loss": 1.1193, "step": 7544500 }, { "epoch": 4.52, "learning_rate": 2.4365472006259098e-05, "loss": 1.1479, "step": 7545000 }, { "epoch": 4.52, "learning_rate": 2.4363376240629652e-05, "loss": 1.0995, "step": 7545500 }, { "epoch": 4.52, "learning_rate": 2.436127627506909e-05, "loss": 1.0784, "step": 7546000 }, { "epoch": 4.52, "learning_rate": 2.4359176309508526e-05, "loss": 1.1243, "step": 7546500 }, { "epoch": 4.52, "learning_rate": 2.435707634394796e-05, "loss": 1.1285, "step": 7547000 }, { "epoch": 4.53, "learning_rate": 2.4354980578318513e-05, "loss": 1.111, "step": 7547500 }, { "epoch": 4.53, "learning_rate": 2.435288061275795e-05, "loss": 1.1061, "step": 7548000 }, { "epoch": 4.53, "learning_rate": 2.4350780647197386e-05, "loss": 1.0756, "step": 7548500 }, { "epoch": 4.53, "learning_rate": 2.434868068163682e-05, "loss": 1.092, "step": 7549000 }, { "epoch": 4.53, "learning_rate": 2.4346580716076257e-05, "loss": 1.0855, "step": 7549500 }, { "epoch": 4.53, "learning_rate": 2.4344480750515693e-05, "loss": 1.0927, "step": 7550000 }, { "epoch": 4.53, "learning_rate": 2.4342384984886247e-05, "loss": 1.1195, "step": 7550500 }, { "epoch": 4.53, "learning_rate": 2.4340285019325684e-05, "loss": 1.0914, "step": 7551000 }, { "epoch": 4.53, "learning_rate": 2.4338185053765117e-05, "loss": 1.1037, "step": 7551500 }, { "epoch": 4.53, "learning_rate": 2.4336089288135674e-05, "loss": 1.1008, "step": 7552000 }, { "epoch": 4.53, "learning_rate": 2.4333989322575108e-05, "loss": 1.1019, "step": 7552500 }, { "epoch": 4.53, "learning_rate": 2.4331889357014545e-05, "loss": 1.0973, "step": 7553000 }, { "epoch": 4.53, "learning_rate": 2.432978939145398e-05, "loss": 1.1238, "step": 7553500 }, { "epoch": 4.53, "learning_rate": 2.4327689425893415e-05, "loss": 1.1075, "step": 7554000 }, { "epoch": 4.53, "learning_rate": 2.4325589460332852e-05, "loss": 1.0849, "step": 7554500 }, { "epoch": 4.53, "learning_rate": 2.4323489494772285e-05, "loss": 1.0997, "step": 7555000 }, { "epoch": 4.53, "learning_rate": 2.432138952921172e-05, "loss": 1.0869, "step": 7555500 }, { "epoch": 4.53, "learning_rate": 2.4319293763582276e-05, "loss": 1.1038, "step": 7556000 }, { "epoch": 4.53, "learning_rate": 2.4317193798021712e-05, "loss": 1.0906, "step": 7556500 }, { "epoch": 4.53, "learning_rate": 2.431509383246115e-05, "loss": 1.1084, "step": 7557000 }, { "epoch": 4.53, "learning_rate": 2.4312993866900583e-05, "loss": 1.0985, "step": 7557500 }, { "epoch": 4.53, "learning_rate": 2.4310893901340016e-05, "loss": 1.0999, "step": 7558000 }, { "epoch": 4.53, "learning_rate": 2.4308798135710573e-05, "loss": 1.0758, "step": 7558500 }, { "epoch": 4.53, "learning_rate": 2.430669817015001e-05, "loss": 1.097, "step": 7559000 }, { "epoch": 4.53, "learning_rate": 2.4304598204589447e-05, "loss": 1.1278, "step": 7559500 }, { "epoch": 4.53, "learning_rate": 2.4302498239028877e-05, "loss": 1.1105, "step": 7560000 }, { "epoch": 4.53, "learning_rate": 2.4300402473399437e-05, "loss": 1.1217, "step": 7560500 }, { "epoch": 4.53, "learning_rate": 2.429830250783887e-05, "loss": 1.0959, "step": 7561000 }, { "epoch": 4.53, "learning_rate": 2.4296202542278308e-05, "loss": 1.1122, "step": 7561500 }, { "epoch": 4.53, "learning_rate": 2.429410257671774e-05, "loss": 1.0971, "step": 7562000 }, { "epoch": 4.53, "learning_rate": 2.4292006811088298e-05, "loss": 1.085, "step": 7562500 }, { "epoch": 4.53, "learning_rate": 2.428990684552773e-05, "loss": 1.1138, "step": 7563000 }, { "epoch": 4.53, "learning_rate": 2.428780687996717e-05, "loss": 1.1101, "step": 7563500 }, { "epoch": 4.53, "learning_rate": 2.4285706914406605e-05, "loss": 1.1163, "step": 7564000 }, { "epoch": 4.54, "learning_rate": 2.428361114877716e-05, "loss": 1.1202, "step": 7564500 }, { "epoch": 4.54, "learning_rate": 2.4281511183216596e-05, "loss": 1.1014, "step": 7565000 }, { "epoch": 4.54, "learning_rate": 2.427941121765603e-05, "loss": 1.1144, "step": 7565500 }, { "epoch": 4.54, "learning_rate": 2.4277311252095466e-05, "loss": 1.1142, "step": 7566000 }, { "epoch": 4.54, "learning_rate": 2.4275211286534903e-05, "loss": 1.1179, "step": 7566500 }, { "epoch": 4.54, "learning_rate": 2.4273115520905456e-05, "loss": 1.1083, "step": 7567000 }, { "epoch": 4.54, "learning_rate": 2.4271015555344893e-05, "loss": 1.0817, "step": 7567500 }, { "epoch": 4.54, "learning_rate": 2.4268915589784327e-05, "loss": 1.0842, "step": 7568000 }, { "epoch": 4.54, "learning_rate": 2.4266815624223763e-05, "loss": 1.1119, "step": 7568500 }, { "epoch": 4.54, "learning_rate": 2.4264719858594317e-05, "loss": 1.0803, "step": 7569000 }, { "epoch": 4.54, "learning_rate": 2.4262619893033754e-05, "loss": 1.1045, "step": 7569500 }, { "epoch": 4.54, "learning_rate": 2.4260519927473187e-05, "loss": 1.1214, "step": 7570000 }, { "epoch": 4.54, "learning_rate": 2.4258419961912624e-05, "loss": 1.1063, "step": 7570500 }, { "epoch": 4.54, "learning_rate": 2.425632419628318e-05, "loss": 1.1017, "step": 7571000 }, { "epoch": 4.54, "learning_rate": 2.4254224230722615e-05, "loss": 1.0812, "step": 7571500 }, { "epoch": 4.54, "learning_rate": 2.425212426516205e-05, "loss": 1.105, "step": 7572000 }, { "epoch": 4.54, "learning_rate": 2.4250024299601485e-05, "loss": 1.0837, "step": 7572500 }, { "epoch": 4.54, "learning_rate": 2.4247924334040922e-05, "loss": 1.1381, "step": 7573000 }, { "epoch": 4.54, "learning_rate": 2.424582856841148e-05, "loss": 1.0667, "step": 7573500 }, { "epoch": 4.54, "learning_rate": 2.4243728602850912e-05, "loss": 1.0963, "step": 7574000 }, { "epoch": 4.54, "learning_rate": 2.424162863729035e-05, "loss": 1.083, "step": 7574500 }, { "epoch": 4.54, "learning_rate": 2.4239528671729783e-05, "loss": 1.1171, "step": 7575000 }, { "epoch": 4.54, "learning_rate": 2.423742870616922e-05, "loss": 1.1175, "step": 7575500 }, { "epoch": 4.54, "learning_rate": 2.4235332940539773e-05, "loss": 1.1183, "step": 7576000 }, { "epoch": 4.54, "learning_rate": 2.423323297497921e-05, "loss": 1.1109, "step": 7576500 }, { "epoch": 4.54, "learning_rate": 2.4231133009418643e-05, "loss": 1.112, "step": 7577000 }, { "epoch": 4.54, "learning_rate": 2.422903304385808e-05, "loss": 1.1086, "step": 7577500 }, { "epoch": 4.54, "learning_rate": 2.4226937278228637e-05, "loss": 1.1171, "step": 7578000 }, { "epoch": 4.54, "learning_rate": 2.422483731266807e-05, "loss": 1.1091, "step": 7578500 }, { "epoch": 4.54, "learning_rate": 2.4222737347107507e-05, "loss": 1.118, "step": 7579000 }, { "epoch": 4.54, "learning_rate": 2.422063738154694e-05, "loss": 1.1, "step": 7579500 }, { "epoch": 4.54, "learning_rate": 2.4218537415986378e-05, "loss": 1.1009, "step": 7580000 }, { "epoch": 4.54, "learning_rate": 2.4216441650356935e-05, "loss": 1.1003, "step": 7580500 }, { "epoch": 4.55, "learning_rate": 2.4214341684796368e-05, "loss": 1.107, "step": 7581000 }, { "epoch": 4.55, "learning_rate": 2.4212241719235805e-05, "loss": 1.1255, "step": 7581500 }, { "epoch": 4.55, "learning_rate": 2.421014175367524e-05, "loss": 1.1116, "step": 7582000 }, { "epoch": 4.55, "learning_rate": 2.4208041788114675e-05, "loss": 1.0843, "step": 7582500 }, { "epoch": 4.55, "learning_rate": 2.4205946022485232e-05, "loss": 1.0833, "step": 7583000 }, { "epoch": 4.55, "learning_rate": 2.4203846056924666e-05, "loss": 1.1285, "step": 7583500 }, { "epoch": 4.55, "learning_rate": 2.42017460913641e-05, "loss": 1.0883, "step": 7584000 }, { "epoch": 4.55, "learning_rate": 2.4199646125803536e-05, "loss": 1.1226, "step": 7584500 }, { "epoch": 4.55, "learning_rate": 2.4197550360174093e-05, "loss": 1.1064, "step": 7585000 }, { "epoch": 4.55, "learning_rate": 2.4195450394613526e-05, "loss": 1.0894, "step": 7585500 }, { "epoch": 4.55, "learning_rate": 2.4193350429052963e-05, "loss": 1.1134, "step": 7586000 }, { "epoch": 4.55, "learning_rate": 2.4191250463492397e-05, "loss": 1.0994, "step": 7586500 }, { "epoch": 4.55, "learning_rate": 2.4189154697862954e-05, "loss": 1.1104, "step": 7587000 }, { "epoch": 4.55, "learning_rate": 2.418705473230239e-05, "loss": 1.087, "step": 7587500 }, { "epoch": 4.55, "learning_rate": 2.4184954766741824e-05, "loss": 1.0944, "step": 7588000 }, { "epoch": 4.55, "learning_rate": 2.418285480118126e-05, "loss": 1.0971, "step": 7588500 }, { "epoch": 4.55, "learning_rate": 2.4180754835620694e-05, "loss": 1.1001, "step": 7589000 }, { "epoch": 4.55, "learning_rate": 2.417865906999125e-05, "loss": 1.1005, "step": 7589500 }, { "epoch": 4.55, "learning_rate": 2.4176559104430688e-05, "loss": 1.1126, "step": 7590000 }, { "epoch": 4.55, "learning_rate": 2.417445913887012e-05, "loss": 1.0998, "step": 7590500 }, { "epoch": 4.55, "learning_rate": 2.417235917330956e-05, "loss": 1.1247, "step": 7591000 }, { "epoch": 4.55, "learning_rate": 2.4170263407680115e-05, "loss": 1.111, "step": 7591500 }, { "epoch": 4.55, "learning_rate": 2.416816344211955e-05, "loss": 1.1059, "step": 7592000 }, { "epoch": 4.55, "learning_rate": 2.4166063476558986e-05, "loss": 1.1063, "step": 7592500 }, { "epoch": 4.55, "learning_rate": 2.416396351099842e-05, "loss": 1.1181, "step": 7593000 }, { "epoch": 4.55, "learning_rate": 2.4161867745368976e-05, "loss": 1.1112, "step": 7593500 }, { "epoch": 4.55, "learning_rate": 2.4159767779808413e-05, "loss": 1.095, "step": 7594000 }, { "epoch": 4.55, "learning_rate": 2.4157667814247846e-05, "loss": 1.0822, "step": 7594500 }, { "epoch": 4.55, "learning_rate": 2.415556784868728e-05, "loss": 1.1052, "step": 7595000 }, { "epoch": 4.55, "learning_rate": 2.4153467883126717e-05, "loss": 1.1003, "step": 7595500 }, { "epoch": 4.55, "learning_rate": 2.4151372117497274e-05, "loss": 1.112, "step": 7596000 }, { "epoch": 4.55, "learning_rate": 2.4149272151936707e-05, "loss": 1.1064, "step": 7596500 }, { "epoch": 4.55, "learning_rate": 2.4147172186376144e-05, "loss": 1.111, "step": 7597000 }, { "epoch": 4.56, "learning_rate": 2.4145072220815577e-05, "loss": 1.1167, "step": 7597500 }, { "epoch": 4.56, "learning_rate": 2.4142976455186134e-05, "loss": 1.09, "step": 7598000 }, { "epoch": 4.56, "learning_rate": 2.414087648962557e-05, "loss": 1.1465, "step": 7598500 }, { "epoch": 4.56, "learning_rate": 2.4138776524065005e-05, "loss": 1.1239, "step": 7599000 }, { "epoch": 4.56, "learning_rate": 2.413667655850444e-05, "loss": 1.1056, "step": 7599500 }, { "epoch": 4.56, "learning_rate": 2.4134580792874995e-05, "loss": 1.0662, "step": 7600000 }, { "epoch": 4.56, "eval_loss": 1.0789257287979126, "eval_runtime": 1114.5617, "eval_samples_per_second": 472.58, "eval_steps_per_second": 78.764, "step": 7600000 }, { "epoch": 4.56, "learning_rate": 2.4132480827314432e-05, "loss": 1.0794, "step": 7600500 }, { "epoch": 4.56, "learning_rate": 2.413038086175387e-05, "loss": 1.103, "step": 7601000 }, { "epoch": 4.56, "learning_rate": 2.4128280896193302e-05, "loss": 1.0822, "step": 7601500 }, { "epoch": 4.56, "learning_rate": 2.412618093063274e-05, "loss": 1.1107, "step": 7602000 }, { "epoch": 4.56, "learning_rate": 2.4124080965072173e-05, "loss": 1.0978, "step": 7602500 }, { "epoch": 4.56, "learning_rate": 2.412198519944273e-05, "loss": 1.1279, "step": 7603000 }, { "epoch": 4.56, "learning_rate": 2.4119885233882163e-05, "loss": 1.1161, "step": 7603500 }, { "epoch": 4.56, "learning_rate": 2.41177852683216e-05, "loss": 1.0895, "step": 7604000 }, { "epoch": 4.56, "learning_rate": 2.4115685302761033e-05, "loss": 1.1347, "step": 7604500 }, { "epoch": 4.56, "learning_rate": 2.411358533720047e-05, "loss": 1.0934, "step": 7605000 }, { "epoch": 4.56, "learning_rate": 2.4111489571571027e-05, "loss": 1.0992, "step": 7605500 }, { "epoch": 4.56, "learning_rate": 2.4109393805941584e-05, "loss": 1.1092, "step": 7606000 }, { "epoch": 4.56, "learning_rate": 2.4107293840381018e-05, "loss": 1.0997, "step": 7606500 }, { "epoch": 4.56, "learning_rate": 2.410519387482045e-05, "loss": 1.1397, "step": 7607000 }, { "epoch": 4.56, "learning_rate": 2.4103093909259888e-05, "loss": 1.1217, "step": 7607500 }, { "epoch": 4.56, "learning_rate": 2.4100993943699325e-05, "loss": 1.1463, "step": 7608000 }, { "epoch": 4.56, "learning_rate": 2.4098893978138758e-05, "loss": 1.103, "step": 7608500 }, { "epoch": 4.56, "learning_rate": 2.4096794012578195e-05, "loss": 1.0786, "step": 7609000 }, { "epoch": 4.56, "learning_rate": 2.409469404701763e-05, "loss": 1.1256, "step": 7609500 }, { "epoch": 4.56, "learning_rate": 2.4092598281388185e-05, "loss": 1.1128, "step": 7610000 }, { "epoch": 4.56, "learning_rate": 2.409049831582762e-05, "loss": 1.1193, "step": 7610500 }, { "epoch": 4.56, "learning_rate": 2.4088398350267056e-05, "loss": 1.1281, "step": 7611000 }, { "epoch": 4.56, "learning_rate": 2.4086298384706493e-05, "loss": 1.094, "step": 7611500 }, { "epoch": 4.56, "learning_rate": 2.4084198419145926e-05, "loss": 1.1079, "step": 7612000 }, { "epoch": 4.56, "learning_rate": 2.4082102653516483e-05, "loss": 1.1037, "step": 7612500 }, { "epoch": 4.56, "learning_rate": 2.4080002687955916e-05, "loss": 1.082, "step": 7613000 }, { "epoch": 4.56, "learning_rate": 2.4077902722395353e-05, "loss": 1.0814, "step": 7613500 }, { "epoch": 4.56, "learning_rate": 2.407580275683479e-05, "loss": 1.1052, "step": 7614000 }, { "epoch": 4.57, "learning_rate": 2.4073706991205344e-05, "loss": 1.1107, "step": 7614500 }, { "epoch": 4.57, "learning_rate": 2.407160702564478e-05, "loss": 1.0944, "step": 7615000 }, { "epoch": 4.57, "learning_rate": 2.4069507060084214e-05, "loss": 1.094, "step": 7615500 }, { "epoch": 4.57, "learning_rate": 2.406740709452365e-05, "loss": 1.1018, "step": 7616000 }, { "epoch": 4.57, "learning_rate": 2.4065307128963084e-05, "loss": 1.0919, "step": 7616500 }, { "epoch": 4.57, "learning_rate": 2.406321136333364e-05, "loss": 1.0749, "step": 7617000 }, { "epoch": 4.57, "learning_rate": 2.4061111397773075e-05, "loss": 1.1053, "step": 7617500 }, { "epoch": 4.57, "learning_rate": 2.405901143221251e-05, "loss": 1.1237, "step": 7618000 }, { "epoch": 4.57, "learning_rate": 2.405691146665195e-05, "loss": 1.1117, "step": 7618500 }, { "epoch": 4.57, "learning_rate": 2.4054815701022502e-05, "loss": 1.1017, "step": 7619000 }, { "epoch": 4.57, "learning_rate": 2.405271573546194e-05, "loss": 1.1321, "step": 7619500 }, { "epoch": 4.57, "learning_rate": 2.4050615769901372e-05, "loss": 1.1014, "step": 7620000 }, { "epoch": 4.57, "learning_rate": 2.404851580434081e-05, "loss": 1.1297, "step": 7620500 }, { "epoch": 4.57, "learning_rate": 2.4046415838780246e-05, "loss": 1.1029, "step": 7621000 }, { "epoch": 4.57, "learning_rate": 2.4044315873219676e-05, "loss": 1.1113, "step": 7621500 }, { "epoch": 4.57, "learning_rate": 2.4042215907659113e-05, "loss": 1.0915, "step": 7622000 }, { "epoch": 4.57, "learning_rate": 2.404012014202967e-05, "loss": 1.0937, "step": 7622500 }, { "epoch": 4.57, "learning_rate": 2.4038020176469107e-05, "loss": 1.0851, "step": 7623000 }, { "epoch": 4.57, "learning_rate": 2.4035920210908544e-05, "loss": 1.1007, "step": 7623500 }, { "epoch": 4.57, "learning_rate": 2.4033820245347974e-05, "loss": 1.1045, "step": 7624000 }, { "epoch": 4.57, "learning_rate": 2.403172447971853e-05, "loss": 1.1125, "step": 7624500 }, { "epoch": 4.57, "learning_rate": 2.4029624514157967e-05, "loss": 1.1316, "step": 7625000 }, { "epoch": 4.57, "learning_rate": 2.4027524548597404e-05, "loss": 1.1186, "step": 7625500 }, { "epoch": 4.57, "learning_rate": 2.4025424583036838e-05, "loss": 1.1098, "step": 7626000 }, { "epoch": 4.57, "learning_rate": 2.4023328817407395e-05, "loss": 1.1061, "step": 7626500 }, { "epoch": 4.57, "learning_rate": 2.4021228851846828e-05, "loss": 1.1125, "step": 7627000 }, { "epoch": 4.57, "learning_rate": 2.4019128886286265e-05, "loss": 1.1225, "step": 7627500 }, { "epoch": 4.57, "learning_rate": 2.4017028920725702e-05, "loss": 1.082, "step": 7628000 }, { "epoch": 4.57, "learning_rate": 2.4014928955165132e-05, "loss": 1.1111, "step": 7628500 }, { "epoch": 4.57, "learning_rate": 2.4012833189535692e-05, "loss": 1.1013, "step": 7629000 }, { "epoch": 4.57, "learning_rate": 2.4010733223975126e-05, "loss": 1.1021, "step": 7629500 }, { "epoch": 4.57, "learning_rate": 2.4008637458345683e-05, "loss": 1.0972, "step": 7630000 }, { "epoch": 4.57, "learning_rate": 2.4006537492785116e-05, "loss": 1.1151, "step": 7630500 }, { "epoch": 4.58, "learning_rate": 2.4004437527224553e-05, "loss": 1.0718, "step": 7631000 }, { "epoch": 4.58, "learning_rate": 2.4002337561663987e-05, "loss": 1.1087, "step": 7631500 }, { "epoch": 4.58, "learning_rate": 2.4000237596103423e-05, "loss": 1.0991, "step": 7632000 }, { "epoch": 4.58, "learning_rate": 2.399813763054286e-05, "loss": 1.093, "step": 7632500 }, { "epoch": 4.58, "learning_rate": 2.3996037664982297e-05, "loss": 1.0991, "step": 7633000 }, { "epoch": 4.58, "learning_rate": 2.399394189935285e-05, "loss": 1.1045, "step": 7633500 }, { "epoch": 4.58, "learning_rate": 2.3991841933792284e-05, "loss": 1.0938, "step": 7634000 }, { "epoch": 4.58, "learning_rate": 2.398974196823172e-05, "loss": 1.0882, "step": 7634500 }, { "epoch": 4.58, "learning_rate": 2.3987642002671158e-05, "loss": 1.0933, "step": 7635000 }, { "epoch": 4.58, "learning_rate": 2.3985542037110588e-05, "loss": 1.1075, "step": 7635500 }, { "epoch": 4.58, "learning_rate": 2.3983442071550025e-05, "loss": 1.1114, "step": 7636000 }, { "epoch": 4.58, "learning_rate": 2.398134630592058e-05, "loss": 1.1046, "step": 7636500 }, { "epoch": 4.58, "learning_rate": 2.397924634036002e-05, "loss": 1.111, "step": 7637000 }, { "epoch": 4.58, "learning_rate": 2.3977146374799455e-05, "loss": 1.0939, "step": 7637500 }, { "epoch": 4.58, "learning_rate": 2.3975046409238885e-05, "loss": 1.1377, "step": 7638000 }, { "epoch": 4.58, "learning_rate": 2.3972946443678322e-05, "loss": 1.0888, "step": 7638500 }, { "epoch": 4.58, "learning_rate": 2.397084647811776e-05, "loss": 1.1298, "step": 7639000 }, { "epoch": 4.58, "learning_rate": 2.3968750712488316e-05, "loss": 1.1001, "step": 7639500 }, { "epoch": 4.58, "learning_rate": 2.3966650746927753e-05, "loss": 1.0923, "step": 7640000 }, { "epoch": 4.58, "learning_rate": 2.3964550781367183e-05, "loss": 1.1002, "step": 7640500 }, { "epoch": 4.58, "learning_rate": 2.396245081580662e-05, "loss": 1.1146, "step": 7641000 }, { "epoch": 4.58, "learning_rate": 2.3960350850246057e-05, "loss": 1.099, "step": 7641500 }, { "epoch": 4.58, "learning_rate": 2.395825088468549e-05, "loss": 1.1138, "step": 7642000 }, { "epoch": 4.58, "learning_rate": 2.3956150919124927e-05, "loss": 1.093, "step": 7642500 }, { "epoch": 4.58, "learning_rate": 2.3954050953564364e-05, "loss": 1.1131, "step": 7643000 }, { "epoch": 4.58, "learning_rate": 2.3951955187934917e-05, "loss": 1.0896, "step": 7643500 }, { "epoch": 4.58, "learning_rate": 2.3949855222374354e-05, "loss": 1.1075, "step": 7644000 }, { "epoch": 4.58, "learning_rate": 2.3947755256813788e-05, "loss": 1.078, "step": 7644500 }, { "epoch": 4.58, "learning_rate": 2.3945655291253225e-05, "loss": 1.0913, "step": 7645000 }, { "epoch": 4.58, "learning_rate": 2.3943555325692658e-05, "loss": 1.087, "step": 7645500 }, { "epoch": 4.58, "learning_rate": 2.394145536013209e-05, "loss": 1.0893, "step": 7646000 }, { "epoch": 4.58, "learning_rate": 2.393935959450265e-05, "loss": 1.1255, "step": 7646500 }, { "epoch": 4.58, "learning_rate": 2.3937259628942085e-05, "loss": 1.0901, "step": 7647000 }, { "epoch": 4.58, "learning_rate": 2.3935159663381522e-05, "loss": 1.0986, "step": 7647500 }, { "epoch": 4.59, "learning_rate": 2.3933059697820956e-05, "loss": 1.1044, "step": 7648000 }, { "epoch": 4.59, "learning_rate": 2.393095973226039e-05, "loss": 1.0931, "step": 7648500 }, { "epoch": 4.59, "learning_rate": 2.3928863966630946e-05, "loss": 1.0822, "step": 7649000 }, { "epoch": 4.59, "learning_rate": 2.3926764001070383e-05, "loss": 1.0819, "step": 7649500 }, { "epoch": 4.59, "learning_rate": 2.392466403550982e-05, "loss": 1.1352, "step": 7650000 }, { "epoch": 4.59, "learning_rate": 2.392256406994925e-05, "loss": 1.074, "step": 7650500 }, { "epoch": 4.59, "learning_rate": 2.3920464104388687e-05, "loss": 1.1207, "step": 7651000 }, { "epoch": 4.59, "learning_rate": 2.3918368338759244e-05, "loss": 1.0744, "step": 7651500 }, { "epoch": 4.59, "learning_rate": 2.391626837319868e-05, "loss": 1.1079, "step": 7652000 }, { "epoch": 4.59, "learning_rate": 2.3914168407638117e-05, "loss": 1.0641, "step": 7652500 }, { "epoch": 4.59, "learning_rate": 2.3912068442077547e-05, "loss": 1.1141, "step": 7653000 }, { "epoch": 4.59, "learning_rate": 2.3909968476516984e-05, "loss": 1.102, "step": 7653500 }, { "epoch": 4.59, "learning_rate": 2.390787271088754e-05, "loss": 1.1043, "step": 7654000 }, { "epoch": 4.59, "learning_rate": 2.3905772745326978e-05, "loss": 1.0804, "step": 7654500 }, { "epoch": 4.59, "learning_rate": 2.390367277976641e-05, "loss": 1.1104, "step": 7655000 }, { "epoch": 4.59, "learning_rate": 2.3901572814205845e-05, "loss": 1.1126, "step": 7655500 }, { "epoch": 4.59, "learning_rate": 2.3899477048576402e-05, "loss": 1.1246, "step": 7656000 }, { "epoch": 4.59, "learning_rate": 2.389737708301584e-05, "loss": 1.0872, "step": 7656500 }, { "epoch": 4.59, "learning_rate": 2.3895277117455276e-05, "loss": 1.072, "step": 7657000 }, { "epoch": 4.59, "learning_rate": 2.3893177151894706e-05, "loss": 1.1222, "step": 7657500 }, { "epoch": 4.59, "learning_rate": 2.3891077186334143e-05, "loss": 1.0944, "step": 7658000 }, { "epoch": 4.59, "learning_rate": 2.38889814207047e-05, "loss": 1.1152, "step": 7658500 }, { "epoch": 4.59, "learning_rate": 2.3886881455144136e-05, "loss": 1.1287, "step": 7659000 }, { "epoch": 4.59, "learning_rate": 2.3884781489583573e-05, "loss": 1.0982, "step": 7659500 }, { "epoch": 4.59, "learning_rate": 2.3882685723954127e-05, "loss": 1.0966, "step": 7660000 }, { "epoch": 4.59, "learning_rate": 2.388058575839356e-05, "loss": 1.0934, "step": 7660500 }, { "epoch": 4.59, "learning_rate": 2.3878485792832997e-05, "loss": 1.1113, "step": 7661000 }, { "epoch": 4.59, "learning_rate": 2.3876385827272434e-05, "loss": 1.1002, "step": 7661500 }, { "epoch": 4.59, "learning_rate": 2.3874285861711867e-05, "loss": 1.1064, "step": 7662000 }, { "epoch": 4.59, "learning_rate": 2.38721858961513e-05, "loss": 1.0966, "step": 7662500 }, { "epoch": 4.59, "learning_rate": 2.3870085930590738e-05, "loss": 1.0796, "step": 7663000 }, { "epoch": 4.59, "learning_rate": 2.3867985965030175e-05, "loss": 1.112, "step": 7663500 }, { "epoch": 4.59, "learning_rate": 2.386589019940073e-05, "loss": 1.0933, "step": 7664000 }, { "epoch": 4.6, "learning_rate": 2.386379023384016e-05, "loss": 1.0954, "step": 7664500 }, { "epoch": 4.6, "learning_rate": 2.38616902682796e-05, "loss": 1.1018, "step": 7665000 }, { "epoch": 4.6, "learning_rate": 2.3859590302719035e-05, "loss": 1.099, "step": 7665500 }, { "epoch": 4.6, "learning_rate": 2.3857490337158472e-05, "loss": 1.1089, "step": 7666000 }, { "epoch": 4.6, "learning_rate": 2.3855390371597906e-05, "loss": 1.1102, "step": 7666500 }, { "epoch": 4.6, "learning_rate": 2.3853290406037342e-05, "loss": 1.0946, "step": 7667000 }, { "epoch": 4.6, "learning_rate": 2.3851190440476776e-05, "loss": 1.0937, "step": 7667500 }, { "epoch": 4.6, "learning_rate": 2.384909047491621e-05, "loss": 1.1065, "step": 7668000 }, { "epoch": 4.6, "learning_rate": 2.3846990509355646e-05, "loss": 1.1072, "step": 7668500 }, { "epoch": 4.6, "learning_rate": 2.3844894743726203e-05, "loss": 1.0872, "step": 7669000 }, { "epoch": 4.6, "learning_rate": 2.384279477816564e-05, "loss": 1.1156, "step": 7669500 }, { "epoch": 4.6, "learning_rate": 2.3840694812605073e-05, "loss": 1.1115, "step": 7670000 }, { "epoch": 4.6, "learning_rate": 2.3838594847044507e-05, "loss": 1.0662, "step": 7670500 }, { "epoch": 4.6, "learning_rate": 2.3836499081415064e-05, "loss": 1.1013, "step": 7671000 }, { "epoch": 4.6, "learning_rate": 2.38343991158545e-05, "loss": 1.0918, "step": 7671500 }, { "epoch": 4.6, "learning_rate": 2.3832299150293934e-05, "loss": 1.0846, "step": 7672000 }, { "epoch": 4.6, "learning_rate": 2.3830199184733368e-05, "loss": 1.112, "step": 7672500 }, { "epoch": 4.6, "learning_rate": 2.3828103419103928e-05, "loss": 1.107, "step": 7673000 }, { "epoch": 4.6, "learning_rate": 2.382600345354336e-05, "loss": 1.1193, "step": 7673500 }, { "epoch": 4.6, "learning_rate": 2.3823903487982798e-05, "loss": 1.0736, "step": 7674000 }, { "epoch": 4.6, "learning_rate": 2.3821803522422232e-05, "loss": 1.0843, "step": 7674500 }, { "epoch": 4.6, "learning_rate": 2.3819703556861665e-05, "loss": 1.0961, "step": 7675000 }, { "epoch": 4.6, "learning_rate": 2.3817603591301102e-05, "loss": 1.0881, "step": 7675500 }, { "epoch": 4.6, "learning_rate": 2.381550782567166e-05, "loss": 1.117, "step": 7676000 }, { "epoch": 4.6, "learning_rate": 2.3813407860111096e-05, "loss": 1.1087, "step": 7676500 }, { "epoch": 4.6, "learning_rate": 2.381130789455053e-05, "loss": 1.0821, "step": 7677000 }, { "epoch": 4.6, "learning_rate": 2.3809212128921086e-05, "loss": 1.0965, "step": 7677500 }, { "epoch": 4.6, "learning_rate": 2.380711216336052e-05, "loss": 1.0925, "step": 7678000 }, { "epoch": 4.6, "learning_rate": 2.3805012197799957e-05, "loss": 1.0834, "step": 7678500 }, { "epoch": 4.6, "learning_rate": 2.3802912232239393e-05, "loss": 1.1042, "step": 7679000 }, { "epoch": 4.6, "learning_rate": 2.3800812266678823e-05, "loss": 1.0787, "step": 7679500 }, { "epoch": 4.6, "learning_rate": 2.379871230111826e-05, "loss": 1.1067, "step": 7680000 }, { "epoch": 4.6, "learning_rate": 2.3796612335557697e-05, "loss": 1.0992, "step": 7680500 }, { "epoch": 4.61, "learning_rate": 2.379451236999713e-05, "loss": 1.0889, "step": 7681000 }, { "epoch": 4.61, "learning_rate": 2.379241660436769e-05, "loss": 1.089, "step": 7681500 }, { "epoch": 4.61, "learning_rate": 2.379031663880712e-05, "loss": 1.0948, "step": 7682000 }, { "epoch": 4.61, "learning_rate": 2.3788216673246558e-05, "loss": 1.1199, "step": 7682500 }, { "epoch": 4.61, "learning_rate": 2.3786116707685995e-05, "loss": 1.1059, "step": 7683000 }, { "epoch": 4.61, "learning_rate": 2.3784016742125428e-05, "loss": 1.071, "step": 7683500 }, { "epoch": 4.61, "learning_rate": 2.3781920976495985e-05, "loss": 1.094, "step": 7684000 }, { "epoch": 4.61, "learning_rate": 2.377982101093542e-05, "loss": 1.1005, "step": 7684500 }, { "epoch": 4.61, "learning_rate": 2.3777721045374855e-05, "loss": 1.0749, "step": 7685000 }, { "epoch": 4.61, "learning_rate": 2.3775621079814292e-05, "loss": 1.1404, "step": 7685500 }, { "epoch": 4.61, "learning_rate": 2.377352531418485e-05, "loss": 1.1056, "step": 7686000 }, { "epoch": 4.61, "learning_rate": 2.377142534862428e-05, "loss": 1.0792, "step": 7686500 }, { "epoch": 4.61, "learning_rate": 2.3769325383063716e-05, "loss": 1.0858, "step": 7687000 }, { "epoch": 4.61, "learning_rate": 2.3767225417503153e-05, "loss": 1.1124, "step": 7687500 }, { "epoch": 4.61, "learning_rate": 2.3765125451942587e-05, "loss": 1.0951, "step": 7688000 }, { "epoch": 4.61, "learning_rate": 2.3763029686313147e-05, "loss": 1.1255, "step": 7688500 }, { "epoch": 4.61, "learning_rate": 2.3760929720752577e-05, "loss": 1.1209, "step": 7689000 }, { "epoch": 4.61, "learning_rate": 2.3758829755192014e-05, "loss": 1.0718, "step": 7689500 }, { "epoch": 4.61, "learning_rate": 2.375672978963145e-05, "loss": 1.1181, "step": 7690000 }, { "epoch": 4.61, "learning_rate": 2.3754634024002008e-05, "loss": 1.1185, "step": 7690500 }, { "epoch": 4.61, "learning_rate": 2.375253405844144e-05, "loss": 1.1074, "step": 7691000 }, { "epoch": 4.61, "learning_rate": 2.3750434092880875e-05, "loss": 1.1004, "step": 7691500 }, { "epoch": 4.61, "learning_rate": 2.374833412732031e-05, "loss": 1.0975, "step": 7692000 }, { "epoch": 4.61, "learning_rate": 2.374623836169087e-05, "loss": 1.1074, "step": 7692500 }, { "epoch": 4.61, "learning_rate": 2.3744138396130305e-05, "loss": 1.0871, "step": 7693000 }, { "epoch": 4.61, "learning_rate": 2.3742038430569735e-05, "loss": 1.0955, "step": 7693500 }, { "epoch": 4.61, "learning_rate": 2.3739938465009172e-05, "loss": 1.1169, "step": 7694000 }, { "epoch": 4.61, "learning_rate": 2.373784269937973e-05, "loss": 1.1148, "step": 7694500 }, { "epoch": 4.61, "learning_rate": 2.3735742733819166e-05, "loss": 1.1137, "step": 7695000 }, { "epoch": 4.61, "learning_rate": 2.3733642768258603e-05, "loss": 1.065, "step": 7695500 }, { "epoch": 4.61, "learning_rate": 2.3731542802698033e-05, "loss": 1.0832, "step": 7696000 }, { "epoch": 4.61, "learning_rate": 2.372944283713747e-05, "loss": 1.0696, "step": 7696500 }, { "epoch": 4.61, "learning_rate": 2.3727347071508027e-05, "loss": 1.1001, "step": 7697000 }, { "epoch": 4.61, "learning_rate": 2.3725247105947463e-05, "loss": 1.0998, "step": 7697500 }, { "epoch": 4.62, "learning_rate": 2.3723147140386897e-05, "loss": 1.0862, "step": 7698000 }, { "epoch": 4.62, "learning_rate": 2.372104717482633e-05, "loss": 1.105, "step": 7698500 }, { "epoch": 4.62, "learning_rate": 2.3718947209265767e-05, "loss": 1.1149, "step": 7699000 }, { "epoch": 4.62, "learning_rate": 2.3716851443636324e-05, "loss": 1.1281, "step": 7699500 }, { "epoch": 4.62, "learning_rate": 2.371475147807576e-05, "loss": 1.1235, "step": 7700000 }, { "epoch": 4.62, "eval_loss": 1.0742263793945312, "eval_runtime": 1107.8327, "eval_samples_per_second": 475.451, "eval_steps_per_second": 79.242, "step": 7700000 }, { "epoch": 4.62, "learning_rate": 2.3712651512515195e-05, "loss": 1.107, "step": 7700500 }, { "epoch": 4.62, "learning_rate": 2.3710551546954628e-05, "loss": 1.0942, "step": 7701000 }, { "epoch": 4.62, "learning_rate": 2.3708451581394065e-05, "loss": 1.0996, "step": 7701500 }, { "epoch": 4.62, "learning_rate": 2.3706355815764622e-05, "loss": 1.1009, "step": 7702000 }, { "epoch": 4.62, "learning_rate": 2.370425585020406e-05, "loss": 1.0983, "step": 7702500 }, { "epoch": 4.62, "learning_rate": 2.370215588464349e-05, "loss": 1.1076, "step": 7703000 }, { "epoch": 4.62, "learning_rate": 2.3700055919082926e-05, "loss": 1.0944, "step": 7703500 }, { "epoch": 4.62, "learning_rate": 2.3697960153453483e-05, "loss": 1.109, "step": 7704000 }, { "epoch": 4.62, "learning_rate": 2.369586018789292e-05, "loss": 1.0914, "step": 7704500 }, { "epoch": 4.62, "learning_rate": 2.3693760222332356e-05, "loss": 1.1143, "step": 7705000 }, { "epoch": 4.62, "learning_rate": 2.3691660256771786e-05, "loss": 1.0804, "step": 7705500 }, { "epoch": 4.62, "learning_rate": 2.3689564491142343e-05, "loss": 1.1056, "step": 7706000 }, { "epoch": 4.62, "learning_rate": 2.368746452558178e-05, "loss": 1.0825, "step": 7706500 }, { "epoch": 4.62, "learning_rate": 2.3685364560021217e-05, "loss": 1.0701, "step": 7707000 }, { "epoch": 4.62, "learning_rate": 2.368326459446065e-05, "loss": 1.1058, "step": 7707500 }, { "epoch": 4.62, "learning_rate": 2.3681168828831207e-05, "loss": 1.1037, "step": 7708000 }, { "epoch": 4.62, "learning_rate": 2.367906886327064e-05, "loss": 1.1141, "step": 7708500 }, { "epoch": 4.62, "learning_rate": 2.3676973097641198e-05, "loss": 1.1344, "step": 7709000 }, { "epoch": 4.62, "learning_rate": 2.3674873132080635e-05, "loss": 1.1121, "step": 7709500 }, { "epoch": 4.62, "learning_rate": 2.3672773166520068e-05, "loss": 1.0875, "step": 7710000 }, { "epoch": 4.62, "learning_rate": 2.36706732009595e-05, "loss": 1.1022, "step": 7710500 }, { "epoch": 4.62, "learning_rate": 2.366857323539894e-05, "loss": 1.0852, "step": 7711000 }, { "epoch": 4.62, "learning_rate": 2.3666473269838375e-05, "loss": 1.0864, "step": 7711500 }, { "epoch": 4.62, "learning_rate": 2.3664373304277812e-05, "loss": 1.1243, "step": 7712000 }, { "epoch": 4.62, "learning_rate": 2.3662273338717246e-05, "loss": 1.0857, "step": 7712500 }, { "epoch": 4.62, "learning_rate": 2.36601775730878e-05, "loss": 1.1199, "step": 7713000 }, { "epoch": 4.62, "learning_rate": 2.3658077607527236e-05, "loss": 1.1077, "step": 7713500 }, { "epoch": 4.62, "learning_rate": 2.3655977641966673e-05, "loss": 1.1182, "step": 7714000 }, { "epoch": 4.63, "learning_rate": 2.3653877676406106e-05, "loss": 1.0874, "step": 7714500 }, { "epoch": 4.63, "learning_rate": 2.3651781910776663e-05, "loss": 1.1044, "step": 7715000 }, { "epoch": 4.63, "learning_rate": 2.3649681945216097e-05, "loss": 1.098, "step": 7715500 }, { "epoch": 4.63, "learning_rate": 2.3647581979655534e-05, "loss": 1.0991, "step": 7716000 }, { "epoch": 4.63, "learning_rate": 2.364548201409497e-05, "loss": 1.0922, "step": 7716500 }, { "epoch": 4.63, "learning_rate": 2.3643382048534404e-05, "loss": 1.0879, "step": 7717000 }, { "epoch": 4.63, "learning_rate": 2.364128628290496e-05, "loss": 1.1028, "step": 7717500 }, { "epoch": 4.63, "learning_rate": 2.3639186317344394e-05, "loss": 1.1196, "step": 7718000 }, { "epoch": 4.63, "learning_rate": 2.363708635178383e-05, "loss": 1.0566, "step": 7718500 }, { "epoch": 4.63, "learning_rate": 2.3634986386223268e-05, "loss": 1.0786, "step": 7719000 }, { "epoch": 4.63, "learning_rate": 2.36328864206627e-05, "loss": 1.1049, "step": 7719500 }, { "epoch": 4.63, "learning_rate": 2.3630790655033255e-05, "loss": 1.096, "step": 7720000 }, { "epoch": 4.63, "learning_rate": 2.3628690689472692e-05, "loss": 1.1126, "step": 7720500 }, { "epoch": 4.63, "learning_rate": 2.362659072391213e-05, "loss": 1.09, "step": 7721000 }, { "epoch": 4.63, "learning_rate": 2.3624490758351562e-05, "loss": 1.0929, "step": 7721500 }, { "epoch": 4.63, "learning_rate": 2.362239499272212e-05, "loss": 1.133, "step": 7722000 }, { "epoch": 4.63, "learning_rate": 2.3620295027161553e-05, "loss": 1.1035, "step": 7722500 }, { "epoch": 4.63, "learning_rate": 2.361819506160099e-05, "loss": 1.1105, "step": 7723000 }, { "epoch": 4.63, "learning_rate": 2.3616095096040426e-05, "loss": 1.1105, "step": 7723500 }, { "epoch": 4.63, "learning_rate": 2.361399513047986e-05, "loss": 1.1194, "step": 7724000 }, { "epoch": 4.63, "learning_rate": 2.3611899364850417e-05, "loss": 1.1006, "step": 7724500 }, { "epoch": 4.63, "learning_rate": 2.360979939928985e-05, "loss": 1.0837, "step": 7725000 }, { "epoch": 4.63, "learning_rate": 2.3607699433729287e-05, "loss": 1.1068, "step": 7725500 }, { "epoch": 4.63, "learning_rate": 2.3605599468168724e-05, "loss": 1.1314, "step": 7726000 }, { "epoch": 4.63, "learning_rate": 2.3603499502608157e-05, "loss": 1.0977, "step": 7726500 }, { "epoch": 4.63, "learning_rate": 2.3601407936909834e-05, "loss": 1.1217, "step": 7727000 }, { "epoch": 4.63, "learning_rate": 2.359930797134927e-05, "loss": 1.1109, "step": 7727500 }, { "epoch": 4.63, "learning_rate": 2.3597208005788705e-05, "loss": 1.0711, "step": 7728000 }, { "epoch": 4.63, "learning_rate": 2.359510804022814e-05, "loss": 1.0763, "step": 7728500 }, { "epoch": 4.63, "learning_rate": 2.3593008074667575e-05, "loss": 1.0967, "step": 7729000 }, { "epoch": 4.63, "learning_rate": 2.3590912309038132e-05, "loss": 1.1026, "step": 7729500 }, { "epoch": 4.63, "learning_rate": 2.3588812343477565e-05, "loss": 1.073, "step": 7730000 }, { "epoch": 4.63, "learning_rate": 2.3586712377917002e-05, "loss": 1.1141, "step": 7730500 }, { "epoch": 4.64, "learning_rate": 2.358461241235644e-05, "loss": 1.1, "step": 7731000 }, { "epoch": 4.64, "learning_rate": 2.3582512446795873e-05, "loss": 1.0817, "step": 7731500 }, { "epoch": 4.64, "learning_rate": 2.3580412481235306e-05, "loss": 1.1017, "step": 7732000 }, { "epoch": 4.64, "learning_rate": 2.3578312515674743e-05, "loss": 1.1101, "step": 7732500 }, { "epoch": 4.64, "learning_rate": 2.357621255011418e-05, "loss": 1.0934, "step": 7733000 }, { "epoch": 4.64, "learning_rate": 2.3574112584553613e-05, "loss": 1.0851, "step": 7733500 }, { "epoch": 4.64, "learning_rate": 2.3572016818924167e-05, "loss": 1.1208, "step": 7734000 }, { "epoch": 4.64, "learning_rate": 2.3569916853363604e-05, "loss": 1.1371, "step": 7734500 }, { "epoch": 4.64, "learning_rate": 2.356781688780304e-05, "loss": 1.1125, "step": 7735000 }, { "epoch": 4.64, "learning_rate": 2.3565716922242474e-05, "loss": 1.0814, "step": 7735500 }, { "epoch": 4.64, "learning_rate": 2.356362115661303e-05, "loss": 1.0998, "step": 7736000 }, { "epoch": 4.64, "learning_rate": 2.3561521191052464e-05, "loss": 1.1076, "step": 7736500 }, { "epoch": 4.64, "learning_rate": 2.35594212254919e-05, "loss": 1.1151, "step": 7737000 }, { "epoch": 4.64, "learning_rate": 2.3557321259931338e-05, "loss": 1.104, "step": 7737500 }, { "epoch": 4.64, "learning_rate": 2.355522129437077e-05, "loss": 1.1038, "step": 7738000 }, { "epoch": 4.64, "learning_rate": 2.355312132881021e-05, "loss": 1.1034, "step": 7738500 }, { "epoch": 4.64, "learning_rate": 2.3551021363249642e-05, "loss": 1.1056, "step": 7739000 }, { "epoch": 4.64, "learning_rate": 2.35489255976202e-05, "loss": 1.1084, "step": 7739500 }, { "epoch": 4.64, "learning_rate": 2.3546825632059636e-05, "loss": 1.0766, "step": 7740000 }, { "epoch": 4.64, "learning_rate": 2.354472566649907e-05, "loss": 1.0962, "step": 7740500 }, { "epoch": 4.64, "learning_rate": 2.3542625700938506e-05, "loss": 1.1266, "step": 7741000 }, { "epoch": 4.64, "learning_rate": 2.354052993530906e-05, "loss": 1.0921, "step": 7741500 }, { "epoch": 4.64, "learning_rate": 2.3538434169679616e-05, "loss": 1.1217, "step": 7742000 }, { "epoch": 4.64, "learning_rate": 2.3536334204119053e-05, "loss": 1.0665, "step": 7742500 }, { "epoch": 4.64, "learning_rate": 2.353423423855849e-05, "loss": 1.1176, "step": 7743000 }, { "epoch": 4.64, "learning_rate": 2.353213427299792e-05, "loss": 1.1188, "step": 7743500 }, { "epoch": 4.64, "learning_rate": 2.3530034307437357e-05, "loss": 1.0633, "step": 7744000 }, { "epoch": 4.64, "learning_rate": 2.3527934341876794e-05, "loss": 1.1089, "step": 7744500 }, { "epoch": 4.64, "learning_rate": 2.3525834376316227e-05, "loss": 1.0982, "step": 7745000 }, { "epoch": 4.64, "learning_rate": 2.3523734410755664e-05, "loss": 1.1132, "step": 7745500 }, { "epoch": 4.64, "learning_rate": 2.3521638645126218e-05, "loss": 1.0915, "step": 7746000 }, { "epoch": 4.64, "learning_rate": 2.3519542879496775e-05, "loss": 1.1118, "step": 7746500 }, { "epoch": 4.64, "learning_rate": 2.351744291393621e-05, "loss": 1.0535, "step": 7747000 }, { "epoch": 4.64, "learning_rate": 2.351534294837565e-05, "loss": 1.0695, "step": 7747500 }, { "epoch": 4.65, "learning_rate": 2.351324298281508e-05, "loss": 1.0902, "step": 7748000 }, { "epoch": 4.65, "learning_rate": 2.3511143017254515e-05, "loss": 1.0862, "step": 7748500 }, { "epoch": 4.65, "learning_rate": 2.3509043051693952e-05, "loss": 1.0994, "step": 7749000 }, { "epoch": 4.65, "learning_rate": 2.3506943086133386e-05, "loss": 1.0921, "step": 7749500 }, { "epoch": 4.65, "learning_rate": 2.3504843120572823e-05, "loss": 1.0997, "step": 7750000 }, { "epoch": 4.65, "learning_rate": 2.350274315501226e-05, "loss": 1.1014, "step": 7750500 }, { "epoch": 4.65, "learning_rate": 2.3500643189451693e-05, "loss": 1.1046, "step": 7751000 }, { "epoch": 4.65, "learning_rate": 2.349854742382225e-05, "loss": 1.1123, "step": 7751500 }, { "epoch": 4.65, "learning_rate": 2.3496447458261683e-05, "loss": 1.0987, "step": 7752000 }, { "epoch": 4.65, "learning_rate": 2.349434749270112e-05, "loss": 1.0887, "step": 7752500 }, { "epoch": 4.65, "learning_rate": 2.3492247527140557e-05, "loss": 1.1014, "step": 7753000 }, { "epoch": 4.65, "learning_rate": 2.349015176151111e-05, "loss": 1.089, "step": 7753500 }, { "epoch": 4.65, "learning_rate": 2.3488051795950547e-05, "loss": 1.1011, "step": 7754000 }, { "epoch": 4.65, "learning_rate": 2.348595183038998e-05, "loss": 1.1018, "step": 7754500 }, { "epoch": 4.65, "learning_rate": 2.3483851864829418e-05, "loss": 1.1081, "step": 7755000 }, { "epoch": 4.65, "learning_rate": 2.348175189926885e-05, "loss": 1.096, "step": 7755500 }, { "epoch": 4.65, "learning_rate": 2.3479656133639408e-05, "loss": 1.1115, "step": 7756000 }, { "epoch": 4.65, "learning_rate": 2.3477556168078845e-05, "loss": 1.0748, "step": 7756500 }, { "epoch": 4.65, "learning_rate": 2.347545620251828e-05, "loss": 1.1013, "step": 7757000 }, { "epoch": 4.65, "learning_rate": 2.3473356236957715e-05, "loss": 1.1052, "step": 7757500 }, { "epoch": 4.65, "learning_rate": 2.347125627139715e-05, "loss": 1.1033, "step": 7758000 }, { "epoch": 4.65, "learning_rate": 2.3469160505767706e-05, "loss": 1.1068, "step": 7758500 }, { "epoch": 4.65, "learning_rate": 2.346706054020714e-05, "loss": 1.0854, "step": 7759000 }, { "epoch": 4.65, "learning_rate": 2.3464960574646576e-05, "loss": 1.0882, "step": 7759500 }, { "epoch": 4.65, "learning_rate": 2.3462860609086013e-05, "loss": 1.0838, "step": 7760000 }, { "epoch": 4.65, "learning_rate": 2.3460760643525446e-05, "loss": 1.0753, "step": 7760500 }, { "epoch": 4.65, "learning_rate": 2.3458664877896003e-05, "loss": 1.1202, "step": 7761000 }, { "epoch": 4.65, "learning_rate": 2.3456564912335437e-05, "loss": 1.1069, "step": 7761500 }, { "epoch": 4.65, "learning_rate": 2.3454464946774874e-05, "loss": 1.1012, "step": 7762000 }, { "epoch": 4.65, "learning_rate": 2.345236498121431e-05, "loss": 1.1181, "step": 7762500 }, { "epoch": 4.65, "learning_rate": 2.3450273415515984e-05, "loss": 1.092, "step": 7763000 }, { "epoch": 4.65, "learning_rate": 2.344817344995542e-05, "loss": 1.1371, "step": 7763500 }, { "epoch": 4.65, "learning_rate": 2.3446073484394858e-05, "loss": 1.1011, "step": 7764000 }, { "epoch": 4.66, "learning_rate": 2.344397351883429e-05, "loss": 1.0994, "step": 7764500 }, { "epoch": 4.66, "learning_rate": 2.3441873553273725e-05, "loss": 1.0915, "step": 7765000 }, { "epoch": 4.66, "learning_rate": 2.343977358771316e-05, "loss": 1.128, "step": 7765500 }, { "epoch": 4.66, "learning_rate": 2.343767782208372e-05, "loss": 1.1048, "step": 7766000 }, { "epoch": 4.66, "learning_rate": 2.3435577856523155e-05, "loss": 1.0974, "step": 7766500 }, { "epoch": 4.66, "learning_rate": 2.3433477890962585e-05, "loss": 1.1101, "step": 7767000 }, { "epoch": 4.66, "learning_rate": 2.3431377925402022e-05, "loss": 1.1102, "step": 7767500 }, { "epoch": 4.66, "learning_rate": 2.342927795984146e-05, "loss": 1.1088, "step": 7768000 }, { "epoch": 4.66, "learning_rate": 2.3427177994280893e-05, "loss": 1.0915, "step": 7768500 }, { "epoch": 4.66, "learning_rate": 2.342507802872033e-05, "loss": 1.1053, "step": 7769000 }, { "epoch": 4.66, "learning_rate": 2.3422978063159766e-05, "loss": 1.1144, "step": 7769500 }, { "epoch": 4.66, "learning_rate": 2.342088229753032e-05, "loss": 1.1015, "step": 7770000 }, { "epoch": 4.66, "learning_rate": 2.3418782331969757e-05, "loss": 1.0935, "step": 7770500 }, { "epoch": 4.66, "learning_rate": 2.341668236640919e-05, "loss": 1.0877, "step": 7771000 }, { "epoch": 4.66, "learning_rate": 2.3414582400848627e-05, "loss": 1.1105, "step": 7771500 }, { "epoch": 4.66, "learning_rate": 2.341248663521918e-05, "loss": 1.1163, "step": 7772000 }, { "epoch": 4.66, "learning_rate": 2.3410386669658617e-05, "loss": 1.0767, "step": 7772500 }, { "epoch": 4.66, "learning_rate": 2.340828670409805e-05, "loss": 1.0923, "step": 7773000 }, { "epoch": 4.66, "learning_rate": 2.3406186738537488e-05, "loss": 1.1018, "step": 7773500 }, { "epoch": 4.66, "learning_rate": 2.3404086772976925e-05, "loss": 1.1022, "step": 7774000 }, { "epoch": 4.66, "learning_rate": 2.3401991007347478e-05, "loss": 1.1133, "step": 7774500 }, { "epoch": 4.66, "learning_rate": 2.3399891041786915e-05, "loss": 1.1084, "step": 7775000 }, { "epoch": 4.66, "learning_rate": 2.339779107622635e-05, "loss": 1.0933, "step": 7775500 }, { "epoch": 4.66, "learning_rate": 2.3395691110665785e-05, "loss": 1.0815, "step": 7776000 }, { "epoch": 4.66, "learning_rate": 2.339359534503634e-05, "loss": 1.1086, "step": 7776500 }, { "epoch": 4.66, "learning_rate": 2.3391495379475776e-05, "loss": 1.108, "step": 7777000 }, { "epoch": 4.66, "learning_rate": 2.3389395413915213e-05, "loss": 1.1067, "step": 7777500 }, { "epoch": 4.66, "learning_rate": 2.338729964828577e-05, "loss": 1.1222, "step": 7778000 }, { "epoch": 4.66, "learning_rate": 2.3385199682725203e-05, "loss": 1.1028, "step": 7778500 }, { "epoch": 4.66, "learning_rate": 2.3383099717164636e-05, "loss": 1.0836, "step": 7779000 }, { "epoch": 4.66, "learning_rate": 2.3380999751604073e-05, "loss": 1.08, "step": 7779500 }, { "epoch": 4.66, "learning_rate": 2.3378899786043507e-05, "loss": 1.116, "step": 7780000 }, { "epoch": 4.66, "learning_rate": 2.3376799820482944e-05, "loss": 1.0752, "step": 7780500 }, { "epoch": 4.67, "learning_rate": 2.337469985492238e-05, "loss": 1.112, "step": 7781000 }, { "epoch": 4.67, "learning_rate": 2.3372599889361814e-05, "loss": 1.1124, "step": 7781500 }, { "epoch": 4.67, "learning_rate": 2.3370499923801247e-05, "loss": 1.1059, "step": 7782000 }, { "epoch": 4.67, "learning_rate": 2.3368404158171804e-05, "loss": 1.0877, "step": 7782500 }, { "epoch": 4.67, "learning_rate": 2.336630419261124e-05, "loss": 1.0937, "step": 7783000 }, { "epoch": 4.67, "learning_rate": 2.3364204227050678e-05, "loss": 1.0958, "step": 7783500 }, { "epoch": 4.67, "learning_rate": 2.336210426149011e-05, "loss": 1.1025, "step": 7784000 }, { "epoch": 4.67, "learning_rate": 2.3360004295929545e-05, "loss": 1.0849, "step": 7784500 }, { "epoch": 4.67, "learning_rate": 2.3357908530300102e-05, "loss": 1.1066, "step": 7785000 }, { "epoch": 4.67, "learning_rate": 2.335580856473954e-05, "loss": 1.1063, "step": 7785500 }, { "epoch": 4.67, "learning_rate": 2.3353708599178976e-05, "loss": 1.1081, "step": 7786000 }, { "epoch": 4.67, "learning_rate": 2.3351608633618406e-05, "loss": 1.1038, "step": 7786500 }, { "epoch": 4.67, "learning_rate": 2.3349508668057843e-05, "loss": 1.0991, "step": 7787000 }, { "epoch": 4.67, "learning_rate": 2.33474129024284e-05, "loss": 1.1048, "step": 7787500 }, { "epoch": 4.67, "learning_rate": 2.3345312936867836e-05, "loss": 1.114, "step": 7788000 }, { "epoch": 4.67, "learning_rate": 2.334321297130727e-05, "loss": 1.0909, "step": 7788500 }, { "epoch": 4.67, "learning_rate": 2.3341113005746703e-05, "loss": 1.0969, "step": 7789000 }, { "epoch": 4.67, "learning_rate": 2.333901304018614e-05, "loss": 1.0743, "step": 7789500 }, { "epoch": 4.67, "learning_rate": 2.3336917274556697e-05, "loss": 1.0909, "step": 7790000 }, { "epoch": 4.67, "learning_rate": 2.3334817308996134e-05, "loss": 1.1108, "step": 7790500 }, { "epoch": 4.67, "learning_rate": 2.3332717343435567e-05, "loss": 1.087, "step": 7791000 }, { "epoch": 4.67, "learning_rate": 2.3330617377875e-05, "loss": 1.0918, "step": 7791500 }, { "epoch": 4.67, "learning_rate": 2.3328521612245558e-05, "loss": 1.1109, "step": 7792000 }, { "epoch": 4.67, "learning_rate": 2.3326425846616115e-05, "loss": 1.0906, "step": 7792500 }, { "epoch": 4.67, "learning_rate": 2.332432588105555e-05, "loss": 1.1065, "step": 7793000 }, { "epoch": 4.67, "learning_rate": 2.3322225915494985e-05, "loss": 1.1029, "step": 7793500 }, { "epoch": 4.67, "learning_rate": 2.332012594993442e-05, "loss": 1.1019, "step": 7794000 }, { "epoch": 4.67, "learning_rate": 2.3318025984373855e-05, "loss": 1.1024, "step": 7794500 }, { "epoch": 4.67, "learning_rate": 2.3315926018813292e-05, "loss": 1.0885, "step": 7795000 }, { "epoch": 4.67, "learning_rate": 2.331382605325273e-05, "loss": 1.1206, "step": 7795500 }, { "epoch": 4.67, "learning_rate": 2.331172608769216e-05, "loss": 1.1094, "step": 7796000 }, { "epoch": 4.67, "learning_rate": 2.3309626122131596e-05, "loss": 1.0872, "step": 7796500 }, { "epoch": 4.67, "learning_rate": 2.3307526156571033e-05, "loss": 1.0838, "step": 7797000 }, { "epoch": 4.67, "learning_rate": 2.3305426191010466e-05, "loss": 1.1017, "step": 7797500 }, { "epoch": 4.68, "learning_rate": 2.3303326225449903e-05, "loss": 1.1119, "step": 7798000 }, { "epoch": 4.68, "learning_rate": 2.3301230459820457e-05, "loss": 1.0664, "step": 7798500 }, { "epoch": 4.68, "learning_rate": 2.3299134694191014e-05, "loss": 1.1187, "step": 7799000 }, { "epoch": 4.68, "learning_rate": 2.329703472863045e-05, "loss": 1.1178, "step": 7799500 }, { "epoch": 4.68, "learning_rate": 2.3294934763069887e-05, "loss": 1.1132, "step": 7800000 }, { "epoch": 4.68, "eval_loss": 1.069945216178894, "eval_runtime": 1102.5508, "eval_samples_per_second": 477.729, "eval_steps_per_second": 79.622, "step": 7800000 }, { "epoch": 4.68, "learning_rate": 2.329283479750932e-05, "loss": 1.0898, "step": 7800500 }, { "epoch": 4.68, "learning_rate": 2.3290739031879874e-05, "loss": 1.0913, "step": 7801000 }, { "epoch": 4.68, "learning_rate": 2.328863906631931e-05, "loss": 1.0606, "step": 7801500 }, { "epoch": 4.68, "learning_rate": 2.3286539100758748e-05, "loss": 1.0769, "step": 7802000 }, { "epoch": 4.68, "learning_rate": 2.3284443335129305e-05, "loss": 1.0986, "step": 7802500 }, { "epoch": 4.68, "learning_rate": 2.328234336956874e-05, "loss": 1.0797, "step": 7803000 }, { "epoch": 4.68, "learning_rate": 2.3280243404008172e-05, "loss": 1.116, "step": 7803500 }, { "epoch": 4.68, "learning_rate": 2.327814343844761e-05, "loss": 1.0902, "step": 7804000 }, { "epoch": 4.68, "learning_rate": 2.3276043472887046e-05, "loss": 1.087, "step": 7804500 }, { "epoch": 4.68, "learning_rate": 2.327394350732648e-05, "loss": 1.1168, "step": 7805000 }, { "epoch": 4.68, "learning_rate": 2.3271843541765913e-05, "loss": 1.117, "step": 7805500 }, { "epoch": 4.68, "learning_rate": 2.326974357620535e-05, "loss": 1.1115, "step": 7806000 }, { "epoch": 4.68, "learning_rate": 2.3267647810575906e-05, "loss": 1.095, "step": 7806500 }, { "epoch": 4.68, "learning_rate": 2.3265547845015343e-05, "loss": 1.1224, "step": 7807000 }, { "epoch": 4.68, "learning_rate": 2.3263447879454777e-05, "loss": 1.1193, "step": 7807500 }, { "epoch": 4.68, "learning_rate": 2.326134791389421e-05, "loss": 1.1108, "step": 7808000 }, { "epoch": 4.68, "learning_rate": 2.3259247948333647e-05, "loss": 1.1047, "step": 7808500 }, { "epoch": 4.68, "learning_rate": 2.325714798277308e-05, "loss": 1.1293, "step": 7809000 }, { "epoch": 4.68, "learning_rate": 2.3255048017212517e-05, "loss": 1.0908, "step": 7809500 }, { "epoch": 4.68, "learning_rate": 2.3252952251583074e-05, "loss": 1.0694, "step": 7810000 }, { "epoch": 4.68, "learning_rate": 2.3250852286022508e-05, "loss": 1.095, "step": 7810500 }, { "epoch": 4.68, "learning_rate": 2.3248752320461945e-05, "loss": 1.1226, "step": 7811000 }, { "epoch": 4.68, "learning_rate": 2.3246652354901378e-05, "loss": 1.0723, "step": 7811500 }, { "epoch": 4.68, "learning_rate": 2.3244556589271935e-05, "loss": 1.1044, "step": 7812000 }, { "epoch": 4.68, "learning_rate": 2.3242456623711372e-05, "loss": 1.0751, "step": 7812500 }, { "epoch": 4.68, "learning_rate": 2.3240356658150805e-05, "loss": 1.0957, "step": 7813000 }, { "epoch": 4.68, "learning_rate": 2.3238256692590242e-05, "loss": 1.102, "step": 7813500 }, { "epoch": 4.68, "learning_rate": 2.32361609269608e-05, "loss": 1.1081, "step": 7814000 }, { "epoch": 4.69, "learning_rate": 2.3234060961400233e-05, "loss": 1.0671, "step": 7814500 }, { "epoch": 4.69, "learning_rate": 2.323196099583967e-05, "loss": 1.0773, "step": 7815000 }, { "epoch": 4.69, "learning_rate": 2.3229865230210223e-05, "loss": 1.093, "step": 7815500 }, { "epoch": 4.69, "learning_rate": 2.322776526464966e-05, "loss": 1.0839, "step": 7816000 }, { "epoch": 4.69, "learning_rate": 2.3225665299089097e-05, "loss": 1.0982, "step": 7816500 }, { "epoch": 4.69, "learning_rate": 2.322356533352853e-05, "loss": 1.1077, "step": 7817000 }, { "epoch": 4.69, "learning_rate": 2.3221465367967964e-05, "loss": 1.0843, "step": 7817500 }, { "epoch": 4.69, "learning_rate": 2.32193654024074e-05, "loss": 1.112, "step": 7818000 }, { "epoch": 4.69, "learning_rate": 2.3217265436846834e-05, "loss": 1.0835, "step": 7818500 }, { "epoch": 4.69, "learning_rate": 2.321516547128627e-05, "loss": 1.113, "step": 7819000 }, { "epoch": 4.69, "learning_rate": 2.3213065505725708e-05, "loss": 1.1277, "step": 7819500 }, { "epoch": 4.69, "learning_rate": 2.321096554016514e-05, "loss": 1.0933, "step": 7820000 }, { "epoch": 4.69, "learning_rate": 2.3208865574604575e-05, "loss": 1.0944, "step": 7820500 }, { "epoch": 4.69, "learning_rate": 2.320676560904401e-05, "loss": 1.0954, "step": 7821000 }, { "epoch": 4.69, "learning_rate": 2.3204665643483445e-05, "loss": 1.0768, "step": 7821500 }, { "epoch": 4.69, "learning_rate": 2.320256567792288e-05, "loss": 1.0982, "step": 7822000 }, { "epoch": 4.69, "learning_rate": 2.320046991229344e-05, "loss": 1.1071, "step": 7822500 }, { "epoch": 4.69, "learning_rate": 2.3198369946732872e-05, "loss": 1.0855, "step": 7823000 }, { "epoch": 4.69, "learning_rate": 2.319626998117231e-05, "loss": 1.1133, "step": 7823500 }, { "epoch": 4.69, "learning_rate": 2.3194170015611742e-05, "loss": 1.0733, "step": 7824000 }, { "epoch": 4.69, "learning_rate": 2.31920742499823e-05, "loss": 1.0916, "step": 7824500 }, { "epoch": 4.69, "learning_rate": 2.3189974284421733e-05, "loss": 1.1108, "step": 7825000 }, { "epoch": 4.69, "learning_rate": 2.318787431886117e-05, "loss": 1.0844, "step": 7825500 }, { "epoch": 4.69, "learning_rate": 2.3185774353300607e-05, "loss": 1.0787, "step": 7826000 }, { "epoch": 4.69, "learning_rate": 2.318367438774004e-05, "loss": 1.0802, "step": 7826500 }, { "epoch": 4.69, "learning_rate": 2.3181578622110597e-05, "loss": 1.0959, "step": 7827000 }, { "epoch": 4.69, "learning_rate": 2.317947865655003e-05, "loss": 1.1133, "step": 7827500 }, { "epoch": 4.69, "learning_rate": 2.3177378690989467e-05, "loss": 1.1144, "step": 7828000 }, { "epoch": 4.69, "learning_rate": 2.3175278725428904e-05, "loss": 1.0862, "step": 7828500 }, { "epoch": 4.69, "learning_rate": 2.317318295979946e-05, "loss": 1.0918, "step": 7829000 }, { "epoch": 4.69, "learning_rate": 2.3171082994238895e-05, "loss": 1.1053, "step": 7829500 }, { "epoch": 4.69, "learning_rate": 2.3168983028678328e-05, "loss": 1.0743, "step": 7830000 }, { "epoch": 4.69, "learning_rate": 2.3166883063117765e-05, "loss": 1.0979, "step": 7830500 }, { "epoch": 4.7, "learning_rate": 2.3164783097557198e-05, "loss": 1.1216, "step": 7831000 }, { "epoch": 4.7, "learning_rate": 2.3162683131996635e-05, "loss": 1.1136, "step": 7831500 }, { "epoch": 4.7, "learning_rate": 2.3160587366367192e-05, "loss": 1.086, "step": 7832000 }, { "epoch": 4.7, "learning_rate": 2.3158487400806626e-05, "loss": 1.1217, "step": 7832500 }, { "epoch": 4.7, "learning_rate": 2.3156387435246062e-05, "loss": 1.081, "step": 7833000 }, { "epoch": 4.7, "learning_rate": 2.3154287469685496e-05, "loss": 1.0924, "step": 7833500 }, { "epoch": 4.7, "learning_rate": 2.3152191704056053e-05, "loss": 1.1041, "step": 7834000 }, { "epoch": 4.7, "learning_rate": 2.3150091738495486e-05, "loss": 1.1054, "step": 7834500 }, { "epoch": 4.7, "learning_rate": 2.3147991772934923e-05, "loss": 1.1053, "step": 7835000 }, { "epoch": 4.7, "learning_rate": 2.314589180737436e-05, "loss": 1.0919, "step": 7835500 }, { "epoch": 4.7, "learning_rate": 2.3143796041744917e-05, "loss": 1.0865, "step": 7836000 }, { "epoch": 4.7, "learning_rate": 2.314169607618435e-05, "loss": 1.1106, "step": 7836500 }, { "epoch": 4.7, "learning_rate": 2.3139596110623784e-05, "loss": 1.0942, "step": 7837000 }, { "epoch": 4.7, "learning_rate": 2.313749614506322e-05, "loss": 1.109, "step": 7837500 }, { "epoch": 4.7, "learning_rate": 2.3135396179502654e-05, "loss": 1.1093, "step": 7838000 }, { "epoch": 4.7, "learning_rate": 2.3133300413873215e-05, "loss": 1.0909, "step": 7838500 }, { "epoch": 4.7, "learning_rate": 2.3131200448312648e-05, "loss": 1.1015, "step": 7839000 }, { "epoch": 4.7, "learning_rate": 2.312910048275208e-05, "loss": 1.101, "step": 7839500 }, { "epoch": 4.7, "learning_rate": 2.3127000517191518e-05, "loss": 1.1144, "step": 7840000 }, { "epoch": 4.7, "learning_rate": 2.3124904751562075e-05, "loss": 1.1133, "step": 7840500 }, { "epoch": 4.7, "learning_rate": 2.312280478600151e-05, "loss": 1.0728, "step": 7841000 }, { "epoch": 4.7, "learning_rate": 2.3120704820440946e-05, "loss": 1.1133, "step": 7841500 }, { "epoch": 4.7, "learning_rate": 2.311860485488038e-05, "loss": 1.0902, "step": 7842000 }, { "epoch": 4.7, "learning_rate": 2.3116509089250936e-05, "loss": 1.0797, "step": 7842500 }, { "epoch": 4.7, "learning_rate": 2.3114409123690373e-05, "loss": 1.0949, "step": 7843000 }, { "epoch": 4.7, "learning_rate": 2.3112309158129806e-05, "loss": 1.068, "step": 7843500 }, { "epoch": 4.7, "learning_rate": 2.311020919256924e-05, "loss": 1.1045, "step": 7844000 }, { "epoch": 4.7, "learning_rate": 2.3108109227008677e-05, "loss": 1.1103, "step": 7844500 }, { "epoch": 4.7, "learning_rate": 2.3106013461379234e-05, "loss": 1.1023, "step": 7845000 }, { "epoch": 4.7, "learning_rate": 2.310391349581867e-05, "loss": 1.0808, "step": 7845500 }, { "epoch": 4.7, "learning_rate": 2.3101813530258104e-05, "loss": 1.0947, "step": 7846000 }, { "epoch": 4.7, "learning_rate": 2.3099713564697537e-05, "loss": 1.0832, "step": 7846500 }, { "epoch": 4.7, "learning_rate": 2.3097617799068094e-05, "loss": 1.104, "step": 7847000 }, { "epoch": 4.7, "learning_rate": 2.309551783350753e-05, "loss": 1.102, "step": 7847500 }, { "epoch": 4.71, "learning_rate": 2.3093417867946965e-05, "loss": 1.0923, "step": 7848000 }, { "epoch": 4.71, "learning_rate": 2.30913179023864e-05, "loss": 1.0813, "step": 7848500 }, { "epoch": 4.71, "learning_rate": 2.3089222136756955e-05, "loss": 1.0966, "step": 7849000 }, { "epoch": 4.71, "learning_rate": 2.3087122171196392e-05, "loss": 1.0718, "step": 7849500 }, { "epoch": 4.71, "learning_rate": 2.308502220563583e-05, "loss": 1.1009, "step": 7850000 }, { "epoch": 4.71, "learning_rate": 2.3082922240075262e-05, "loss": 1.1032, "step": 7850500 }, { "epoch": 4.71, "learning_rate": 2.30808222745147e-05, "loss": 1.1121, "step": 7851000 }, { "epoch": 4.71, "learning_rate": 2.3078722308954132e-05, "loss": 1.1063, "step": 7851500 }, { "epoch": 4.71, "learning_rate": 2.3076622343393566e-05, "loss": 1.102, "step": 7852000 }, { "epoch": 4.71, "learning_rate": 2.3074522377833003e-05, "loss": 1.0842, "step": 7852500 }, { "epoch": 4.71, "learning_rate": 2.307242661220356e-05, "loss": 1.0787, "step": 7853000 }, { "epoch": 4.71, "learning_rate": 2.3070326646642997e-05, "loss": 1.0838, "step": 7853500 }, { "epoch": 4.71, "learning_rate": 2.306822668108243e-05, "loss": 1.0896, "step": 7854000 }, { "epoch": 4.71, "learning_rate": 2.3066126715521863e-05, "loss": 1.0963, "step": 7854500 }, { "epoch": 4.71, "learning_rate": 2.30640267499613e-05, "loss": 1.1017, "step": 7855000 }, { "epoch": 4.71, "learning_rate": 2.3061930984331857e-05, "loss": 1.0925, "step": 7855500 }, { "epoch": 4.71, "learning_rate": 2.305983101877129e-05, "loss": 1.0872, "step": 7856000 }, { "epoch": 4.71, "learning_rate": 2.3057731053210728e-05, "loss": 1.1192, "step": 7856500 }, { "epoch": 4.71, "learning_rate": 2.305563108765016e-05, "loss": 1.0918, "step": 7857000 }, { "epoch": 4.71, "learning_rate": 2.3053535322020718e-05, "loss": 1.1229, "step": 7857500 }, { "epoch": 4.71, "learning_rate": 2.3051435356460155e-05, "loss": 1.0879, "step": 7858000 }, { "epoch": 4.71, "learning_rate": 2.304933539089959e-05, "loss": 1.0876, "step": 7858500 }, { "epoch": 4.71, "learning_rate": 2.3047235425339022e-05, "loss": 1.0742, "step": 7859000 }, { "epoch": 4.71, "learning_rate": 2.304513545977846e-05, "loss": 1.0897, "step": 7859500 }, { "epoch": 4.71, "learning_rate": 2.3043035494217895e-05, "loss": 1.0635, "step": 7860000 }, { "epoch": 4.71, "learning_rate": 2.304093552865733e-05, "loss": 1.0995, "step": 7860500 }, { "epoch": 4.71, "learning_rate": 2.3038835563096766e-05, "loss": 1.1026, "step": 7861000 }, { "epoch": 4.71, "learning_rate": 2.303673979746732e-05, "loss": 1.0956, "step": 7861500 }, { "epoch": 4.71, "learning_rate": 2.3034639831906756e-05, "loss": 1.076, "step": 7862000 }, { "epoch": 4.71, "learning_rate": 2.3032539866346193e-05, "loss": 1.1067, "step": 7862500 }, { "epoch": 4.71, "learning_rate": 2.3030439900785627e-05, "loss": 1.0939, "step": 7863000 }, { "epoch": 4.71, "learning_rate": 2.3028344135156183e-05, "loss": 1.0732, "step": 7863500 }, { "epoch": 4.71, "learning_rate": 2.302624836952674e-05, "loss": 1.0891, "step": 7864000 }, { "epoch": 4.72, "learning_rate": 2.3024148403966174e-05, "loss": 1.111, "step": 7864500 }, { "epoch": 4.72, "learning_rate": 2.302204843840561e-05, "loss": 1.091, "step": 7865000 }, { "epoch": 4.72, "learning_rate": 2.3019948472845044e-05, "loss": 1.0763, "step": 7865500 }, { "epoch": 4.72, "learning_rate": 2.3017848507284478e-05, "loss": 1.1218, "step": 7866000 }, { "epoch": 4.72, "learning_rate": 2.3015748541723915e-05, "loss": 1.0819, "step": 7866500 }, { "epoch": 4.72, "learning_rate": 2.301364857616335e-05, "loss": 1.0858, "step": 7867000 }, { "epoch": 4.72, "learning_rate": 2.3011548610602788e-05, "loss": 1.1106, "step": 7867500 }, { "epoch": 4.72, "learning_rate": 2.3009452844973342e-05, "loss": 1.1212, "step": 7868000 }, { "epoch": 4.72, "learning_rate": 2.3007352879412775e-05, "loss": 1.0921, "step": 7868500 }, { "epoch": 4.72, "learning_rate": 2.3005252913852212e-05, "loss": 1.1165, "step": 7869000 }, { "epoch": 4.72, "learning_rate": 2.300315294829165e-05, "loss": 1.1072, "step": 7869500 }, { "epoch": 4.72, "learning_rate": 2.3001052982731082e-05, "loss": 1.111, "step": 7870000 }, { "epoch": 4.72, "learning_rate": 2.299895301717052e-05, "loss": 1.0833, "step": 7870500 }, { "epoch": 4.72, "learning_rate": 2.2996857251541073e-05, "loss": 1.0979, "step": 7871000 }, { "epoch": 4.72, "learning_rate": 2.299475728598051e-05, "loss": 1.1083, "step": 7871500 }, { "epoch": 4.72, "learning_rate": 2.2992657320419947e-05, "loss": 1.0819, "step": 7872000 }, { "epoch": 4.72, "learning_rate": 2.299055735485938e-05, "loss": 1.0938, "step": 7872500 }, { "epoch": 4.72, "learning_rate": 2.2988461589229934e-05, "loss": 1.1035, "step": 7873000 }, { "epoch": 4.72, "learning_rate": 2.298636162366937e-05, "loss": 1.0785, "step": 7873500 }, { "epoch": 4.72, "learning_rate": 2.2984261658108807e-05, "loss": 1.1128, "step": 7874000 }, { "epoch": 4.72, "learning_rate": 2.2982161692548244e-05, "loss": 1.1008, "step": 7874500 }, { "epoch": 4.72, "learning_rate": 2.2980065926918798e-05, "loss": 1.0896, "step": 7875000 }, { "epoch": 4.72, "learning_rate": 2.297796596135823e-05, "loss": 1.1069, "step": 7875500 }, { "epoch": 4.72, "learning_rate": 2.2975865995797668e-05, "loss": 1.1111, "step": 7876000 }, { "epoch": 4.72, "learning_rate": 2.2973766030237105e-05, "loss": 1.0973, "step": 7876500 }, { "epoch": 4.72, "learning_rate": 2.2971666064676538e-05, "loss": 1.0976, "step": 7877000 }, { "epoch": 4.72, "learning_rate": 2.2969566099115975e-05, "loss": 1.0722, "step": 7877500 }, { "epoch": 4.72, "learning_rate": 2.296747033348653e-05, "loss": 1.0866, "step": 7878000 }, { "epoch": 4.72, "learning_rate": 2.2965370367925966e-05, "loss": 1.0856, "step": 7878500 }, { "epoch": 4.72, "learning_rate": 2.2963270402365402e-05, "loss": 1.0937, "step": 7879000 }, { "epoch": 4.72, "learning_rate": 2.2961170436804836e-05, "loss": 1.0908, "step": 7879500 }, { "epoch": 4.72, "learning_rate": 2.2959070471244273e-05, "loss": 1.0793, "step": 7880000 }, { "epoch": 4.72, "learning_rate": 2.2956974705614826e-05, "loss": 1.1142, "step": 7880500 }, { "epoch": 4.72, "learning_rate": 2.2954874740054263e-05, "loss": 1.0919, "step": 7881000 }, { "epoch": 4.73, "learning_rate": 2.29527747744937e-05, "loss": 1.1087, "step": 7881500 }, { "epoch": 4.73, "learning_rate": 2.2950674808933133e-05, "loss": 1.0959, "step": 7882000 }, { "epoch": 4.73, "learning_rate": 2.2948579043303687e-05, "loss": 1.0954, "step": 7882500 }, { "epoch": 4.73, "learning_rate": 2.2946479077743124e-05, "loss": 1.0812, "step": 7883000 }, { "epoch": 4.73, "learning_rate": 2.294437911218256e-05, "loss": 1.0753, "step": 7883500 }, { "epoch": 4.73, "learning_rate": 2.2942279146621994e-05, "loss": 1.1152, "step": 7884000 }, { "epoch": 4.73, "learning_rate": 2.294017918106143e-05, "loss": 1.0856, "step": 7884500 }, { "epoch": 4.73, "learning_rate": 2.2938083415431985e-05, "loss": 1.1096, "step": 7885000 }, { "epoch": 4.73, "learning_rate": 2.293598344987142e-05, "loss": 1.0988, "step": 7885500 }, { "epoch": 4.73, "learning_rate": 2.2933883484310858e-05, "loss": 1.0961, "step": 7886000 }, { "epoch": 4.73, "learning_rate": 2.2931783518750292e-05, "loss": 1.0966, "step": 7886500 }, { "epoch": 4.73, "learning_rate": 2.292968355318973e-05, "loss": 1.0901, "step": 7887000 }, { "epoch": 4.73, "learning_rate": 2.2927587787560282e-05, "loss": 1.0854, "step": 7887500 }, { "epoch": 4.73, "learning_rate": 2.292548782199972e-05, "loss": 1.0875, "step": 7888000 }, { "epoch": 4.73, "learning_rate": 2.2923387856439156e-05, "loss": 1.1086, "step": 7888500 }, { "epoch": 4.73, "learning_rate": 2.292128789087859e-05, "loss": 1.1031, "step": 7889000 }, { "epoch": 4.73, "learning_rate": 2.2919187925318026e-05, "loss": 1.101, "step": 7889500 }, { "epoch": 4.73, "learning_rate": 2.291708795975746e-05, "loss": 1.0981, "step": 7890000 }, { "epoch": 4.73, "learning_rate": 2.2914987994196893e-05, "loss": 1.0891, "step": 7890500 }, { "epoch": 4.73, "learning_rate": 2.291288802863633e-05, "loss": 1.1273, "step": 7891000 }, { "epoch": 4.73, "learning_rate": 2.2910792263006887e-05, "loss": 1.1176, "step": 7891500 }, { "epoch": 4.73, "learning_rate": 2.2908692297446324e-05, "loss": 1.0817, "step": 7892000 }, { "epoch": 4.73, "learning_rate": 2.2906592331885757e-05, "loss": 1.0928, "step": 7892500 }, { "epoch": 4.73, "learning_rate": 2.290449236632519e-05, "loss": 1.1001, "step": 7893000 }, { "epoch": 4.73, "learning_rate": 2.2902396600695748e-05, "loss": 1.0843, "step": 7893500 }, { "epoch": 4.73, "learning_rate": 2.2900296635135184e-05, "loss": 1.0936, "step": 7894000 }, { "epoch": 4.73, "learning_rate": 2.2898196669574618e-05, "loss": 1.096, "step": 7894500 }, { "epoch": 4.73, "learning_rate": 2.289609670401405e-05, "loss": 1.1054, "step": 7895000 }, { "epoch": 4.73, "learning_rate": 2.2894000938384612e-05, "loss": 1.0833, "step": 7895500 }, { "epoch": 4.73, "learning_rate": 2.2891900972824045e-05, "loss": 1.0944, "step": 7896000 }, { "epoch": 4.73, "learning_rate": 2.2889801007263482e-05, "loss": 1.1057, "step": 7896500 }, { "epoch": 4.73, "learning_rate": 2.2887701041702915e-05, "loss": 1.1072, "step": 7897000 }, { "epoch": 4.73, "learning_rate": 2.288560107614235e-05, "loss": 1.0852, "step": 7897500 }, { "epoch": 4.74, "learning_rate": 2.2883505310512906e-05, "loss": 1.0901, "step": 7898000 }, { "epoch": 4.74, "learning_rate": 2.2881405344952343e-05, "loss": 1.1064, "step": 7898500 }, { "epoch": 4.74, "learning_rate": 2.287930537939178e-05, "loss": 1.1182, "step": 7899000 }, { "epoch": 4.74, "learning_rate": 2.2877205413831213e-05, "loss": 1.0951, "step": 7899500 }, { "epoch": 4.74, "learning_rate": 2.2875105448270647e-05, "loss": 1.0874, "step": 7900000 }, { "epoch": 4.74, "eval_loss": 1.0676285028457642, "eval_runtime": 1100.8847, "eval_samples_per_second": 478.452, "eval_steps_per_second": 79.742, "step": 7900000 }, { "epoch": 4.74, "learning_rate": 2.2873013882572327e-05, "loss": 1.1437, "step": 7900500 }, { "epoch": 4.74, "learning_rate": 2.287091391701176e-05, "loss": 1.1005, "step": 7901000 }, { "epoch": 4.74, "learning_rate": 2.2868813951451194e-05, "loss": 1.0894, "step": 7901500 }, { "epoch": 4.74, "learning_rate": 2.286671398589063e-05, "loss": 1.123, "step": 7902000 }, { "epoch": 4.74, "learning_rate": 2.2864614020330068e-05, "loss": 1.0945, "step": 7902500 }, { "epoch": 4.74, "learning_rate": 2.28625140547695e-05, "loss": 1.111, "step": 7903000 }, { "epoch": 4.74, "learning_rate": 2.2860414089208938e-05, "loss": 1.1178, "step": 7903500 }, { "epoch": 4.74, "learning_rate": 2.285831412364837e-05, "loss": 1.0933, "step": 7904000 }, { "epoch": 4.74, "learning_rate": 2.2856214158087805e-05, "loss": 1.0671, "step": 7904500 }, { "epoch": 4.74, "learning_rate": 2.2854118392458362e-05, "loss": 1.0718, "step": 7905000 }, { "epoch": 4.74, "learning_rate": 2.28520184268978e-05, "loss": 1.1045, "step": 7905500 }, { "epoch": 4.74, "learning_rate": 2.2849918461337235e-05, "loss": 1.106, "step": 7906000 }, { "epoch": 4.74, "learning_rate": 2.284781849577667e-05, "loss": 1.1085, "step": 7906500 }, { "epoch": 4.74, "learning_rate": 2.2845722730147226e-05, "loss": 1.1229, "step": 7907000 }, { "epoch": 4.74, "learning_rate": 2.284362276458666e-05, "loss": 1.1038, "step": 7907500 }, { "epoch": 4.74, "learning_rate": 2.2841522799026096e-05, "loss": 1.0665, "step": 7908000 }, { "epoch": 4.74, "learning_rate": 2.2839422833465533e-05, "loss": 1.0862, "step": 7908500 }, { "epoch": 4.74, "learning_rate": 2.2837327067836087e-05, "loss": 1.0953, "step": 7909000 }, { "epoch": 4.74, "learning_rate": 2.2835227102275523e-05, "loss": 1.0891, "step": 7909500 }, { "epoch": 4.74, "learning_rate": 2.2833127136714957e-05, "loss": 1.0798, "step": 7910000 }, { "epoch": 4.74, "learning_rate": 2.2831027171154394e-05, "loss": 1.0823, "step": 7910500 }, { "epoch": 4.74, "learning_rate": 2.282892720559383e-05, "loss": 1.094, "step": 7911000 }, { "epoch": 4.74, "learning_rate": 2.282682724003326e-05, "loss": 1.0886, "step": 7911500 }, { "epoch": 4.74, "learning_rate": 2.2824727274472698e-05, "loss": 1.0869, "step": 7912000 }, { "epoch": 4.74, "learning_rate": 2.2822631508843255e-05, "loss": 1.0965, "step": 7912500 }, { "epoch": 4.74, "learning_rate": 2.282053154328269e-05, "loss": 1.1041, "step": 7913000 }, { "epoch": 4.74, "learning_rate": 2.2818431577722125e-05, "loss": 1.1056, "step": 7913500 }, { "epoch": 4.74, "learning_rate": 2.2816331612161558e-05, "loss": 1.108, "step": 7914000 }, { "epoch": 4.75, "learning_rate": 2.281424004646324e-05, "loss": 1.0883, "step": 7914500 }, { "epoch": 4.75, "learning_rate": 2.2812140080902672e-05, "loss": 1.0984, "step": 7915000 }, { "epoch": 4.75, "learning_rate": 2.2810040115342106e-05, "loss": 1.0865, "step": 7915500 }, { "epoch": 4.75, "learning_rate": 2.2807940149781543e-05, "loss": 1.1085, "step": 7916000 }, { "epoch": 4.75, "learning_rate": 2.280584018422098e-05, "loss": 1.1246, "step": 7916500 }, { "epoch": 4.75, "learning_rate": 2.2803740218660413e-05, "loss": 1.046, "step": 7917000 }, { "epoch": 4.75, "learning_rate": 2.280164025309985e-05, "loss": 1.0921, "step": 7917500 }, { "epoch": 4.75, "learning_rate": 2.2799540287539287e-05, "loss": 1.0978, "step": 7918000 }, { "epoch": 4.75, "learning_rate": 2.279744452190984e-05, "loss": 1.1057, "step": 7918500 }, { "epoch": 4.75, "learning_rate": 2.2795344556349274e-05, "loss": 1.1132, "step": 7919000 }, { "epoch": 4.75, "learning_rate": 2.279324459078871e-05, "loss": 1.1156, "step": 7919500 }, { "epoch": 4.75, "learning_rate": 2.2791144625228147e-05, "loss": 1.077, "step": 7920000 }, { "epoch": 4.75, "learning_rate": 2.2789044659667584e-05, "loss": 1.0697, "step": 7920500 }, { "epoch": 4.75, "learning_rate": 2.2786948894038138e-05, "loss": 1.0933, "step": 7921000 }, { "epoch": 4.75, "learning_rate": 2.278484892847757e-05, "loss": 1.1319, "step": 7921500 }, { "epoch": 4.75, "learning_rate": 2.2782748962917008e-05, "loss": 1.1139, "step": 7922000 }, { "epoch": 4.75, "learning_rate": 2.2780648997356445e-05, "loss": 1.1121, "step": 7922500 }, { "epoch": 4.75, "learning_rate": 2.2778553231727e-05, "loss": 1.1149, "step": 7923000 }, { "epoch": 4.75, "learning_rate": 2.2776453266166435e-05, "loss": 1.0856, "step": 7923500 }, { "epoch": 4.75, "learning_rate": 2.277435330060587e-05, "loss": 1.0778, "step": 7924000 }, { "epoch": 4.75, "learning_rate": 2.2772253335045306e-05, "loss": 1.0712, "step": 7924500 }, { "epoch": 4.75, "learning_rate": 2.2770157569415863e-05, "loss": 1.0722, "step": 7925000 }, { "epoch": 4.75, "learning_rate": 2.2768057603855296e-05, "loss": 1.069, "step": 7925500 }, { "epoch": 4.75, "learning_rate": 2.2765957638294733e-05, "loss": 1.101, "step": 7926000 }, { "epoch": 4.75, "learning_rate": 2.2763857672734166e-05, "loss": 1.1014, "step": 7926500 }, { "epoch": 4.75, "learning_rate": 2.2761761907104723e-05, "loss": 1.1294, "step": 7927000 }, { "epoch": 4.75, "learning_rate": 2.2759661941544157e-05, "loss": 1.075, "step": 7927500 }, { "epoch": 4.75, "learning_rate": 2.2757561975983594e-05, "loss": 1.0867, "step": 7928000 }, { "epoch": 4.75, "learning_rate": 2.2755462010423027e-05, "loss": 1.0678, "step": 7928500 }, { "epoch": 4.75, "learning_rate": 2.2753362044862464e-05, "loss": 1.0928, "step": 7929000 }, { "epoch": 4.75, "learning_rate": 2.275126627923302e-05, "loss": 1.1144, "step": 7929500 }, { "epoch": 4.75, "learning_rate": 2.2749166313672454e-05, "loss": 1.0863, "step": 7930000 }, { "epoch": 4.75, "learning_rate": 2.274706634811189e-05, "loss": 1.1062, "step": 7930500 }, { "epoch": 4.75, "learning_rate": 2.2744966382551325e-05, "loss": 1.0822, "step": 7931000 }, { "epoch": 4.76, "learning_rate": 2.274287061692188e-05, "loss": 1.0906, "step": 7931500 }, { "epoch": 4.76, "learning_rate": 2.274077065136132e-05, "loss": 1.0809, "step": 7932000 }, { "epoch": 4.76, "learning_rate": 2.2738670685800752e-05, "loss": 1.1033, "step": 7932500 }, { "epoch": 4.76, "learning_rate": 2.273657072024019e-05, "loss": 1.124, "step": 7933000 }, { "epoch": 4.76, "learning_rate": 2.2734470754679622e-05, "loss": 1.0919, "step": 7933500 }, { "epoch": 4.76, "learning_rate": 2.2732379188981303e-05, "loss": 1.0688, "step": 7934000 }, { "epoch": 4.76, "learning_rate": 2.2730279223420733e-05, "loss": 1.0913, "step": 7934500 }, { "epoch": 4.76, "learning_rate": 2.272817925786017e-05, "loss": 1.0848, "step": 7935000 }, { "epoch": 4.76, "learning_rate": 2.2726079292299606e-05, "loss": 1.1064, "step": 7935500 }, { "epoch": 4.76, "learning_rate": 2.2723979326739043e-05, "loss": 1.0878, "step": 7936000 }, { "epoch": 4.76, "learning_rate": 2.2721879361178477e-05, "loss": 1.0949, "step": 7936500 }, { "epoch": 4.76, "learning_rate": 2.271977939561791e-05, "loss": 1.0975, "step": 7937000 }, { "epoch": 4.76, "learning_rate": 2.2717679430057347e-05, "loss": 1.1134, "step": 7937500 }, { "epoch": 4.76, "learning_rate": 2.2715583664427904e-05, "loss": 1.0759, "step": 7938000 }, { "epoch": 4.76, "learning_rate": 2.271348789879846e-05, "loss": 1.1072, "step": 7938500 }, { "epoch": 4.76, "learning_rate": 2.2711387933237894e-05, "loss": 1.0911, "step": 7939000 }, { "epoch": 4.76, "learning_rate": 2.2709287967677328e-05, "loss": 1.1049, "step": 7939500 }, { "epoch": 4.76, "learning_rate": 2.2707188002116765e-05, "loss": 1.0849, "step": 7940000 }, { "epoch": 4.76, "learning_rate": 2.27050880365562e-05, "loss": 1.092, "step": 7940500 }, { "epoch": 4.76, "learning_rate": 2.2702988070995635e-05, "loss": 1.1116, "step": 7941000 }, { "epoch": 4.76, "learning_rate": 2.2700888105435072e-05, "loss": 1.1286, "step": 7941500 }, { "epoch": 4.76, "learning_rate": 2.2698788139874505e-05, "loss": 1.0736, "step": 7942000 }, { "epoch": 4.76, "learning_rate": 2.2696692374245062e-05, "loss": 1.0872, "step": 7942500 }, { "epoch": 4.76, "learning_rate": 2.26945924086845e-05, "loss": 1.0819, "step": 7943000 }, { "epoch": 4.76, "learning_rate": 2.2692492443123933e-05, "loss": 1.104, "step": 7943500 }, { "epoch": 4.76, "learning_rate": 2.269039247756337e-05, "loss": 1.0834, "step": 7944000 }, { "epoch": 4.76, "learning_rate": 2.2688292512002803e-05, "loss": 1.1109, "step": 7944500 }, { "epoch": 4.76, "learning_rate": 2.2686192546442236e-05, "loss": 1.0966, "step": 7945000 }, { "epoch": 4.76, "learning_rate": 2.2684096780812793e-05, "loss": 1.0763, "step": 7945500 }, { "epoch": 4.76, "learning_rate": 2.268199681525223e-05, "loss": 1.107, "step": 7946000 }, { "epoch": 4.76, "learning_rate": 2.2679896849691664e-05, "loss": 1.0639, "step": 7946500 }, { "epoch": 4.76, "learning_rate": 2.267780108406222e-05, "loss": 1.0816, "step": 7947000 }, { "epoch": 4.76, "learning_rate": 2.2675701118501657e-05, "loss": 1.0802, "step": 7947500 }, { "epoch": 4.77, "learning_rate": 2.267360115294109e-05, "loss": 1.0719, "step": 7948000 }, { "epoch": 4.77, "learning_rate": 2.2671501187380528e-05, "loss": 1.1044, "step": 7948500 }, { "epoch": 4.77, "learning_rate": 2.266940122181996e-05, "loss": 1.1055, "step": 7949000 }, { "epoch": 4.77, "learning_rate": 2.2667301256259395e-05, "loss": 1.1048, "step": 7949500 }, { "epoch": 4.77, "learning_rate": 2.266520129069883e-05, "loss": 1.1016, "step": 7950000 }, { "epoch": 4.77, "learning_rate": 2.266310132513827e-05, "loss": 1.0991, "step": 7950500 }, { "epoch": 4.77, "learning_rate": 2.2661005559508825e-05, "loss": 1.0981, "step": 7951000 }, { "epoch": 4.77, "learning_rate": 2.265890559394826e-05, "loss": 1.1127, "step": 7951500 }, { "epoch": 4.77, "learning_rate": 2.2656805628387692e-05, "loss": 1.0729, "step": 7952000 }, { "epoch": 4.77, "learning_rate": 2.265470566282713e-05, "loss": 1.084, "step": 7952500 }, { "epoch": 4.77, "learning_rate": 2.2652605697266566e-05, "loss": 1.0756, "step": 7953000 }, { "epoch": 4.77, "learning_rate": 2.2650505731706e-05, "loss": 1.096, "step": 7953500 }, { "epoch": 4.77, "learning_rate": 2.2648405766145436e-05, "loss": 1.0728, "step": 7954000 }, { "epoch": 4.77, "learning_rate": 2.264631000051599e-05, "loss": 1.081, "step": 7954500 }, { "epoch": 4.77, "learning_rate": 2.2644210034955427e-05, "loss": 1.1107, "step": 7955000 }, { "epoch": 4.77, "learning_rate": 2.2642110069394863e-05, "loss": 1.0962, "step": 7955500 }, { "epoch": 4.77, "learning_rate": 2.2640010103834297e-05, "loss": 1.0979, "step": 7956000 }, { "epoch": 4.77, "learning_rate": 2.263791013827373e-05, "loss": 1.0821, "step": 7956500 }, { "epoch": 4.77, "learning_rate": 2.2635814372644287e-05, "loss": 1.0911, "step": 7957000 }, { "epoch": 4.77, "learning_rate": 2.2633714407083724e-05, "loss": 1.1026, "step": 7957500 }, { "epoch": 4.77, "learning_rate": 2.263161864145428e-05, "loss": 1.074, "step": 7958000 }, { "epoch": 4.77, "learning_rate": 2.2629518675893715e-05, "loss": 1.1007, "step": 7958500 }, { "epoch": 4.77, "learning_rate": 2.2627418710333148e-05, "loss": 1.0889, "step": 7959000 }, { "epoch": 4.77, "learning_rate": 2.2625318744772585e-05, "loss": 1.0927, "step": 7959500 }, { "epoch": 4.77, "learning_rate": 2.2623218779212022e-05, "loss": 1.085, "step": 7960000 }, { "epoch": 4.77, "learning_rate": 2.2621118813651455e-05, "loss": 1.0788, "step": 7960500 }, { "epoch": 4.77, "learning_rate": 2.2619018848090892e-05, "loss": 1.0884, "step": 7961000 }, { "epoch": 4.77, "learning_rate": 2.2616918882530326e-05, "loss": 1.103, "step": 7961500 }, { "epoch": 4.77, "learning_rate": 2.2614823116900883e-05, "loss": 1.0903, "step": 7962000 }, { "epoch": 4.77, "learning_rate": 2.261272315134032e-05, "loss": 1.0827, "step": 7962500 }, { "epoch": 4.77, "learning_rate": 2.2610623185779753e-05, "loss": 1.0897, "step": 7963000 }, { "epoch": 4.77, "learning_rate": 2.260852322021919e-05, "loss": 1.1329, "step": 7963500 }, { "epoch": 4.77, "learning_rate": 2.2606423254658623e-05, "loss": 1.0954, "step": 7964000 }, { "epoch": 4.78, "learning_rate": 2.260432748902918e-05, "loss": 1.0985, "step": 7964500 }, { "epoch": 4.78, "learning_rate": 2.2602227523468617e-05, "loss": 1.0713, "step": 7965000 }, { "epoch": 4.78, "learning_rate": 2.260012755790805e-05, "loss": 1.0904, "step": 7965500 }, { "epoch": 4.78, "learning_rate": 2.2598027592347484e-05, "loss": 1.1413, "step": 7966000 }, { "epoch": 4.78, "learning_rate": 2.259593602664916e-05, "loss": 1.103, "step": 7966500 }, { "epoch": 4.78, "learning_rate": 2.2593836061088598e-05, "loss": 1.061, "step": 7967000 }, { "epoch": 4.78, "learning_rate": 2.2591736095528035e-05, "loss": 1.0954, "step": 7967500 }, { "epoch": 4.78, "learning_rate": 2.2589636129967468e-05, "loss": 1.0896, "step": 7968000 }, { "epoch": 4.78, "learning_rate": 2.25875361644069e-05, "loss": 1.1173, "step": 7968500 }, { "epoch": 4.78, "learning_rate": 2.258543619884634e-05, "loss": 1.0622, "step": 7969000 }, { "epoch": 4.78, "learning_rate": 2.2583336233285775e-05, "loss": 1.1156, "step": 7969500 }, { "epoch": 4.78, "learning_rate": 2.2581240467656332e-05, "loss": 1.0745, "step": 7970000 }, { "epoch": 4.78, "learning_rate": 2.2579140502095762e-05, "loss": 1.1007, "step": 7970500 }, { "epoch": 4.78, "learning_rate": 2.25770405365352e-05, "loss": 1.0821, "step": 7971000 }, { "epoch": 4.78, "learning_rate": 2.2574940570974636e-05, "loss": 1.0737, "step": 7971500 }, { "epoch": 4.78, "learning_rate": 2.2572840605414073e-05, "loss": 1.1015, "step": 7972000 }, { "epoch": 4.78, "learning_rate": 2.2570740639853506e-05, "loss": 1.0993, "step": 7972500 }, { "epoch": 4.78, "learning_rate": 2.2568640674292943e-05, "loss": 1.1087, "step": 7973000 }, { "epoch": 4.78, "learning_rate": 2.2566540708732377e-05, "loss": 1.1032, "step": 7973500 }, { "epoch": 4.78, "learning_rate": 2.256444074317181e-05, "loss": 1.0769, "step": 7974000 }, { "epoch": 4.78, "learning_rate": 2.2562340777611247e-05, "loss": 1.0854, "step": 7974500 }, { "epoch": 4.78, "learning_rate": 2.2560245011981804e-05, "loss": 1.065, "step": 7975000 }, { "epoch": 4.78, "learning_rate": 2.2558145046421237e-05, "loss": 1.0904, "step": 7975500 }, { "epoch": 4.78, "learning_rate": 2.2556045080860674e-05, "loss": 1.0972, "step": 7976000 }, { "epoch": 4.78, "learning_rate": 2.2553945115300108e-05, "loss": 1.0865, "step": 7976500 }, { "epoch": 4.78, "learning_rate": 2.2551845149739544e-05, "loss": 1.0852, "step": 7977000 }, { "epoch": 4.78, "learning_rate": 2.25497493841101e-05, "loss": 1.0787, "step": 7977500 }, { "epoch": 4.78, "learning_rate": 2.2547649418549535e-05, "loss": 1.1129, "step": 7978000 }, { "epoch": 4.78, "learning_rate": 2.254554945298897e-05, "loss": 1.0747, "step": 7978500 }, { "epoch": 4.78, "learning_rate": 2.2543449487428405e-05, "loss": 1.0788, "step": 7979000 }, { "epoch": 4.78, "learning_rate": 2.2541349521867842e-05, "loss": 1.1049, "step": 7979500 }, { "epoch": 4.78, "learning_rate": 2.25392537562384e-05, "loss": 1.1188, "step": 7980000 }, { "epoch": 4.78, "learning_rate": 2.2537153790677832e-05, "loss": 1.0949, "step": 7980500 }, { "epoch": 4.78, "learning_rate": 2.2535053825117266e-05, "loss": 1.0846, "step": 7981000 }, { "epoch": 4.79, "learning_rate": 2.2532953859556703e-05, "loss": 1.101, "step": 7981500 }, { "epoch": 4.79, "learning_rate": 2.253085389399614e-05, "loss": 1.1128, "step": 7982000 }, { "epoch": 4.79, "learning_rate": 2.2528758128366697e-05, "loss": 1.1057, "step": 7982500 }, { "epoch": 4.79, "learning_rate": 2.252665816280613e-05, "loss": 1.1035, "step": 7983000 }, { "epoch": 4.79, "learning_rate": 2.2524558197245563e-05, "loss": 1.0907, "step": 7983500 }, { "epoch": 4.79, "learning_rate": 2.2522458231685e-05, "loss": 1.0898, "step": 7984000 }, { "epoch": 4.79, "learning_rate": 2.2520362466055557e-05, "loss": 1.0599, "step": 7984500 }, { "epoch": 4.79, "learning_rate": 2.251826250049499e-05, "loss": 1.0886, "step": 7985000 }, { "epoch": 4.79, "learning_rate": 2.2516162534934424e-05, "loss": 1.1069, "step": 7985500 }, { "epoch": 4.79, "learning_rate": 2.251406256937386e-05, "loss": 1.1, "step": 7986000 }, { "epoch": 4.79, "learning_rate": 2.2511962603813298e-05, "loss": 1.1138, "step": 7986500 }, { "epoch": 4.79, "learning_rate": 2.2509866838183855e-05, "loss": 1.0822, "step": 7987000 }, { "epoch": 4.79, "learning_rate": 2.250776687262329e-05, "loss": 1.0993, "step": 7987500 }, { "epoch": 4.79, "learning_rate": 2.2505666907062722e-05, "loss": 1.089, "step": 7988000 }, { "epoch": 4.79, "learning_rate": 2.250356694150216e-05, "loss": 1.1093, "step": 7988500 }, { "epoch": 4.79, "learning_rate": 2.2501466975941595e-05, "loss": 1.0576, "step": 7989000 }, { "epoch": 4.79, "learning_rate": 2.249937541024327e-05, "loss": 1.1065, "step": 7989500 }, { "epoch": 4.79, "learning_rate": 2.2497275444682706e-05, "loss": 1.0719, "step": 7990000 }, { "epoch": 4.79, "learning_rate": 2.2495175479122143e-05, "loss": 1.0914, "step": 7990500 }, { "epoch": 4.79, "learning_rate": 2.2493075513561576e-05, "loss": 1.0964, "step": 7991000 }, { "epoch": 4.79, "learning_rate": 2.2490975548001013e-05, "loss": 1.0738, "step": 7991500 }, { "epoch": 4.79, "learning_rate": 2.248887558244045e-05, "loss": 1.0782, "step": 7992000 }, { "epoch": 4.79, "learning_rate": 2.248677561687988e-05, "loss": 1.0911, "step": 7992500 }, { "epoch": 4.79, "learning_rate": 2.2484675651319317e-05, "loss": 1.0885, "step": 7993000 }, { "epoch": 4.79, "learning_rate": 2.2482575685758754e-05, "loss": 1.1287, "step": 7993500 }, { "epoch": 4.79, "learning_rate": 2.248047992012931e-05, "loss": 1.087, "step": 7994000 }, { "epoch": 4.79, "learning_rate": 2.2478384154499864e-05, "loss": 1.0979, "step": 7994500 }, { "epoch": 4.79, "learning_rate": 2.24762841889393e-05, "loss": 1.1095, "step": 7995000 }, { "epoch": 4.79, "learning_rate": 2.2474184223378735e-05, "loss": 1.067, "step": 7995500 }, { "epoch": 4.79, "learning_rate": 2.247208425781817e-05, "loss": 1.0771, "step": 7996000 }, { "epoch": 4.79, "learning_rate": 2.246998429225761e-05, "loss": 1.0995, "step": 7996500 }, { "epoch": 4.79, "learning_rate": 2.2467888526628162e-05, "loss": 1.0885, "step": 7997000 }, { "epoch": 4.79, "learning_rate": 2.24657885610676e-05, "loss": 1.091, "step": 7997500 }, { "epoch": 4.8, "learning_rate": 2.2463688595507032e-05, "loss": 1.1111, "step": 7998000 }, { "epoch": 4.8, "learning_rate": 2.246158862994647e-05, "loss": 1.1194, "step": 7998500 }, { "epoch": 4.8, "learning_rate": 2.2459488664385906e-05, "loss": 1.1093, "step": 7999000 }, { "epoch": 4.8, "learning_rate": 2.2457388698825336e-05, "loss": 1.0779, "step": 7999500 }, { "epoch": 4.8, "learning_rate": 2.2455288733264773e-05, "loss": 1.0806, "step": 8000000 }, { "epoch": 4.8, "eval_loss": 1.0654122829437256, "eval_runtime": 1096.7784, "eval_samples_per_second": 480.243, "eval_steps_per_second": 80.041, "step": 8000000 } ], "max_steps": 13343552, "num_train_epochs": 8, "total_flos": 4.236358805426442e+18, "trial_name": null, "trial_params": null }