diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,78536 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.897013328984666, + "global_step": 6500000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 2.7664e-06, + "loss": 7.1306, + "step": 500 + }, + { + "epoch": 0.0, + "learning_rate": 5.5664e-06, + "loss": 4.7491, + "step": 1000 + }, + { + "epoch": 0.0, + "learning_rate": 8.3664e-06, + "loss": 4.5563, + "step": 1500 + }, + { + "epoch": 0.0, + "learning_rate": 1.1166399999999999e-05, + "loss": 4.4179, + "step": 2000 + }, + { + "epoch": 0.0, + "learning_rate": 1.39664e-05, + "loss": 4.2868, + "step": 2500 + }, + { + "epoch": 0.0, + "learning_rate": 1.67664e-05, + "loss": 4.1711, + "step": 3000 + }, + { + "epoch": 0.0, + "learning_rate": 1.95664e-05, + "loss": 4.1089, + "step": 3500 + }, + { + "epoch": 0.0, + "learning_rate": 2.23664e-05, + "loss": 4.0387, + "step": 4000 + }, + { + "epoch": 0.0, + "learning_rate": 2.51664e-05, + "loss": 3.9442, + "step": 4500 + }, + { + "epoch": 0.0, + "learning_rate": 2.79664e-05, + "loss": 3.8552, + "step": 5000 + }, + { + "epoch": 0.0, + "learning_rate": 3.07664e-05, + "loss": 3.8383, + "step": 5500 + }, + { + "epoch": 0.0, + "learning_rate": 3.3566400000000004e-05, + "loss": 3.7433, + "step": 6000 + }, + { + "epoch": 0.0, + "learning_rate": 3.63664e-05, + "loss": 3.672, + "step": 6500 + }, + { + "epoch": 0.0, + "learning_rate": 3.91664e-05, + "loss": 3.5705, + "step": 7000 + }, + { + "epoch": 0.0, + "learning_rate": 4.19664e-05, + "loss": 3.4977, + "step": 7500 + }, + { + "epoch": 0.0, + "learning_rate": 4.47664e-05, + "loss": 3.4228, + "step": 8000 + }, + { + "epoch": 0.01, + "learning_rate": 4.75664e-05, + "loss": 3.3206, + "step": 8500 + }, + { + "epoch": 0.01, + "learning_rate": 5.03664e-05, + "loss": 3.3024, + "step": 9000 + }, + { + "epoch": 0.01, + "learning_rate": 5.31608e-05, + "loss": 3.238, + "step": 9500 + }, + { + "epoch": 0.01, + "learning_rate": 5.5960799999999995e-05, + "loss": 3.1835, + "step": 10000 + }, + { + "epoch": 0.01, + "learning_rate": 5.599792943395728e-05, + "loss": 3.1321, + "step": 10500 + }, + { + "epoch": 0.01, + "learning_rate": 5.599582946839672e-05, + "loss": 2.9498, + "step": 11000 + }, + { + "epoch": 0.01, + "learning_rate": 5.5993733702767274e-05, + "loss": 2.9067, + "step": 11500 + }, + { + "epoch": 0.01, + "learning_rate": 5.599163373720671e-05, + "loss": 2.8201, + "step": 12000 + }, + { + "epoch": 0.01, + "learning_rate": 5.598953377164614e-05, + "loss": 2.7697, + "step": 12500 + }, + { + "epoch": 0.01, + "learning_rate": 5.598743380608558e-05, + "loss": 2.6956, + "step": 13000 + }, + { + "epoch": 0.01, + "learning_rate": 5.5985333840525015e-05, + "loss": 2.6767, + "step": 13500 + }, + { + "epoch": 0.01, + "learning_rate": 5.598323387496445e-05, + "loss": 2.6511, + "step": 14000 + }, + { + "epoch": 0.01, + "learning_rate": 5.598113390940389e-05, + "loss": 2.6005, + "step": 14500 + }, + { + "epoch": 0.01, + "learning_rate": 5.597903394384332e-05, + "loss": 2.5646, + "step": 15000 + }, + { + "epoch": 0.01, + "learning_rate": 5.5976933978282755e-05, + "loss": 2.5398, + "step": 15500 + }, + { + "epoch": 0.01, + "learning_rate": 5.5974838212653316e-05, + "loss": 2.5406, + "step": 16000 + }, + { + "epoch": 0.01, + "learning_rate": 5.597273824709275e-05, + "loss": 2.5244, + "step": 16500 + }, + { + "epoch": 0.01, + "learning_rate": 5.59706424814633e-05, + "loss": 2.4838, + "step": 17000 + }, + { + "epoch": 0.01, + "learning_rate": 5.5968542515902736e-05, + "loss": 2.4476, + "step": 17500 + }, + { + "epoch": 0.01, + "learning_rate": 5.5966442550342176e-05, + "loss": 2.3907, + "step": 18000 + }, + { + "epoch": 0.01, + "learning_rate": 5.596434258478161e-05, + "loss": 2.4071, + "step": 18500 + }, + { + "epoch": 0.01, + "learning_rate": 5.596224261922104e-05, + "loss": 2.3964, + "step": 19000 + }, + { + "epoch": 0.01, + "learning_rate": 5.5960142653660483e-05, + "loss": 2.3317, + "step": 19500 + }, + { + "epoch": 0.01, + "learning_rate": 5.595804268809992e-05, + "loss": 2.3686, + "step": 20000 + }, + { + "epoch": 0.01, + "learning_rate": 5.5955942722539344e-05, + "loss": 2.3225, + "step": 20500 + }, + { + "epoch": 0.01, + "learning_rate": 5.5953842756978784e-05, + "loss": 2.2592, + "step": 21000 + }, + { + "epoch": 0.01, + "learning_rate": 5.595174279141822e-05, + "loss": 2.2759, + "step": 21500 + }, + { + "epoch": 0.01, + "learning_rate": 5.594964282585766e-05, + "loss": 2.2664, + "step": 22000 + }, + { + "epoch": 0.01, + "learning_rate": 5.594754286029709e-05, + "loss": 2.281, + "step": 22500 + }, + { + "epoch": 0.01, + "learning_rate": 5.594545129459877e-05, + "loss": 2.239, + "step": 23000 + }, + { + "epoch": 0.01, + "learning_rate": 5.5943351329038205e-05, + "loss": 2.2078, + "step": 23500 + }, + { + "epoch": 0.01, + "learning_rate": 5.594125556340876e-05, + "loss": 2.2347, + "step": 24000 + }, + { + "epoch": 0.01, + "learning_rate": 5.593915559784819e-05, + "loss": 2.1976, + "step": 24500 + }, + { + "epoch": 0.01, + "learning_rate": 5.593705563228763e-05, + "loss": 2.1759, + "step": 25000 + }, + { + "epoch": 0.02, + "learning_rate": 5.5934955666727066e-05, + "loss": 2.1614, + "step": 25500 + }, + { + "epoch": 0.02, + "learning_rate": 5.593285990109762e-05, + "loss": 2.2002, + "step": 26000 + }, + { + "epoch": 0.02, + "learning_rate": 5.593075993553705e-05, + "loss": 2.167, + "step": 26500 + }, + { + "epoch": 0.02, + "learning_rate": 5.592865996997649e-05, + "loss": 2.1416, + "step": 27000 + }, + { + "epoch": 0.02, + "learning_rate": 5.5926560004415926e-05, + "loss": 2.1846, + "step": 27500 + }, + { + "epoch": 0.02, + "learning_rate": 5.592446003885537e-05, + "loss": 2.1457, + "step": 28000 + }, + { + "epoch": 0.02, + "learning_rate": 5.59223600732948e-05, + "loss": 2.1489, + "step": 28500 + }, + { + "epoch": 0.02, + "learning_rate": 5.5920260107734234e-05, + "loss": 2.0987, + "step": 29000 + }, + { + "epoch": 0.02, + "learning_rate": 5.5918160142173674e-05, + "loss": 2.102, + "step": 29500 + }, + { + "epoch": 0.02, + "learning_rate": 5.59160601766131e-05, + "loss": 2.1108, + "step": 30000 + }, + { + "epoch": 0.02, + "learning_rate": 5.5913960211052534e-05, + "loss": 2.1382, + "step": 30500 + }, + { + "epoch": 0.02, + "learning_rate": 5.5911864445423094e-05, + "loss": 2.0619, + "step": 31000 + }, + { + "epoch": 0.02, + "learning_rate": 5.590976867979365e-05, + "loss": 2.0802, + "step": 31500 + }, + { + "epoch": 0.02, + "learning_rate": 5.590766871423309e-05, + "loss": 2.0627, + "step": 32000 + }, + { + "epoch": 0.02, + "learning_rate": 5.590556874867252e-05, + "loss": 2.0826, + "step": 32500 + }, + { + "epoch": 0.02, + "learning_rate": 5.5903468783111955e-05, + "loss": 2.0486, + "step": 33000 + }, + { + "epoch": 0.02, + "learning_rate": 5.5901368817551395e-05, + "loss": 2.0652, + "step": 33500 + }, + { + "epoch": 0.02, + "learning_rate": 5.589926885199083e-05, + "loss": 2.048, + "step": 34000 + }, + { + "epoch": 0.02, + "learning_rate": 5.589716888643026e-05, + "loss": 2.0727, + "step": 34500 + }, + { + "epoch": 0.02, + "learning_rate": 5.5895068920869696e-05, + "loss": 2.0037, + "step": 35000 + }, + { + "epoch": 0.02, + "learning_rate": 5.589296895530913e-05, + "loss": 2.0548, + "step": 35500 + }, + { + "epoch": 0.02, + "learning_rate": 5.589086898974857e-05, + "loss": 1.9999, + "step": 36000 + }, + { + "epoch": 0.02, + "learning_rate": 5.5888769024188e-05, + "loss": 2.0511, + "step": 36500 + }, + { + "epoch": 0.02, + "learning_rate": 5.5886669058627436e-05, + "loss": 1.982, + "step": 37000 + }, + { + "epoch": 0.02, + "learning_rate": 5.588457749292912e-05, + "loss": 2.007, + "step": 37500 + }, + { + "epoch": 0.02, + "learning_rate": 5.588247752736855e-05, + "loss": 2.0074, + "step": 38000 + }, + { + "epoch": 0.02, + "learning_rate": 5.588037756180799e-05, + "loss": 2.0068, + "step": 38500 + }, + { + "epoch": 0.02, + "learning_rate": 5.5878277596247424e-05, + "loss": 2.0297, + "step": 39000 + }, + { + "epoch": 0.02, + "learning_rate": 5.587617763068685e-05, + "loss": 1.9801, + "step": 39500 + }, + { + "epoch": 0.02, + "learning_rate": 5.587408186505741e-05, + "loss": 1.968, + "step": 40000 + }, + { + "epoch": 0.02, + "learning_rate": 5.587198189949685e-05, + "loss": 1.9741, + "step": 40500 + }, + { + "epoch": 0.02, + "learning_rate": 5.5869881933936285e-05, + "loss": 2.0031, + "step": 41000 + }, + { + "epoch": 0.02, + "learning_rate": 5.586778196837572e-05, + "loss": 1.9725, + "step": 41500 + }, + { + "epoch": 0.03, + "learning_rate": 5.586568200281515e-05, + "loss": 1.9465, + "step": 42000 + }, + { + "epoch": 0.03, + "learning_rate": 5.586358623718571e-05, + "loss": 1.9521, + "step": 42500 + }, + { + "epoch": 0.03, + "learning_rate": 5.5861490471556265e-05, + "loss": 1.96, + "step": 43000 + }, + { + "epoch": 0.03, + "learning_rate": 5.58593905059957e-05, + "loss": 1.9815, + "step": 43500 + }, + { + "epoch": 0.03, + "learning_rate": 5.585729054043514e-05, + "loss": 1.9682, + "step": 44000 + }, + { + "epoch": 0.03, + "learning_rate": 5.585519057487457e-05, + "loss": 1.9458, + "step": 44500 + }, + { + "epoch": 0.03, + "learning_rate": 5.5853090609314006e-05, + "loss": 1.9562, + "step": 45000 + }, + { + "epoch": 0.03, + "learning_rate": 5.5850990643753446e-05, + "loss": 1.9166, + "step": 45500 + }, + { + "epoch": 0.03, + "learning_rate": 5.584889067819288e-05, + "loss": 1.9057, + "step": 46000 + }, + { + "epoch": 0.03, + "learning_rate": 5.5846790712632306e-05, + "loss": 1.9332, + "step": 46500 + }, + { + "epoch": 0.03, + "learning_rate": 5.584469074707175e-05, + "loss": 1.9453, + "step": 47000 + }, + { + "epoch": 0.03, + "learning_rate": 5.584259078151118e-05, + "loss": 1.9018, + "step": 47500 + }, + { + "epoch": 0.03, + "learning_rate": 5.5840490815950614e-05, + "loss": 1.9193, + "step": 48000 + }, + { + "epoch": 0.03, + "learning_rate": 5.5838390850390054e-05, + "loss": 1.9188, + "step": 48500 + }, + { + "epoch": 0.03, + "learning_rate": 5.583629088482949e-05, + "loss": 1.851, + "step": 49000 + }, + { + "epoch": 0.03, + "learning_rate": 5.583419511920004e-05, + "loss": 1.9075, + "step": 49500 + }, + { + "epoch": 0.03, + "learning_rate": 5.583209515363948e-05, + "loss": 1.8626, + "step": 50000 + }, + { + "epoch": 0.03, + "learning_rate": 5.5829995188078915e-05, + "loss": 1.9197, + "step": 50500 + }, + { + "epoch": 0.03, + "learning_rate": 5.582789522251835e-05, + "loss": 1.8625, + "step": 51000 + }, + { + "epoch": 0.03, + "learning_rate": 5.582579525695779e-05, + "loss": 1.903, + "step": 51500 + }, + { + "epoch": 0.03, + "learning_rate": 5.582369949132834e-05, + "loss": 1.8981, + "step": 52000 + }, + { + "epoch": 0.03, + "learning_rate": 5.58216037256989e-05, + "loss": 1.8916, + "step": 52500 + }, + { + "epoch": 0.03, + "learning_rate": 5.5819503760138336e-05, + "loss": 1.9053, + "step": 53000 + }, + { + "epoch": 0.03, + "learning_rate": 5.581740379457776e-05, + "loss": 1.8785, + "step": 53500 + }, + { + "epoch": 0.03, + "learning_rate": 5.58153038290172e-05, + "loss": 1.8709, + "step": 54000 + }, + { + "epoch": 0.03, + "learning_rate": 5.5813203863456636e-05, + "loss": 1.8518, + "step": 54500 + }, + { + "epoch": 0.03, + "learning_rate": 5.581110389789607e-05, + "loss": 1.8553, + "step": 55000 + }, + { + "epoch": 0.03, + "learning_rate": 5.580900393233551e-05, + "loss": 1.8435, + "step": 55500 + }, + { + "epoch": 0.03, + "learning_rate": 5.580690396677494e-05, + "loss": 1.8547, + "step": 56000 + }, + { + "epoch": 0.03, + "learning_rate": 5.5804804001214377e-05, + "loss": 1.8521, + "step": 56500 + }, + { + "epoch": 0.03, + "learning_rate": 5.580270403565382e-05, + "loss": 1.8389, + "step": 57000 + }, + { + "epoch": 0.03, + "learning_rate": 5.580060407009325e-05, + "loss": 1.8671, + "step": 57500 + }, + { + "epoch": 0.03, + "learning_rate": 5.5798508304463804e-05, + "loss": 1.8519, + "step": 58000 + }, + { + "epoch": 0.04, + "learning_rate": 5.5796408338903244e-05, + "loss": 1.8339, + "step": 58500 + }, + { + "epoch": 0.04, + "learning_rate": 5.579430837334268e-05, + "loss": 1.8504, + "step": 59000 + }, + { + "epoch": 0.04, + "learning_rate": 5.579220840778211e-05, + "loss": 1.8243, + "step": 59500 + }, + { + "epoch": 0.04, + "learning_rate": 5.579010844222155e-05, + "loss": 1.8522, + "step": 60000 + }, + { + "epoch": 0.04, + "learning_rate": 5.5788012676592105e-05, + "loss": 1.8346, + "step": 60500 + }, + { + "epoch": 0.04, + "learning_rate": 5.578591271103154e-05, + "loss": 1.8302, + "step": 61000 + }, + { + "epoch": 0.04, + "learning_rate": 5.578381694540209e-05, + "loss": 1.8138, + "step": 61500 + }, + { + "epoch": 0.04, + "learning_rate": 5.5781716979841525e-05, + "loss": 1.8075, + "step": 62000 + }, + { + "epoch": 0.04, + "learning_rate": 5.5779621214212086e-05, + "loss": 1.8539, + "step": 62500 + }, + { + "epoch": 0.04, + "learning_rate": 5.577752124865152e-05, + "loss": 1.8535, + "step": 63000 + }, + { + "epoch": 0.04, + "learning_rate": 5.577542128309095e-05, + "loss": 1.8144, + "step": 63500 + }, + { + "epoch": 0.04, + "learning_rate": 5.577332131753039e-05, + "loss": 1.8078, + "step": 64000 + }, + { + "epoch": 0.04, + "learning_rate": 5.5771221351969826e-05, + "loss": 1.8121, + "step": 64500 + }, + { + "epoch": 0.04, + "learning_rate": 5.576912138640926e-05, + "loss": 1.819, + "step": 65000 + }, + { + "epoch": 0.04, + "learning_rate": 5.57670214208487e-05, + "loss": 1.7988, + "step": 65500 + }, + { + "epoch": 0.04, + "learning_rate": 5.5764921455288133e-05, + "loss": 1.8449, + "step": 66000 + }, + { + "epoch": 0.04, + "learning_rate": 5.576282148972757e-05, + "loss": 1.8113, + "step": 66500 + }, + { + "epoch": 0.04, + "learning_rate": 5.576072152416701e-05, + "loss": 1.8032, + "step": 67000 + }, + { + "epoch": 0.04, + "learning_rate": 5.575862155860644e-05, + "loss": 1.7805, + "step": 67500 + }, + { + "epoch": 0.04, + "learning_rate": 5.5756521593045874e-05, + "loss": 1.8332, + "step": 68000 + }, + { + "epoch": 0.04, + "learning_rate": 5.575442582741643e-05, + "loss": 1.8081, + "step": 68500 + }, + { + "epoch": 0.04, + "learning_rate": 5.575232586185587e-05, + "loss": 1.7867, + "step": 69000 + }, + { + "epoch": 0.04, + "learning_rate": 5.57502258962953e-05, + "loss": 1.8534, + "step": 69500 + }, + { + "epoch": 0.04, + "learning_rate": 5.5748125930734735e-05, + "loss": 1.7946, + "step": 70000 + }, + { + "epoch": 0.04, + "learning_rate": 5.574603016510529e-05, + "loss": 1.7681, + "step": 70500 + }, + { + "epoch": 0.04, + "learning_rate": 5.574393439947585e-05, + "loss": 1.8173, + "step": 71000 + }, + { + "epoch": 0.04, + "learning_rate": 5.574183443391528e-05, + "loss": 1.7993, + "step": 71500 + }, + { + "epoch": 0.04, + "learning_rate": 5.5739734468354716e-05, + "loss": 1.8121, + "step": 72000 + }, + { + "epoch": 0.04, + "learning_rate": 5.5737634502794156e-05, + "loss": 1.7741, + "step": 72500 + }, + { + "epoch": 0.04, + "learning_rate": 5.573553453723359e-05, + "loss": 1.7632, + "step": 73000 + }, + { + "epoch": 0.04, + "learning_rate": 5.573343457167302e-05, + "loss": 1.7428, + "step": 73500 + }, + { + "epoch": 0.04, + "learning_rate": 5.573133460611246e-05, + "loss": 1.7824, + "step": 74000 + }, + { + "epoch": 0.04, + "learning_rate": 5.5729234640551896e-05, + "loss": 1.7765, + "step": 74500 + }, + { + "epoch": 0.04, + "learning_rate": 5.572713467499133e-05, + "loss": 1.7981, + "step": 75000 + }, + { + "epoch": 0.05, + "learning_rate": 5.5725038909361883e-05, + "loss": 1.7925, + "step": 75500 + }, + { + "epoch": 0.05, + "learning_rate": 5.5722938943801324e-05, + "loss": 1.7825, + "step": 76000 + }, + { + "epoch": 0.05, + "learning_rate": 5.572083897824076e-05, + "loss": 1.7829, + "step": 76500 + }, + { + "epoch": 0.05, + "learning_rate": 5.571873901268019e-05, + "loss": 1.79, + "step": 77000 + }, + { + "epoch": 0.05, + "learning_rate": 5.571663904711963e-05, + "loss": 1.799, + "step": 77500 + }, + { + "epoch": 0.05, + "learning_rate": 5.5714543281490184e-05, + "loss": 1.7995, + "step": 78000 + }, + { + "epoch": 0.05, + "learning_rate": 5.571244331592962e-05, + "loss": 1.806, + "step": 78500 + }, + { + "epoch": 0.05, + "learning_rate": 5.571034335036905e-05, + "loss": 1.7516, + "step": 79000 + }, + { + "epoch": 0.05, + "learning_rate": 5.570824338480849e-05, + "loss": 1.7678, + "step": 79500 + }, + { + "epoch": 0.05, + "learning_rate": 5.5706143419247925e-05, + "loss": 1.7673, + "step": 80000 + }, + { + "epoch": 0.05, + "learning_rate": 5.570404345368736e-05, + "loss": 1.7944, + "step": 80500 + }, + { + "epoch": 0.05, + "learning_rate": 5.570194348812679e-05, + "loss": 1.8062, + "step": 81000 + }, + { + "epoch": 0.05, + "learning_rate": 5.5699843522566225e-05, + "loss": 1.7369, + "step": 81500 + }, + { + "epoch": 0.05, + "learning_rate": 5.5697747756936786e-05, + "loss": 1.7562, + "step": 82000 + }, + { + "epoch": 0.05, + "learning_rate": 5.5695647791376226e-05, + "loss": 1.7348, + "step": 82500 + }, + { + "epoch": 0.05, + "learning_rate": 5.569355622567789e-05, + "loss": 1.8007, + "step": 83000 + }, + { + "epoch": 0.05, + "learning_rate": 5.569145626011733e-05, + "loss": 1.7627, + "step": 83500 + }, + { + "epoch": 0.05, + "learning_rate": 5.568935629455677e-05, + "loss": 1.7367, + "step": 84000 + }, + { + "epoch": 0.05, + "learning_rate": 5.56872563289962e-05, + "loss": 1.7428, + "step": 84500 + }, + { + "epoch": 0.05, + "learning_rate": 5.568515636343564e-05, + "loss": 1.7706, + "step": 85000 + }, + { + "epoch": 0.05, + "learning_rate": 5.5683056397875074e-05, + "loss": 1.7478, + "step": 85500 + }, + { + "epoch": 0.05, + "learning_rate": 5.568095643231451e-05, + "loss": 1.7316, + "step": 86000 + }, + { + "epoch": 0.05, + "learning_rate": 5.567885646675395e-05, + "loss": 1.7544, + "step": 86500 + }, + { + "epoch": 0.05, + "learning_rate": 5.567675650119338e-05, + "loss": 1.7324, + "step": 87000 + }, + { + "epoch": 0.05, + "learning_rate": 5.5674660735563935e-05, + "loss": 1.7232, + "step": 87500 + }, + { + "epoch": 0.05, + "learning_rate": 5.5672560770003375e-05, + "loss": 1.742, + "step": 88000 + }, + { + "epoch": 0.05, + "learning_rate": 5.567046080444281e-05, + "loss": 1.7131, + "step": 88500 + }, + { + "epoch": 0.05, + "learning_rate": 5.566836083888224e-05, + "loss": 1.7744, + "step": 89000 + }, + { + "epoch": 0.05, + "learning_rate": 5.5666265073252795e-05, + "loss": 1.7089, + "step": 89500 + }, + { + "epoch": 0.05, + "learning_rate": 5.5664165107692235e-05, + "loss": 1.7256, + "step": 90000 + }, + { + "epoch": 0.05, + "learning_rate": 5.566206514213167e-05, + "loss": 1.7423, + "step": 90500 + }, + { + "epoch": 0.05, + "learning_rate": 5.56599651765711e-05, + "loss": 1.7465, + "step": 91000 + }, + { + "epoch": 0.05, + "learning_rate": 5.565786521101054e-05, + "loss": 1.7184, + "step": 91500 + }, + { + "epoch": 0.06, + "learning_rate": 5.5655769445381096e-05, + "loss": 1.7434, + "step": 92000 + }, + { + "epoch": 0.06, + "learning_rate": 5.565366947982053e-05, + "loss": 1.7604, + "step": 92500 + }, + { + "epoch": 0.06, + "learning_rate": 5.565157371419108e-05, + "loss": 1.7488, + "step": 93000 + }, + { + "epoch": 0.06, + "learning_rate": 5.5649473748630523e-05, + "loss": 1.697, + "step": 93500 + }, + { + "epoch": 0.06, + "learning_rate": 5.564737378306996e-05, + "loss": 1.7406, + "step": 94000 + }, + { + "epoch": 0.06, + "learning_rate": 5.564527381750939e-05, + "loss": 1.756, + "step": 94500 + }, + { + "epoch": 0.06, + "learning_rate": 5.564317385194883e-05, + "loss": 1.7186, + "step": 95000 + }, + { + "epoch": 0.06, + "learning_rate": 5.5641073886388264e-05, + "loss": 1.7215, + "step": 95500 + }, + { + "epoch": 0.06, + "learning_rate": 5.56389739208277e-05, + "loss": 1.7408, + "step": 96000 + }, + { + "epoch": 0.06, + "learning_rate": 5.563687395526714e-05, + "loss": 1.7173, + "step": 96500 + }, + { + "epoch": 0.06, + "learning_rate": 5.563477398970657e-05, + "loss": 1.7422, + "step": 97000 + }, + { + "epoch": 0.06, + "learning_rate": 5.5632674024146e-05, + "loss": 1.7511, + "step": 97500 + }, + { + "epoch": 0.06, + "learning_rate": 5.563057405858544e-05, + "loss": 1.7128, + "step": 98000 + }, + { + "epoch": 0.06, + "learning_rate": 5.562847409302487e-05, + "loss": 1.7366, + "step": 98500 + }, + { + "epoch": 0.06, + "learning_rate": 5.562638252732655e-05, + "loss": 1.7102, + "step": 99000 + }, + { + "epoch": 0.06, + "learning_rate": 5.5624282561765986e-05, + "loss": 1.6987, + "step": 99500 + }, + { + "epoch": 0.06, + "learning_rate": 5.5622182596205426e-05, + "loss": 1.7162, + "step": 100000 + }, + { + "epoch": 0.06, + "eval_loss": 1.6113003492355347, + "eval_runtime": 1095.3065, + "eval_samples_per_second": 480.888, + "eval_steps_per_second": 80.148, + "step": 100000 + }, + { + "epoch": 0.06, + "learning_rate": 5.562008263064486e-05, + "loss": 1.693, + "step": 100500 + }, + { + "epoch": 0.06, + "learning_rate": 5.561798266508429e-05, + "loss": 1.7246, + "step": 101000 + }, + { + "epoch": 0.06, + "learning_rate": 5.561588269952373e-05, + "loss": 1.7385, + "step": 101500 + }, + { + "epoch": 0.06, + "learning_rate": 5.561378273396316e-05, + "loss": 1.7152, + "step": 102000 + }, + { + "epoch": 0.06, + "learning_rate": 5.561168276840259e-05, + "loss": 1.6918, + "step": 102500 + }, + { + "epoch": 0.06, + "learning_rate": 5.5609587002773153e-05, + "loss": 1.6783, + "step": 103000 + }, + { + "epoch": 0.06, + "learning_rate": 5.5607487037212594e-05, + "loss": 1.6992, + "step": 103500 + }, + { + "epoch": 0.06, + "learning_rate": 5.560538707165203e-05, + "loss": 1.7065, + "step": 104000 + }, + { + "epoch": 0.06, + "learning_rate": 5.5603287106091454e-05, + "loss": 1.6912, + "step": 104500 + }, + { + "epoch": 0.06, + "learning_rate": 5.5601187140530894e-05, + "loss": 1.7006, + "step": 105000 + }, + { + "epoch": 0.06, + "learning_rate": 5.5599091374901454e-05, + "loss": 1.7121, + "step": 105500 + }, + { + "epoch": 0.06, + "learning_rate": 5.559699560927201e-05, + "loss": 1.6932, + "step": 106000 + }, + { + "epoch": 0.06, + "learning_rate": 5.559489564371144e-05, + "loss": 1.6908, + "step": 106500 + }, + { + "epoch": 0.06, + "learning_rate": 5.559279567815088e-05, + "loss": 1.7198, + "step": 107000 + }, + { + "epoch": 0.06, + "learning_rate": 5.5590695712590315e-05, + "loss": 1.7275, + "step": 107500 + }, + { + "epoch": 0.06, + "learning_rate": 5.558859574702975e-05, + "loss": 1.6924, + "step": 108000 + }, + { + "epoch": 0.07, + "learning_rate": 5.558649578146919e-05, + "loss": 1.6792, + "step": 108500 + }, + { + "epoch": 0.07, + "learning_rate": 5.5584395815908616e-05, + "loss": 1.6821, + "step": 109000 + }, + { + "epoch": 0.07, + "learning_rate": 5.558229585034805e-05, + "loss": 1.6619, + "step": 109500 + }, + { + "epoch": 0.07, + "learning_rate": 5.558020008471861e-05, + "loss": 1.6954, + "step": 110000 + }, + { + "epoch": 0.07, + "learning_rate": 5.557810011915805e-05, + "loss": 1.7016, + "step": 110500 + }, + { + "epoch": 0.07, + "learning_rate": 5.557600015359748e-05, + "loss": 1.6689, + "step": 111000 + }, + { + "epoch": 0.07, + "learning_rate": 5.557390018803691e-05, + "loss": 1.7108, + "step": 111500 + }, + { + "epoch": 0.07, + "learning_rate": 5.557180022247635e-05, + "loss": 1.7, + "step": 112000 + }, + { + "epoch": 0.07, + "learning_rate": 5.556970445684691e-05, + "loss": 1.7109, + "step": 112500 + }, + { + "epoch": 0.07, + "learning_rate": 5.5567608691217464e-05, + "loss": 1.6864, + "step": 113000 + }, + { + "epoch": 0.07, + "learning_rate": 5.55655087256569e-05, + "loss": 1.6809, + "step": 113500 + }, + { + "epoch": 0.07, + "learning_rate": 5.556341296002745e-05, + "loss": 1.7365, + "step": 114000 + }, + { + "epoch": 0.07, + "learning_rate": 5.556131299446689e-05, + "loss": 1.6716, + "step": 114500 + }, + { + "epoch": 0.07, + "learning_rate": 5.5559213028906325e-05, + "loss": 1.7088, + "step": 115000 + }, + { + "epoch": 0.07, + "learning_rate": 5.555711306334576e-05, + "loss": 1.6476, + "step": 115500 + }, + { + "epoch": 0.07, + "learning_rate": 5.555501729771631e-05, + "loss": 1.6933, + "step": 116000 + }, + { + "epoch": 0.07, + "learning_rate": 5.555291733215575e-05, + "loss": 1.6808, + "step": 116500 + }, + { + "epoch": 0.07, + "learning_rate": 5.5550817366595185e-05, + "loss": 1.7114, + "step": 117000 + }, + { + "epoch": 0.07, + "learning_rate": 5.554871740103462e-05, + "loss": 1.6573, + "step": 117500 + }, + { + "epoch": 0.07, + "learning_rate": 5.554661743547406e-05, + "loss": 1.6965, + "step": 118000 + }, + { + "epoch": 0.07, + "learning_rate": 5.554451746991349e-05, + "loss": 1.6986, + "step": 118500 + }, + { + "epoch": 0.07, + "learning_rate": 5.5542417504352926e-05, + "loss": 1.6969, + "step": 119000 + }, + { + "epoch": 0.07, + "learning_rate": 5.5540317538792366e-05, + "loss": 1.6895, + "step": 119500 + }, + { + "epoch": 0.07, + "learning_rate": 5.55382175732318e-05, + "loss": 1.6495, + "step": 120000 + }, + { + "epoch": 0.07, + "learning_rate": 5.553611760767123e-05, + "loss": 1.6495, + "step": 120500 + }, + { + "epoch": 0.07, + "learning_rate": 5.5534021842041793e-05, + "loss": 1.6729, + "step": 121000 + }, + { + "epoch": 0.07, + "learning_rate": 5.553192607641235e-05, + "loss": 1.706, + "step": 121500 + }, + { + "epoch": 0.07, + "learning_rate": 5.552982611085178e-05, + "loss": 1.672, + "step": 122000 + }, + { + "epoch": 0.07, + "learning_rate": 5.5527726145291214e-05, + "loss": 1.6255, + "step": 122500 + }, + { + "epoch": 0.07, + "learning_rate": 5.5525626179730654e-05, + "loss": 1.6458, + "step": 123000 + }, + { + "epoch": 0.07, + "learning_rate": 5.552352621417009e-05, + "loss": 1.6765, + "step": 123500 + }, + { + "epoch": 0.07, + "learning_rate": 5.552142624860952e-05, + "loss": 1.6828, + "step": 124000 + }, + { + "epoch": 0.07, + "learning_rate": 5.551932628304896e-05, + "loss": 1.6512, + "step": 124500 + }, + { + "epoch": 0.07, + "learning_rate": 5.5517226317488395e-05, + "loss": 1.6695, + "step": 125000 + }, + { + "epoch": 0.08, + "learning_rate": 5.551513055185895e-05, + "loss": 1.6789, + "step": 125500 + }, + { + "epoch": 0.08, + "learning_rate": 5.551303058629838e-05, + "loss": 1.6785, + "step": 126000 + }, + { + "epoch": 0.08, + "learning_rate": 5.551093062073782e-05, + "loss": 1.6435, + "step": 126500 + }, + { + "epoch": 0.08, + "learning_rate": 5.5508830655177255e-05, + "loss": 1.6531, + "step": 127000 + }, + { + "epoch": 0.08, + "learning_rate": 5.550673068961669e-05, + "loss": 1.6556, + "step": 127500 + }, + { + "epoch": 0.08, + "learning_rate": 5.550463072405613e-05, + "loss": 1.6563, + "step": 128000 + }, + { + "epoch": 0.08, + "learning_rate": 5.5502530758495556e-05, + "loss": 1.6816, + "step": 128500 + }, + { + "epoch": 0.08, + "learning_rate": 5.5500430792934996e-05, + "loss": 1.67, + "step": 129000 + }, + { + "epoch": 0.08, + "learning_rate": 5.549833082737443e-05, + "loss": 1.6965, + "step": 129500 + }, + { + "epoch": 0.08, + "learning_rate": 5.549623506174499e-05, + "loss": 1.6701, + "step": 130000 + }, + { + "epoch": 0.08, + "learning_rate": 5.5494139296115543e-05, + "loss": 1.6774, + "step": 130500 + }, + { + "epoch": 0.08, + "learning_rate": 5.549203933055498e-05, + "loss": 1.6849, + "step": 131000 + }, + { + "epoch": 0.08, + "learning_rate": 5.548993936499442e-05, + "loss": 1.6633, + "step": 131500 + }, + { + "epoch": 0.08, + "learning_rate": 5.548783939943385e-05, + "loss": 1.6135, + "step": 132000 + }, + { + "epoch": 0.08, + "learning_rate": 5.5485739433873284e-05, + "loss": 1.6785, + "step": 132500 + }, + { + "epoch": 0.08, + "learning_rate": 5.548363946831272e-05, + "loss": 1.6718, + "step": 133000 + }, + { + "epoch": 0.08, + "learning_rate": 5.548154370268328e-05, + "loss": 1.6594, + "step": 133500 + }, + { + "epoch": 0.08, + "learning_rate": 5.547944373712271e-05, + "loss": 1.6686, + "step": 134000 + }, + { + "epoch": 0.08, + "learning_rate": 5.5477343771562145e-05, + "loss": 1.6388, + "step": 134500 + }, + { + "epoch": 0.08, + "learning_rate": 5.5475243806001585e-05, + "loss": 1.6506, + "step": 135000 + }, + { + "epoch": 0.08, + "learning_rate": 5.547314804037214e-05, + "loss": 1.6689, + "step": 135500 + }, + { + "epoch": 0.08, + "learning_rate": 5.547104807481157e-05, + "loss": 1.6153, + "step": 136000 + }, + { + "epoch": 0.08, + "learning_rate": 5.546894810925101e-05, + "loss": 1.6636, + "step": 136500 + }, + { + "epoch": 0.08, + "learning_rate": 5.5466848143690446e-05, + "loss": 1.6691, + "step": 137000 + }, + { + "epoch": 0.08, + "learning_rate": 5.546474817812988e-05, + "loss": 1.6591, + "step": 137500 + }, + { + "epoch": 0.08, + "learning_rate": 5.546264821256931e-05, + "loss": 1.6408, + "step": 138000 + }, + { + "epoch": 0.08, + "learning_rate": 5.5460548247008746e-05, + "loss": 1.6484, + "step": 138500 + }, + { + "epoch": 0.08, + "learning_rate": 5.545844828144818e-05, + "loss": 1.6661, + "step": 139000 + }, + { + "epoch": 0.08, + "learning_rate": 5.545634831588762e-05, + "loss": 1.6503, + "step": 139500 + }, + { + "epoch": 0.08, + "learning_rate": 5.5454252550258173e-05, + "loss": 1.6475, + "step": 140000 + }, + { + "epoch": 0.08, + "learning_rate": 5.545215258469761e-05, + "loss": 1.6633, + "step": 140500 + }, + { + "epoch": 0.08, + "learning_rate": 5.545005261913704e-05, + "loss": 1.6189, + "step": 141000 + }, + { + "epoch": 0.08, + "learning_rate": 5.544795265357648e-05, + "loss": 1.6433, + "step": 141500 + }, + { + "epoch": 0.09, + "learning_rate": 5.544585688794704e-05, + "loss": 1.5953, + "step": 142000 + }, + { + "epoch": 0.09, + "learning_rate": 5.544375692238647e-05, + "loss": 1.6335, + "step": 142500 + }, + { + "epoch": 0.09, + "learning_rate": 5.544165695682591e-05, + "loss": 1.6495, + "step": 143000 + }, + { + "epoch": 0.09, + "learning_rate": 5.543955699126534e-05, + "loss": 1.6592, + "step": 143500 + }, + { + "epoch": 0.09, + "learning_rate": 5.54374612256359e-05, + "loss": 1.6293, + "step": 144000 + }, + { + "epoch": 0.09, + "learning_rate": 5.5435361260075335e-05, + "loss": 1.6412, + "step": 144500 + }, + { + "epoch": 0.09, + "learning_rate": 5.543326129451477e-05, + "loss": 1.655, + "step": 145000 + }, + { + "epoch": 0.09, + "learning_rate": 5.54311613289542e-05, + "loss": 1.654, + "step": 145500 + }, + { + "epoch": 0.09, + "learning_rate": 5.5429061363393636e-05, + "loss": 1.6185, + "step": 146000 + }, + { + "epoch": 0.09, + "learning_rate": 5.5426965597764196e-05, + "loss": 1.6205, + "step": 146500 + }, + { + "epoch": 0.09, + "learning_rate": 5.5424865632203636e-05, + "loss": 1.6426, + "step": 147000 + }, + { + "epoch": 0.09, + "learning_rate": 5.542276566664306e-05, + "loss": 1.6475, + "step": 147500 + }, + { + "epoch": 0.09, + "learning_rate": 5.5420665701082496e-05, + "loss": 1.6265, + "step": 148000 + }, + { + "epoch": 0.09, + "learning_rate": 5.5418565735521936e-05, + "loss": 1.6394, + "step": 148500 + }, + { + "epoch": 0.09, + "learning_rate": 5.541646576996137e-05, + "loss": 1.628, + "step": 149000 + }, + { + "epoch": 0.09, + "learning_rate": 5.54143658044008e-05, + "loss": 1.6245, + "step": 149500 + }, + { + "epoch": 0.09, + "learning_rate": 5.5412265838840244e-05, + "loss": 1.6298, + "step": 150000 + }, + { + "epoch": 0.09, + "learning_rate": 5.541016587327968e-05, + "loss": 1.632, + "step": 150500 + }, + { + "epoch": 0.09, + "learning_rate": 5.540807010765023e-05, + "loss": 1.6021, + "step": 151000 + }, + { + "epoch": 0.09, + "learning_rate": 5.540597014208967e-05, + "loss": 1.6157, + "step": 151500 + }, + { + "epoch": 0.09, + "learning_rate": 5.5403870176529104e-05, + "loss": 1.6264, + "step": 152000 + }, + { + "epoch": 0.09, + "learning_rate": 5.540177021096854e-05, + "loss": 1.6485, + "step": 152500 + }, + { + "epoch": 0.09, + "learning_rate": 5.539967444533909e-05, + "loss": 1.6493, + "step": 153000 + }, + { + "epoch": 0.09, + "learning_rate": 5.539757447977853e-05, + "loss": 1.6467, + "step": 153500 + }, + { + "epoch": 0.09, + "learning_rate": 5.5395474514217965e-05, + "loss": 1.6432, + "step": 154000 + }, + { + "epoch": 0.09, + "learning_rate": 5.539337874858852e-05, + "loss": 1.6397, + "step": 154500 + }, + { + "epoch": 0.09, + "learning_rate": 5.539127878302795e-05, + "loss": 1.6255, + "step": 155000 + }, + { + "epoch": 0.09, + "learning_rate": 5.538917881746739e-05, + "loss": 1.6372, + "step": 155500 + }, + { + "epoch": 0.09, + "learning_rate": 5.5387078851906826e-05, + "loss": 1.6155, + "step": 156000 + }, + { + "epoch": 0.09, + "learning_rate": 5.538497888634626e-05, + "loss": 1.6305, + "step": 156500 + }, + { + "epoch": 0.09, + "learning_rate": 5.538288312071682e-05, + "loss": 1.6082, + "step": 157000 + }, + { + "epoch": 0.09, + "learning_rate": 5.538078315515625e-05, + "loss": 1.6183, + "step": 157500 + }, + { + "epoch": 0.09, + "learning_rate": 5.5378683189595687e-05, + "loss": 1.6143, + "step": 158000 + }, + { + "epoch": 0.1, + "learning_rate": 5.537658322403513e-05, + "loss": 1.6299, + "step": 158500 + }, + { + "epoch": 0.1, + "learning_rate": 5.537448325847456e-05, + "loss": 1.6296, + "step": 159000 + }, + { + "epoch": 0.1, + "learning_rate": 5.5372383292913994e-05, + "loss": 1.6464, + "step": 159500 + }, + { + "epoch": 0.1, + "learning_rate": 5.5370283327353434e-05, + "loss": 1.6393, + "step": 160000 + }, + { + "epoch": 0.1, + "learning_rate": 5.536818336179287e-05, + "loss": 1.6345, + "step": 160500 + }, + { + "epoch": 0.1, + "learning_rate": 5.536608759616342e-05, + "loss": 1.6183, + "step": 161000 + }, + { + "epoch": 0.1, + "learning_rate": 5.5363987630602854e-05, + "loss": 1.62, + "step": 161500 + }, + { + "epoch": 0.1, + "learning_rate": 5.5361887665042295e-05, + "loss": 1.62, + "step": 162000 + }, + { + "epoch": 0.1, + "learning_rate": 5.535978769948173e-05, + "loss": 1.6003, + "step": 162500 + }, + { + "epoch": 0.1, + "learning_rate": 5.535769193385228e-05, + "loss": 1.6346, + "step": 163000 + }, + { + "epoch": 0.1, + "learning_rate": 5.5355591968291715e-05, + "loss": 1.6618, + "step": 163500 + }, + { + "epoch": 0.1, + "learning_rate": 5.5353492002731155e-05, + "loss": 1.5938, + "step": 164000 + }, + { + "epoch": 0.1, + "learning_rate": 5.535139203717059e-05, + "loss": 1.6081, + "step": 164500 + }, + { + "epoch": 0.1, + "learning_rate": 5.534929207161002e-05, + "loss": 1.6232, + "step": 165000 + }, + { + "epoch": 0.1, + "learning_rate": 5.534719210604946e-05, + "loss": 1.6045, + "step": 165500 + }, + { + "epoch": 0.1, + "learning_rate": 5.5345092140488896e-05, + "loss": 1.6061, + "step": 166000 + }, + { + "epoch": 0.1, + "learning_rate": 5.5342992174928336e-05, + "loss": 1.6491, + "step": 166500 + }, + { + "epoch": 0.1, + "learning_rate": 5.534089220936776e-05, + "loss": 1.6291, + "step": 167000 + }, + { + "epoch": 0.1, + "learning_rate": 5.533879644373832e-05, + "loss": 1.6282, + "step": 167500 + }, + { + "epoch": 0.1, + "learning_rate": 5.533669647817776e-05, + "loss": 1.602, + "step": 168000 + }, + { + "epoch": 0.1, + "learning_rate": 5.53345965126172e-05, + "loss": 1.6096, + "step": 168500 + }, + { + "epoch": 0.1, + "learning_rate": 5.533249654705663e-05, + "loss": 1.6116, + "step": 169000 + }, + { + "epoch": 0.1, + "learning_rate": 5.5330400781427184e-05, + "loss": 1.6204, + "step": 169500 + }, + { + "epoch": 0.1, + "learning_rate": 5.532830081586662e-05, + "loss": 1.6099, + "step": 170000 + }, + { + "epoch": 0.1, + "learning_rate": 5.532620085030606e-05, + "loss": 1.644, + "step": 170500 + }, + { + "epoch": 0.1, + "learning_rate": 5.532410088474549e-05, + "loss": 1.6269, + "step": 171000 + }, + { + "epoch": 0.1, + "learning_rate": 5.5322000919184925e-05, + "loss": 1.5776, + "step": 171500 + }, + { + "epoch": 0.1, + "learning_rate": 5.5319905153555485e-05, + "loss": 1.6387, + "step": 172000 + }, + { + "epoch": 0.1, + "learning_rate": 5.531780938792604e-05, + "loss": 1.6198, + "step": 172500 + }, + { + "epoch": 0.1, + "learning_rate": 5.531570942236547e-05, + "loss": 1.5853, + "step": 173000 + }, + { + "epoch": 0.1, + "learning_rate": 5.5313609456804905e-05, + "loss": 1.6199, + "step": 173500 + }, + { + "epoch": 0.1, + "learning_rate": 5.5311509491244346e-05, + "loss": 1.5931, + "step": 174000 + }, + { + "epoch": 0.1, + "learning_rate": 5.530940952568378e-05, + "loss": 1.6317, + "step": 174500 + }, + { + "epoch": 0.1, + "learning_rate": 5.530730956012321e-05, + "loss": 1.6126, + "step": 175000 + }, + { + "epoch": 0.11, + "learning_rate": 5.530520959456265e-05, + "loss": 1.6168, + "step": 175500 + }, + { + "epoch": 0.11, + "learning_rate": 5.5303113828933206e-05, + "loss": 1.5932, + "step": 176000 + }, + { + "epoch": 0.11, + "learning_rate": 5.530101386337264e-05, + "loss": 1.6496, + "step": 176500 + }, + { + "epoch": 0.11, + "learning_rate": 5.529891389781207e-05, + "loss": 1.6044, + "step": 177000 + }, + { + "epoch": 0.11, + "learning_rate": 5.5296813932251514e-05, + "loss": 1.6055, + "step": 177500 + }, + { + "epoch": 0.11, + "learning_rate": 5.529471816662207e-05, + "loss": 1.5994, + "step": 178000 + }, + { + "epoch": 0.11, + "learning_rate": 5.52926182010615e-05, + "loss": 1.5919, + "step": 178500 + }, + { + "epoch": 0.11, + "learning_rate": 5.529051823550094e-05, + "loss": 1.5779, + "step": 179000 + }, + { + "epoch": 0.11, + "learning_rate": 5.5288418269940374e-05, + "loss": 1.6163, + "step": 179500 + }, + { + "epoch": 0.11, + "learning_rate": 5.528631830437981e-05, + "loss": 1.6164, + "step": 180000 + }, + { + "epoch": 0.11, + "learning_rate": 5.528421833881925e-05, + "loss": 1.6308, + "step": 180500 + }, + { + "epoch": 0.11, + "learning_rate": 5.528211837325868e-05, + "loss": 1.6218, + "step": 181000 + }, + { + "epoch": 0.11, + "learning_rate": 5.528001840769811e-05, + "loss": 1.5762, + "step": 181500 + }, + { + "epoch": 0.11, + "learning_rate": 5.527792264206867e-05, + "loss": 1.6014, + "step": 182000 + }, + { + "epoch": 0.11, + "learning_rate": 5.527582267650811e-05, + "loss": 1.6038, + "step": 182500 + }, + { + "epoch": 0.11, + "learning_rate": 5.527372271094754e-05, + "loss": 1.6381, + "step": 183000 + }, + { + "epoch": 0.11, + "learning_rate": 5.5271622745386976e-05, + "loss": 1.6253, + "step": 183500 + }, + { + "epoch": 0.11, + "learning_rate": 5.526952277982641e-05, + "loss": 1.6, + "step": 184000 + }, + { + "epoch": 0.11, + "learning_rate": 5.526742281426584e-05, + "loss": 1.5856, + "step": 184500 + }, + { + "epoch": 0.11, + "learning_rate": 5.5265322848705276e-05, + "loss": 1.6109, + "step": 185000 + }, + { + "epoch": 0.11, + "learning_rate": 5.5263222883144716e-05, + "loss": 1.6121, + "step": 185500 + }, + { + "epoch": 0.11, + "learning_rate": 5.526112291758415e-05, + "loss": 1.6169, + "step": 186000 + }, + { + "epoch": 0.11, + "learning_rate": 5.52590271519547e-05, + "loss": 1.5976, + "step": 186500 + }, + { + "epoch": 0.11, + "learning_rate": 5.5256927186394144e-05, + "loss": 1.6129, + "step": 187000 + }, + { + "epoch": 0.11, + "learning_rate": 5.525482722083358e-05, + "loss": 1.5925, + "step": 187500 + }, + { + "epoch": 0.11, + "learning_rate": 5.525272725527301e-05, + "loss": 1.6216, + "step": 188000 + }, + { + "epoch": 0.11, + "learning_rate": 5.525062728971245e-05, + "loss": 1.6201, + "step": 188500 + }, + { + "epoch": 0.11, + "learning_rate": 5.5248531524083004e-05, + "loss": 1.6284, + "step": 189000 + }, + { + "epoch": 0.11, + "learning_rate": 5.5246435758453565e-05, + "loss": 1.5996, + "step": 189500 + }, + { + "epoch": 0.11, + "learning_rate": 5.5244335792893e-05, + "loss": 1.5812, + "step": 190000 + }, + { + "epoch": 0.11, + "learning_rate": 5.524224002726355e-05, + "loss": 1.586, + "step": 190500 + }, + { + "epoch": 0.11, + "learning_rate": 5.5240140061702985e-05, + "loss": 1.6248, + "step": 191000 + }, + { + "epoch": 0.11, + "learning_rate": 5.5238040096142425e-05, + "loss": 1.6143, + "step": 191500 + }, + { + "epoch": 0.12, + "learning_rate": 5.523594013058186e-05, + "loss": 1.581, + "step": 192000 + }, + { + "epoch": 0.12, + "learning_rate": 5.523384016502129e-05, + "loss": 1.6128, + "step": 192500 + }, + { + "epoch": 0.12, + "learning_rate": 5.523174019946073e-05, + "loss": 1.5824, + "step": 193000 + }, + { + "epoch": 0.12, + "learning_rate": 5.522964023390016e-05, + "loss": 1.6002, + "step": 193500 + }, + { + "epoch": 0.12, + "learning_rate": 5.52275402683396e-05, + "loss": 1.6012, + "step": 194000 + }, + { + "epoch": 0.12, + "learning_rate": 5.522544030277903e-05, + "loss": 1.5855, + "step": 194500 + }, + { + "epoch": 0.12, + "learning_rate": 5.522334453714959e-05, + "loss": 1.6029, + "step": 195000 + }, + { + "epoch": 0.12, + "learning_rate": 5.522124457158903e-05, + "loss": 1.6014, + "step": 195500 + }, + { + "epoch": 0.12, + "learning_rate": 5.521914460602846e-05, + "loss": 1.6003, + "step": 196000 + }, + { + "epoch": 0.12, + "learning_rate": 5.5217044640467894e-05, + "loss": 1.57, + "step": 196500 + }, + { + "epoch": 0.12, + "learning_rate": 5.521494467490733e-05, + "loss": 1.6085, + "step": 197000 + }, + { + "epoch": 0.12, + "learning_rate": 5.521284470934677e-05, + "loss": 1.6124, + "step": 197500 + }, + { + "epoch": 0.12, + "learning_rate": 5.52107447437862e-05, + "loss": 1.5989, + "step": 198000 + }, + { + "epoch": 0.12, + "learning_rate": 5.5208644778225634e-05, + "loss": 1.5842, + "step": 198500 + }, + { + "epoch": 0.12, + "learning_rate": 5.520654901259619e-05, + "loss": 1.6082, + "step": 199000 + }, + { + "epoch": 0.12, + "learning_rate": 5.520444904703563e-05, + "loss": 1.6194, + "step": 199500 + }, + { + "epoch": 0.12, + "learning_rate": 5.520235748133731e-05, + "loss": 1.5846, + "step": 200000 + }, + { + "epoch": 0.12, + "eval_loss": 1.5057743787765503, + "eval_runtime": 1098.6714, + "eval_samples_per_second": 479.415, + "eval_steps_per_second": 79.903, + "step": 200000 + }, + { + "epoch": 0.12, + "learning_rate": 5.520025751577674e-05, + "loss": 1.6172, + "step": 200500 + }, + { + "epoch": 0.12, + "learning_rate": 5.5198157550216175e-05, + "loss": 1.5929, + "step": 201000 + }, + { + "epoch": 0.12, + "learning_rate": 5.5196057584655616e-05, + "loss": 1.5772, + "step": 201500 + }, + { + "epoch": 0.12, + "learning_rate": 5.519395761909505e-05, + "loss": 1.5737, + "step": 202000 + }, + { + "epoch": 0.12, + "learning_rate": 5.519185765353448e-05, + "loss": 1.575, + "step": 202500 + }, + { + "epoch": 0.12, + "learning_rate": 5.5189757687973916e-05, + "loss": 1.6277, + "step": 203000 + }, + { + "epoch": 0.12, + "learning_rate": 5.518765772241335e-05, + "loss": 1.5874, + "step": 203500 + }, + { + "epoch": 0.12, + "learning_rate": 5.518555775685278e-05, + "loss": 1.5785, + "step": 204000 + }, + { + "epoch": 0.12, + "learning_rate": 5.518345779129222e-05, + "loss": 1.5956, + "step": 204500 + }, + { + "epoch": 0.12, + "learning_rate": 5.5181357825731657e-05, + "loss": 1.5306, + "step": 205000 + }, + { + "epoch": 0.12, + "learning_rate": 5.517925786017109e-05, + "loss": 1.6043, + "step": 205500 + }, + { + "epoch": 0.12, + "learning_rate": 5.517715789461053e-05, + "loss": 1.6062, + "step": 206000 + }, + { + "epoch": 0.12, + "learning_rate": 5.5175062128981084e-05, + "loss": 1.5506, + "step": 206500 + }, + { + "epoch": 0.12, + "learning_rate": 5.5172966363351644e-05, + "loss": 1.6059, + "step": 207000 + }, + { + "epoch": 0.12, + "learning_rate": 5.517086639779107e-05, + "loss": 1.6007, + "step": 207500 + }, + { + "epoch": 0.12, + "learning_rate": 5.516876643223051e-05, + "loss": 1.6055, + "step": 208000 + }, + { + "epoch": 0.13, + "learning_rate": 5.5166666466669945e-05, + "loss": 1.6001, + "step": 208500 + }, + { + "epoch": 0.13, + "learning_rate": 5.5164570701040505e-05, + "loss": 1.5949, + "step": 209000 + }, + { + "epoch": 0.13, + "learning_rate": 5.516247073547994e-05, + "loss": 1.599, + "step": 209500 + }, + { + "epoch": 0.13, + "learning_rate": 5.516037076991937e-05, + "loss": 1.5766, + "step": 210000 + }, + { + "epoch": 0.13, + "learning_rate": 5.5158270804358805e-05, + "loss": 1.6037, + "step": 210500 + }, + { + "epoch": 0.13, + "learning_rate": 5.515617083879824e-05, + "loss": 1.567, + "step": 211000 + }, + { + "epoch": 0.13, + "learning_rate": 5.515407087323768e-05, + "loss": 1.5633, + "step": 211500 + }, + { + "epoch": 0.13, + "learning_rate": 5.515197510760824e-05, + "loss": 1.5883, + "step": 212000 + }, + { + "epoch": 0.13, + "learning_rate": 5.5149875142047666e-05, + "loss": 1.5684, + "step": 212500 + }, + { + "epoch": 0.13, + "learning_rate": 5.51477751764871e-05, + "loss": 1.5685, + "step": 213000 + }, + { + "epoch": 0.13, + "learning_rate": 5.514567521092654e-05, + "loss": 1.5924, + "step": 213500 + }, + { + "epoch": 0.13, + "learning_rate": 5.514357524536597e-05, + "loss": 1.5858, + "step": 214000 + }, + { + "epoch": 0.13, + "learning_rate": 5.5141479479736534e-05, + "loss": 1.5703, + "step": 214500 + }, + { + "epoch": 0.13, + "learning_rate": 5.513937951417597e-05, + "loss": 1.5536, + "step": 215000 + }, + { + "epoch": 0.13, + "learning_rate": 5.51372795486154e-05, + "loss": 1.5849, + "step": 215500 + }, + { + "epoch": 0.13, + "learning_rate": 5.5135179583054834e-05, + "loss": 1.5829, + "step": 216000 + }, + { + "epoch": 0.13, + "learning_rate": 5.5133079617494274e-05, + "loss": 1.579, + "step": 216500 + }, + { + "epoch": 0.13, + "learning_rate": 5.513097965193371e-05, + "loss": 1.5838, + "step": 217000 + }, + { + "epoch": 0.13, + "learning_rate": 5.512887968637314e-05, + "loss": 1.5788, + "step": 217500 + }, + { + "epoch": 0.13, + "learning_rate": 5.512677972081258e-05, + "loss": 1.6009, + "step": 218000 + }, + { + "epoch": 0.13, + "learning_rate": 5.5124679755252015e-05, + "loss": 1.5677, + "step": 218500 + }, + { + "epoch": 0.13, + "learning_rate": 5.5122588189553695e-05, + "loss": 1.5503, + "step": 219000 + }, + { + "epoch": 0.13, + "learning_rate": 5.512048822399312e-05, + "loss": 1.5778, + "step": 219500 + }, + { + "epoch": 0.13, + "learning_rate": 5.5118388258432555e-05, + "loss": 1.5821, + "step": 220000 + }, + { + "epoch": 0.13, + "learning_rate": 5.5116288292871996e-05, + "loss": 1.5648, + "step": 220500 + }, + { + "epoch": 0.13, + "learning_rate": 5.5114192527242556e-05, + "loss": 1.5803, + "step": 221000 + }, + { + "epoch": 0.13, + "learning_rate": 5.511209256168199e-05, + "loss": 1.5933, + "step": 221500 + }, + { + "epoch": 0.13, + "learning_rate": 5.510999259612142e-05, + "loss": 1.6024, + "step": 222000 + }, + { + "epoch": 0.13, + "learning_rate": 5.510789683049198e-05, + "loss": 1.5776, + "step": 222500 + }, + { + "epoch": 0.13, + "learning_rate": 5.510579686493142e-05, + "loss": 1.6148, + "step": 223000 + }, + { + "epoch": 0.13, + "learning_rate": 5.510369689937085e-05, + "loss": 1.5659, + "step": 223500 + }, + { + "epoch": 0.13, + "learning_rate": 5.510159693381029e-05, + "loss": 1.5784, + "step": 224000 + }, + { + "epoch": 0.13, + "learning_rate": 5.509949696824972e-05, + "loss": 1.5568, + "step": 224500 + }, + { + "epoch": 0.13, + "learning_rate": 5.509739700268915e-05, + "loss": 1.5756, + "step": 225000 + }, + { + "epoch": 0.14, + "learning_rate": 5.509529703712859e-05, + "loss": 1.5393, + "step": 225500 + }, + { + "epoch": 0.14, + "learning_rate": 5.5093197071568024e-05, + "loss": 1.5623, + "step": 226000 + }, + { + "epoch": 0.14, + "learning_rate": 5.509109710600746e-05, + "loss": 1.5507, + "step": 226500 + }, + { + "epoch": 0.14, + "learning_rate": 5.50889971404469e-05, + "loss": 1.5647, + "step": 227000 + }, + { + "epoch": 0.14, + "learning_rate": 5.508689717488633e-05, + "loss": 1.5549, + "step": 227500 + }, + { + "epoch": 0.14, + "learning_rate": 5.5084797209325765e-05, + "loss": 1.5854, + "step": 228000 + }, + { + "epoch": 0.14, + "learning_rate": 5.508270144369632e-05, + "loss": 1.556, + "step": 228500 + }, + { + "epoch": 0.14, + "learning_rate": 5.508060147813576e-05, + "loss": 1.5822, + "step": 229000 + }, + { + "epoch": 0.14, + "learning_rate": 5.507850151257519e-05, + "loss": 1.5834, + "step": 229500 + }, + { + "epoch": 0.14, + "learning_rate": 5.5076401547014626e-05, + "loss": 1.6046, + "step": 230000 + }, + { + "epoch": 0.14, + "learning_rate": 5.5074301581454066e-05, + "loss": 1.6046, + "step": 230500 + }, + { + "epoch": 0.14, + "learning_rate": 5.5072210015755746e-05, + "loss": 1.5461, + "step": 231000 + }, + { + "epoch": 0.14, + "learning_rate": 5.507011005019517e-05, + "loss": 1.5611, + "step": 231500 + }, + { + "epoch": 0.14, + "learning_rate": 5.5068010084634606e-05, + "loss": 1.5498, + "step": 232000 + }, + { + "epoch": 0.14, + "learning_rate": 5.506591011907405e-05, + "loss": 1.5501, + "step": 232500 + }, + { + "epoch": 0.14, + "learning_rate": 5.506381015351348e-05, + "loss": 1.548, + "step": 233000 + }, + { + "epoch": 0.14, + "learning_rate": 5.5061710187952914e-05, + "loss": 1.5708, + "step": 233500 + }, + { + "epoch": 0.14, + "learning_rate": 5.505961442232347e-05, + "loss": 1.5545, + "step": 234000 + }, + { + "epoch": 0.14, + "learning_rate": 5.505751865669403e-05, + "loss": 1.555, + "step": 234500 + }, + { + "epoch": 0.14, + "learning_rate": 5.505541869113347e-05, + "loss": 1.5698, + "step": 235000 + }, + { + "epoch": 0.14, + "learning_rate": 5.50533187255729e-05, + "loss": 1.5515, + "step": 235500 + }, + { + "epoch": 0.14, + "learning_rate": 5.5051218760012335e-05, + "loss": 1.5406, + "step": 236000 + }, + { + "epoch": 0.14, + "learning_rate": 5.504911879445177e-05, + "loss": 1.5443, + "step": 236500 + }, + { + "epoch": 0.14, + "learning_rate": 5.50470188288912e-05, + "loss": 1.5536, + "step": 237000 + }, + { + "epoch": 0.14, + "learning_rate": 5.504491886333064e-05, + "loss": 1.5699, + "step": 237500 + }, + { + "epoch": 0.14, + "learning_rate": 5.5042818897770075e-05, + "loss": 1.5656, + "step": 238000 + }, + { + "epoch": 0.14, + "learning_rate": 5.504072313214063e-05, + "loss": 1.5824, + "step": 238500 + }, + { + "epoch": 0.14, + "learning_rate": 5.503862316658006e-05, + "loss": 1.5363, + "step": 239000 + }, + { + "epoch": 0.14, + "learning_rate": 5.50365232010195e-05, + "loss": 1.5354, + "step": 239500 + }, + { + "epoch": 0.14, + "learning_rate": 5.5034423235458936e-05, + "loss": 1.5804, + "step": 240000 + }, + { + "epoch": 0.14, + "learning_rate": 5.503232326989837e-05, + "loss": 1.5398, + "step": 240500 + }, + { + "epoch": 0.14, + "learning_rate": 5.503023170420005e-05, + "loss": 1.5531, + "step": 241000 + }, + { + "epoch": 0.14, + "learning_rate": 5.502813173863948e-05, + "loss": 1.5521, + "step": 241500 + }, + { + "epoch": 0.15, + "learning_rate": 5.5026031773078924e-05, + "loss": 1.5674, + "step": 242000 + }, + { + "epoch": 0.15, + "learning_rate": 5.502393180751836e-05, + "loss": 1.5475, + "step": 242500 + }, + { + "epoch": 0.15, + "learning_rate": 5.50218318419578e-05, + "loss": 1.549, + "step": 243000 + }, + { + "epoch": 0.15, + "learning_rate": 5.5019731876397224e-05, + "loss": 1.5572, + "step": 243500 + }, + { + "epoch": 0.15, + "learning_rate": 5.501763191083666e-05, + "loss": 1.5546, + "step": 244000 + }, + { + "epoch": 0.15, + "learning_rate": 5.50155319452761e-05, + "loss": 1.54, + "step": 244500 + }, + { + "epoch": 0.15, + "learning_rate": 5.501343197971553e-05, + "loss": 1.5534, + "step": 245000 + }, + { + "epoch": 0.15, + "learning_rate": 5.5011332014154965e-05, + "loss": 1.5464, + "step": 245500 + }, + { + "epoch": 0.15, + "learning_rate": 5.500923624852552e-05, + "loss": 1.5766, + "step": 246000 + }, + { + "epoch": 0.15, + "learning_rate": 5.500713628296496e-05, + "loss": 1.5508, + "step": 246500 + }, + { + "epoch": 0.15, + "learning_rate": 5.500503631740439e-05, + "loss": 1.5527, + "step": 247000 + }, + { + "epoch": 0.15, + "learning_rate": 5.5002936351843825e-05, + "loss": 1.5564, + "step": 247500 + }, + { + "epoch": 0.15, + "learning_rate": 5.5000836386283266e-05, + "loss": 1.5638, + "step": 248000 + }, + { + "epoch": 0.15, + "learning_rate": 5.49987364207227e-05, + "loss": 1.5471, + "step": 248500 + }, + { + "epoch": 0.15, + "learning_rate": 5.499663645516213e-05, + "loss": 1.5921, + "step": 249000 + }, + { + "epoch": 0.15, + "learning_rate": 5.499453648960157e-05, + "loss": 1.5616, + "step": 249500 + }, + { + "epoch": 0.15, + "learning_rate": 5.4992436524041006e-05, + "loss": 1.5572, + "step": 250000 + }, + { + "epoch": 0.15, + "learning_rate": 5.499034075841156e-05, + "loss": 1.5648, + "step": 250500 + }, + { + "epoch": 0.15, + "learning_rate": 5.4988240792851e-05, + "loss": 1.5501, + "step": 251000 + }, + { + "epoch": 0.15, + "learning_rate": 5.4986145027221554e-05, + "loss": 1.6062, + "step": 251500 + }, + { + "epoch": 0.15, + "learning_rate": 5.498404506166099e-05, + "loss": 1.5563, + "step": 252000 + }, + { + "epoch": 0.15, + "learning_rate": 5.498194509610042e-05, + "loss": 1.5646, + "step": 252500 + }, + { + "epoch": 0.15, + "learning_rate": 5.497984513053986e-05, + "loss": 1.5585, + "step": 253000 + }, + { + "epoch": 0.15, + "learning_rate": 5.4977745164979294e-05, + "loss": 1.5076, + "step": 253500 + }, + { + "epoch": 0.15, + "learning_rate": 5.497564519941873e-05, + "loss": 1.5712, + "step": 254000 + }, + { + "epoch": 0.15, + "learning_rate": 5.497354523385817e-05, + "loss": 1.5462, + "step": 254500 + }, + { + "epoch": 0.15, + "learning_rate": 5.49714452682976e-05, + "loss": 1.5614, + "step": 255000 + }, + { + "epoch": 0.15, + "learning_rate": 5.4969349502668155e-05, + "loss": 1.5738, + "step": 255500 + }, + { + "epoch": 0.15, + "learning_rate": 5.496724953710759e-05, + "loss": 1.5401, + "step": 256000 + }, + { + "epoch": 0.15, + "learning_rate": 5.496514957154703e-05, + "loss": 1.548, + "step": 256500 + }, + { + "epoch": 0.15, + "learning_rate": 5.496304960598646e-05, + "loss": 1.5519, + "step": 257000 + }, + { + "epoch": 0.15, + "learning_rate": 5.4960953840357016e-05, + "loss": 1.5678, + "step": 257500 + }, + { + "epoch": 0.15, + "learning_rate": 5.495885807472757e-05, + "loss": 1.5166, + "step": 258000 + }, + { + "epoch": 0.15, + "learning_rate": 5.495675810916701e-05, + "loss": 1.5571, + "step": 258500 + }, + { + "epoch": 0.16, + "learning_rate": 5.495465814360644e-05, + "loss": 1.5538, + "step": 259000 + }, + { + "epoch": 0.16, + "learning_rate": 5.4952558178045876e-05, + "loss": 1.5246, + "step": 259500 + }, + { + "epoch": 0.16, + "learning_rate": 5.4950458212485317e-05, + "loss": 1.5855, + "step": 260000 + }, + { + "epoch": 0.16, + "learning_rate": 5.494835824692475e-05, + "loss": 1.5427, + "step": 260500 + }, + { + "epoch": 0.16, + "learning_rate": 5.4946258281364184e-05, + "loss": 1.5513, + "step": 261000 + }, + { + "epoch": 0.16, + "learning_rate": 5.4944158315803624e-05, + "loss": 1.5516, + "step": 261500 + }, + { + "epoch": 0.16, + "learning_rate": 5.494205835024306e-05, + "loss": 1.5543, + "step": 262000 + }, + { + "epoch": 0.16, + "learning_rate": 5.493995838468249e-05, + "loss": 1.5701, + "step": 262500 + }, + { + "epoch": 0.16, + "learning_rate": 5.4937862619053044e-05, + "loss": 1.5495, + "step": 263000 + }, + { + "epoch": 0.16, + "learning_rate": 5.4935762653492484e-05, + "loss": 1.5427, + "step": 263500 + }, + { + "epoch": 0.16, + "learning_rate": 5.493366268793192e-05, + "loss": 1.5414, + "step": 264000 + }, + { + "epoch": 0.16, + "learning_rate": 5.493156272237135e-05, + "loss": 1.5612, + "step": 264500 + }, + { + "epoch": 0.16, + "learning_rate": 5.492946275681079e-05, + "loss": 1.5663, + "step": 265000 + }, + { + "epoch": 0.16, + "learning_rate": 5.4927366991181345e-05, + "loss": 1.5413, + "step": 265500 + }, + { + "epoch": 0.16, + "learning_rate": 5.492526702562078e-05, + "loss": 1.5568, + "step": 266000 + }, + { + "epoch": 0.16, + "learning_rate": 5.492316706006022e-05, + "loss": 1.5595, + "step": 266500 + }, + { + "epoch": 0.16, + "learning_rate": 5.492106709449965e-05, + "loss": 1.5211, + "step": 267000 + }, + { + "epoch": 0.16, + "learning_rate": 5.4918967128939086e-05, + "loss": 1.5112, + "step": 267500 + }, + { + "epoch": 0.16, + "learning_rate": 5.491686716337852e-05, + "loss": 1.5416, + "step": 268000 + }, + { + "epoch": 0.16, + "learning_rate": 5.491476719781795e-05, + "loss": 1.5689, + "step": 268500 + }, + { + "epoch": 0.16, + "learning_rate": 5.4912667232257386e-05, + "loss": 1.5339, + "step": 269000 + }, + { + "epoch": 0.16, + "learning_rate": 5.4910567266696826e-05, + "loss": 1.5484, + "step": 269500 + }, + { + "epoch": 0.16, + "learning_rate": 5.490847150106739e-05, + "loss": 1.5524, + "step": 270000 + }, + { + "epoch": 0.16, + "learning_rate": 5.490637573543794e-05, + "loss": 1.5528, + "step": 270500 + }, + { + "epoch": 0.16, + "learning_rate": 5.4904279969808494e-05, + "loss": 1.5559, + "step": 271000 + }, + { + "epoch": 0.16, + "learning_rate": 5.490218000424793e-05, + "loss": 1.5594, + "step": 271500 + }, + { + "epoch": 0.16, + "learning_rate": 5.490008003868737e-05, + "loss": 1.5377, + "step": 272000 + }, + { + "epoch": 0.16, + "learning_rate": 5.48979800731268e-05, + "loss": 1.537, + "step": 272500 + }, + { + "epoch": 0.16, + "learning_rate": 5.4895880107566235e-05, + "loss": 1.5462, + "step": 273000 + }, + { + "epoch": 0.16, + "learning_rate": 5.4893780142005675e-05, + "loss": 1.5742, + "step": 273500 + }, + { + "epoch": 0.16, + "learning_rate": 5.489168017644511e-05, + "loss": 1.5052, + "step": 274000 + }, + { + "epoch": 0.16, + "learning_rate": 5.488958021088454e-05, + "loss": 1.583, + "step": 274500 + }, + { + "epoch": 0.16, + "learning_rate": 5.4887480245323975e-05, + "loss": 1.5206, + "step": 275000 + }, + { + "epoch": 0.17, + "learning_rate": 5.4885384479694536e-05, + "loss": 1.576, + "step": 275500 + }, + { + "epoch": 0.17, + "learning_rate": 5.488328451413397e-05, + "loss": 1.5534, + "step": 276000 + }, + { + "epoch": 0.17, + "learning_rate": 5.488118874850452e-05, + "loss": 1.5095, + "step": 276500 + }, + { + "epoch": 0.17, + "learning_rate": 5.4879088782943956e-05, + "loss": 1.5479, + "step": 277000 + }, + { + "epoch": 0.17, + "learning_rate": 5.4876988817383396e-05, + "loss": 1.5154, + "step": 277500 + }, + { + "epoch": 0.17, + "learning_rate": 5.487488885182283e-05, + "loss": 1.5269, + "step": 278000 + }, + { + "epoch": 0.17, + "learning_rate": 5.487278888626226e-05, + "loss": 1.5386, + "step": 278500 + }, + { + "epoch": 0.17, + "learning_rate": 5.4870688920701703e-05, + "loss": 1.5105, + "step": 279000 + }, + { + "epoch": 0.17, + "learning_rate": 5.486858895514114e-05, + "loss": 1.5905, + "step": 279500 + }, + { + "epoch": 0.17, + "learning_rate": 5.486648898958057e-05, + "loss": 1.5191, + "step": 280000 + }, + { + "epoch": 0.17, + "learning_rate": 5.4864389024020004e-05, + "loss": 1.5372, + "step": 280500 + }, + { + "epoch": 0.17, + "learning_rate": 5.486228905845944e-05, + "loss": 1.5744, + "step": 281000 + }, + { + "epoch": 0.17, + "learning_rate": 5.486018909289888e-05, + "loss": 1.5372, + "step": 281500 + }, + { + "epoch": 0.17, + "learning_rate": 5.485809332726944e-05, + "loss": 1.5391, + "step": 282000 + }, + { + "epoch": 0.17, + "learning_rate": 5.4855993361708864e-05, + "loss": 1.5146, + "step": 282500 + }, + { + "epoch": 0.17, + "learning_rate": 5.48538933961483e-05, + "loss": 1.522, + "step": 283000 + }, + { + "epoch": 0.17, + "learning_rate": 5.485179343058774e-05, + "loss": 1.5382, + "step": 283500 + }, + { + "epoch": 0.17, + "learning_rate": 5.48496976649583e-05, + "loss": 1.5458, + "step": 284000 + }, + { + "epoch": 0.17, + "learning_rate": 5.4847597699397725e-05, + "loss": 1.5505, + "step": 284500 + }, + { + "epoch": 0.17, + "learning_rate": 5.484549773383716e-05, + "loss": 1.5513, + "step": 285000 + }, + { + "epoch": 0.17, + "learning_rate": 5.48433977682766e-05, + "loss": 1.5208, + "step": 285500 + }, + { + "epoch": 0.17, + "learning_rate": 5.484129780271603e-05, + "loss": 1.5379, + "step": 286000 + }, + { + "epoch": 0.17, + "learning_rate": 5.4839197837155466e-05, + "loss": 1.5663, + "step": 286500 + }, + { + "epoch": 0.17, + "learning_rate": 5.4837097871594906e-05, + "loss": 1.5263, + "step": 287000 + }, + { + "epoch": 0.17, + "learning_rate": 5.483499790603434e-05, + "loss": 1.5318, + "step": 287500 + }, + { + "epoch": 0.17, + "learning_rate": 5.483289794047377e-05, + "loss": 1.5053, + "step": 288000 + }, + { + "epoch": 0.17, + "learning_rate": 5.4830806374775453e-05, + "loss": 1.528, + "step": 288500 + }, + { + "epoch": 0.17, + "learning_rate": 5.4828706409214894e-05, + "loss": 1.5249, + "step": 289000 + }, + { + "epoch": 0.17, + "learning_rate": 5.482660644365432e-05, + "loss": 1.5616, + "step": 289500 + }, + { + "epoch": 0.17, + "learning_rate": 5.4824506478093754e-05, + "loss": 1.5175, + "step": 290000 + }, + { + "epoch": 0.17, + "learning_rate": 5.4822406512533194e-05, + "loss": 1.5088, + "step": 290500 + }, + { + "epoch": 0.17, + "learning_rate": 5.4820310746903754e-05, + "loss": 1.5718, + "step": 291000 + }, + { + "epoch": 0.17, + "learning_rate": 5.481821078134319e-05, + "loss": 1.5678, + "step": 291500 + }, + { + "epoch": 0.18, + "learning_rate": 5.4816110815782615e-05, + "loss": 1.5313, + "step": 292000 + }, + { + "epoch": 0.18, + "learning_rate": 5.4814010850222055e-05, + "loss": 1.5149, + "step": 292500 + }, + { + "epoch": 0.18, + "learning_rate": 5.481191088466149e-05, + "loss": 1.542, + "step": 293000 + }, + { + "epoch": 0.18, + "learning_rate": 5.480981091910092e-05, + "loss": 1.5365, + "step": 293500 + }, + { + "epoch": 0.18, + "learning_rate": 5.480771095354036e-05, + "loss": 1.5118, + "step": 294000 + }, + { + "epoch": 0.18, + "learning_rate": 5.4805610987979795e-05, + "loss": 1.5491, + "step": 294500 + }, + { + "epoch": 0.18, + "learning_rate": 5.480351102241923e-05, + "loss": 1.5336, + "step": 295000 + }, + { + "epoch": 0.18, + "learning_rate": 5.480141105685867e-05, + "loss": 1.5175, + "step": 295500 + }, + { + "epoch": 0.18, + "learning_rate": 5.479931529122922e-05, + "loss": 1.5407, + "step": 296000 + }, + { + "epoch": 0.18, + "learning_rate": 5.4797215325668656e-05, + "loss": 1.5076, + "step": 296500 + }, + { + "epoch": 0.18, + "learning_rate": 5.479511956003921e-05, + "loss": 1.5337, + "step": 297000 + }, + { + "epoch": 0.18, + "learning_rate": 5.479301959447865e-05, + "loss": 1.5312, + "step": 297500 + }, + { + "epoch": 0.18, + "learning_rate": 5.4790919628918083e-05, + "loss": 1.5084, + "step": 298000 + }, + { + "epoch": 0.18, + "learning_rate": 5.478881966335752e-05, + "loss": 1.5111, + "step": 298500 + }, + { + "epoch": 0.18, + "learning_rate": 5.478671969779696e-05, + "loss": 1.5434, + "step": 299000 + }, + { + "epoch": 0.18, + "learning_rate": 5.478461973223639e-05, + "loss": 1.5214, + "step": 299500 + }, + { + "epoch": 0.18, + "learning_rate": 5.4782519766675824e-05, + "loss": 1.54, + "step": 300000 + }, + { + "epoch": 0.18, + "eval_loss": 1.45011568069458, + "eval_runtime": 1097.0232, + "eval_samples_per_second": 480.136, + "eval_steps_per_second": 80.023, + "step": 300000 + }, + { + "epoch": 0.18, + "learning_rate": 5.478042400104638e-05, + "loss": 1.5421, + "step": 300500 + }, + { + "epoch": 0.18, + "learning_rate": 5.477832403548582e-05, + "loss": 1.5362, + "step": 301000 + }, + { + "epoch": 0.18, + "learning_rate": 5.477622406992525e-05, + "loss": 1.5563, + "step": 301500 + }, + { + "epoch": 0.18, + "learning_rate": 5.4774124104364685e-05, + "loss": 1.5506, + "step": 302000 + }, + { + "epoch": 0.18, + "learning_rate": 5.4772024138804125e-05, + "loss": 1.5259, + "step": 302500 + }, + { + "epoch": 0.18, + "learning_rate": 5.476992417324356e-05, + "loss": 1.5519, + "step": 303000 + }, + { + "epoch": 0.18, + "learning_rate": 5.476782840761411e-05, + "loss": 1.5289, + "step": 303500 + }, + { + "epoch": 0.18, + "learning_rate": 5.476572844205355e-05, + "loss": 1.536, + "step": 304000 + }, + { + "epoch": 0.18, + "learning_rate": 5.4763628476492986e-05, + "loss": 1.5506, + "step": 304500 + }, + { + "epoch": 0.18, + "learning_rate": 5.476152851093242e-05, + "loss": 1.4987, + "step": 305000 + }, + { + "epoch": 0.18, + "learning_rate": 5.475942854537186e-05, + "loss": 1.5034, + "step": 305500 + }, + { + "epoch": 0.18, + "learning_rate": 5.475732857981129e-05, + "loss": 1.5534, + "step": 306000 + }, + { + "epoch": 0.18, + "learning_rate": 5.4755228614250726e-05, + "loss": 1.5103, + "step": 306500 + }, + { + "epoch": 0.18, + "learning_rate": 5.475312864869016e-05, + "loss": 1.5517, + "step": 307000 + }, + { + "epoch": 0.18, + "learning_rate": 5.475103288306072e-05, + "loss": 1.5442, + "step": 307500 + }, + { + "epoch": 0.18, + "learning_rate": 5.4748937117431274e-05, + "loss": 1.5199, + "step": 308000 + }, + { + "epoch": 0.18, + "learning_rate": 5.474683715187071e-05, + "loss": 1.4951, + "step": 308500 + }, + { + "epoch": 0.19, + "learning_rate": 5.474473718631015e-05, + "loss": 1.5294, + "step": 309000 + }, + { + "epoch": 0.19, + "learning_rate": 5.474263722074958e-05, + "loss": 1.5289, + "step": 309500 + }, + { + "epoch": 0.19, + "learning_rate": 5.4740541455120134e-05, + "loss": 1.4918, + "step": 310000 + }, + { + "epoch": 0.19, + "learning_rate": 5.4738445689490695e-05, + "loss": 1.5376, + "step": 310500 + }, + { + "epoch": 0.19, + "learning_rate": 5.473634572393012e-05, + "loss": 1.5349, + "step": 311000 + }, + { + "epoch": 0.19, + "learning_rate": 5.473424575836956e-05, + "loss": 1.4939, + "step": 311500 + }, + { + "epoch": 0.19, + "learning_rate": 5.4732145792808995e-05, + "loss": 1.5329, + "step": 312000 + }, + { + "epoch": 0.19, + "learning_rate": 5.473004582724843e-05, + "loss": 1.5036, + "step": 312500 + }, + { + "epoch": 0.19, + "learning_rate": 5.472794586168787e-05, + "loss": 1.5368, + "step": 313000 + }, + { + "epoch": 0.19, + "learning_rate": 5.47258458961273e-05, + "loss": 1.5455, + "step": 313500 + }, + { + "epoch": 0.19, + "learning_rate": 5.4723745930566736e-05, + "loss": 1.5161, + "step": 314000 + }, + { + "epoch": 0.19, + "learning_rate": 5.4721645965006176e-05, + "loss": 1.4974, + "step": 314500 + }, + { + "epoch": 0.19, + "learning_rate": 5.471954599944561e-05, + "loss": 1.5229, + "step": 315000 + }, + { + "epoch": 0.19, + "learning_rate": 5.471744603388504e-05, + "loss": 1.5048, + "step": 315500 + }, + { + "epoch": 0.19, + "learning_rate": 5.471534606832448e-05, + "loss": 1.551, + "step": 316000 + }, + { + "epoch": 0.19, + "learning_rate": 5.471324610276391e-05, + "loss": 1.5474, + "step": 316500 + }, + { + "epoch": 0.19, + "learning_rate": 5.471114613720335e-05, + "loss": 1.5466, + "step": 317000 + }, + { + "epoch": 0.19, + "learning_rate": 5.470905037157391e-05, + "loss": 1.5042, + "step": 317500 + }, + { + "epoch": 0.19, + "learning_rate": 5.4706950406013344e-05, + "loss": 1.5298, + "step": 318000 + }, + { + "epoch": 0.19, + "learning_rate": 5.470485044045278e-05, + "loss": 1.5123, + "step": 318500 + }, + { + "epoch": 0.19, + "learning_rate": 5.470275047489221e-05, + "loss": 1.5178, + "step": 319000 + }, + { + "epoch": 0.19, + "learning_rate": 5.4700650509331644e-05, + "loss": 1.5563, + "step": 319500 + }, + { + "epoch": 0.19, + "learning_rate": 5.4698554743702205e-05, + "loss": 1.5241, + "step": 320000 + }, + { + "epoch": 0.19, + "learning_rate": 5.469645477814164e-05, + "loss": 1.5211, + "step": 320500 + }, + { + "epoch": 0.19, + "learning_rate": 5.469435901251219e-05, + "loss": 1.5086, + "step": 321000 + }, + { + "epoch": 0.19, + "learning_rate": 5.469225904695163e-05, + "loss": 1.5145, + "step": 321500 + }, + { + "epoch": 0.19, + "learning_rate": 5.4690159081391065e-05, + "loss": 1.5319, + "step": 322000 + }, + { + "epoch": 0.19, + "learning_rate": 5.46880591158305e-05, + "loss": 1.5272, + "step": 322500 + }, + { + "epoch": 0.19, + "learning_rate": 5.468595915026994e-05, + "loss": 1.5315, + "step": 323000 + }, + { + "epoch": 0.19, + "learning_rate": 5.4683859184709366e-05, + "loss": 1.5158, + "step": 323500 + }, + { + "epoch": 0.19, + "learning_rate": 5.4681759219148806e-05, + "loss": 1.5042, + "step": 324000 + }, + { + "epoch": 0.19, + "learning_rate": 5.467965925358824e-05, + "loss": 1.5135, + "step": 324500 + }, + { + "epoch": 0.19, + "learning_rate": 5.46775634879588e-05, + "loss": 1.5164, + "step": 325000 + }, + { + "epoch": 0.2, + "learning_rate": 5.467546352239823e-05, + "loss": 1.578, + "step": 325500 + }, + { + "epoch": 0.2, + "learning_rate": 5.467336355683767e-05, + "loss": 1.5135, + "step": 326000 + }, + { + "epoch": 0.2, + "learning_rate": 5.46712635912771e-05, + "loss": 1.5154, + "step": 326500 + }, + { + "epoch": 0.2, + "learning_rate": 5.4669163625716534e-05, + "loss": 1.5164, + "step": 327000 + }, + { + "epoch": 0.2, + "learning_rate": 5.4667063660155974e-05, + "loss": 1.5277, + "step": 327500 + }, + { + "epoch": 0.2, + "learning_rate": 5.466496369459541e-05, + "loss": 1.5319, + "step": 328000 + }, + { + "epoch": 0.2, + "learning_rate": 5.466286372903484e-05, + "loss": 1.517, + "step": 328500 + }, + { + "epoch": 0.2, + "learning_rate": 5.4660767963405394e-05, + "loss": 1.5135, + "step": 329000 + }, + { + "epoch": 0.2, + "learning_rate": 5.4658667997844835e-05, + "loss": 1.5282, + "step": 329500 + }, + { + "epoch": 0.2, + "learning_rate": 5.465656803228427e-05, + "loss": 1.5398, + "step": 330000 + }, + { + "epoch": 0.2, + "learning_rate": 5.465447226665483e-05, + "loss": 1.5406, + "step": 330500 + }, + { + "epoch": 0.2, + "learning_rate": 5.465237230109426e-05, + "loss": 1.4958, + "step": 331000 + }, + { + "epoch": 0.2, + "learning_rate": 5.4650272335533695e-05, + "loss": 1.5157, + "step": 331500 + }, + { + "epoch": 0.2, + "learning_rate": 5.464817236997313e-05, + "loss": 1.5218, + "step": 332000 + }, + { + "epoch": 0.2, + "learning_rate": 5.464607660434369e-05, + "loss": 1.5262, + "step": 332500 + }, + { + "epoch": 0.2, + "learning_rate": 5.464397663878312e-05, + "loss": 1.5212, + "step": 333000 + }, + { + "epoch": 0.2, + "learning_rate": 5.4641876673222556e-05, + "loss": 1.5404, + "step": 333500 + }, + { + "epoch": 0.2, + "learning_rate": 5.463977670766199e-05, + "loss": 1.5006, + "step": 334000 + }, + { + "epoch": 0.2, + "learning_rate": 5.463767674210143e-05, + "loss": 1.5156, + "step": 334500 + }, + { + "epoch": 0.2, + "learning_rate": 5.463558097647199e-05, + "loss": 1.5274, + "step": 335000 + }, + { + "epoch": 0.2, + "learning_rate": 5.463348101091142e-05, + "loss": 1.5069, + "step": 335500 + }, + { + "epoch": 0.2, + "learning_rate": 5.463138104535085e-05, + "loss": 1.5011, + "step": 336000 + }, + { + "epoch": 0.2, + "learning_rate": 5.462928107979029e-05, + "loss": 1.5012, + "step": 336500 + }, + { + "epoch": 0.2, + "learning_rate": 5.462718531416085e-05, + "loss": 1.4935, + "step": 337000 + }, + { + "epoch": 0.2, + "learning_rate": 5.4625085348600284e-05, + "loss": 1.5293, + "step": 337500 + }, + { + "epoch": 0.2, + "learning_rate": 5.462298538303972e-05, + "loss": 1.4987, + "step": 338000 + }, + { + "epoch": 0.2, + "learning_rate": 5.462088961741028e-05, + "loss": 1.5118, + "step": 338500 + }, + { + "epoch": 0.2, + "learning_rate": 5.461878965184971e-05, + "loss": 1.5351, + "step": 339000 + }, + { + "epoch": 0.2, + "learning_rate": 5.4616689686289145e-05, + "loss": 1.4981, + "step": 339500 + }, + { + "epoch": 0.2, + "learning_rate": 5.46145939206597e-05, + "loss": 1.4874, + "step": 340000 + }, + { + "epoch": 0.2, + "learning_rate": 5.461249395509914e-05, + "loss": 1.5241, + "step": 340500 + }, + { + "epoch": 0.2, + "learning_rate": 5.461039398953857e-05, + "loss": 1.5351, + "step": 341000 + }, + { + "epoch": 0.2, + "learning_rate": 5.4608294023978006e-05, + "loss": 1.5116, + "step": 341500 + }, + { + "epoch": 0.21, + "learning_rate": 5.4606194058417446e-05, + "loss": 1.4991, + "step": 342000 + }, + { + "epoch": 0.21, + "learning_rate": 5.460409409285687e-05, + "loss": 1.5235, + "step": 342500 + }, + { + "epoch": 0.21, + "learning_rate": 5.4601994127296306e-05, + "loss": 1.4995, + "step": 343000 + }, + { + "epoch": 0.21, + "learning_rate": 5.4599898361666866e-05, + "loss": 1.4979, + "step": 343500 + }, + { + "epoch": 0.21, + "learning_rate": 5.459779839610631e-05, + "loss": 1.4825, + "step": 344000 + }, + { + "epoch": 0.21, + "learning_rate": 5.459569843054574e-05, + "loss": 1.527, + "step": 344500 + }, + { + "epoch": 0.21, + "learning_rate": 5.4593598464985174e-05, + "loss": 1.5359, + "step": 345000 + }, + { + "epoch": 0.21, + "learning_rate": 5.4591502699355734e-05, + "loss": 1.4982, + "step": 345500 + }, + { + "epoch": 0.21, + "learning_rate": 5.458940273379517e-05, + "loss": 1.5081, + "step": 346000 + }, + { + "epoch": 0.21, + "learning_rate": 5.45873027682346e-05, + "loss": 1.5767, + "step": 346500 + }, + { + "epoch": 0.21, + "learning_rate": 5.458520280267404e-05, + "loss": 1.5586, + "step": 347000 + }, + { + "epoch": 0.21, + "learning_rate": 5.458310283711347e-05, + "loss": 1.4968, + "step": 347500 + }, + { + "epoch": 0.21, + "learning_rate": 5.45810028715529e-05, + "loss": 1.5282, + "step": 348000 + }, + { + "epoch": 0.21, + "learning_rate": 5.457890290599234e-05, + "loss": 1.5299, + "step": 348500 + }, + { + "epoch": 0.21, + "learning_rate": 5.4576802940431775e-05, + "loss": 1.5139, + "step": 349000 + }, + { + "epoch": 0.21, + "learning_rate": 5.457470297487121e-05, + "loss": 1.5233, + "step": 349500 + }, + { + "epoch": 0.21, + "learning_rate": 5.457260300931065e-05, + "loss": 1.5259, + "step": 350000 + }, + { + "epoch": 0.21, + "learning_rate": 5.457050304375008e-05, + "loss": 1.5051, + "step": 350500 + }, + { + "epoch": 0.21, + "learning_rate": 5.4568403078189516e-05, + "loss": 1.5282, + "step": 351000 + }, + { + "epoch": 0.21, + "learning_rate": 5.4566303112628956e-05, + "loss": 1.4965, + "step": 351500 + }, + { + "epoch": 0.21, + "learning_rate": 5.456420734699951e-05, + "loss": 1.5031, + "step": 352000 + }, + { + "epoch": 0.21, + "learning_rate": 5.456210738143894e-05, + "loss": 1.5164, + "step": 352500 + }, + { + "epoch": 0.21, + "learning_rate": 5.4560011615809496e-05, + "loss": 1.5002, + "step": 353000 + }, + { + "epoch": 0.21, + "learning_rate": 5.455791165024894e-05, + "loss": 1.5069, + "step": 353500 + }, + { + "epoch": 0.21, + "learning_rate": 5.455581168468837e-05, + "loss": 1.551, + "step": 354000 + }, + { + "epoch": 0.21, + "learning_rate": 5.4553711719127804e-05, + "loss": 1.5206, + "step": 354500 + }, + { + "epoch": 0.21, + "learning_rate": 5.4551611753567244e-05, + "loss": 1.505, + "step": 355000 + }, + { + "epoch": 0.21, + "learning_rate": 5.454951178800668e-05, + "loss": 1.4968, + "step": 355500 + }, + { + "epoch": 0.21, + "learning_rate": 5.454741182244611e-05, + "loss": 1.5227, + "step": 356000 + }, + { + "epoch": 0.21, + "learning_rate": 5.454531185688555e-05, + "loss": 1.5417, + "step": 356500 + }, + { + "epoch": 0.21, + "learning_rate": 5.4543211891324984e-05, + "loss": 1.4893, + "step": 357000 + }, + { + "epoch": 0.21, + "learning_rate": 5.454111612569554e-05, + "loss": 1.4669, + "step": 357500 + }, + { + "epoch": 0.21, + "learning_rate": 5.453901616013497e-05, + "loss": 1.4808, + "step": 358000 + }, + { + "epoch": 0.21, + "learning_rate": 5.453691619457441e-05, + "loss": 1.4919, + "step": 358500 + }, + { + "epoch": 0.22, + "learning_rate": 5.4534816229013845e-05, + "loss": 1.5133, + "step": 359000 + }, + { + "epoch": 0.22, + "learning_rate": 5.453272466331552e-05, + "loss": 1.5113, + "step": 359500 + }, + { + "epoch": 0.22, + "learning_rate": 5.453062889768608e-05, + "loss": 1.4929, + "step": 360000 + }, + { + "epoch": 0.22, + "learning_rate": 5.452852893212551e-05, + "loss": 1.5048, + "step": 360500 + }, + { + "epoch": 0.22, + "learning_rate": 5.452642896656495e-05, + "loss": 1.5149, + "step": 361000 + }, + { + "epoch": 0.22, + "learning_rate": 5.4524329001004386e-05, + "loss": 1.523, + "step": 361500 + }, + { + "epoch": 0.22, + "learning_rate": 5.452222903544381e-05, + "loss": 1.5208, + "step": 362000 + }, + { + "epoch": 0.22, + "learning_rate": 5.452013326981437e-05, + "loss": 1.5038, + "step": 362500 + }, + { + "epoch": 0.22, + "learning_rate": 5.4518033304253814e-05, + "loss": 1.5023, + "step": 363000 + }, + { + "epoch": 0.22, + "learning_rate": 5.451593333869325e-05, + "loss": 1.507, + "step": 363500 + }, + { + "epoch": 0.22, + "learning_rate": 5.4513833373132674e-05, + "loss": 1.508, + "step": 364000 + }, + { + "epoch": 0.22, + "learning_rate": 5.4511733407572114e-05, + "loss": 1.506, + "step": 364500 + }, + { + "epoch": 0.22, + "learning_rate": 5.450963344201155e-05, + "loss": 1.5085, + "step": 365000 + }, + { + "epoch": 0.22, + "learning_rate": 5.450753767638211e-05, + "loss": 1.5063, + "step": 365500 + }, + { + "epoch": 0.22, + "learning_rate": 5.450543771082154e-05, + "loss": 1.5293, + "step": 366000 + }, + { + "epoch": 0.22, + "learning_rate": 5.4503337745260975e-05, + "loss": 1.4851, + "step": 366500 + }, + { + "epoch": 0.22, + "learning_rate": 5.450123777970041e-05, + "loss": 1.5298, + "step": 367000 + }, + { + "epoch": 0.22, + "learning_rate": 5.449913781413985e-05, + "loss": 1.4931, + "step": 367500 + }, + { + "epoch": 0.22, + "learning_rate": 5.449703784857928e-05, + "loss": 1.5235, + "step": 368000 + }, + { + "epoch": 0.22, + "learning_rate": 5.4494937883018715e-05, + "loss": 1.5155, + "step": 368500 + }, + { + "epoch": 0.22, + "learning_rate": 5.4492837917458156e-05, + "loss": 1.4876, + "step": 369000 + }, + { + "epoch": 0.22, + "learning_rate": 5.449073795189759e-05, + "loss": 1.5209, + "step": 369500 + }, + { + "epoch": 0.22, + "learning_rate": 5.448864218626814e-05, + "loss": 1.5165, + "step": 370000 + }, + { + "epoch": 0.22, + "learning_rate": 5.4486542220707576e-05, + "loss": 1.5166, + "step": 370500 + }, + { + "epoch": 0.22, + "learning_rate": 5.4484442255147016e-05, + "loss": 1.4801, + "step": 371000 + }, + { + "epoch": 0.22, + "learning_rate": 5.448234648951757e-05, + "loss": 1.4763, + "step": 371500 + }, + { + "epoch": 0.22, + "learning_rate": 5.4480246523957e-05, + "loss": 1.4971, + "step": 372000 + }, + { + "epoch": 0.22, + "learning_rate": 5.447814655839644e-05, + "loss": 1.5724, + "step": 372500 + }, + { + "epoch": 0.22, + "learning_rate": 5.447604659283588e-05, + "loss": 1.5202, + "step": 373000 + }, + { + "epoch": 0.22, + "learning_rate": 5.447394662727531e-05, + "loss": 1.4959, + "step": 373500 + }, + { + "epoch": 0.22, + "learning_rate": 5.4471846661714744e-05, + "loss": 1.525, + "step": 374000 + }, + { + "epoch": 0.22, + "learning_rate": 5.4469746696154184e-05, + "loss": 1.5232, + "step": 374500 + }, + { + "epoch": 0.22, + "learning_rate": 5.446764673059362e-05, + "loss": 1.4812, + "step": 375000 + }, + { + "epoch": 0.23, + "learning_rate": 5.446554676503306e-05, + "loss": 1.5136, + "step": 375500 + }, + { + "epoch": 0.23, + "learning_rate": 5.446345099940361e-05, + "loss": 1.528, + "step": 376000 + }, + { + "epoch": 0.23, + "learning_rate": 5.4461351033843045e-05, + "loss": 1.4843, + "step": 376500 + }, + { + "epoch": 0.23, + "learning_rate": 5.44592552682136e-05, + "loss": 1.5356, + "step": 377000 + }, + { + "epoch": 0.23, + "learning_rate": 5.445715530265303e-05, + "loss": 1.4876, + "step": 377500 + }, + { + "epoch": 0.23, + "learning_rate": 5.445505533709247e-05, + "loss": 1.4893, + "step": 378000 + }, + { + "epoch": 0.23, + "learning_rate": 5.4452955371531906e-05, + "loss": 1.4816, + "step": 378500 + }, + { + "epoch": 0.23, + "learning_rate": 5.445085540597134e-05, + "loss": 1.4766, + "step": 379000 + }, + { + "epoch": 0.23, + "learning_rate": 5.444875544041078e-05, + "loss": 1.5296, + "step": 379500 + }, + { + "epoch": 0.23, + "learning_rate": 5.444665547485021e-05, + "loss": 1.4964, + "step": 380000 + }, + { + "epoch": 0.23, + "learning_rate": 5.4444555509289646e-05, + "loss": 1.4718, + "step": 380500 + }, + { + "epoch": 0.23, + "learning_rate": 5.4442455543729086e-05, + "loss": 1.5037, + "step": 381000 + }, + { + "epoch": 0.23, + "learning_rate": 5.444035977809964e-05, + "loss": 1.5162, + "step": 381500 + }, + { + "epoch": 0.23, + "learning_rate": 5.4438259812539073e-05, + "loss": 1.468, + "step": 382000 + }, + { + "epoch": 0.23, + "learning_rate": 5.4436159846978514e-05, + "loss": 1.5389, + "step": 382500 + }, + { + "epoch": 0.23, + "learning_rate": 5.443405988141795e-05, + "loss": 1.4856, + "step": 383000 + }, + { + "epoch": 0.23, + "learning_rate": 5.44319641157885e-05, + "loss": 1.518, + "step": 383500 + }, + { + "epoch": 0.23, + "learning_rate": 5.4429864150227934e-05, + "loss": 1.5001, + "step": 384000 + }, + { + "epoch": 0.23, + "learning_rate": 5.4427764184667374e-05, + "loss": 1.5253, + "step": 384500 + }, + { + "epoch": 0.23, + "learning_rate": 5.442566421910681e-05, + "loss": 1.5021, + "step": 385000 + }, + { + "epoch": 0.23, + "learning_rate": 5.442356845347736e-05, + "loss": 1.4786, + "step": 385500 + }, + { + "epoch": 0.23, + "learning_rate": 5.4421468487916795e-05, + "loss": 1.4512, + "step": 386000 + }, + { + "epoch": 0.23, + "learning_rate": 5.4419368522356235e-05, + "loss": 1.4892, + "step": 386500 + }, + { + "epoch": 0.23, + "learning_rate": 5.441726855679567e-05, + "loss": 1.4961, + "step": 387000 + }, + { + "epoch": 0.23, + "learning_rate": 5.44151685912351e-05, + "loss": 1.48, + "step": 387500 + }, + { + "epoch": 0.23, + "learning_rate": 5.441307282560566e-05, + "loss": 1.4948, + "step": 388000 + }, + { + "epoch": 0.23, + "learning_rate": 5.4410972860045096e-05, + "loss": 1.4842, + "step": 388500 + }, + { + "epoch": 0.23, + "learning_rate": 5.440887289448453e-05, + "loss": 1.4811, + "step": 389000 + }, + { + "epoch": 0.23, + "learning_rate": 5.440677292892397e-05, + "loss": 1.4699, + "step": 389500 + }, + { + "epoch": 0.23, + "learning_rate": 5.44046729633634e-05, + "loss": 1.5123, + "step": 390000 + }, + { + "epoch": 0.23, + "learning_rate": 5.4402572997802837e-05, + "loss": 1.4864, + "step": 390500 + }, + { + "epoch": 0.23, + "learning_rate": 5.440047303224227e-05, + "loss": 1.5081, + "step": 391000 + }, + { + "epoch": 0.23, + "learning_rate": 5.4398373066681703e-05, + "loss": 1.4964, + "step": 391500 + }, + { + "epoch": 0.24, + "learning_rate": 5.4396277301052264e-05, + "loss": 1.5164, + "step": 392000 + }, + { + "epoch": 0.24, + "learning_rate": 5.439418153542282e-05, + "loss": 1.4802, + "step": 392500 + }, + { + "epoch": 0.24, + "learning_rate": 5.439208156986225e-05, + "loss": 1.5002, + "step": 393000 + }, + { + "epoch": 0.24, + "learning_rate": 5.438998160430169e-05, + "loss": 1.5082, + "step": 393500 + }, + { + "epoch": 0.24, + "learning_rate": 5.4387881638741125e-05, + "loss": 1.5304, + "step": 394000 + }, + { + "epoch": 0.24, + "learning_rate": 5.438578167318056e-05, + "loss": 1.4962, + "step": 394500 + }, + { + "epoch": 0.24, + "learning_rate": 5.438368590755112e-05, + "loss": 1.4784, + "step": 395000 + }, + { + "epoch": 0.24, + "learning_rate": 5.438158594199055e-05, + "loss": 1.4968, + "step": 395500 + }, + { + "epoch": 0.24, + "learning_rate": 5.4379485976429985e-05, + "loss": 1.4957, + "step": 396000 + }, + { + "epoch": 0.24, + "learning_rate": 5.4377386010869425e-05, + "loss": 1.5053, + "step": 396500 + }, + { + "epoch": 0.24, + "learning_rate": 5.437528604530886e-05, + "loss": 1.5131, + "step": 397000 + }, + { + "epoch": 0.24, + "learning_rate": 5.437318607974829e-05, + "loss": 1.4995, + "step": 397500 + }, + { + "epoch": 0.24, + "learning_rate": 5.437108611418773e-05, + "loss": 1.4867, + "step": 398000 + }, + { + "epoch": 0.24, + "learning_rate": 5.436898614862716e-05, + "loss": 1.4864, + "step": 398500 + }, + { + "epoch": 0.24, + "learning_rate": 5.436688618306659e-05, + "loss": 1.485, + "step": 399000 + }, + { + "epoch": 0.24, + "learning_rate": 5.436478621750603e-05, + "loss": 1.5123, + "step": 399500 + }, + { + "epoch": 0.24, + "learning_rate": 5.436269045187659e-05, + "loss": 1.5177, + "step": 400000 + }, + { + "epoch": 0.24, + "eval_loss": 1.4208892583847046, + "eval_runtime": 1102.0508, + "eval_samples_per_second": 477.945, + "eval_steps_per_second": 79.658, + "step": 400000 + }, + { + "epoch": 0.24, + "learning_rate": 5.436059048631602e-05, + "loss": 1.4802, + "step": 400500 + }, + { + "epoch": 0.24, + "learning_rate": 5.4358490520755453e-05, + "loss": 1.4977, + "step": 401000 + }, + { + "epoch": 0.24, + "learning_rate": 5.4356394755126014e-05, + "loss": 1.5022, + "step": 401500 + }, + { + "epoch": 0.24, + "learning_rate": 5.4354294789565454e-05, + "loss": 1.4652, + "step": 402000 + }, + { + "epoch": 0.24, + "learning_rate": 5.435219902393601e-05, + "loss": 1.4737, + "step": 402500 + }, + { + "epoch": 0.24, + "learning_rate": 5.435009905837544e-05, + "loss": 1.5345, + "step": 403000 + }, + { + "epoch": 0.24, + "learning_rate": 5.434799909281488e-05, + "loss": 1.5055, + "step": 403500 + }, + { + "epoch": 0.24, + "learning_rate": 5.4345899127254315e-05, + "loss": 1.5234, + "step": 404000 + }, + { + "epoch": 0.24, + "learning_rate": 5.434379916169375e-05, + "loss": 1.4716, + "step": 404500 + }, + { + "epoch": 0.24, + "learning_rate": 5.434169919613319e-05, + "loss": 1.4981, + "step": 405000 + }, + { + "epoch": 0.24, + "learning_rate": 5.4339599230572615e-05, + "loss": 1.5184, + "step": 405500 + }, + { + "epoch": 0.24, + "learning_rate": 5.433749926501205e-05, + "loss": 1.4997, + "step": 406000 + }, + { + "epoch": 0.24, + "learning_rate": 5.433540349938261e-05, + "loss": 1.4904, + "step": 406500 + }, + { + "epoch": 0.24, + "learning_rate": 5.433330353382205e-05, + "loss": 1.476, + "step": 407000 + }, + { + "epoch": 0.24, + "learning_rate": 5.433120356826148e-05, + "loss": 1.4736, + "step": 407500 + }, + { + "epoch": 0.24, + "learning_rate": 5.432910360270091e-05, + "loss": 1.5078, + "step": 408000 + }, + { + "epoch": 0.24, + "learning_rate": 5.432700783707147e-05, + "loss": 1.4695, + "step": 408500 + }, + { + "epoch": 0.25, + "learning_rate": 5.432491207144203e-05, + "loss": 1.5021, + "step": 409000 + }, + { + "epoch": 0.25, + "learning_rate": 5.4322812105881464e-05, + "loss": 1.5098, + "step": 409500 + }, + { + "epoch": 0.25, + "learning_rate": 5.43207121403209e-05, + "loss": 1.491, + "step": 410000 + }, + { + "epoch": 0.25, + "learning_rate": 5.431861217476034e-05, + "loss": 1.4809, + "step": 410500 + }, + { + "epoch": 0.25, + "learning_rate": 5.431651220919977e-05, + "loss": 1.4879, + "step": 411000 + }, + { + "epoch": 0.25, + "learning_rate": 5.4314412243639204e-05, + "loss": 1.5112, + "step": 411500 + }, + { + "epoch": 0.25, + "learning_rate": 5.4312312278078644e-05, + "loss": 1.5208, + "step": 412000 + }, + { + "epoch": 0.25, + "learning_rate": 5.431021231251807e-05, + "loss": 1.4875, + "step": 412500 + }, + { + "epoch": 0.25, + "learning_rate": 5.4308112346957505e-05, + "loss": 1.4839, + "step": 413000 + }, + { + "epoch": 0.25, + "learning_rate": 5.4306012381396945e-05, + "loss": 1.5076, + "step": 413500 + }, + { + "epoch": 0.25, + "learning_rate": 5.430391241583638e-05, + "loss": 1.5046, + "step": 414000 + }, + { + "epoch": 0.25, + "learning_rate": 5.430181245027581e-05, + "loss": 1.5053, + "step": 414500 + }, + { + "epoch": 0.25, + "learning_rate": 5.429971248471525e-05, + "loss": 1.479, + "step": 415000 + }, + { + "epoch": 0.25, + "learning_rate": 5.4297616719085805e-05, + "loss": 1.5168, + "step": 415500 + }, + { + "epoch": 0.25, + "learning_rate": 5.429551675352524e-05, + "loss": 1.46, + "step": 416000 + }, + { + "epoch": 0.25, + "learning_rate": 5.429341678796467e-05, + "loss": 1.4813, + "step": 416500 + }, + { + "epoch": 0.25, + "learning_rate": 5.429131682240411e-05, + "loss": 1.4848, + "step": 417000 + }, + { + "epoch": 0.25, + "learning_rate": 5.4289221056774666e-05, + "loss": 1.504, + "step": 417500 + }, + { + "epoch": 0.25, + "learning_rate": 5.42871210912141e-05, + "loss": 1.4767, + "step": 418000 + }, + { + "epoch": 0.25, + "learning_rate": 5.428502532558466e-05, + "loss": 1.4702, + "step": 418500 + }, + { + "epoch": 0.25, + "learning_rate": 5.42829253600241e-05, + "loss": 1.5099, + "step": 419000 + }, + { + "epoch": 0.25, + "learning_rate": 5.428082539446353e-05, + "loss": 1.4795, + "step": 419500 + }, + { + "epoch": 0.25, + "learning_rate": 5.427872962883409e-05, + "loss": 1.5049, + "step": 420000 + }, + { + "epoch": 0.25, + "learning_rate": 5.427662966327352e-05, + "loss": 1.5336, + "step": 420500 + }, + { + "epoch": 0.25, + "learning_rate": 5.427452969771296e-05, + "loss": 1.5044, + "step": 421000 + }, + { + "epoch": 0.25, + "learning_rate": 5.4272429732152394e-05, + "loss": 1.5018, + "step": 421500 + }, + { + "epoch": 0.25, + "learning_rate": 5.427032976659182e-05, + "loss": 1.4736, + "step": 422000 + }, + { + "epoch": 0.25, + "learning_rate": 5.426822980103126e-05, + "loss": 1.4819, + "step": 422500 + }, + { + "epoch": 0.25, + "learning_rate": 5.4266129835470695e-05, + "loss": 1.4975, + "step": 423000 + }, + { + "epoch": 0.25, + "learning_rate": 5.426402986991013e-05, + "loss": 1.497, + "step": 423500 + }, + { + "epoch": 0.25, + "learning_rate": 5.426192990434957e-05, + "loss": 1.4882, + "step": 424000 + }, + { + "epoch": 0.25, + "learning_rate": 5.4259829938789e-05, + "loss": 1.4715, + "step": 424500 + }, + { + "epoch": 0.25, + "learning_rate": 5.4257729973228435e-05, + "loss": 1.4678, + "step": 425000 + }, + { + "epoch": 0.26, + "learning_rate": 5.4255630007667876e-05, + "loss": 1.5004, + "step": 425500 + }, + { + "epoch": 0.26, + "learning_rate": 5.425353004210731e-05, + "loss": 1.4857, + "step": 426000 + }, + { + "epoch": 0.26, + "learning_rate": 5.425143007654674e-05, + "loss": 1.4719, + "step": 426500 + }, + { + "epoch": 0.26, + "learning_rate": 5.424933011098618e-05, + "loss": 1.5231, + "step": 427000 + }, + { + "epoch": 0.26, + "learning_rate": 5.424723014542561e-05, + "loss": 1.51, + "step": 427500 + }, + { + "epoch": 0.26, + "learning_rate": 5.424513437979617e-05, + "loss": 1.4926, + "step": 428000 + }, + { + "epoch": 0.26, + "learning_rate": 5.424303441423561e-05, + "loss": 1.4765, + "step": 428500 + }, + { + "epoch": 0.26, + "learning_rate": 5.4240934448675044e-05, + "loss": 1.4609, + "step": 429000 + }, + { + "epoch": 0.26, + "learning_rate": 5.42388386830456e-05, + "loss": 1.5168, + "step": 429500 + }, + { + "epoch": 0.26, + "learning_rate": 5.423674291741615e-05, + "loss": 1.481, + "step": 430000 + }, + { + "epoch": 0.26, + "learning_rate": 5.4234642951855584e-05, + "loss": 1.4786, + "step": 430500 + }, + { + "epoch": 0.26, + "learning_rate": 5.4232542986295024e-05, + "loss": 1.4847, + "step": 431000 + }, + { + "epoch": 0.26, + "learning_rate": 5.423044302073446e-05, + "loss": 1.4862, + "step": 431500 + }, + { + "epoch": 0.26, + "learning_rate": 5.422834305517389e-05, + "loss": 1.4737, + "step": 432000 + }, + { + "epoch": 0.26, + "learning_rate": 5.422624308961333e-05, + "loss": 1.4638, + "step": 432500 + }, + { + "epoch": 0.26, + "learning_rate": 5.4224143124052765e-05, + "loss": 1.4685, + "step": 433000 + }, + { + "epoch": 0.26, + "learning_rate": 5.4222043158492205e-05, + "loss": 1.5195, + "step": 433500 + }, + { + "epoch": 0.26, + "learning_rate": 5.421994319293164e-05, + "loss": 1.5087, + "step": 434000 + }, + { + "epoch": 0.26, + "learning_rate": 5.421784322737107e-05, + "loss": 1.469, + "step": 434500 + }, + { + "epoch": 0.26, + "learning_rate": 5.4215743261810506e-05, + "loss": 1.4829, + "step": 435000 + }, + { + "epoch": 0.26, + "learning_rate": 5.421364329624994e-05, + "loss": 1.4669, + "step": 435500 + }, + { + "epoch": 0.26, + "learning_rate": 5.421154333068937e-05, + "loss": 1.4557, + "step": 436000 + }, + { + "epoch": 0.26, + "learning_rate": 5.420945176499105e-05, + "loss": 1.5271, + "step": 436500 + }, + { + "epoch": 0.26, + "learning_rate": 5.4207355999361607e-05, + "loss": 1.5119, + "step": 437000 + }, + { + "epoch": 0.26, + "learning_rate": 5.420525603380104e-05, + "loss": 1.4962, + "step": 437500 + }, + { + "epoch": 0.26, + "learning_rate": 5.420315606824048e-05, + "loss": 1.4752, + "step": 438000 + }, + { + "epoch": 0.26, + "learning_rate": 5.4201056102679914e-05, + "loss": 1.4952, + "step": 438500 + }, + { + "epoch": 0.26, + "learning_rate": 5.419895613711935e-05, + "loss": 1.5031, + "step": 439000 + }, + { + "epoch": 0.26, + "learning_rate": 5.419685617155879e-05, + "loss": 1.5013, + "step": 439500 + }, + { + "epoch": 0.26, + "learning_rate": 5.419476040592934e-05, + "loss": 1.4796, + "step": 440000 + }, + { + "epoch": 0.26, + "learning_rate": 5.4192660440368774e-05, + "loss": 1.4715, + "step": 440500 + }, + { + "epoch": 0.26, + "learning_rate": 5.4190560474808215e-05, + "loss": 1.5148, + "step": 441000 + }, + { + "epoch": 0.26, + "learning_rate": 5.418846050924765e-05, + "loss": 1.4945, + "step": 441500 + }, + { + "epoch": 0.26, + "learning_rate": 5.418636054368708e-05, + "loss": 1.4679, + "step": 442000 + }, + { + "epoch": 0.27, + "learning_rate": 5.418426057812652e-05, + "loss": 1.4923, + "step": 442500 + }, + { + "epoch": 0.27, + "learning_rate": 5.4182160612565955e-05, + "loss": 1.4764, + "step": 443000 + }, + { + "epoch": 0.27, + "learning_rate": 5.418006064700539e-05, + "loss": 1.4813, + "step": 443500 + }, + { + "epoch": 0.27, + "learning_rate": 5.417796488137594e-05, + "loss": 1.5003, + "step": 444000 + }, + { + "epoch": 0.27, + "learning_rate": 5.417586491581538e-05, + "loss": 1.4896, + "step": 444500 + }, + { + "epoch": 0.27, + "learning_rate": 5.4173764950254816e-05, + "loss": 1.458, + "step": 445000 + }, + { + "epoch": 0.27, + "learning_rate": 5.417166498469425e-05, + "loss": 1.4845, + "step": 445500 + }, + { + "epoch": 0.27, + "learning_rate": 5.416956501913369e-05, + "loss": 1.4785, + "step": 446000 + }, + { + "epoch": 0.27, + "learning_rate": 5.416746925350424e-05, + "loss": 1.4766, + "step": 446500 + }, + { + "epoch": 0.27, + "learning_rate": 5.416536928794368e-05, + "loss": 1.4629, + "step": 447000 + }, + { + "epoch": 0.27, + "learning_rate": 5.416327352231423e-05, + "loss": 1.4764, + "step": 447500 + }, + { + "epoch": 0.27, + "learning_rate": 5.416117355675367e-05, + "loss": 1.4984, + "step": 448000 + }, + { + "epoch": 0.27, + "learning_rate": 5.4159073591193104e-05, + "loss": 1.5047, + "step": 448500 + }, + { + "epoch": 0.27, + "learning_rate": 5.415697362563254e-05, + "loss": 1.483, + "step": 449000 + }, + { + "epoch": 0.27, + "learning_rate": 5.415487366007198e-05, + "loss": 1.4727, + "step": 449500 + }, + { + "epoch": 0.27, + "learning_rate": 5.415277369451141e-05, + "loss": 1.466, + "step": 450000 + }, + { + "epoch": 0.27, + "learning_rate": 5.4150673728950845e-05, + "loss": 1.4967, + "step": 450500 + }, + { + "epoch": 0.27, + "learning_rate": 5.4148573763390285e-05, + "loss": 1.5263, + "step": 451000 + }, + { + "epoch": 0.27, + "learning_rate": 5.414647379782971e-05, + "loss": 1.4971, + "step": 451500 + }, + { + "epoch": 0.27, + "learning_rate": 5.4144373832269145e-05, + "loss": 1.4895, + "step": 452000 + }, + { + "epoch": 0.27, + "learning_rate": 5.4142278066639705e-05, + "loss": 1.4727, + "step": 452500 + }, + { + "epoch": 0.27, + "learning_rate": 5.4140178101079146e-05, + "loss": 1.5039, + "step": 453000 + }, + { + "epoch": 0.27, + "learning_rate": 5.41380823354497e-05, + "loss": 1.4509, + "step": 453500 + }, + { + "epoch": 0.27, + "learning_rate": 5.413598236988913e-05, + "loss": 1.4789, + "step": 454000 + }, + { + "epoch": 0.27, + "learning_rate": 5.413388240432857e-05, + "loss": 1.4908, + "step": 454500 + }, + { + "epoch": 0.27, + "learning_rate": 5.4131782438768006e-05, + "loss": 1.4847, + "step": 455000 + }, + { + "epoch": 0.27, + "learning_rate": 5.412968247320744e-05, + "loss": 1.4833, + "step": 455500 + }, + { + "epoch": 0.27, + "learning_rate": 5.412758250764687e-05, + "loss": 1.4864, + "step": 456000 + }, + { + "epoch": 0.27, + "learning_rate": 5.412548254208631e-05, + "loss": 1.4836, + "step": 456500 + }, + { + "epoch": 0.27, + "learning_rate": 5.412338257652574e-05, + "loss": 1.4851, + "step": 457000 + }, + { + "epoch": 0.27, + "learning_rate": 5.412128261096518e-05, + "loss": 1.4735, + "step": 457500 + }, + { + "epoch": 0.27, + "learning_rate": 5.4119182645404614e-05, + "loss": 1.4734, + "step": 458000 + }, + { + "epoch": 0.27, + "learning_rate": 5.411708267984405e-05, + "loss": 1.4755, + "step": 458500 + }, + { + "epoch": 0.28, + "learning_rate": 5.41149869142146e-05, + "loss": 1.4784, + "step": 459000 + }, + { + "epoch": 0.28, + "learning_rate": 5.411288694865404e-05, + "loss": 1.4712, + "step": 459500 + }, + { + "epoch": 0.28, + "learning_rate": 5.4110786983093475e-05, + "loss": 1.4509, + "step": 460000 + }, + { + "epoch": 0.28, + "learning_rate": 5.410868701753291e-05, + "loss": 1.4527, + "step": 460500 + }, + { + "epoch": 0.28, + "learning_rate": 5.410658705197235e-05, + "loss": 1.4796, + "step": 461000 + }, + { + "epoch": 0.28, + "learning_rate": 5.41044912863429e-05, + "loss": 1.4916, + "step": 461500 + }, + { + "epoch": 0.28, + "learning_rate": 5.4102391320782335e-05, + "loss": 1.4592, + "step": 462000 + }, + { + "epoch": 0.28, + "learning_rate": 5.4100291355221776e-05, + "loss": 1.5036, + "step": 462500 + }, + { + "epoch": 0.28, + "learning_rate": 5.409819138966121e-05, + "loss": 1.4846, + "step": 463000 + }, + { + "epoch": 0.28, + "learning_rate": 5.409609142410064e-05, + "loss": 1.4955, + "step": 463500 + }, + { + "epoch": 0.28, + "learning_rate": 5.4093995658471196e-05, + "loss": 1.4611, + "step": 464000 + }, + { + "epoch": 0.28, + "learning_rate": 5.4091895692910636e-05, + "loss": 1.4841, + "step": 464500 + }, + { + "epoch": 0.28, + "learning_rate": 5.408979572735007e-05, + "loss": 1.4605, + "step": 465000 + }, + { + "epoch": 0.28, + "learning_rate": 5.40876957617895e-05, + "loss": 1.4769, + "step": 465500 + }, + { + "epoch": 0.28, + "learning_rate": 5.4085595796228943e-05, + "loss": 1.4751, + "step": 466000 + }, + { + "epoch": 0.28, + "learning_rate": 5.408349583066838e-05, + "loss": 1.485, + "step": 466500 + }, + { + "epoch": 0.28, + "learning_rate": 5.408139586510781e-05, + "loss": 1.4905, + "step": 467000 + }, + { + "epoch": 0.28, + "learning_rate": 5.407929589954725e-05, + "loss": 1.4479, + "step": 467500 + }, + { + "epoch": 0.28, + "learning_rate": 5.4077195933986684e-05, + "loss": 1.4639, + "step": 468000 + }, + { + "epoch": 0.28, + "learning_rate": 5.407509596842612e-05, + "loss": 1.5038, + "step": 468500 + }, + { + "epoch": 0.28, + "learning_rate": 5.407300020279667e-05, + "loss": 1.4514, + "step": 469000 + }, + { + "epoch": 0.28, + "learning_rate": 5.407090443716723e-05, + "loss": 1.4981, + "step": 469500 + }, + { + "epoch": 0.28, + "learning_rate": 5.4068804471606665e-05, + "loss": 1.4475, + "step": 470000 + }, + { + "epoch": 0.28, + "learning_rate": 5.40667045060461e-05, + "loss": 1.4781, + "step": 470500 + }, + { + "epoch": 0.28, + "learning_rate": 5.406460454048554e-05, + "loss": 1.4666, + "step": 471000 + }, + { + "epoch": 0.28, + "learning_rate": 5.406250877485609e-05, + "loss": 1.4754, + "step": 471500 + }, + { + "epoch": 0.28, + "learning_rate": 5.406041300922665e-05, + "loss": 1.4858, + "step": 472000 + }, + { + "epoch": 0.28, + "learning_rate": 5.4058313043666086e-05, + "loss": 1.4444, + "step": 472500 + }, + { + "epoch": 0.28, + "learning_rate": 5.405621307810551e-05, + "loss": 1.5036, + "step": 473000 + }, + { + "epoch": 0.28, + "learning_rate": 5.405411311254495e-05, + "loss": 1.4703, + "step": 473500 + }, + { + "epoch": 0.28, + "learning_rate": 5.4052013146984386e-05, + "loss": 1.4395, + "step": 474000 + }, + { + "epoch": 0.28, + "learning_rate": 5.404991318142382e-05, + "loss": 1.458, + "step": 474500 + }, + { + "epoch": 0.28, + "learning_rate": 5.404781321586326e-05, + "loss": 1.4904, + "step": 475000 + }, + { + "epoch": 0.29, + "learning_rate": 5.4045713250302694e-05, + "loss": 1.4722, + "step": 475500 + }, + { + "epoch": 0.29, + "learning_rate": 5.404361328474213e-05, + "loss": 1.4849, + "step": 476000 + }, + { + "epoch": 0.29, + "learning_rate": 5.404151331918157e-05, + "loss": 1.4861, + "step": 476500 + }, + { + "epoch": 0.29, + "learning_rate": 5.4039413353621e-05, + "loss": 1.4941, + "step": 477000 + }, + { + "epoch": 0.29, + "learning_rate": 5.4037313388060434e-05, + "loss": 1.4892, + "step": 477500 + }, + { + "epoch": 0.29, + "learning_rate": 5.4035213422499874e-05, + "loss": 1.4417, + "step": 478000 + }, + { + "epoch": 0.29, + "learning_rate": 5.40331134569393e-05, + "loss": 1.4741, + "step": 478500 + }, + { + "epoch": 0.29, + "learning_rate": 5.403101349137874e-05, + "loss": 1.4821, + "step": 479000 + }, + { + "epoch": 0.29, + "learning_rate": 5.4028913525818175e-05, + "loss": 1.4759, + "step": 479500 + }, + { + "epoch": 0.29, + "learning_rate": 5.4026817760188735e-05, + "loss": 1.4759, + "step": 480000 + }, + { + "epoch": 0.29, + "learning_rate": 5.402472199455929e-05, + "loss": 1.4996, + "step": 480500 + }, + { + "epoch": 0.29, + "learning_rate": 5.402262202899872e-05, + "loss": 1.4596, + "step": 481000 + }, + { + "epoch": 0.29, + "learning_rate": 5.402052206343816e-05, + "loss": 1.4686, + "step": 481500 + }, + { + "epoch": 0.29, + "learning_rate": 5.4018422097877596e-05, + "loss": 1.4766, + "step": 482000 + }, + { + "epoch": 0.29, + "learning_rate": 5.401632633224815e-05, + "loss": 1.4807, + "step": 482500 + }, + { + "epoch": 0.29, + "learning_rate": 5.401422636668758e-05, + "loss": 1.4772, + "step": 483000 + }, + { + "epoch": 0.29, + "learning_rate": 5.401212640112702e-05, + "loss": 1.4749, + "step": 483500 + }, + { + "epoch": 0.29, + "learning_rate": 5.4010026435566457e-05, + "loss": 1.4796, + "step": 484000 + }, + { + "epoch": 0.29, + "learning_rate": 5.400792647000589e-05, + "loss": 1.4645, + "step": 484500 + }, + { + "epoch": 0.29, + "learning_rate": 5.400582650444533e-05, + "loss": 1.486, + "step": 485000 + }, + { + "epoch": 0.29, + "learning_rate": 5.400372653888476e-05, + "loss": 1.4682, + "step": 485500 + }, + { + "epoch": 0.29, + "learning_rate": 5.40016265733242e-05, + "loss": 1.4471, + "step": 486000 + }, + { + "epoch": 0.29, + "learning_rate": 5.399952660776363e-05, + "loss": 1.4638, + "step": 486500 + }, + { + "epoch": 0.29, + "learning_rate": 5.399743084213419e-05, + "loss": 1.4751, + "step": 487000 + }, + { + "epoch": 0.29, + "learning_rate": 5.3995330876573624e-05, + "loss": 1.4749, + "step": 487500 + }, + { + "epoch": 0.29, + "learning_rate": 5.399323511094418e-05, + "loss": 1.4473, + "step": 488000 + }, + { + "epoch": 0.29, + "learning_rate": 5.399113514538362e-05, + "loss": 1.4827, + "step": 488500 + }, + { + "epoch": 0.29, + "learning_rate": 5.398903517982305e-05, + "loss": 1.4956, + "step": 489000 + }, + { + "epoch": 0.29, + "learning_rate": 5.3986935214262485e-05, + "loss": 1.4725, + "step": 489500 + }, + { + "epoch": 0.29, + "learning_rate": 5.3984835248701925e-05, + "loss": 1.4544, + "step": 490000 + }, + { + "epoch": 0.29, + "learning_rate": 5.398273948307248e-05, + "loss": 1.4674, + "step": 490500 + }, + { + "epoch": 0.29, + "learning_rate": 5.398063951751191e-05, + "loss": 1.48, + "step": 491000 + }, + { + "epoch": 0.29, + "learning_rate": 5.3978539551951346e-05, + "loss": 1.4429, + "step": 491500 + }, + { + "epoch": 0.29, + "learning_rate": 5.3976439586390786e-05, + "loss": 1.4687, + "step": 492000 + }, + { + "epoch": 0.3, + "learning_rate": 5.397433962083021e-05, + "loss": 1.4786, + "step": 492500 + }, + { + "epoch": 0.3, + "learning_rate": 5.397223965526965e-05, + "loss": 1.4867, + "step": 493000 + }, + { + "epoch": 0.3, + "learning_rate": 5.3970139689709086e-05, + "loss": 1.5085, + "step": 493500 + }, + { + "epoch": 0.3, + "learning_rate": 5.396803972414852e-05, + "loss": 1.4532, + "step": 494000 + }, + { + "epoch": 0.3, + "learning_rate": 5.396594395851908e-05, + "loss": 1.4871, + "step": 494500 + }, + { + "epoch": 0.3, + "learning_rate": 5.3963843992958514e-05, + "loss": 1.4505, + "step": 495000 + }, + { + "epoch": 0.3, + "learning_rate": 5.3961748227329074e-05, + "loss": 1.485, + "step": 495500 + }, + { + "epoch": 0.3, + "learning_rate": 5.395964826176851e-05, + "loss": 1.4968, + "step": 496000 + }, + { + "epoch": 0.3, + "learning_rate": 5.395754829620794e-05, + "loss": 1.4476, + "step": 496500 + }, + { + "epoch": 0.3, + "learning_rate": 5.395544833064738e-05, + "loss": 1.4873, + "step": 497000 + }, + { + "epoch": 0.3, + "learning_rate": 5.395334836508681e-05, + "loss": 1.4653, + "step": 497500 + }, + { + "epoch": 0.3, + "learning_rate": 5.395124839952624e-05, + "loss": 1.4962, + "step": 498000 + }, + { + "epoch": 0.3, + "learning_rate": 5.394914843396568e-05, + "loss": 1.4837, + "step": 498500 + }, + { + "epoch": 0.3, + "learning_rate": 5.3947048468405115e-05, + "loss": 1.4668, + "step": 499000 + }, + { + "epoch": 0.3, + "learning_rate": 5.3944956902706796e-05, + "loss": 1.4675, + "step": 499500 + }, + { + "epoch": 0.3, + "learning_rate": 5.394285693714623e-05, + "loss": 1.4509, + "step": 500000 + }, + { + "epoch": 0.3, + "eval_loss": 1.39423406124115, + "eval_runtime": 1102.7794, + "eval_samples_per_second": 477.63, + "eval_steps_per_second": 79.605, + "step": 500000 + }, + { + "epoch": 0.3, + "learning_rate": 5.394075697158567e-05, + "loss": 1.4921, + "step": 500500 + }, + { + "epoch": 0.3, + "learning_rate": 5.39386570060251e-05, + "loss": 1.4708, + "step": 501000 + }, + { + "epoch": 0.3, + "learning_rate": 5.3936557040464536e-05, + "loss": 1.4328, + "step": 501500 + }, + { + "epoch": 0.3, + "learning_rate": 5.3934457074903976e-05, + "loss": 1.4618, + "step": 502000 + }, + { + "epoch": 0.3, + "learning_rate": 5.393236130927453e-05, + "loss": 1.469, + "step": 502500 + }, + { + "epoch": 0.3, + "learning_rate": 5.3930261343713963e-05, + "loss": 1.4599, + "step": 503000 + }, + { + "epoch": 0.3, + "learning_rate": 5.39281613781534e-05, + "loss": 1.4738, + "step": 503500 + }, + { + "epoch": 0.3, + "learning_rate": 5.392606141259284e-05, + "loss": 1.4419, + "step": 504000 + }, + { + "epoch": 0.3, + "learning_rate": 5.3923961447032264e-05, + "loss": 1.481, + "step": 504500 + }, + { + "epoch": 0.3, + "learning_rate": 5.39218614814717e-05, + "loss": 1.4842, + "step": 505000 + }, + { + "epoch": 0.3, + "learning_rate": 5.391976151591114e-05, + "loss": 1.4802, + "step": 505500 + }, + { + "epoch": 0.3, + "learning_rate": 5.391766155035057e-05, + "loss": 1.4596, + "step": 506000 + }, + { + "epoch": 0.3, + "learning_rate": 5.391556578472113e-05, + "loss": 1.4874, + "step": 506500 + }, + { + "epoch": 0.3, + "learning_rate": 5.3913465819160565e-05, + "loss": 1.4527, + "step": 507000 + }, + { + "epoch": 0.3, + "learning_rate": 5.39113658536e-05, + "loss": 1.483, + "step": 507500 + }, + { + "epoch": 0.3, + "learning_rate": 5.390926588803943e-05, + "loss": 1.4635, + "step": 508000 + }, + { + "epoch": 0.3, + "learning_rate": 5.390716592247887e-05, + "loss": 1.4686, + "step": 508500 + }, + { + "epoch": 0.31, + "learning_rate": 5.3905065956918305e-05, + "loss": 1.4861, + "step": 509000 + }, + { + "epoch": 0.31, + "learning_rate": 5.390296599135774e-05, + "loss": 1.4563, + "step": 509500 + }, + { + "epoch": 0.31, + "learning_rate": 5.390087022572829e-05, + "loss": 1.4874, + "step": 510000 + }, + { + "epoch": 0.31, + "learning_rate": 5.389877026016773e-05, + "loss": 1.4642, + "step": 510500 + }, + { + "epoch": 0.31, + "learning_rate": 5.3896670294607166e-05, + "loss": 1.4704, + "step": 511000 + }, + { + "epoch": 0.31, + "learning_rate": 5.38945703290466e-05, + "loss": 1.4708, + "step": 511500 + }, + { + "epoch": 0.31, + "learning_rate": 5.389247456341716e-05, + "loss": 1.464, + "step": 512000 + }, + { + "epoch": 0.31, + "learning_rate": 5.389037879778772e-05, + "loss": 1.459, + "step": 512500 + }, + { + "epoch": 0.31, + "learning_rate": 5.3888278832227154e-05, + "loss": 1.4629, + "step": 513000 + }, + { + "epoch": 0.31, + "learning_rate": 5.388618306659771e-05, + "loss": 1.4731, + "step": 513500 + }, + { + "epoch": 0.31, + "learning_rate": 5.388408310103714e-05, + "loss": 1.4593, + "step": 514000 + }, + { + "epoch": 0.31, + "learning_rate": 5.388198313547658e-05, + "loss": 1.4806, + "step": 514500 + }, + { + "epoch": 0.31, + "learning_rate": 5.3879883169916014e-05, + "loss": 1.4702, + "step": 515000 + }, + { + "epoch": 0.31, + "learning_rate": 5.387778320435545e-05, + "loss": 1.4674, + "step": 515500 + }, + { + "epoch": 0.31, + "learning_rate": 5.387568323879489e-05, + "loss": 1.4989, + "step": 516000 + }, + { + "epoch": 0.31, + "learning_rate": 5.3873583273234315e-05, + "loss": 1.4581, + "step": 516500 + }, + { + "epoch": 0.31, + "learning_rate": 5.387148330767375e-05, + "loss": 1.4312, + "step": 517000 + }, + { + "epoch": 0.31, + "learning_rate": 5.386938334211319e-05, + "loss": 1.4704, + "step": 517500 + }, + { + "epoch": 0.31, + "learning_rate": 5.386728337655262e-05, + "loss": 1.474, + "step": 518000 + }, + { + "epoch": 0.31, + "learning_rate": 5.3865183410992055e-05, + "loss": 1.4743, + "step": 518500 + }, + { + "epoch": 0.31, + "learning_rate": 5.3863083445431496e-05, + "loss": 1.4557, + "step": 519000 + }, + { + "epoch": 0.31, + "learning_rate": 5.386098347987093e-05, + "loss": 1.4193, + "step": 519500 + }, + { + "epoch": 0.31, + "learning_rate": 5.385888771424148e-05, + "loss": 1.4183, + "step": 520000 + }, + { + "epoch": 0.31, + "learning_rate": 5.385678774868092e-05, + "loss": 1.4506, + "step": 520500 + }, + { + "epoch": 0.31, + "learning_rate": 5.385469198305148e-05, + "loss": 1.4805, + "step": 521000 + }, + { + "epoch": 0.31, + "learning_rate": 5.385259201749091e-05, + "loss": 1.4719, + "step": 521500 + }, + { + "epoch": 0.31, + "learning_rate": 5.3850492051930343e-05, + "loss": 1.4597, + "step": 522000 + }, + { + "epoch": 0.31, + "learning_rate": 5.3848392086369784e-05, + "loss": 1.4465, + "step": 522500 + }, + { + "epoch": 0.31, + "learning_rate": 5.3846296320740344e-05, + "loss": 1.4688, + "step": 523000 + }, + { + "epoch": 0.31, + "learning_rate": 5.384419635517977e-05, + "loss": 1.4534, + "step": 523500 + }, + { + "epoch": 0.31, + "learning_rate": 5.3842096389619204e-05, + "loss": 1.4553, + "step": 524000 + }, + { + "epoch": 0.31, + "learning_rate": 5.3839996424058644e-05, + "loss": 1.4524, + "step": 524500 + }, + { + "epoch": 0.31, + "learning_rate": 5.383789645849808e-05, + "loss": 1.4509, + "step": 525000 + }, + { + "epoch": 0.32, + "learning_rate": 5.383579649293751e-05, + "loss": 1.5044, + "step": 525500 + }, + { + "epoch": 0.32, + "learning_rate": 5.383369652737695e-05, + "loss": 1.5006, + "step": 526000 + }, + { + "epoch": 0.32, + "learning_rate": 5.3831600761747505e-05, + "loss": 1.4834, + "step": 526500 + }, + { + "epoch": 0.32, + "learning_rate": 5.382950079618694e-05, + "loss": 1.4554, + "step": 527000 + }, + { + "epoch": 0.32, + "learning_rate": 5.382740083062638e-05, + "loss": 1.4694, + "step": 527500 + }, + { + "epoch": 0.32, + "learning_rate": 5.382530086506581e-05, + "loss": 1.4621, + "step": 528000 + }, + { + "epoch": 0.32, + "learning_rate": 5.3823200899505246e-05, + "loss": 1.4831, + "step": 528500 + }, + { + "epoch": 0.32, + "learning_rate": 5.3821100933944686e-05, + "loss": 1.4699, + "step": 529000 + }, + { + "epoch": 0.32, + "learning_rate": 5.381900096838412e-05, + "loss": 1.4635, + "step": 529500 + }, + { + "epoch": 0.32, + "learning_rate": 5.381690100282355e-05, + "loss": 1.4493, + "step": 530000 + }, + { + "epoch": 0.32, + "learning_rate": 5.381480103726299e-05, + "loss": 1.4588, + "step": 530500 + }, + { + "epoch": 0.32, + "learning_rate": 5.3812701071702427e-05, + "loss": 1.4337, + "step": 531000 + }, + { + "epoch": 0.32, + "learning_rate": 5.381060110614185e-05, + "loss": 1.4663, + "step": 531500 + }, + { + "epoch": 0.32, + "learning_rate": 5.3808505340512414e-05, + "loss": 1.4553, + "step": 532000 + }, + { + "epoch": 0.32, + "learning_rate": 5.3806405374951854e-05, + "loss": 1.4686, + "step": 532500 + }, + { + "epoch": 0.32, + "learning_rate": 5.380430540939129e-05, + "loss": 1.4886, + "step": 533000 + }, + { + "epoch": 0.32, + "learning_rate": 5.380220544383072e-05, + "loss": 1.4579, + "step": 533500 + }, + { + "epoch": 0.32, + "learning_rate": 5.3800105478270154e-05, + "loss": 1.4527, + "step": 534000 + }, + { + "epoch": 0.32, + "learning_rate": 5.379800551270959e-05, + "loss": 1.4672, + "step": 534500 + }, + { + "epoch": 0.32, + "learning_rate": 5.379590554714902e-05, + "loss": 1.4291, + "step": 535000 + }, + { + "epoch": 0.32, + "learning_rate": 5.379380978151958e-05, + "loss": 1.477, + "step": 535500 + }, + { + "epoch": 0.32, + "learning_rate": 5.379170981595902e-05, + "loss": 1.4374, + "step": 536000 + }, + { + "epoch": 0.32, + "learning_rate": 5.378960985039845e-05, + "loss": 1.4743, + "step": 536500 + }, + { + "epoch": 0.32, + "learning_rate": 5.378750988483789e-05, + "loss": 1.4532, + "step": 537000 + }, + { + "epoch": 0.32, + "learning_rate": 5.378540991927732e-05, + "loss": 1.4681, + "step": 537500 + }, + { + "epoch": 0.32, + "learning_rate": 5.3783309953716756e-05, + "loss": 1.4418, + "step": 538000 + }, + { + "epoch": 0.32, + "learning_rate": 5.3781214188087316e-05, + "loss": 1.4785, + "step": 538500 + }, + { + "epoch": 0.32, + "learning_rate": 5.377911422252675e-05, + "loss": 1.4813, + "step": 539000 + }, + { + "epoch": 0.32, + "learning_rate": 5.377701845689731e-05, + "loss": 1.4728, + "step": 539500 + }, + { + "epoch": 0.32, + "learning_rate": 5.377491849133674e-05, + "loss": 1.4488, + "step": 540000 + }, + { + "epoch": 0.32, + "learning_rate": 5.377281852577618e-05, + "loss": 1.4475, + "step": 540500 + }, + { + "epoch": 0.32, + "learning_rate": 5.377071856021561e-05, + "loss": 1.4984, + "step": 541000 + }, + { + "epoch": 0.32, + "learning_rate": 5.3768618594655044e-05, + "loss": 1.4632, + "step": 541500 + }, + { + "epoch": 0.32, + "learning_rate": 5.376651862909448e-05, + "loss": 1.418, + "step": 542000 + }, + { + "epoch": 0.33, + "learning_rate": 5.376441866353392e-05, + "loss": 1.4526, + "step": 542500 + }, + { + "epoch": 0.33, + "learning_rate": 5.376231869797335e-05, + "loss": 1.4693, + "step": 543000 + }, + { + "epoch": 0.33, + "learning_rate": 5.3760218732412784e-05, + "loss": 1.4522, + "step": 543500 + }, + { + "epoch": 0.33, + "learning_rate": 5.3758118766852224e-05, + "loss": 1.4922, + "step": 544000 + }, + { + "epoch": 0.33, + "learning_rate": 5.375601880129166e-05, + "loss": 1.433, + "step": 544500 + }, + { + "epoch": 0.33, + "learning_rate": 5.375391883573109e-05, + "loss": 1.4408, + "step": 545000 + }, + { + "epoch": 0.33, + "learning_rate": 5.375181887017053e-05, + "loss": 1.4421, + "step": 545500 + }, + { + "epoch": 0.33, + "learning_rate": 5.3749718904609965e-05, + "loss": 1.468, + "step": 546000 + }, + { + "epoch": 0.33, + "learning_rate": 5.37476189390494e-05, + "loss": 1.4545, + "step": 546500 + }, + { + "epoch": 0.33, + "learning_rate": 5.374552737335107e-05, + "loss": 1.4541, + "step": 547000 + }, + { + "epoch": 0.33, + "learning_rate": 5.374342740779051e-05, + "loss": 1.4912, + "step": 547500 + }, + { + "epoch": 0.33, + "learning_rate": 5.3741327442229946e-05, + "loss": 1.4484, + "step": 548000 + }, + { + "epoch": 0.33, + "learning_rate": 5.373922747666938e-05, + "loss": 1.4795, + "step": 548500 + }, + { + "epoch": 0.33, + "learning_rate": 5.373712751110882e-05, + "loss": 1.45, + "step": 549000 + }, + { + "epoch": 0.33, + "learning_rate": 5.373502754554825e-05, + "loss": 1.4788, + "step": 549500 + }, + { + "epoch": 0.33, + "learning_rate": 5.3732927579987687e-05, + "loss": 1.4774, + "step": 550000 + }, + { + "epoch": 0.33, + "learning_rate": 5.373082761442713e-05, + "loss": 1.4913, + "step": 550500 + }, + { + "epoch": 0.33, + "learning_rate": 5.372872764886656e-05, + "loss": 1.436, + "step": 551000 + }, + { + "epoch": 0.33, + "learning_rate": 5.3726636083168234e-05, + "loss": 1.4616, + "step": 551500 + }, + { + "epoch": 0.33, + "learning_rate": 5.372453611760767e-05, + "loss": 1.4495, + "step": 552000 + }, + { + "epoch": 0.33, + "learning_rate": 5.372243615204711e-05, + "loss": 1.4402, + "step": 552500 + }, + { + "epoch": 0.33, + "learning_rate": 5.372033618648654e-05, + "loss": 1.4483, + "step": 553000 + }, + { + "epoch": 0.33, + "learning_rate": 5.3718236220925975e-05, + "loss": 1.4489, + "step": 553500 + }, + { + "epoch": 0.33, + "learning_rate": 5.3716136255365415e-05, + "loss": 1.4528, + "step": 554000 + }, + { + "epoch": 0.33, + "learning_rate": 5.371403628980485e-05, + "loss": 1.4725, + "step": 554500 + }, + { + "epoch": 0.33, + "learning_rate": 5.371193632424428e-05, + "loss": 1.4451, + "step": 555000 + }, + { + "epoch": 0.33, + "learning_rate": 5.370983635868372e-05, + "loss": 1.4319, + "step": 555500 + }, + { + "epoch": 0.33, + "learning_rate": 5.370774479298539e-05, + "loss": 1.4718, + "step": 556000 + }, + { + "epoch": 0.33, + "learning_rate": 5.370564482742483e-05, + "loss": 1.4565, + "step": 556500 + }, + { + "epoch": 0.33, + "learning_rate": 5.370354486186426e-05, + "loss": 1.4427, + "step": 557000 + }, + { + "epoch": 0.33, + "learning_rate": 5.3701444896303696e-05, + "loss": 1.4279, + "step": 557500 + }, + { + "epoch": 0.33, + "learning_rate": 5.3699344930743136e-05, + "loss": 1.4681, + "step": 558000 + }, + { + "epoch": 0.33, + "learning_rate": 5.369724916511369e-05, + "loss": 1.4486, + "step": 558500 + }, + { + "epoch": 0.34, + "learning_rate": 5.369514919955312e-05, + "loss": 1.4583, + "step": 559000 + }, + { + "epoch": 0.34, + "learning_rate": 5.3693049233992563e-05, + "loss": 1.4572, + "step": 559500 + }, + { + "epoch": 0.34, + "learning_rate": 5.3690949268432e-05, + "loss": 1.4543, + "step": 560000 + }, + { + "epoch": 0.34, + "learning_rate": 5.368884930287143e-05, + "loss": 1.453, + "step": 560500 + }, + { + "epoch": 0.34, + "learning_rate": 5.3686753537241984e-05, + "loss": 1.4382, + "step": 561000 + }, + { + "epoch": 0.34, + "learning_rate": 5.3684653571681424e-05, + "loss": 1.4591, + "step": 561500 + }, + { + "epoch": 0.34, + "learning_rate": 5.3682557806051985e-05, + "loss": 1.4615, + "step": 562000 + }, + { + "epoch": 0.34, + "learning_rate": 5.368045784049141e-05, + "loss": 1.4834, + "step": 562500 + }, + { + "epoch": 0.34, + "learning_rate": 5.3678357874930845e-05, + "loss": 1.4664, + "step": 563000 + }, + { + "epoch": 0.34, + "learning_rate": 5.3676257909370285e-05, + "loss": 1.4678, + "step": 563500 + }, + { + "epoch": 0.34, + "learning_rate": 5.367415794380972e-05, + "loss": 1.4438, + "step": 564000 + }, + { + "epoch": 0.34, + "learning_rate": 5.367205797824916e-05, + "loss": 1.464, + "step": 564500 + }, + { + "epoch": 0.34, + "learning_rate": 5.366995801268859e-05, + "loss": 1.4623, + "step": 565000 + }, + { + "epoch": 0.34, + "learning_rate": 5.3667858047128026e-05, + "loss": 1.4471, + "step": 565500 + }, + { + "epoch": 0.34, + "learning_rate": 5.3665758081567466e-05, + "loss": 1.4515, + "step": 566000 + }, + { + "epoch": 0.34, + "learning_rate": 5.36636581160069e-05, + "loss": 1.4244, + "step": 566500 + }, + { + "epoch": 0.34, + "learning_rate": 5.366155815044633e-05, + "loss": 1.4558, + "step": 567000 + }, + { + "epoch": 0.34, + "learning_rate": 5.365945818488577e-05, + "loss": 1.4595, + "step": 567500 + }, + { + "epoch": 0.34, + "learning_rate": 5.36573582193252e-05, + "loss": 1.4305, + "step": 568000 + }, + { + "epoch": 0.34, + "learning_rate": 5.365526245369576e-05, + "loss": 1.4406, + "step": 568500 + }, + { + "epoch": 0.34, + "learning_rate": 5.3653162488135193e-05, + "loss": 1.462, + "step": 569000 + }, + { + "epoch": 0.34, + "learning_rate": 5.3651062522574634e-05, + "loss": 1.4595, + "step": 569500 + }, + { + "epoch": 0.34, + "learning_rate": 5.364896255701407e-05, + "loss": 1.4909, + "step": 570000 + }, + { + "epoch": 0.34, + "learning_rate": 5.364687099131574e-05, + "loss": 1.4719, + "step": 570500 + }, + { + "epoch": 0.34, + "learning_rate": 5.3644771025755174e-05, + "loss": 1.4807, + "step": 571000 + }, + { + "epoch": 0.34, + "learning_rate": 5.3642671060194614e-05, + "loss": 1.4544, + "step": 571500 + }, + { + "epoch": 0.34, + "learning_rate": 5.364057109463405e-05, + "loss": 1.4577, + "step": 572000 + }, + { + "epoch": 0.34, + "learning_rate": 5.363847112907348e-05, + "loss": 1.4659, + "step": 572500 + }, + { + "epoch": 0.34, + "learning_rate": 5.363637116351292e-05, + "loss": 1.454, + "step": 573000 + }, + { + "epoch": 0.34, + "learning_rate": 5.3634271197952355e-05, + "loss": 1.4702, + "step": 573500 + }, + { + "epoch": 0.34, + "learning_rate": 5.363217543232291e-05, + "loss": 1.4803, + "step": 574000 + }, + { + "epoch": 0.34, + "learning_rate": 5.363007546676234e-05, + "loss": 1.4818, + "step": 574500 + }, + { + "epoch": 0.34, + "learning_rate": 5.362797550120178e-05, + "loss": 1.4441, + "step": 575000 + }, + { + "epoch": 0.35, + "learning_rate": 5.3625875535641216e-05, + "loss": 1.4358, + "step": 575500 + }, + { + "epoch": 0.35, + "learning_rate": 5.362377557008065e-05, + "loss": 1.4538, + "step": 576000 + }, + { + "epoch": 0.35, + "learning_rate": 5.362167560452009e-05, + "loss": 1.469, + "step": 576500 + }, + { + "epoch": 0.35, + "learning_rate": 5.361957563895952e-05, + "loss": 1.4577, + "step": 577000 + }, + { + "epoch": 0.35, + "learning_rate": 5.361747567339895e-05, + "loss": 1.4498, + "step": 577500 + }, + { + "epoch": 0.35, + "learning_rate": 5.361537570783839e-05, + "loss": 1.4132, + "step": 578000 + }, + { + "epoch": 0.35, + "learning_rate": 5.361327994220895e-05, + "loss": 1.4081, + "step": 578500 + }, + { + "epoch": 0.35, + "learning_rate": 5.3611179976648384e-05, + "loss": 1.4546, + "step": 579000 + }, + { + "epoch": 0.35, + "learning_rate": 5.360908001108782e-05, + "loss": 1.4465, + "step": 579500 + }, + { + "epoch": 0.35, + "learning_rate": 5.360698004552725e-05, + "loss": 1.4338, + "step": 580000 + }, + { + "epoch": 0.35, + "learning_rate": 5.3604880079966684e-05, + "loss": 1.4582, + "step": 580500 + }, + { + "epoch": 0.35, + "learning_rate": 5.3602784314337244e-05, + "loss": 1.4748, + "step": 581000 + }, + { + "epoch": 0.35, + "learning_rate": 5.3600684348776685e-05, + "loss": 1.4371, + "step": 581500 + }, + { + "epoch": 0.35, + "learning_rate": 5.359858438321612e-05, + "loss": 1.4423, + "step": 582000 + }, + { + "epoch": 0.35, + "learning_rate": 5.359648861758667e-05, + "loss": 1.4646, + "step": 582500 + }, + { + "epoch": 0.35, + "learning_rate": 5.3594388652026105e-05, + "loss": 1.4183, + "step": 583000 + }, + { + "epoch": 0.35, + "learning_rate": 5.3592288686465545e-05, + "loss": 1.4732, + "step": 583500 + }, + { + "epoch": 0.35, + "learning_rate": 5.359018872090498e-05, + "loss": 1.4349, + "step": 584000 + }, + { + "epoch": 0.35, + "learning_rate": 5.358808875534441e-05, + "loss": 1.4537, + "step": 584500 + }, + { + "epoch": 0.35, + "learning_rate": 5.3585992989714966e-05, + "loss": 1.4631, + "step": 585000 + }, + { + "epoch": 0.35, + "learning_rate": 5.3583893024154406e-05, + "loss": 1.4499, + "step": 585500 + }, + { + "epoch": 0.35, + "learning_rate": 5.358179305859384e-05, + "loss": 1.4533, + "step": 586000 + }, + { + "epoch": 0.35, + "learning_rate": 5.357969309303327e-05, + "loss": 1.4466, + "step": 586500 + }, + { + "epoch": 0.35, + "learning_rate": 5.3577593127472707e-05, + "loss": 1.4415, + "step": 587000 + }, + { + "epoch": 0.35, + "learning_rate": 5.357549316191214e-05, + "loss": 1.4763, + "step": 587500 + }, + { + "epoch": 0.35, + "learning_rate": 5.35733973962827e-05, + "loss": 1.4567, + "step": 588000 + }, + { + "epoch": 0.35, + "learning_rate": 5.357129743072214e-05, + "loss": 1.4361, + "step": 588500 + }, + { + "epoch": 0.35, + "learning_rate": 5.3569197465161574e-05, + "loss": 1.4296, + "step": 589000 + }, + { + "epoch": 0.35, + "learning_rate": 5.3567097499601e-05, + "loss": 1.496, + "step": 589500 + }, + { + "epoch": 0.35, + "learning_rate": 5.356499753404044e-05, + "loss": 1.4276, + "step": 590000 + }, + { + "epoch": 0.35, + "learning_rate": 5.3562897568479874e-05, + "loss": 1.4369, + "step": 590500 + }, + { + "epoch": 0.35, + "learning_rate": 5.356079760291931e-05, + "loss": 1.4548, + "step": 591000 + }, + { + "epoch": 0.35, + "learning_rate": 5.355869763735875e-05, + "loss": 1.4422, + "step": 591500 + }, + { + "epoch": 0.35, + "learning_rate": 5.35566018717293e-05, + "loss": 1.4806, + "step": 592000 + }, + { + "epoch": 0.36, + "learning_rate": 5.3554501906168735e-05, + "loss": 1.4277, + "step": 592500 + }, + { + "epoch": 0.36, + "learning_rate": 5.355240194060817e-05, + "loss": 1.4337, + "step": 593000 + }, + { + "epoch": 0.36, + "learning_rate": 5.355030197504761e-05, + "loss": 1.4104, + "step": 593500 + }, + { + "epoch": 0.36, + "learning_rate": 5.354820620941817e-05, + "loss": 1.467, + "step": 594000 + }, + { + "epoch": 0.36, + "learning_rate": 5.354611044378872e-05, + "loss": 1.4678, + "step": 594500 + }, + { + "epoch": 0.36, + "learning_rate": 5.3544010478228156e-05, + "loss": 1.4167, + "step": 595000 + }, + { + "epoch": 0.36, + "learning_rate": 5.3541910512667596e-05, + "loss": 1.4224, + "step": 595500 + }, + { + "epoch": 0.36, + "learning_rate": 5.353981054710703e-05, + "loss": 1.4826, + "step": 596000 + }, + { + "epoch": 0.36, + "learning_rate": 5.3537710581546457e-05, + "loss": 1.4465, + "step": 596500 + }, + { + "epoch": 0.36, + "learning_rate": 5.35356106159859e-05, + "loss": 1.4431, + "step": 597000 + }, + { + "epoch": 0.36, + "learning_rate": 5.353351065042533e-05, + "loss": 1.4756, + "step": 597500 + }, + { + "epoch": 0.36, + "learning_rate": 5.3531410684864764e-05, + "loss": 1.4438, + "step": 598000 + }, + { + "epoch": 0.36, + "learning_rate": 5.3529314919235324e-05, + "loss": 1.45, + "step": 598500 + }, + { + "epoch": 0.36, + "learning_rate": 5.352721495367476e-05, + "loss": 1.445, + "step": 599000 + }, + { + "epoch": 0.36, + "learning_rate": 5.352511498811419e-05, + "loss": 1.4542, + "step": 599500 + }, + { + "epoch": 0.36, + "learning_rate": 5.3523015022553624e-05, + "loss": 1.4337, + "step": 600000 + }, + { + "epoch": 0.36, + "eval_loss": 1.3768744468688965, + "eval_runtime": 1111.0659, + "eval_samples_per_second": 474.067, + "eval_steps_per_second": 79.012, + "step": 600000 + }, + { + "epoch": 0.36, + "learning_rate": 5.3520915056993065e-05, + "loss": 1.4555, + "step": 600500 + }, + { + "epoch": 0.36, + "learning_rate": 5.35188150914325e-05, + "loss": 1.4661, + "step": 601000 + }, + { + "epoch": 0.36, + "learning_rate": 5.351671932580305e-05, + "loss": 1.4178, + "step": 601500 + }, + { + "epoch": 0.36, + "learning_rate": 5.351461936024249e-05, + "loss": 1.4682, + "step": 602000 + }, + { + "epoch": 0.36, + "learning_rate": 5.351252359461305e-05, + "loss": 1.427, + "step": 602500 + }, + { + "epoch": 0.36, + "learning_rate": 5.3510423629052486e-05, + "loss": 1.4369, + "step": 603000 + }, + { + "epoch": 0.36, + "learning_rate": 5.350832786342304e-05, + "loss": 1.4255, + "step": 603500 + }, + { + "epoch": 0.36, + "learning_rate": 5.350622789786247e-05, + "loss": 1.4287, + "step": 604000 + }, + { + "epoch": 0.36, + "learning_rate": 5.350412793230191e-05, + "loss": 1.4319, + "step": 604500 + }, + { + "epoch": 0.36, + "learning_rate": 5.3502027966741346e-05, + "loss": 1.4201, + "step": 605000 + }, + { + "epoch": 0.36, + "learning_rate": 5.349992800118078e-05, + "loss": 1.4515, + "step": 605500 + }, + { + "epoch": 0.36, + "learning_rate": 5.349782803562022e-05, + "loss": 1.4629, + "step": 606000 + }, + { + "epoch": 0.36, + "learning_rate": 5.349572807005965e-05, + "loss": 1.4473, + "step": 606500 + }, + { + "epoch": 0.36, + "learning_rate": 5.349362810449908e-05, + "loss": 1.4352, + "step": 607000 + }, + { + "epoch": 0.36, + "learning_rate": 5.349153233886964e-05, + "loss": 1.471, + "step": 607500 + }, + { + "epoch": 0.36, + "learning_rate": 5.348943237330908e-05, + "loss": 1.4547, + "step": 608000 + }, + { + "epoch": 0.36, + "learning_rate": 5.348733240774851e-05, + "loss": 1.4612, + "step": 608500 + }, + { + "epoch": 0.37, + "learning_rate": 5.348523244218795e-05, + "loss": 1.4406, + "step": 609000 + }, + { + "epoch": 0.37, + "learning_rate": 5.348313247662738e-05, + "loss": 1.4493, + "step": 609500 + }, + { + "epoch": 0.37, + "learning_rate": 5.3481032511066815e-05, + "loss": 1.451, + "step": 610000 + }, + { + "epoch": 0.37, + "learning_rate": 5.3478932545506255e-05, + "loss": 1.4441, + "step": 610500 + }, + { + "epoch": 0.37, + "learning_rate": 5.347683257994569e-05, + "loss": 1.4575, + "step": 611000 + }, + { + "epoch": 0.37, + "learning_rate": 5.347473261438512e-05, + "loss": 1.4635, + "step": 611500 + }, + { + "epoch": 0.37, + "learning_rate": 5.34726410486868e-05, + "loss": 1.4496, + "step": 612000 + }, + { + "epoch": 0.37, + "learning_rate": 5.3470541083126236e-05, + "loss": 1.469, + "step": 612500 + }, + { + "epoch": 0.37, + "learning_rate": 5.3468441117565676e-05, + "loss": 1.448, + "step": 613000 + }, + { + "epoch": 0.37, + "learning_rate": 5.34663411520051e-05, + "loss": 1.4622, + "step": 613500 + }, + { + "epoch": 0.37, + "learning_rate": 5.3464241186444536e-05, + "loss": 1.4492, + "step": 614000 + }, + { + "epoch": 0.37, + "learning_rate": 5.3462145420815097e-05, + "loss": 1.4249, + "step": 614500 + }, + { + "epoch": 0.37, + "learning_rate": 5.346004545525454e-05, + "loss": 1.4879, + "step": 615000 + }, + { + "epoch": 0.37, + "learning_rate": 5.345794548969397e-05, + "loss": 1.4732, + "step": 615500 + }, + { + "epoch": 0.37, + "learning_rate": 5.3455845524133404e-05, + "loss": 1.4502, + "step": 616000 + }, + { + "epoch": 0.37, + "learning_rate": 5.345374555857284e-05, + "loss": 1.423, + "step": 616500 + }, + { + "epoch": 0.37, + "learning_rate": 5.345164559301227e-05, + "loss": 1.4802, + "step": 617000 + }, + { + "epoch": 0.37, + "learning_rate": 5.344954562745171e-05, + "loss": 1.4899, + "step": 617500 + }, + { + "epoch": 0.37, + "learning_rate": 5.3447449861822264e-05, + "loss": 1.4651, + "step": 618000 + }, + { + "epoch": 0.37, + "learning_rate": 5.34453498962617e-05, + "loss": 1.4203, + "step": 618500 + }, + { + "epoch": 0.37, + "learning_rate": 5.344324993070113e-05, + "loss": 1.4236, + "step": 619000 + }, + { + "epoch": 0.37, + "learning_rate": 5.344115416507169e-05, + "loss": 1.4513, + "step": 619500 + }, + { + "epoch": 0.37, + "learning_rate": 5.343905419951113e-05, + "loss": 1.4665, + "step": 620000 + }, + { + "epoch": 0.37, + "learning_rate": 5.343695423395056e-05, + "loss": 1.4464, + "step": 620500 + }, + { + "epoch": 0.37, + "learning_rate": 5.343485426838999e-05, + "loss": 1.43, + "step": 621000 + }, + { + "epoch": 0.37, + "learning_rate": 5.343275430282943e-05, + "loss": 1.4221, + "step": 621500 + }, + { + "epoch": 0.37, + "learning_rate": 5.3430654337268866e-05, + "loss": 1.4269, + "step": 622000 + }, + { + "epoch": 0.37, + "learning_rate": 5.34285543717083e-05, + "loss": 1.4491, + "step": 622500 + }, + { + "epoch": 0.37, + "learning_rate": 5.342645440614774e-05, + "loss": 1.425, + "step": 623000 + }, + { + "epoch": 0.37, + "learning_rate": 5.342435444058717e-05, + "loss": 1.4478, + "step": 623500 + }, + { + "epoch": 0.37, + "learning_rate": 5.3422258674957727e-05, + "loss": 1.4586, + "step": 624000 + }, + { + "epoch": 0.37, + "learning_rate": 5.342015870939717e-05, + "loss": 1.448, + "step": 624500 + }, + { + "epoch": 0.37, + "learning_rate": 5.34180587438366e-05, + "loss": 1.4555, + "step": 625000 + }, + { + "epoch": 0.38, + "learning_rate": 5.3415958778276034e-05, + "loss": 1.4411, + "step": 625500 + }, + { + "epoch": 0.38, + "learning_rate": 5.3413858812715474e-05, + "loss": 1.4469, + "step": 626000 + }, + { + "epoch": 0.38, + "learning_rate": 5.341175884715491e-05, + "loss": 1.4492, + "step": 626500 + }, + { + "epoch": 0.38, + "learning_rate": 5.340965888159434e-05, + "loss": 1.4286, + "step": 627000 + }, + { + "epoch": 0.38, + "learning_rate": 5.340755891603378e-05, + "loss": 1.4145, + "step": 627500 + }, + { + "epoch": 0.38, + "learning_rate": 5.3405463150404335e-05, + "loss": 1.4191, + "step": 628000 + }, + { + "epoch": 0.38, + "learning_rate": 5.340336318484377e-05, + "loss": 1.4256, + "step": 628500 + }, + { + "epoch": 0.38, + "learning_rate": 5.34012632192832e-05, + "loss": 1.4367, + "step": 629000 + }, + { + "epoch": 0.38, + "learning_rate": 5.3399167453653755e-05, + "loss": 1.4325, + "step": 629500 + }, + { + "epoch": 0.38, + "learning_rate": 5.3397067488093195e-05, + "loss": 1.4389, + "step": 630000 + }, + { + "epoch": 0.38, + "learning_rate": 5.339496752253263e-05, + "loss": 1.452, + "step": 630500 + }, + { + "epoch": 0.38, + "learning_rate": 5.339286755697207e-05, + "loss": 1.4202, + "step": 631000 + }, + { + "epoch": 0.38, + "learning_rate": 5.339077179134262e-05, + "loss": 1.4379, + "step": 631500 + }, + { + "epoch": 0.38, + "learning_rate": 5.3388671825782056e-05, + "loss": 1.4422, + "step": 632000 + }, + { + "epoch": 0.38, + "learning_rate": 5.338657186022149e-05, + "loss": 1.4148, + "step": 632500 + }, + { + "epoch": 0.38, + "learning_rate": 5.338447189466093e-05, + "loss": 1.4406, + "step": 633000 + }, + { + "epoch": 0.38, + "learning_rate": 5.338237192910036e-05, + "loss": 1.4397, + "step": 633500 + }, + { + "epoch": 0.38, + "learning_rate": 5.33802719635398e-05, + "loss": 1.4582, + "step": 634000 + }, + { + "epoch": 0.38, + "learning_rate": 5.337817199797924e-05, + "loss": 1.4508, + "step": 634500 + }, + { + "epoch": 0.38, + "learning_rate": 5.337607203241867e-05, + "loss": 1.4748, + "step": 635000 + }, + { + "epoch": 0.38, + "learning_rate": 5.33739720668581e-05, + "loss": 1.4191, + "step": 635500 + }, + { + "epoch": 0.38, + "learning_rate": 5.337187210129754e-05, + "loss": 1.4539, + "step": 636000 + }, + { + "epoch": 0.38, + "learning_rate": 5.33697763356681e-05, + "loss": 1.4653, + "step": 636500 + }, + { + "epoch": 0.38, + "learning_rate": 5.336767637010753e-05, + "loss": 1.4614, + "step": 637000 + }, + { + "epoch": 0.38, + "learning_rate": 5.3365580604478085e-05, + "loss": 1.4223, + "step": 637500 + }, + { + "epoch": 0.38, + "learning_rate": 5.3363480638917525e-05, + "loss": 1.448, + "step": 638000 + }, + { + "epoch": 0.38, + "learning_rate": 5.336138067335696e-05, + "loss": 1.447, + "step": 638500 + }, + { + "epoch": 0.38, + "learning_rate": 5.335928070779639e-05, + "loss": 1.4191, + "step": 639000 + }, + { + "epoch": 0.38, + "learning_rate": 5.335718074223583e-05, + "loss": 1.4401, + "step": 639500 + }, + { + "epoch": 0.38, + "learning_rate": 5.3355080776675266e-05, + "loss": 1.4308, + "step": 640000 + }, + { + "epoch": 0.38, + "learning_rate": 5.335298081111469e-05, + "loss": 1.4421, + "step": 640500 + }, + { + "epoch": 0.38, + "learning_rate": 5.335088504548525e-05, + "loss": 1.4242, + "step": 641000 + }, + { + "epoch": 0.38, + "learning_rate": 5.3348789279855806e-05, + "loss": 1.4478, + "step": 641500 + }, + { + "epoch": 0.38, + "learning_rate": 5.3346689314295246e-05, + "loss": 1.4296, + "step": 642000 + }, + { + "epoch": 0.39, + "learning_rate": 5.334458934873468e-05, + "loss": 1.4458, + "step": 642500 + }, + { + "epoch": 0.39, + "learning_rate": 5.334248938317411e-05, + "loss": 1.4488, + "step": 643000 + }, + { + "epoch": 0.39, + "learning_rate": 5.3340389417613554e-05, + "loss": 1.4374, + "step": 643500 + }, + { + "epoch": 0.39, + "learning_rate": 5.333828945205299e-05, + "loss": 1.4245, + "step": 644000 + }, + { + "epoch": 0.39, + "learning_rate": 5.333618948649242e-05, + "loss": 1.4593, + "step": 644500 + }, + { + "epoch": 0.39, + "learning_rate": 5.3334089520931854e-05, + "loss": 1.431, + "step": 645000 + }, + { + "epoch": 0.39, + "learning_rate": 5.333198955537129e-05, + "loss": 1.4644, + "step": 645500 + }, + { + "epoch": 0.39, + "learning_rate": 5.332988958981073e-05, + "loss": 1.4543, + "step": 646000 + }, + { + "epoch": 0.39, + "learning_rate": 5.332778962425016e-05, + "loss": 1.442, + "step": 646500 + }, + { + "epoch": 0.39, + "learning_rate": 5.3325689658689595e-05, + "loss": 1.4349, + "step": 647000 + }, + { + "epoch": 0.39, + "learning_rate": 5.332359389306015e-05, + "loss": 1.4316, + "step": 647500 + }, + { + "epoch": 0.39, + "learning_rate": 5.332149392749959e-05, + "loss": 1.4708, + "step": 648000 + }, + { + "epoch": 0.39, + "learning_rate": 5.331939816187015e-05, + "loss": 1.4376, + "step": 648500 + }, + { + "epoch": 0.39, + "learning_rate": 5.331729819630958e-05, + "loss": 1.4279, + "step": 649000 + }, + { + "epoch": 0.39, + "learning_rate": 5.3315198230749016e-05, + "loss": 1.4284, + "step": 649500 + }, + { + "epoch": 0.39, + "learning_rate": 5.331309826518845e-05, + "loss": 1.4341, + "step": 650000 + }, + { + "epoch": 0.39, + "learning_rate": 5.331099829962788e-05, + "loss": 1.4804, + "step": 650500 + }, + { + "epoch": 0.39, + "learning_rate": 5.3308898334067316e-05, + "loss": 1.4413, + "step": 651000 + }, + { + "epoch": 0.39, + "learning_rate": 5.3306798368506756e-05, + "loss": 1.4023, + "step": 651500 + }, + { + "epoch": 0.39, + "learning_rate": 5.330469840294619e-05, + "loss": 1.43, + "step": 652000 + }, + { + "epoch": 0.39, + "learning_rate": 5.330260263731674e-05, + "loss": 1.4476, + "step": 652500 + }, + { + "epoch": 0.39, + "learning_rate": 5.3300506871687304e-05, + "loss": 1.4248, + "step": 653000 + }, + { + "epoch": 0.39, + "learning_rate": 5.3298406906126744e-05, + "loss": 1.4464, + "step": 653500 + }, + { + "epoch": 0.39, + "learning_rate": 5.329630694056618e-05, + "loss": 1.4334, + "step": 654000 + }, + { + "epoch": 0.39, + "learning_rate": 5.3294206975005604e-05, + "loss": 1.4424, + "step": 654500 + }, + { + "epoch": 0.39, + "learning_rate": 5.3292107009445044e-05, + "loss": 1.4482, + "step": 655000 + }, + { + "epoch": 0.39, + "learning_rate": 5.329000704388448e-05, + "loss": 1.4251, + "step": 655500 + }, + { + "epoch": 0.39, + "learning_rate": 5.328791127825504e-05, + "loss": 1.444, + "step": 656000 + }, + { + "epoch": 0.39, + "learning_rate": 5.328581131269447e-05, + "loss": 1.4468, + "step": 656500 + }, + { + "epoch": 0.39, + "learning_rate": 5.3283711347133905e-05, + "loss": 1.4211, + "step": 657000 + }, + { + "epoch": 0.39, + "learning_rate": 5.328161138157334e-05, + "loss": 1.4287, + "step": 657500 + }, + { + "epoch": 0.39, + "learning_rate": 5.327951141601277e-05, + "loss": 1.432, + "step": 658000 + }, + { + "epoch": 0.39, + "learning_rate": 5.327741145045221e-05, + "loss": 1.4243, + "step": 658500 + }, + { + "epoch": 0.4, + "learning_rate": 5.3275311484891646e-05, + "loss": 1.4279, + "step": 659000 + }, + { + "epoch": 0.4, + "learning_rate": 5.327321151933108e-05, + "loss": 1.471, + "step": 659500 + }, + { + "epoch": 0.4, + "learning_rate": 5.327111155377052e-05, + "loss": 1.4446, + "step": 660000 + }, + { + "epoch": 0.4, + "learning_rate": 5.326901578814107e-05, + "loss": 1.4615, + "step": 660500 + }, + { + "epoch": 0.4, + "learning_rate": 5.3266915822580506e-05, + "loss": 1.4471, + "step": 661000 + }, + { + "epoch": 0.4, + "learning_rate": 5.3264815857019947e-05, + "loss": 1.4619, + "step": 661500 + }, + { + "epoch": 0.4, + "learning_rate": 5.326271589145938e-05, + "loss": 1.4318, + "step": 662000 + }, + { + "epoch": 0.4, + "learning_rate": 5.3260615925898813e-05, + "loss": 1.4415, + "step": 662500 + }, + { + "epoch": 0.4, + "learning_rate": 5.3258515960338254e-05, + "loss": 1.4262, + "step": 663000 + }, + { + "epoch": 0.4, + "learning_rate": 5.325641599477769e-05, + "loss": 1.4446, + "step": 663500 + }, + { + "epoch": 0.4, + "learning_rate": 5.325431602921712e-05, + "loss": 1.4169, + "step": 664000 + }, + { + "epoch": 0.4, + "learning_rate": 5.3252220263587674e-05, + "loss": 1.4126, + "step": 664500 + }, + { + "epoch": 0.4, + "learning_rate": 5.3250120298027114e-05, + "loss": 1.4523, + "step": 665000 + }, + { + "epoch": 0.4, + "learning_rate": 5.324802033246655e-05, + "loss": 1.4505, + "step": 665500 + }, + { + "epoch": 0.4, + "learning_rate": 5.32459245668371e-05, + "loss": 1.4749, + "step": 666000 + }, + { + "epoch": 0.4, + "learning_rate": 5.3243824601276535e-05, + "loss": 1.4397, + "step": 666500 + }, + { + "epoch": 0.4, + "learning_rate": 5.3241724635715975e-05, + "loss": 1.4256, + "step": 667000 + }, + { + "epoch": 0.4, + "learning_rate": 5.323962467015541e-05, + "loss": 1.4611, + "step": 667500 + }, + { + "epoch": 0.4, + "learning_rate": 5.323752890452596e-05, + "loss": 1.4506, + "step": 668000 + }, + { + "epoch": 0.4, + "learning_rate": 5.32354289389654e-05, + "loss": 1.456, + "step": 668500 + }, + { + "epoch": 0.4, + "learning_rate": 5.3233337373267076e-05, + "loss": 1.4112, + "step": 669000 + }, + { + "epoch": 0.4, + "learning_rate": 5.3231237407706516e-05, + "loss": 1.4675, + "step": 669500 + }, + { + "epoch": 0.4, + "learning_rate": 5.322913744214595e-05, + "loss": 1.4507, + "step": 670000 + }, + { + "epoch": 0.4, + "learning_rate": 5.322703747658538e-05, + "loss": 1.4317, + "step": 670500 + }, + { + "epoch": 0.4, + "learning_rate": 5.3224937511024823e-05, + "loss": 1.4341, + "step": 671000 + }, + { + "epoch": 0.4, + "learning_rate": 5.322283754546425e-05, + "loss": 1.4129, + "step": 671500 + }, + { + "epoch": 0.4, + "learning_rate": 5.3220737579903684e-05, + "loss": 1.4284, + "step": 672000 + }, + { + "epoch": 0.4, + "learning_rate": 5.3218637614343124e-05, + "loss": 1.4382, + "step": 672500 + }, + { + "epoch": 0.4, + "learning_rate": 5.321653764878256e-05, + "loss": 1.4176, + "step": 673000 + }, + { + "epoch": 0.4, + "learning_rate": 5.321443768322199e-05, + "loss": 1.4286, + "step": 673500 + }, + { + "epoch": 0.4, + "learning_rate": 5.321233771766143e-05, + "loss": 1.4349, + "step": 674000 + }, + { + "epoch": 0.4, + "learning_rate": 5.3210237752100864e-05, + "loss": 1.4427, + "step": 674500 + }, + { + "epoch": 0.4, + "learning_rate": 5.32081377865403e-05, + "loss": 1.4347, + "step": 675000 + }, + { + "epoch": 0.4, + "learning_rate": 5.320604202091086e-05, + "loss": 1.4143, + "step": 675500 + }, + { + "epoch": 0.41, + "learning_rate": 5.320394625528141e-05, + "loss": 1.4515, + "step": 676000 + }, + { + "epoch": 0.41, + "learning_rate": 5.3201846289720845e-05, + "loss": 1.4268, + "step": 676500 + }, + { + "epoch": 0.41, + "learning_rate": 5.319974632416028e-05, + "loss": 1.4481, + "step": 677000 + }, + { + "epoch": 0.41, + "learning_rate": 5.319764635859972e-05, + "loss": 1.4725, + "step": 677500 + }, + { + "epoch": 0.41, + "learning_rate": 5.319554639303915e-05, + "loss": 1.4098, + "step": 678000 + }, + { + "epoch": 0.41, + "learning_rate": 5.3193446427478586e-05, + "loss": 1.4437, + "step": 678500 + }, + { + "epoch": 0.41, + "learning_rate": 5.3191346461918026e-05, + "loss": 1.3956, + "step": 679000 + }, + { + "epoch": 0.41, + "learning_rate": 5.318925069628858e-05, + "loss": 1.4491, + "step": 679500 + }, + { + "epoch": 0.41, + "learning_rate": 5.318715493065914e-05, + "loss": 1.4341, + "step": 680000 + }, + { + "epoch": 0.41, + "learning_rate": 5.3185054965098574e-05, + "loss": 1.4301, + "step": 680500 + }, + { + "epoch": 0.41, + "learning_rate": 5.318295499953801e-05, + "loss": 1.3966, + "step": 681000 + }, + { + "epoch": 0.41, + "learning_rate": 5.318085503397744e-05, + "loss": 1.4268, + "step": 681500 + }, + { + "epoch": 0.41, + "learning_rate": 5.3178755068416874e-05, + "loss": 1.4356, + "step": 682000 + }, + { + "epoch": 0.41, + "learning_rate": 5.3176655102856314e-05, + "loss": 1.4598, + "step": 682500 + }, + { + "epoch": 0.41, + "learning_rate": 5.317455513729575e-05, + "loss": 1.4122, + "step": 683000 + }, + { + "epoch": 0.41, + "learning_rate": 5.317245517173518e-05, + "loss": 1.4324, + "step": 683500 + }, + { + "epoch": 0.41, + "learning_rate": 5.317035520617462e-05, + "loss": 1.4231, + "step": 684000 + }, + { + "epoch": 0.41, + "learning_rate": 5.3168259440545175e-05, + "loss": 1.3964, + "step": 684500 + }, + { + "epoch": 0.41, + "learning_rate": 5.316615947498461e-05, + "loss": 1.4346, + "step": 685000 + }, + { + "epoch": 0.41, + "learning_rate": 5.316405950942404e-05, + "loss": 1.4072, + "step": 685500 + }, + { + "epoch": 0.41, + "learning_rate": 5.316195954386348e-05, + "loss": 1.4381, + "step": 686000 + }, + { + "epoch": 0.41, + "learning_rate": 5.3159859578302915e-05, + "loss": 1.4389, + "step": 686500 + }, + { + "epoch": 0.41, + "learning_rate": 5.315775961274235e-05, + "loss": 1.4436, + "step": 687000 + }, + { + "epoch": 0.41, + "learning_rate": 5.315565964718179e-05, + "loss": 1.4558, + "step": 687500 + }, + { + "epoch": 0.41, + "learning_rate": 5.315355968162122e-05, + "loss": 1.4319, + "step": 688000 + }, + { + "epoch": 0.41, + "learning_rate": 5.3151459716060656e-05, + "loss": 1.4506, + "step": 688500 + }, + { + "epoch": 0.41, + "learning_rate": 5.3149363950431216e-05, + "loss": 1.4629, + "step": 689000 + }, + { + "epoch": 0.41, + "learning_rate": 5.314726398487065e-05, + "loss": 1.4172, + "step": 689500 + }, + { + "epoch": 0.41, + "learning_rate": 5.314516401931008e-05, + "loss": 1.4572, + "step": 690000 + }, + { + "epoch": 0.41, + "learning_rate": 5.3143064053749524e-05, + "loss": 1.4266, + "step": 690500 + }, + { + "epoch": 0.41, + "learning_rate": 5.314096408818895e-05, + "loss": 1.4258, + "step": 691000 + }, + { + "epoch": 0.41, + "learning_rate": 5.3138864122628384e-05, + "loss": 1.4577, + "step": 691500 + }, + { + "epoch": 0.41, + "learning_rate": 5.3136764157067824e-05, + "loss": 1.4386, + "step": 692000 + }, + { + "epoch": 0.42, + "learning_rate": 5.313466419150726e-05, + "loss": 1.4349, + "step": 692500 + }, + { + "epoch": 0.42, + "learning_rate": 5.313257262580894e-05, + "loss": 1.4594, + "step": 693000 + }, + { + "epoch": 0.42, + "learning_rate": 5.313047266024837e-05, + "loss": 1.4586, + "step": 693500 + }, + { + "epoch": 0.42, + "learning_rate": 5.3128372694687805e-05, + "loss": 1.4089, + "step": 694000 + }, + { + "epoch": 0.42, + "learning_rate": 5.3126272729127245e-05, + "loss": 1.4087, + "step": 694500 + }, + { + "epoch": 0.42, + "learning_rate": 5.312417276356668e-05, + "loss": 1.4497, + "step": 695000 + }, + { + "epoch": 0.42, + "learning_rate": 5.312207279800611e-05, + "loss": 1.4195, + "step": 695500 + }, + { + "epoch": 0.42, + "learning_rate": 5.3119972832445545e-05, + "loss": 1.3999, + "step": 696000 + }, + { + "epoch": 0.42, + "learning_rate": 5.311787286688498e-05, + "loss": 1.429, + "step": 696500 + }, + { + "epoch": 0.42, + "learning_rate": 5.311577710125554e-05, + "loss": 1.4587, + "step": 697000 + }, + { + "epoch": 0.42, + "learning_rate": 5.311368133562609e-05, + "loss": 1.437, + "step": 697500 + }, + { + "epoch": 0.42, + "learning_rate": 5.311158137006553e-05, + "loss": 1.4342, + "step": 698000 + }, + { + "epoch": 0.42, + "learning_rate": 5.3109481404504967e-05, + "loss": 1.4075, + "step": 698500 + }, + { + "epoch": 0.42, + "learning_rate": 5.31073814389444e-05, + "loss": 1.4341, + "step": 699000 + }, + { + "epoch": 0.42, + "learning_rate": 5.3105285673314954e-05, + "loss": 1.4322, + "step": 699500 + }, + { + "epoch": 0.42, + "learning_rate": 5.3103185707754394e-05, + "loss": 1.4627, + "step": 700000 + }, + { + "epoch": 0.42, + "eval_loss": 1.3590091466903687, + "eval_runtime": 1101.3908, + "eval_samples_per_second": 478.232, + "eval_steps_per_second": 79.706, + "step": 700000 + }, + { + "epoch": 0.42, + "learning_rate": 5.310108574219383e-05, + "loss": 1.408, + "step": 700500 + }, + { + "epoch": 0.42, + "learning_rate": 5.309898577663326e-05, + "loss": 1.4505, + "step": 701000 + }, + { + "epoch": 0.42, + "learning_rate": 5.30968858110727e-05, + "loss": 1.4231, + "step": 701500 + }, + { + "epoch": 0.42, + "learning_rate": 5.3094785845512134e-05, + "loss": 1.4554, + "step": 702000 + }, + { + "epoch": 0.42, + "learning_rate": 5.309268587995157e-05, + "loss": 1.4322, + "step": 702500 + }, + { + "epoch": 0.42, + "learning_rate": 5.3090585914391e-05, + "loss": 1.4357, + "step": 703000 + }, + { + "epoch": 0.42, + "learning_rate": 5.3088485948830435e-05, + "loss": 1.4412, + "step": 703500 + }, + { + "epoch": 0.42, + "learning_rate": 5.3086385983269875e-05, + "loss": 1.4213, + "step": 704000 + }, + { + "epoch": 0.42, + "learning_rate": 5.308428601770931e-05, + "loss": 1.4415, + "step": 704500 + }, + { + "epoch": 0.42, + "learning_rate": 5.308218605214874e-05, + "loss": 1.4051, + "step": 705000 + }, + { + "epoch": 0.42, + "learning_rate": 5.308008608658818e-05, + "loss": 1.4593, + "step": 705500 + }, + { + "epoch": 0.42, + "learning_rate": 5.3077994520889856e-05, + "loss": 1.4116, + "step": 706000 + }, + { + "epoch": 0.42, + "learning_rate": 5.3075894555329296e-05, + "loss": 1.3986, + "step": 706500 + }, + { + "epoch": 0.42, + "learning_rate": 5.307379458976873e-05, + "loss": 1.4291, + "step": 707000 + }, + { + "epoch": 0.42, + "learning_rate": 5.307169462420816e-05, + "loss": 1.4258, + "step": 707500 + }, + { + "epoch": 0.42, + "learning_rate": 5.3069594658647596e-05, + "loss": 1.43, + "step": 708000 + }, + { + "epoch": 0.42, + "learning_rate": 5.306749889301816e-05, + "loss": 1.4185, + "step": 708500 + }, + { + "epoch": 0.43, + "learning_rate": 5.306539892745759e-05, + "loss": 1.4218, + "step": 709000 + }, + { + "epoch": 0.43, + "learning_rate": 5.3063298961897024e-05, + "loss": 1.4296, + "step": 709500 + }, + { + "epoch": 0.43, + "learning_rate": 5.306119899633646e-05, + "loss": 1.425, + "step": 710000 + }, + { + "epoch": 0.43, + "learning_rate": 5.305909903077589e-05, + "loss": 1.4172, + "step": 710500 + }, + { + "epoch": 0.43, + "learning_rate": 5.305699906521533e-05, + "loss": 1.4091, + "step": 711000 + }, + { + "epoch": 0.43, + "learning_rate": 5.3054899099654764e-05, + "loss": 1.4459, + "step": 711500 + }, + { + "epoch": 0.43, + "learning_rate": 5.30527991340942e-05, + "loss": 1.4144, + "step": 712000 + }, + { + "epoch": 0.43, + "learning_rate": 5.305069916853364e-05, + "loss": 1.4102, + "step": 712500 + }, + { + "epoch": 0.43, + "learning_rate": 5.304860340290419e-05, + "loss": 1.4436, + "step": 713000 + }, + { + "epoch": 0.43, + "learning_rate": 5.3046503437343625e-05, + "loss": 1.4086, + "step": 713500 + }, + { + "epoch": 0.43, + "learning_rate": 5.3044407671714185e-05, + "loss": 1.4445, + "step": 714000 + }, + { + "epoch": 0.43, + "learning_rate": 5.304230770615362e-05, + "loss": 1.4127, + "step": 714500 + }, + { + "epoch": 0.43, + "learning_rate": 5.304020774059305e-05, + "loss": 1.4149, + "step": 715000 + }, + { + "epoch": 0.43, + "learning_rate": 5.3038107775032486e-05, + "loss": 1.4366, + "step": 715500 + }, + { + "epoch": 0.43, + "learning_rate": 5.303600780947192e-05, + "loss": 1.4374, + "step": 716000 + }, + { + "epoch": 0.43, + "learning_rate": 5.303390784391136e-05, + "loss": 1.4664, + "step": 716500 + }, + { + "epoch": 0.43, + "learning_rate": 5.303181207828192e-05, + "loss": 1.46, + "step": 717000 + }, + { + "epoch": 0.43, + "learning_rate": 5.3029712112721347e-05, + "loss": 1.4286, + "step": 717500 + }, + { + "epoch": 0.43, + "learning_rate": 5.302761214716079e-05, + "loss": 1.4548, + "step": 718000 + }, + { + "epoch": 0.43, + "learning_rate": 5.302551218160022e-05, + "loss": 1.4518, + "step": 718500 + }, + { + "epoch": 0.43, + "learning_rate": 5.3023412216039654e-05, + "loss": 1.4134, + "step": 719000 + }, + { + "epoch": 0.43, + "learning_rate": 5.3021312250479094e-05, + "loss": 1.4766, + "step": 719500 + }, + { + "epoch": 0.43, + "learning_rate": 5.301921648484965e-05, + "loss": 1.4393, + "step": 720000 + }, + { + "epoch": 0.43, + "learning_rate": 5.301711651928908e-05, + "loss": 1.4194, + "step": 720500 + }, + { + "epoch": 0.43, + "learning_rate": 5.3015016553728514e-05, + "loss": 1.4593, + "step": 721000 + }, + { + "epoch": 0.43, + "learning_rate": 5.3012916588167955e-05, + "loss": 1.4598, + "step": 721500 + }, + { + "epoch": 0.43, + "learning_rate": 5.301081662260739e-05, + "loss": 1.4184, + "step": 722000 + }, + { + "epoch": 0.43, + "learning_rate": 5.300872085697794e-05, + "loss": 1.4309, + "step": 722500 + }, + { + "epoch": 0.43, + "learning_rate": 5.3006620891417375e-05, + "loss": 1.4349, + "step": 723000 + }, + { + "epoch": 0.43, + "learning_rate": 5.3004520925856815e-05, + "loss": 1.4393, + "step": 723500 + }, + { + "epoch": 0.43, + "learning_rate": 5.300242096029625e-05, + "loss": 1.4495, + "step": 724000 + }, + { + "epoch": 0.43, + "learning_rate": 5.300032099473568e-05, + "loss": 1.433, + "step": 724500 + }, + { + "epoch": 0.43, + "learning_rate": 5.299822102917512e-05, + "loss": 1.4518, + "step": 725000 + }, + { + "epoch": 0.43, + "learning_rate": 5.2996121063614556e-05, + "loss": 1.4506, + "step": 725500 + }, + { + "epoch": 0.44, + "learning_rate": 5.299402109805399e-05, + "loss": 1.4156, + "step": 726000 + }, + { + "epoch": 0.44, + "learning_rate": 5.299192533242455e-05, + "loss": 1.4393, + "step": 726500 + }, + { + "epoch": 0.44, + "learning_rate": 5.298982536686398e-05, + "loss": 1.4054, + "step": 727000 + }, + { + "epoch": 0.44, + "learning_rate": 5.298772540130342e-05, + "loss": 1.4535, + "step": 727500 + }, + { + "epoch": 0.44, + "learning_rate": 5.298562543574286e-05, + "loss": 1.4119, + "step": 728000 + }, + { + "epoch": 0.44, + "learning_rate": 5.298352547018229e-05, + "loss": 1.4354, + "step": 728500 + }, + { + "epoch": 0.44, + "learning_rate": 5.298143390448397e-05, + "loss": 1.4135, + "step": 729000 + }, + { + "epoch": 0.44, + "learning_rate": 5.29793339389234e-05, + "loss": 1.4155, + "step": 729500 + }, + { + "epoch": 0.44, + "learning_rate": 5.297723397336283e-05, + "loss": 1.4293, + "step": 730000 + }, + { + "epoch": 0.44, + "learning_rate": 5.297513400780227e-05, + "loss": 1.4225, + "step": 730500 + }, + { + "epoch": 0.44, + "learning_rate": 5.297303824217283e-05, + "loss": 1.4443, + "step": 731000 + }, + { + "epoch": 0.44, + "learning_rate": 5.297093827661226e-05, + "loss": 1.4244, + "step": 731500 + }, + { + "epoch": 0.44, + "learning_rate": 5.29688383110517e-05, + "loss": 1.4241, + "step": 732000 + }, + { + "epoch": 0.44, + "learning_rate": 5.296673834549113e-05, + "loss": 1.4309, + "step": 732500 + }, + { + "epoch": 0.44, + "learning_rate": 5.2964638379930565e-05, + "loss": 1.4142, + "step": 733000 + }, + { + "epoch": 0.44, + "learning_rate": 5.2962542614301126e-05, + "loss": 1.4607, + "step": 733500 + }, + { + "epoch": 0.44, + "learning_rate": 5.296044264874056e-05, + "loss": 1.4232, + "step": 734000 + }, + { + "epoch": 0.44, + "learning_rate": 5.295834268317999e-05, + "loss": 1.4346, + "step": 734500 + }, + { + "epoch": 0.44, + "learning_rate": 5.2956242717619426e-05, + "loss": 1.429, + "step": 735000 + }, + { + "epoch": 0.44, + "learning_rate": 5.2954146951989987e-05, + "loss": 1.4275, + "step": 735500 + }, + { + "epoch": 0.44, + "learning_rate": 5.295204698642943e-05, + "loss": 1.4596, + "step": 736000 + }, + { + "epoch": 0.44, + "learning_rate": 5.2949947020868853e-05, + "loss": 1.4197, + "step": 736500 + }, + { + "epoch": 0.44, + "learning_rate": 5.294784705530829e-05, + "loss": 1.4253, + "step": 737000 + }, + { + "epoch": 0.44, + "learning_rate": 5.294574708974773e-05, + "loss": 1.4374, + "step": 737500 + }, + { + "epoch": 0.44, + "learning_rate": 5.294364712418716e-05, + "loss": 1.4415, + "step": 738000 + }, + { + "epoch": 0.44, + "learning_rate": 5.2941547158626594e-05, + "loss": 1.4352, + "step": 738500 + }, + { + "epoch": 0.44, + "learning_rate": 5.2939447193066034e-05, + "loss": 1.4322, + "step": 739000 + }, + { + "epoch": 0.44, + "learning_rate": 5.2937355627367715e-05, + "loss": 1.4446, + "step": 739500 + }, + { + "epoch": 0.44, + "learning_rate": 5.293525566180715e-05, + "loss": 1.4068, + "step": 740000 + }, + { + "epoch": 0.44, + "learning_rate": 5.293315569624658e-05, + "loss": 1.4331, + "step": 740500 + }, + { + "epoch": 0.44, + "learning_rate": 5.2931055730686015e-05, + "loss": 1.4115, + "step": 741000 + }, + { + "epoch": 0.44, + "learning_rate": 5.292895576512545e-05, + "loss": 1.4116, + "step": 741500 + }, + { + "epoch": 0.44, + "learning_rate": 5.292685579956488e-05, + "loss": 1.426, + "step": 742000 + }, + { + "epoch": 0.45, + "learning_rate": 5.292475583400432e-05, + "loss": 1.4512, + "step": 742500 + }, + { + "epoch": 0.45, + "learning_rate": 5.2922655868443756e-05, + "loss": 1.4143, + "step": 743000 + }, + { + "epoch": 0.45, + "learning_rate": 5.292056010281431e-05, + "loss": 1.4206, + "step": 743500 + }, + { + "epoch": 0.45, + "learning_rate": 5.291846013725374e-05, + "loss": 1.4405, + "step": 744000 + }, + { + "epoch": 0.45, + "learning_rate": 5.291636017169318e-05, + "loss": 1.4426, + "step": 744500 + }, + { + "epoch": 0.45, + "learning_rate": 5.291426440606374e-05, + "loss": 1.4342, + "step": 745000 + }, + { + "epoch": 0.45, + "learning_rate": 5.291216444050318e-05, + "loss": 1.389, + "step": 745500 + }, + { + "epoch": 0.45, + "learning_rate": 5.291006447494261e-05, + "loss": 1.3899, + "step": 746000 + }, + { + "epoch": 0.45, + "learning_rate": 5.2907964509382044e-05, + "loss": 1.4248, + "step": 746500 + }, + { + "epoch": 0.45, + "learning_rate": 5.2905868743752604e-05, + "loss": 1.4283, + "step": 747000 + }, + { + "epoch": 0.45, + "learning_rate": 5.290376877819204e-05, + "loss": 1.4424, + "step": 747500 + }, + { + "epoch": 0.45, + "learning_rate": 5.290167301256259e-05, + "loss": 1.4209, + "step": 748000 + }, + { + "epoch": 0.45, + "learning_rate": 5.289957304700203e-05, + "loss": 1.4141, + "step": 748500 + }, + { + "epoch": 0.45, + "learning_rate": 5.2897473081441465e-05, + "loss": 1.4175, + "step": 749000 + }, + { + "epoch": 0.45, + "learning_rate": 5.28953731158809e-05, + "loss": 1.4353, + "step": 749500 + }, + { + "epoch": 0.45, + "learning_rate": 5.289327315032034e-05, + "loss": 1.4151, + "step": 750000 + }, + { + "epoch": 0.45, + "learning_rate": 5.289117318475977e-05, + "loss": 1.432, + "step": 750500 + }, + { + "epoch": 0.45, + "learning_rate": 5.28890732191992e-05, + "loss": 1.4435, + "step": 751000 + }, + { + "epoch": 0.45, + "learning_rate": 5.288697325363864e-05, + "loss": 1.4726, + "step": 751500 + }, + { + "epoch": 0.45, + "learning_rate": 5.288487328807807e-05, + "loss": 1.4564, + "step": 752000 + }, + { + "epoch": 0.45, + "learning_rate": 5.2882773322517506e-05, + "loss": 1.4647, + "step": 752500 + }, + { + "epoch": 0.45, + "learning_rate": 5.2880673356956946e-05, + "loss": 1.4249, + "step": 753000 + }, + { + "epoch": 0.45, + "learning_rate": 5.287857339139638e-05, + "loss": 1.4816, + "step": 753500 + }, + { + "epoch": 0.45, + "learning_rate": 5.287647762576693e-05, + "loss": 1.422, + "step": 754000 + }, + { + "epoch": 0.45, + "learning_rate": 5.2874386060068614e-05, + "loss": 1.4455, + "step": 754500 + }, + { + "epoch": 0.45, + "learning_rate": 5.287228609450805e-05, + "loss": 1.421, + "step": 755000 + }, + { + "epoch": 0.45, + "learning_rate": 5.287018612894749e-05, + "loss": 1.4052, + "step": 755500 + }, + { + "epoch": 0.45, + "learning_rate": 5.286808616338692e-05, + "loss": 1.3963, + "step": 756000 + }, + { + "epoch": 0.45, + "learning_rate": 5.2865986197826354e-05, + "loss": 1.4512, + "step": 756500 + }, + { + "epoch": 0.45, + "learning_rate": 5.2863886232265794e-05, + "loss": 1.4022, + "step": 757000 + }, + { + "epoch": 0.45, + "learning_rate": 5.286178626670523e-05, + "loss": 1.4024, + "step": 757500 + }, + { + "epoch": 0.45, + "learning_rate": 5.285969050107578e-05, + "loss": 1.4019, + "step": 758000 + }, + { + "epoch": 0.45, + "learning_rate": 5.2857590535515215e-05, + "loss": 1.4412, + "step": 758500 + }, + { + "epoch": 0.46, + "learning_rate": 5.2855490569954655e-05, + "loss": 1.4478, + "step": 759000 + }, + { + "epoch": 0.46, + "learning_rate": 5.285339060439409e-05, + "loss": 1.4091, + "step": 759500 + }, + { + "epoch": 0.46, + "learning_rate": 5.285129063883352e-05, + "loss": 1.4316, + "step": 760000 + }, + { + "epoch": 0.46, + "learning_rate": 5.2849190673272955e-05, + "loss": 1.436, + "step": 760500 + }, + { + "epoch": 0.46, + "learning_rate": 5.284709070771239e-05, + "loss": 1.4195, + "step": 761000 + }, + { + "epoch": 0.46, + "learning_rate": 5.284499494208295e-05, + "loss": 1.3842, + "step": 761500 + }, + { + "epoch": 0.46, + "learning_rate": 5.284289497652239e-05, + "loss": 1.4408, + "step": 762000 + }, + { + "epoch": 0.46, + "learning_rate": 5.2840795010961816e-05, + "loss": 1.4332, + "step": 762500 + }, + { + "epoch": 0.46, + "learning_rate": 5.283869504540125e-05, + "loss": 1.3958, + "step": 763000 + }, + { + "epoch": 0.46, + "learning_rate": 5.283659507984069e-05, + "loss": 1.4334, + "step": 763500 + }, + { + "epoch": 0.46, + "learning_rate": 5.283449511428012e-05, + "loss": 1.4235, + "step": 764000 + }, + { + "epoch": 0.46, + "learning_rate": 5.283239514871956e-05, + "loss": 1.4217, + "step": 764500 + }, + { + "epoch": 0.46, + "learning_rate": 5.283029938309011e-05, + "loss": 1.4225, + "step": 765000 + }, + { + "epoch": 0.46, + "learning_rate": 5.282819941752955e-05, + "loss": 1.4362, + "step": 765500 + }, + { + "epoch": 0.46, + "learning_rate": 5.2826099451968984e-05, + "loss": 1.396, + "step": 766000 + }, + { + "epoch": 0.46, + "learning_rate": 5.282399948640842e-05, + "loss": 1.4001, + "step": 766500 + }, + { + "epoch": 0.46, + "learning_rate": 5.2821903720778985e-05, + "loss": 1.4292, + "step": 767000 + }, + { + "epoch": 0.46, + "learning_rate": 5.281980375521841e-05, + "loss": 1.4172, + "step": 767500 + }, + { + "epoch": 0.46, + "learning_rate": 5.2817703789657845e-05, + "loss": 1.4426, + "step": 768000 + }, + { + "epoch": 0.46, + "learning_rate": 5.2815603824097285e-05, + "loss": 1.4257, + "step": 768500 + }, + { + "epoch": 0.46, + "learning_rate": 5.281350385853672e-05, + "loss": 1.4549, + "step": 769000 + }, + { + "epoch": 0.46, + "learning_rate": 5.281140389297615e-05, + "loss": 1.429, + "step": 769500 + }, + { + "epoch": 0.46, + "learning_rate": 5.280930392741559e-05, + "loss": 1.4024, + "step": 770000 + }, + { + "epoch": 0.46, + "learning_rate": 5.2807203961855026e-05, + "loss": 1.3868, + "step": 770500 + }, + { + "epoch": 0.46, + "learning_rate": 5.280510399629446e-05, + "loss": 1.4544, + "step": 771000 + }, + { + "epoch": 0.46, + "learning_rate": 5.280300823066501e-05, + "loss": 1.402, + "step": 771500 + }, + { + "epoch": 0.46, + "learning_rate": 5.280090826510445e-05, + "loss": 1.4316, + "step": 772000 + }, + { + "epoch": 0.46, + "learning_rate": 5.2798808299543886e-05, + "loss": 1.409, + "step": 772500 + }, + { + "epoch": 0.46, + "learning_rate": 5.279670833398332e-05, + "loss": 1.4215, + "step": 773000 + }, + { + "epoch": 0.46, + "learning_rate": 5.279460836842276e-05, + "loss": 1.4543, + "step": 773500 + }, + { + "epoch": 0.46, + "learning_rate": 5.2792508402862194e-05, + "loss": 1.4367, + "step": 774000 + }, + { + "epoch": 0.46, + "learning_rate": 5.279040843730163e-05, + "loss": 1.4511, + "step": 774500 + }, + { + "epoch": 0.46, + "learning_rate": 5.278831267167219e-05, + "loss": 1.4217, + "step": 775000 + }, + { + "epoch": 0.46, + "learning_rate": 5.278621270611162e-05, + "loss": 1.423, + "step": 775500 + }, + { + "epoch": 0.47, + "learning_rate": 5.2784112740551054e-05, + "loss": 1.4249, + "step": 776000 + }, + { + "epoch": 0.47, + "learning_rate": 5.2782012774990495e-05, + "loss": 1.4343, + "step": 776500 + }, + { + "epoch": 0.47, + "learning_rate": 5.277991280942993e-05, + "loss": 1.4311, + "step": 777000 + }, + { + "epoch": 0.47, + "learning_rate": 5.277781704380048e-05, + "loss": 1.3929, + "step": 777500 + }, + { + "epoch": 0.47, + "learning_rate": 5.2775721278171035e-05, + "loss": 1.384, + "step": 778000 + }, + { + "epoch": 0.47, + "learning_rate": 5.277362131261047e-05, + "loss": 1.4355, + "step": 778500 + }, + { + "epoch": 0.47, + "learning_rate": 5.277152134704991e-05, + "loss": 1.4185, + "step": 779000 + }, + { + "epoch": 0.47, + "learning_rate": 5.276942138148934e-05, + "loss": 1.4229, + "step": 779500 + }, + { + "epoch": 0.47, + "learning_rate": 5.2767321415928776e-05, + "loss": 1.4343, + "step": 780000 + }, + { + "epoch": 0.47, + "learning_rate": 5.2765221450368216e-05, + "loss": 1.4136, + "step": 780500 + }, + { + "epoch": 0.47, + "learning_rate": 5.276312148480765e-05, + "loss": 1.3813, + "step": 781000 + }, + { + "epoch": 0.47, + "learning_rate": 5.276102151924708e-05, + "loss": 1.4223, + "step": 781500 + }, + { + "epoch": 0.47, + "learning_rate": 5.275892155368652e-05, + "loss": 1.3987, + "step": 782000 + }, + { + "epoch": 0.47, + "learning_rate": 5.275682158812595e-05, + "loss": 1.4144, + "step": 782500 + }, + { + "epoch": 0.47, + "learning_rate": 5.275472162256539e-05, + "loss": 1.4169, + "step": 783000 + }, + { + "epoch": 0.47, + "learning_rate": 5.2752621657004824e-05, + "loss": 1.4241, + "step": 783500 + }, + { + "epoch": 0.47, + "learning_rate": 5.275052169144426e-05, + "loss": 1.423, + "step": 784000 + }, + { + "epoch": 0.47, + "learning_rate": 5.274842592581482e-05, + "loss": 1.4311, + "step": 784500 + }, + { + "epoch": 0.47, + "learning_rate": 5.274633016018537e-05, + "loss": 1.4093, + "step": 785000 + }, + { + "epoch": 0.47, + "learning_rate": 5.2744234394555924e-05, + "loss": 1.4348, + "step": 785500 + }, + { + "epoch": 0.47, + "learning_rate": 5.2742134428995365e-05, + "loss": 1.4067, + "step": 786000 + }, + { + "epoch": 0.47, + "learning_rate": 5.27400344634348e-05, + "loss": 1.4248, + "step": 786500 + }, + { + "epoch": 0.47, + "learning_rate": 5.273793449787423e-05, + "loss": 1.4434, + "step": 787000 + }, + { + "epoch": 0.47, + "learning_rate": 5.273583453231367e-05, + "loss": 1.4103, + "step": 787500 + }, + { + "epoch": 0.47, + "learning_rate": 5.2733734566753105e-05, + "loss": 1.4276, + "step": 788000 + }, + { + "epoch": 0.47, + "learning_rate": 5.273163460119254e-05, + "loss": 1.4342, + "step": 788500 + }, + { + "epoch": 0.47, + "learning_rate": 5.272953463563198e-05, + "loss": 1.4141, + "step": 789000 + }, + { + "epoch": 0.47, + "learning_rate": 5.2727434670071406e-05, + "loss": 1.392, + "step": 789500 + }, + { + "epoch": 0.47, + "learning_rate": 5.2725338904441966e-05, + "loss": 1.4136, + "step": 790000 + }, + { + "epoch": 0.47, + "learning_rate": 5.2723238938881406e-05, + "loss": 1.3944, + "step": 790500 + }, + { + "epoch": 0.47, + "learning_rate": 5.272113897332084e-05, + "loss": 1.4292, + "step": 791000 + }, + { + "epoch": 0.47, + "learning_rate": 5.271903900776027e-05, + "loss": 1.4284, + "step": 791500 + }, + { + "epoch": 0.47, + "learning_rate": 5.271693904219971e-05, + "loss": 1.4478, + "step": 792000 + }, + { + "epoch": 0.48, + "learning_rate": 5.271483907663914e-05, + "loss": 1.4383, + "step": 792500 + }, + { + "epoch": 0.48, + "learning_rate": 5.2712739111078574e-05, + "loss": 1.433, + "step": 793000 + }, + { + "epoch": 0.48, + "learning_rate": 5.2710639145518014e-05, + "loss": 1.4345, + "step": 793500 + }, + { + "epoch": 0.48, + "learning_rate": 5.2708543379888574e-05, + "loss": 1.4657, + "step": 794000 + }, + { + "epoch": 0.48, + "learning_rate": 5.2706443414328e-05, + "loss": 1.4015, + "step": 794500 + }, + { + "epoch": 0.48, + "learning_rate": 5.2704343448767434e-05, + "loss": 1.3839, + "step": 795000 + }, + { + "epoch": 0.48, + "learning_rate": 5.2702243483206875e-05, + "loss": 1.4436, + "step": 795500 + }, + { + "epoch": 0.48, + "learning_rate": 5.2700147717577435e-05, + "loss": 1.4201, + "step": 796000 + }, + { + "epoch": 0.48, + "learning_rate": 5.269805195194799e-05, + "loss": 1.4199, + "step": 796500 + }, + { + "epoch": 0.48, + "learning_rate": 5.269595198638742e-05, + "loss": 1.4173, + "step": 797000 + }, + { + "epoch": 0.48, + "learning_rate": 5.269385202082686e-05, + "loss": 1.4289, + "step": 797500 + }, + { + "epoch": 0.48, + "learning_rate": 5.2691752055266296e-05, + "loss": 1.4226, + "step": 798000 + }, + { + "epoch": 0.48, + "learning_rate": 5.268965628963685e-05, + "loss": 1.3627, + "step": 798500 + }, + { + "epoch": 0.48, + "learning_rate": 5.268755632407628e-05, + "loss": 1.4062, + "step": 799000 + }, + { + "epoch": 0.48, + "learning_rate": 5.268545635851572e-05, + "loss": 1.4044, + "step": 799500 + }, + { + "epoch": 0.48, + "learning_rate": 5.2683356392955156e-05, + "loss": 1.3971, + "step": 800000 + }, + { + "epoch": 0.48, + "eval_loss": 1.3476606607437134, + "eval_runtime": 1101.7337, + "eval_samples_per_second": 478.083, + "eval_steps_per_second": 79.681, + "step": 800000 + }, + { + "epoch": 0.48, + "learning_rate": 5.268125642739459e-05, + "loss": 1.4049, + "step": 800500 + }, + { + "epoch": 0.48, + "learning_rate": 5.267915646183403e-05, + "loss": 1.4219, + "step": 801000 + }, + { + "epoch": 0.48, + "learning_rate": 5.267705649627346e-05, + "loss": 1.4239, + "step": 801500 + }, + { + "epoch": 0.48, + "learning_rate": 5.267495653071289e-05, + "loss": 1.4172, + "step": 802000 + }, + { + "epoch": 0.48, + "learning_rate": 5.267285656515233e-05, + "loss": 1.4057, + "step": 802500 + }, + { + "epoch": 0.48, + "learning_rate": 5.2670756599591764e-05, + "loss": 1.4081, + "step": 803000 + }, + { + "epoch": 0.48, + "learning_rate": 5.26686566340312e-05, + "loss": 1.4203, + "step": 803500 + }, + { + "epoch": 0.48, + "learning_rate": 5.266655666847064e-05, + "loss": 1.4057, + "step": 804000 + }, + { + "epoch": 0.48, + "learning_rate": 5.266446090284119e-05, + "loss": 1.415, + "step": 804500 + }, + { + "epoch": 0.48, + "learning_rate": 5.2662360937280625e-05, + "loss": 1.4025, + "step": 805000 + }, + { + "epoch": 0.48, + "learning_rate": 5.2660260971720065e-05, + "loss": 1.4336, + "step": 805500 + }, + { + "epoch": 0.48, + "learning_rate": 5.26581610061595e-05, + "loss": 1.3758, + "step": 806000 + }, + { + "epoch": 0.48, + "learning_rate": 5.265606944046118e-05, + "loss": 1.4085, + "step": 806500 + }, + { + "epoch": 0.48, + "learning_rate": 5.265396947490061e-05, + "loss": 1.402, + "step": 807000 + }, + { + "epoch": 0.48, + "learning_rate": 5.2651869509340046e-05, + "loss": 1.4093, + "step": 807500 + }, + { + "epoch": 0.48, + "learning_rate": 5.2649769543779486e-05, + "loss": 1.4118, + "step": 808000 + }, + { + "epoch": 0.48, + "learning_rate": 5.264766957821891e-05, + "loss": 1.4126, + "step": 808500 + }, + { + "epoch": 0.49, + "learning_rate": 5.2645569612658346e-05, + "loss": 1.4374, + "step": 809000 + }, + { + "epoch": 0.49, + "learning_rate": 5.2643469647097786e-05, + "loss": 1.4125, + "step": 809500 + }, + { + "epoch": 0.49, + "learning_rate": 5.264136968153722e-05, + "loss": 1.4105, + "step": 810000 + }, + { + "epoch": 0.49, + "learning_rate": 5.263926971597665e-05, + "loss": 1.4576, + "step": 810500 + }, + { + "epoch": 0.49, + "learning_rate": 5.2637178150278334e-05, + "loss": 1.423, + "step": 811000 + }, + { + "epoch": 0.49, + "learning_rate": 5.2635078184717774e-05, + "loss": 1.4182, + "step": 811500 + }, + { + "epoch": 0.49, + "learning_rate": 5.263297821915721e-05, + "loss": 1.4275, + "step": 812000 + }, + { + "epoch": 0.49, + "learning_rate": 5.263087825359664e-05, + "loss": 1.4098, + "step": 812500 + }, + { + "epoch": 0.49, + "learning_rate": 5.262877828803608e-05, + "loss": 1.4135, + "step": 813000 + }, + { + "epoch": 0.49, + "learning_rate": 5.262667832247551e-05, + "loss": 1.3993, + "step": 813500 + }, + { + "epoch": 0.49, + "learning_rate": 5.262457835691494e-05, + "loss": 1.4268, + "step": 814000 + }, + { + "epoch": 0.49, + "learning_rate": 5.262247839135438e-05, + "loss": 1.4105, + "step": 814500 + }, + { + "epoch": 0.49, + "learning_rate": 5.262038262572494e-05, + "loss": 1.4069, + "step": 815000 + }, + { + "epoch": 0.49, + "learning_rate": 5.2618282660164375e-05, + "loss": 1.3967, + "step": 815500 + }, + { + "epoch": 0.49, + "learning_rate": 5.261618689453493e-05, + "loss": 1.3951, + "step": 816000 + }, + { + "epoch": 0.49, + "learning_rate": 5.261408692897436e-05, + "loss": 1.4378, + "step": 816500 + }, + { + "epoch": 0.49, + "learning_rate": 5.26119869634138e-05, + "loss": 1.3984, + "step": 817000 + }, + { + "epoch": 0.49, + "learning_rate": 5.2609886997853236e-05, + "loss": 1.3945, + "step": 817500 + }, + { + "epoch": 0.49, + "learning_rate": 5.260779123222379e-05, + "loss": 1.4236, + "step": 818000 + }, + { + "epoch": 0.49, + "learning_rate": 5.260569126666323e-05, + "loss": 1.3945, + "step": 818500 + }, + { + "epoch": 0.49, + "learning_rate": 5.260359130110266e-05, + "loss": 1.4138, + "step": 819000 + }, + { + "epoch": 0.49, + "learning_rate": 5.26014913355421e-05, + "loss": 1.3774, + "step": 819500 + }, + { + "epoch": 0.49, + "learning_rate": 5.259939136998154e-05, + "loss": 1.4063, + "step": 820000 + }, + { + "epoch": 0.49, + "learning_rate": 5.2597291404420964e-05, + "loss": 1.4424, + "step": 820500 + }, + { + "epoch": 0.49, + "learning_rate": 5.25951914388604e-05, + "loss": 1.4055, + "step": 821000 + }, + { + "epoch": 0.49, + "learning_rate": 5.259309147329984e-05, + "loss": 1.3729, + "step": 821500 + }, + { + "epoch": 0.49, + "learning_rate": 5.259099150773927e-05, + "loss": 1.4052, + "step": 822000 + }, + { + "epoch": 0.49, + "learning_rate": 5.258889574210983e-05, + "loss": 1.4193, + "step": 822500 + }, + { + "epoch": 0.49, + "learning_rate": 5.258679577654926e-05, + "loss": 1.4445, + "step": 823000 + }, + { + "epoch": 0.49, + "learning_rate": 5.25846958109887e-05, + "loss": 1.4077, + "step": 823500 + }, + { + "epoch": 0.49, + "learning_rate": 5.258260004535926e-05, + "loss": 1.4075, + "step": 824000 + }, + { + "epoch": 0.49, + "learning_rate": 5.258050007979869e-05, + "loss": 1.3979, + "step": 824500 + }, + { + "epoch": 0.49, + "learning_rate": 5.2578400114238125e-05, + "loss": 1.4419, + "step": 825000 + }, + { + "epoch": 0.49, + "learning_rate": 5.257630014867756e-05, + "loss": 1.4302, + "step": 825500 + }, + { + "epoch": 0.5, + "learning_rate": 5.257420018311699e-05, + "loss": 1.4607, + "step": 826000 + }, + { + "epoch": 0.5, + "learning_rate": 5.257210441748755e-05, + "loss": 1.4044, + "step": 826500 + }, + { + "epoch": 0.5, + "learning_rate": 5.257000445192699e-05, + "loss": 1.414, + "step": 827000 + }, + { + "epoch": 0.5, + "learning_rate": 5.2567908686297546e-05, + "loss": 1.412, + "step": 827500 + }, + { + "epoch": 0.5, + "learning_rate": 5.256580872073698e-05, + "loss": 1.4107, + "step": 828000 + }, + { + "epoch": 0.5, + "learning_rate": 5.256370875517641e-05, + "loss": 1.4276, + "step": 828500 + }, + { + "epoch": 0.5, + "learning_rate": 5.2561608789615854e-05, + "loss": 1.4317, + "step": 829000 + }, + { + "epoch": 0.5, + "learning_rate": 5.255951302398641e-05, + "loss": 1.3977, + "step": 829500 + }, + { + "epoch": 0.5, + "learning_rate": 5.255741305842584e-05, + "loss": 1.4031, + "step": 830000 + }, + { + "epoch": 0.5, + "learning_rate": 5.2555317292796394e-05, + "loss": 1.4017, + "step": 830500 + }, + { + "epoch": 0.5, + "learning_rate": 5.2553217327235834e-05, + "loss": 1.4126, + "step": 831000 + }, + { + "epoch": 0.5, + "learning_rate": 5.255111736167527e-05, + "loss": 1.408, + "step": 831500 + }, + { + "epoch": 0.5, + "learning_rate": 5.25490173961147e-05, + "loss": 1.3863, + "step": 832000 + }, + { + "epoch": 0.5, + "learning_rate": 5.254691743055414e-05, + "loss": 1.4112, + "step": 832500 + }, + { + "epoch": 0.5, + "learning_rate": 5.2544817464993575e-05, + "loss": 1.3932, + "step": 833000 + }, + { + "epoch": 0.5, + "learning_rate": 5.254271749943301e-05, + "loss": 1.429, + "step": 833500 + }, + { + "epoch": 0.5, + "learning_rate": 5.254061753387245e-05, + "loss": 1.4309, + "step": 834000 + }, + { + "epoch": 0.5, + "learning_rate": 5.253851756831188e-05, + "loss": 1.4212, + "step": 834500 + }, + { + "epoch": 0.5, + "learning_rate": 5.253641760275131e-05, + "loss": 1.4225, + "step": 835000 + }, + { + "epoch": 0.5, + "learning_rate": 5.253431763719075e-05, + "loss": 1.3975, + "step": 835500 + }, + { + "epoch": 0.5, + "learning_rate": 5.253221767163018e-05, + "loss": 1.4305, + "step": 836000 + }, + { + "epoch": 0.5, + "learning_rate": 5.2530117706069616e-05, + "loss": 1.4204, + "step": 836500 + }, + { + "epoch": 0.5, + "learning_rate": 5.2528017740509056e-05, + "loss": 1.4108, + "step": 837000 + }, + { + "epoch": 0.5, + "learning_rate": 5.252592197487961e-05, + "loss": 1.3828, + "step": 837500 + }, + { + "epoch": 0.5, + "learning_rate": 5.252382200931904e-05, + "loss": 1.4142, + "step": 838000 + }, + { + "epoch": 0.5, + "learning_rate": 5.252172204375848e-05, + "loss": 1.4294, + "step": 838500 + }, + { + "epoch": 0.5, + "learning_rate": 5.251962207819792e-05, + "loss": 1.4154, + "step": 839000 + }, + { + "epoch": 0.5, + "learning_rate": 5.251752211263735e-05, + "loss": 1.4359, + "step": 839500 + }, + { + "epoch": 0.5, + "learning_rate": 5.251542214707679e-05, + "loss": 1.4021, + "step": 840000 + }, + { + "epoch": 0.5, + "learning_rate": 5.2513326381447344e-05, + "loss": 1.4131, + "step": 840500 + }, + { + "epoch": 0.5, + "learning_rate": 5.251122641588678e-05, + "loss": 1.4293, + "step": 841000 + }, + { + "epoch": 0.5, + "learning_rate": 5.250912645032621e-05, + "loss": 1.4133, + "step": 841500 + }, + { + "epoch": 0.5, + "learning_rate": 5.250702648476565e-05, + "loss": 1.441, + "step": 842000 + }, + { + "epoch": 0.51, + "learning_rate": 5.2504926519205085e-05, + "loss": 1.4341, + "step": 842500 + }, + { + "epoch": 0.51, + "learning_rate": 5.250282655364452e-05, + "loss": 1.4481, + "step": 843000 + }, + { + "epoch": 0.51, + "learning_rate": 5.250073078801507e-05, + "loss": 1.3972, + "step": 843500 + }, + { + "epoch": 0.51, + "learning_rate": 5.249863082245451e-05, + "loss": 1.428, + "step": 844000 + }, + { + "epoch": 0.51, + "learning_rate": 5.2496530856893946e-05, + "loss": 1.4243, + "step": 844500 + }, + { + "epoch": 0.51, + "learning_rate": 5.249443089133338e-05, + "loss": 1.4263, + "step": 845000 + }, + { + "epoch": 0.51, + "learning_rate": 5.249233512570393e-05, + "loss": 1.4228, + "step": 845500 + }, + { + "epoch": 0.51, + "learning_rate": 5.249023516014337e-05, + "loss": 1.3805, + "step": 846000 + }, + { + "epoch": 0.51, + "learning_rate": 5.2488135194582806e-05, + "loss": 1.4233, + "step": 846500 + }, + { + "epoch": 0.51, + "learning_rate": 5.2486035229022247e-05, + "loss": 1.4401, + "step": 847000 + }, + { + "epoch": 0.51, + "learning_rate": 5.248393526346168e-05, + "loss": 1.4353, + "step": 847500 + }, + { + "epoch": 0.51, + "learning_rate": 5.2481839497832234e-05, + "loss": 1.4342, + "step": 848000 + }, + { + "epoch": 0.51, + "learning_rate": 5.247973953227167e-05, + "loss": 1.4169, + "step": 848500 + }, + { + "epoch": 0.51, + "learning_rate": 5.247763956671111e-05, + "loss": 1.4189, + "step": 849000 + }, + { + "epoch": 0.51, + "learning_rate": 5.247553960115054e-05, + "loss": 1.3913, + "step": 849500 + }, + { + "epoch": 0.51, + "learning_rate": 5.2473439635589974e-05, + "loss": 1.4473, + "step": 850000 + }, + { + "epoch": 0.51, + "learning_rate": 5.2471339670029414e-05, + "loss": 1.4138, + "step": 850500 + }, + { + "epoch": 0.51, + "learning_rate": 5.246924390439997e-05, + "loss": 1.4174, + "step": 851000 + }, + { + "epoch": 0.51, + "learning_rate": 5.24671439388394e-05, + "loss": 1.4409, + "step": 851500 + }, + { + "epoch": 0.51, + "learning_rate": 5.2465043973278835e-05, + "loss": 1.4366, + "step": 852000 + }, + { + "epoch": 0.51, + "learning_rate": 5.2462944007718275e-05, + "loss": 1.3988, + "step": 852500 + }, + { + "epoch": 0.51, + "learning_rate": 5.246084404215771e-05, + "loss": 1.4026, + "step": 853000 + }, + { + "epoch": 0.51, + "learning_rate": 5.245874407659714e-05, + "loss": 1.4102, + "step": 853500 + }, + { + "epoch": 0.51, + "learning_rate": 5.24566483109677e-05, + "loss": 1.4261, + "step": 854000 + }, + { + "epoch": 0.51, + "learning_rate": 5.2454548345407136e-05, + "loss": 1.4146, + "step": 854500 + }, + { + "epoch": 0.51, + "learning_rate": 5.245244837984657e-05, + "loss": 1.4159, + "step": 855000 + }, + { + "epoch": 0.51, + "learning_rate": 5.245034841428601e-05, + "loss": 1.3839, + "step": 855500 + }, + { + "epoch": 0.51, + "learning_rate": 5.244824844872544e-05, + "loss": 1.4077, + "step": 856000 + }, + { + "epoch": 0.51, + "learning_rate": 5.2446148483164876e-05, + "loss": 1.3936, + "step": 856500 + }, + { + "epoch": 0.51, + "learning_rate": 5.244404851760431e-05, + "loss": 1.3762, + "step": 857000 + }, + { + "epoch": 0.51, + "learning_rate": 5.2441948552043743e-05, + "loss": 1.4056, + "step": 857500 + }, + { + "epoch": 0.51, + "learning_rate": 5.243984858648318e-05, + "loss": 1.4031, + "step": 858000 + }, + { + "epoch": 0.51, + "learning_rate": 5.243775702078486e-05, + "loss": 1.4041, + "step": 858500 + }, + { + "epoch": 0.52, + "learning_rate": 5.243565705522429e-05, + "loss": 1.4188, + "step": 859000 + }, + { + "epoch": 0.52, + "learning_rate": 5.243355708966373e-05, + "loss": 1.4126, + "step": 859500 + }, + { + "epoch": 0.52, + "learning_rate": 5.2431457124103164e-05, + "loss": 1.4103, + "step": 860000 + }, + { + "epoch": 0.52, + "learning_rate": 5.24293571585426e-05, + "loss": 1.4154, + "step": 860500 + }, + { + "epoch": 0.52, + "learning_rate": 5.242725719298204e-05, + "loss": 1.4055, + "step": 861000 + }, + { + "epoch": 0.52, + "learning_rate": 5.242516142735259e-05, + "loss": 1.3874, + "step": 861500 + }, + { + "epoch": 0.52, + "learning_rate": 5.2423061461792025e-05, + "loss": 1.387, + "step": 862000 + }, + { + "epoch": 0.52, + "learning_rate": 5.2420961496231465e-05, + "loss": 1.4242, + "step": 862500 + }, + { + "epoch": 0.52, + "learning_rate": 5.24188615306709e-05, + "loss": 1.4214, + "step": 863000 + }, + { + "epoch": 0.52, + "learning_rate": 5.241676156511033e-05, + "loss": 1.4212, + "step": 863500 + }, + { + "epoch": 0.52, + "learning_rate": 5.241466159954977e-05, + "loss": 1.4259, + "step": 864000 + }, + { + "epoch": 0.52, + "learning_rate": 5.24125616339892e-05, + "loss": 1.4294, + "step": 864500 + }, + { + "epoch": 0.52, + "learning_rate": 5.241046586835976e-05, + "loss": 1.3904, + "step": 865000 + }, + { + "epoch": 0.52, + "learning_rate": 5.240836590279919e-05, + "loss": 1.3872, + "step": 865500 + }, + { + "epoch": 0.52, + "learning_rate": 5.240626593723863e-05, + "loss": 1.415, + "step": 866000 + }, + { + "epoch": 0.52, + "learning_rate": 5.240416597167806e-05, + "loss": 1.4467, + "step": 866500 + }, + { + "epoch": 0.52, + "learning_rate": 5.2402066006117493e-05, + "loss": 1.3677, + "step": 867000 + }, + { + "epoch": 0.52, + "learning_rate": 5.2399966040556934e-05, + "loss": 1.3957, + "step": 867500 + }, + { + "epoch": 0.52, + "learning_rate": 5.2397870274927494e-05, + "loss": 1.4113, + "step": 868000 + }, + { + "epoch": 0.52, + "learning_rate": 5.239577450929805e-05, + "loss": 1.4239, + "step": 868500 + }, + { + "epoch": 0.52, + "learning_rate": 5.239367454373748e-05, + "loss": 1.3956, + "step": 869000 + }, + { + "epoch": 0.52, + "learning_rate": 5.239157457817692e-05, + "loss": 1.3879, + "step": 869500 + }, + { + "epoch": 0.52, + "learning_rate": 5.2389474612616355e-05, + "loss": 1.4304, + "step": 870000 + }, + { + "epoch": 0.52, + "learning_rate": 5.238737464705579e-05, + "loss": 1.4105, + "step": 870500 + }, + { + "epoch": 0.52, + "learning_rate": 5.238527468149523e-05, + "loss": 1.4289, + "step": 871000 + }, + { + "epoch": 0.52, + "learning_rate": 5.2383174715934655e-05, + "loss": 1.3973, + "step": 871500 + }, + { + "epoch": 0.52, + "learning_rate": 5.238107475037409e-05, + "loss": 1.3981, + "step": 872000 + }, + { + "epoch": 0.52, + "learning_rate": 5.237897478481353e-05, + "loss": 1.3973, + "step": 872500 + }, + { + "epoch": 0.52, + "learning_rate": 5.237687481925296e-05, + "loss": 1.4019, + "step": 873000 + }, + { + "epoch": 0.52, + "learning_rate": 5.2374774853692396e-05, + "loss": 1.4261, + "step": 873500 + }, + { + "epoch": 0.52, + "learning_rate": 5.2372674888131836e-05, + "loss": 1.4133, + "step": 874000 + }, + { + "epoch": 0.52, + "learning_rate": 5.237058332243351e-05, + "loss": 1.4314, + "step": 874500 + }, + { + "epoch": 0.52, + "learning_rate": 5.236848335687295e-05, + "loss": 1.3962, + "step": 875000 + }, + { + "epoch": 0.52, + "learning_rate": 5.2366383391312383e-05, + "loss": 1.4012, + "step": 875500 + }, + { + "epoch": 0.53, + "learning_rate": 5.236428762568294e-05, + "loss": 1.4301, + "step": 876000 + }, + { + "epoch": 0.53, + "learning_rate": 5.236218766012238e-05, + "loss": 1.4135, + "step": 876500 + }, + { + "epoch": 0.53, + "learning_rate": 5.236008769456181e-05, + "loss": 1.3894, + "step": 877000 + }, + { + "epoch": 0.53, + "learning_rate": 5.2357987729001244e-05, + "loss": 1.4161, + "step": 877500 + }, + { + "epoch": 0.53, + "learning_rate": 5.2355887763440684e-05, + "loss": 1.4214, + "step": 878000 + }, + { + "epoch": 0.53, + "learning_rate": 5.235378779788011e-05, + "loss": 1.4302, + "step": 878500 + }, + { + "epoch": 0.53, + "learning_rate": 5.2351687832319544e-05, + "loss": 1.4385, + "step": 879000 + }, + { + "epoch": 0.53, + "learning_rate": 5.2349587866758985e-05, + "loss": 1.4295, + "step": 879500 + }, + { + "epoch": 0.53, + "learning_rate": 5.2347492101129545e-05, + "loss": 1.4233, + "step": 880000 + }, + { + "epoch": 0.53, + "learning_rate": 5.234539213556898e-05, + "loss": 1.3985, + "step": 880500 + }, + { + "epoch": 0.53, + "learning_rate": 5.2343292170008405e-05, + "loss": 1.4143, + "step": 881000 + }, + { + "epoch": 0.53, + "learning_rate": 5.2341192204447845e-05, + "loss": 1.4033, + "step": 881500 + }, + { + "epoch": 0.53, + "learning_rate": 5.233909223888728e-05, + "loss": 1.3976, + "step": 882000 + }, + { + "epoch": 0.53, + "learning_rate": 5.233699227332671e-05, + "loss": 1.3902, + "step": 882500 + }, + { + "epoch": 0.53, + "learning_rate": 5.233489230776615e-05, + "loss": 1.4114, + "step": 883000 + }, + { + "epoch": 0.53, + "learning_rate": 5.2332792342205586e-05, + "loss": 1.4045, + "step": 883500 + }, + { + "epoch": 0.53, + "learning_rate": 5.233069657657614e-05, + "loss": 1.3903, + "step": 884000 + }, + { + "epoch": 0.53, + "learning_rate": 5.232859661101558e-05, + "loss": 1.4144, + "step": 884500 + }, + { + "epoch": 0.53, + "learning_rate": 5.232649664545501e-05, + "loss": 1.4088, + "step": 885000 + }, + { + "epoch": 0.53, + "learning_rate": 5.232439667989445e-05, + "loss": 1.364, + "step": 885500 + }, + { + "epoch": 0.53, + "learning_rate": 5.2322300914265e-05, + "loss": 1.3881, + "step": 886000 + }, + { + "epoch": 0.53, + "learning_rate": 5.232020514863556e-05, + "loss": 1.418, + "step": 886500 + }, + { + "epoch": 0.53, + "learning_rate": 5.2318105183075e-05, + "loss": 1.4143, + "step": 887000 + }, + { + "epoch": 0.53, + "learning_rate": 5.2316005217514434e-05, + "loss": 1.4366, + "step": 887500 + }, + { + "epoch": 0.53, + "learning_rate": 5.231390525195386e-05, + "loss": 1.4213, + "step": 888000 + }, + { + "epoch": 0.53, + "learning_rate": 5.23118052863933e-05, + "loss": 1.3788, + "step": 888500 + }, + { + "epoch": 0.53, + "learning_rate": 5.2309705320832735e-05, + "loss": 1.3851, + "step": 889000 + }, + { + "epoch": 0.53, + "learning_rate": 5.230760535527217e-05, + "loss": 1.3879, + "step": 889500 + }, + { + "epoch": 0.53, + "learning_rate": 5.230550958964273e-05, + "loss": 1.386, + "step": 890000 + }, + { + "epoch": 0.53, + "learning_rate": 5.230340962408216e-05, + "loss": 1.4073, + "step": 890500 + }, + { + "epoch": 0.53, + "learning_rate": 5.2301309658521596e-05, + "loss": 1.4101, + "step": 891000 + }, + { + "epoch": 0.53, + "learning_rate": 5.2299209692961036e-05, + "loss": 1.4105, + "step": 891500 + }, + { + "epoch": 0.53, + "learning_rate": 5.229710972740047e-05, + "loss": 1.4069, + "step": 892000 + }, + { + "epoch": 0.54, + "learning_rate": 5.22950097618399e-05, + "loss": 1.4192, + "step": 892500 + }, + { + "epoch": 0.54, + "learning_rate": 5.229290979627934e-05, + "loss": 1.3792, + "step": 893000 + }, + { + "epoch": 0.54, + "learning_rate": 5.2290809830718776e-05, + "loss": 1.3804, + "step": 893500 + }, + { + "epoch": 0.54, + "learning_rate": 5.228871406508933e-05, + "loss": 1.4097, + "step": 894000 + }, + { + "epoch": 0.54, + "learning_rate": 5.2286614099528763e-05, + "loss": 1.4191, + "step": 894500 + }, + { + "epoch": 0.54, + "learning_rate": 5.2284514133968204e-05, + "loss": 1.4171, + "step": 895000 + }, + { + "epoch": 0.54, + "learning_rate": 5.228241416840764e-05, + "loss": 1.4221, + "step": 895500 + }, + { + "epoch": 0.54, + "learning_rate": 5.228032260270932e-05, + "loss": 1.3626, + "step": 896000 + }, + { + "epoch": 0.54, + "learning_rate": 5.227822263714875e-05, + "loss": 1.3973, + "step": 896500 + }, + { + "epoch": 0.54, + "learning_rate": 5.2276122671588184e-05, + "loss": 1.3964, + "step": 897000 + }, + { + "epoch": 0.54, + "learning_rate": 5.2274026905958745e-05, + "loss": 1.4159, + "step": 897500 + }, + { + "epoch": 0.54, + "learning_rate": 5.227192694039818e-05, + "loss": 1.4249, + "step": 898000 + }, + { + "epoch": 0.54, + "learning_rate": 5.226982697483761e-05, + "loss": 1.4169, + "step": 898500 + }, + { + "epoch": 0.54, + "learning_rate": 5.226772700927705e-05, + "loss": 1.4336, + "step": 899000 + }, + { + "epoch": 0.54, + "learning_rate": 5.2265631243647606e-05, + "loss": 1.4122, + "step": 899500 + }, + { + "epoch": 0.54, + "learning_rate": 5.226353127808704e-05, + "loss": 1.4346, + "step": 900000 + }, + { + "epoch": 0.54, + "eval_loss": 1.338213324546814, + "eval_runtime": 1101.9411, + "eval_samples_per_second": 477.993, + "eval_steps_per_second": 79.666, + "step": 900000 + }, + { + "epoch": 0.54, + "learning_rate": 5.226143131252647e-05, + "loss": 1.4128, + "step": 900500 + }, + { + "epoch": 0.54, + "learning_rate": 5.225933134696591e-05, + "loss": 1.4096, + "step": 901000 + }, + { + "epoch": 0.54, + "learning_rate": 5.2257231381405346e-05, + "loss": 1.3951, + "step": 901500 + }, + { + "epoch": 0.54, + "learning_rate": 5.225513141584478e-05, + "loss": 1.4103, + "step": 902000 + }, + { + "epoch": 0.54, + "learning_rate": 5.225303145028421e-05, + "loss": 1.3779, + "step": 902500 + }, + { + "epoch": 0.54, + "learning_rate": 5.2250935684654773e-05, + "loss": 1.3767, + "step": 903000 + }, + { + "epoch": 0.54, + "learning_rate": 5.224883571909421e-05, + "loss": 1.3837, + "step": 903500 + }, + { + "epoch": 0.54, + "learning_rate": 5.224673575353364e-05, + "loss": 1.4177, + "step": 904000 + }, + { + "epoch": 0.54, + "learning_rate": 5.224463578797308e-05, + "loss": 1.3836, + "step": 904500 + }, + { + "epoch": 0.54, + "learning_rate": 5.224253582241251e-05, + "loss": 1.3993, + "step": 905000 + }, + { + "epoch": 0.54, + "learning_rate": 5.224043585685195e-05, + "loss": 1.4055, + "step": 905500 + }, + { + "epoch": 0.54, + "learning_rate": 5.223833589129138e-05, + "loss": 1.4267, + "step": 906000 + }, + { + "epoch": 0.54, + "learning_rate": 5.2236235925730814e-05, + "loss": 1.4204, + "step": 906500 + }, + { + "epoch": 0.54, + "learning_rate": 5.2234135960170255e-05, + "loss": 1.4353, + "step": 907000 + }, + { + "epoch": 0.54, + "learning_rate": 5.223203599460969e-05, + "loss": 1.3914, + "step": 907500 + }, + { + "epoch": 0.54, + "learning_rate": 5.222993602904912e-05, + "loss": 1.3852, + "step": 908000 + }, + { + "epoch": 0.54, + "learning_rate": 5.222783606348856e-05, + "loss": 1.3969, + "step": 908500 + }, + { + "epoch": 0.54, + "learning_rate": 5.2225740297859115e-05, + "loss": 1.4055, + "step": 909000 + }, + { + "epoch": 0.55, + "learning_rate": 5.222364033229855e-05, + "loss": 1.3707, + "step": 909500 + }, + { + "epoch": 0.55, + "learning_rate": 5.222154036673798e-05, + "loss": 1.3909, + "step": 910000 + }, + { + "epoch": 0.55, + "learning_rate": 5.221944040117742e-05, + "loss": 1.3987, + "step": 910500 + }, + { + "epoch": 0.55, + "learning_rate": 5.2217340435616856e-05, + "loss": 1.4216, + "step": 911000 + }, + { + "epoch": 0.55, + "learning_rate": 5.221524047005629e-05, + "loss": 1.4172, + "step": 911500 + }, + { + "epoch": 0.55, + "learning_rate": 5.221314470442685e-05, + "loss": 1.3801, + "step": 912000 + }, + { + "epoch": 0.55, + "learning_rate": 5.221104473886628e-05, + "loss": 1.4128, + "step": 912500 + }, + { + "epoch": 0.55, + "learning_rate": 5.220894477330572e-05, + "loss": 1.4, + "step": 913000 + }, + { + "epoch": 0.55, + "learning_rate": 5.220684480774516e-05, + "loss": 1.411, + "step": 913500 + }, + { + "epoch": 0.55, + "learning_rate": 5.220474484218459e-05, + "loss": 1.4058, + "step": 914000 + }, + { + "epoch": 0.55, + "learning_rate": 5.2202649076555144e-05, + "loss": 1.4101, + "step": 914500 + }, + { + "epoch": 0.55, + "learning_rate": 5.220054911099458e-05, + "loss": 1.406, + "step": 915000 + }, + { + "epoch": 0.55, + "learning_rate": 5.219844914543402e-05, + "loss": 1.4022, + "step": 915500 + }, + { + "epoch": 0.55, + "learning_rate": 5.219635337980457e-05, + "loss": 1.3702, + "step": 916000 + }, + { + "epoch": 0.55, + "learning_rate": 5.2194253414244005e-05, + "loss": 1.4078, + "step": 916500 + }, + { + "epoch": 0.55, + "learning_rate": 5.219215344868344e-05, + "loss": 1.3671, + "step": 917000 + }, + { + "epoch": 0.55, + "learning_rate": 5.219005348312288e-05, + "loss": 1.4178, + "step": 917500 + }, + { + "epoch": 0.55, + "learning_rate": 5.218795351756231e-05, + "loss": 1.4283, + "step": 918000 + }, + { + "epoch": 0.55, + "learning_rate": 5.2185853552001745e-05, + "loss": 1.3755, + "step": 918500 + }, + { + "epoch": 0.55, + "learning_rate": 5.2183753586441186e-05, + "loss": 1.4042, + "step": 919000 + }, + { + "epoch": 0.55, + "learning_rate": 5.218165362088062e-05, + "loss": 1.3998, + "step": 919500 + }, + { + "epoch": 0.55, + "learning_rate": 5.217955365532005e-05, + "loss": 1.4169, + "step": 920000 + }, + { + "epoch": 0.55, + "learning_rate": 5.2177453689759486e-05, + "loss": 1.4142, + "step": 920500 + }, + { + "epoch": 0.55, + "learning_rate": 5.217535372419892e-05, + "loss": 1.4231, + "step": 921000 + }, + { + "epoch": 0.55, + "learning_rate": 5.217325375863836e-05, + "loss": 1.393, + "step": 921500 + }, + { + "epoch": 0.55, + "learning_rate": 5.217115799300891e-05, + "loss": 1.3594, + "step": 922000 + }, + { + "epoch": 0.55, + "learning_rate": 5.216905802744835e-05, + "loss": 1.4134, + "step": 922500 + }, + { + "epoch": 0.55, + "learning_rate": 5.216695806188778e-05, + "loss": 1.3847, + "step": 923000 + }, + { + "epoch": 0.55, + "learning_rate": 5.216485809632722e-05, + "loss": 1.4215, + "step": 923500 + }, + { + "epoch": 0.55, + "learning_rate": 5.216276233069778e-05, + "loss": 1.3818, + "step": 924000 + }, + { + "epoch": 0.55, + "learning_rate": 5.216066236513721e-05, + "loss": 1.3955, + "step": 924500 + }, + { + "epoch": 0.55, + "learning_rate": 5.215856659950777e-05, + "loss": 1.3995, + "step": 925000 + }, + { + "epoch": 0.55, + "learning_rate": 5.21564666339472e-05, + "loss": 1.4, + "step": 925500 + }, + { + "epoch": 0.56, + "learning_rate": 5.215436666838664e-05, + "loss": 1.4416, + "step": 926000 + }, + { + "epoch": 0.56, + "learning_rate": 5.2152266702826075e-05, + "loss": 1.3824, + "step": 926500 + }, + { + "epoch": 0.56, + "learning_rate": 5.215017093719663e-05, + "loss": 1.3972, + "step": 927000 + }, + { + "epoch": 0.56, + "learning_rate": 5.214807097163607e-05, + "loss": 1.395, + "step": 927500 + }, + { + "epoch": 0.56, + "learning_rate": 5.21459710060755e-05, + "loss": 1.4043, + "step": 928000 + }, + { + "epoch": 0.56, + "learning_rate": 5.2143871040514936e-05, + "loss": 1.4319, + "step": 928500 + }, + { + "epoch": 0.56, + "learning_rate": 5.2141771074954376e-05, + "loss": 1.4073, + "step": 929000 + }, + { + "epoch": 0.56, + "learning_rate": 5.213967530932493e-05, + "loss": 1.4013, + "step": 929500 + }, + { + "epoch": 0.56, + "learning_rate": 5.213757534376436e-05, + "loss": 1.4218, + "step": 930000 + }, + { + "epoch": 0.56, + "learning_rate": 5.2135475378203796e-05, + "loss": 1.4094, + "step": 930500 + }, + { + "epoch": 0.56, + "learning_rate": 5.213337961257435e-05, + "loss": 1.4099, + "step": 931000 + }, + { + "epoch": 0.56, + "learning_rate": 5.213127964701379e-05, + "loss": 1.3959, + "step": 931500 + }, + { + "epoch": 0.56, + "learning_rate": 5.2129179681453224e-05, + "loss": 1.4247, + "step": 932000 + }, + { + "epoch": 0.56, + "learning_rate": 5.212707971589266e-05, + "loss": 1.4103, + "step": 932500 + }, + { + "epoch": 0.56, + "learning_rate": 5.21249797503321e-05, + "loss": 1.4104, + "step": 933000 + }, + { + "epoch": 0.56, + "learning_rate": 5.212287978477153e-05, + "loss": 1.3538, + "step": 933500 + }, + { + "epoch": 0.56, + "learning_rate": 5.2120779819210964e-05, + "loss": 1.4161, + "step": 934000 + }, + { + "epoch": 0.56, + "learning_rate": 5.21186798536504e-05, + "loss": 1.4061, + "step": 934500 + }, + { + "epoch": 0.56, + "learning_rate": 5.211657988808983e-05, + "loss": 1.3926, + "step": 935000 + }, + { + "epoch": 0.56, + "learning_rate": 5.211448412246039e-05, + "loss": 1.4, + "step": 935500 + }, + { + "epoch": 0.56, + "learning_rate": 5.211238415689983e-05, + "loss": 1.3897, + "step": 936000 + }, + { + "epoch": 0.56, + "learning_rate": 5.211028419133926e-05, + "loss": 1.3829, + "step": 936500 + }, + { + "epoch": 0.56, + "learning_rate": 5.210818422577869e-05, + "loss": 1.4139, + "step": 937000 + }, + { + "epoch": 0.56, + "learning_rate": 5.210608426021813e-05, + "loss": 1.4418, + "step": 937500 + }, + { + "epoch": 0.56, + "learning_rate": 5.2103984294657566e-05, + "loss": 1.4129, + "step": 938000 + }, + { + "epoch": 0.56, + "learning_rate": 5.2101884329097e-05, + "loss": 1.4144, + "step": 938500 + }, + { + "epoch": 0.56, + "learning_rate": 5.209978436353644e-05, + "loss": 1.3888, + "step": 939000 + }, + { + "epoch": 0.56, + "learning_rate": 5.209768859790699e-05, + "loss": 1.3965, + "step": 939500 + }, + { + "epoch": 0.56, + "learning_rate": 5.2095588632346426e-05, + "loss": 1.4176, + "step": 940000 + }, + { + "epoch": 0.56, + "learning_rate": 5.209348866678586e-05, + "loss": 1.3752, + "step": 940500 + }, + { + "epoch": 0.56, + "learning_rate": 5.20913887012253e-05, + "loss": 1.4028, + "step": 941000 + }, + { + "epoch": 0.56, + "learning_rate": 5.2089288735664733e-05, + "loss": 1.4089, + "step": 941500 + }, + { + "epoch": 0.56, + "learning_rate": 5.208718877010417e-05, + "loss": 1.4531, + "step": 942000 + }, + { + "epoch": 0.57, + "learning_rate": 5.208508880454361e-05, + "loss": 1.4071, + "step": 942500 + }, + { + "epoch": 0.57, + "learning_rate": 5.208298883898304e-05, + "loss": 1.3919, + "step": 943000 + }, + { + "epoch": 0.57, + "learning_rate": 5.2080893073353594e-05, + "loss": 1.3715, + "step": 943500 + }, + { + "epoch": 0.57, + "learning_rate": 5.2078793107793034e-05, + "loss": 1.4305, + "step": 944000 + }, + { + "epoch": 0.57, + "learning_rate": 5.207669314223247e-05, + "loss": 1.4003, + "step": 944500 + }, + { + "epoch": 0.57, + "learning_rate": 5.20745931766719e-05, + "loss": 1.3806, + "step": 945000 + }, + { + "epoch": 0.57, + "learning_rate": 5.207249321111134e-05, + "loss": 1.4218, + "step": 945500 + }, + { + "epoch": 0.57, + "learning_rate": 5.2070397445481895e-05, + "loss": 1.4129, + "step": 946000 + }, + { + "epoch": 0.57, + "learning_rate": 5.206829747992133e-05, + "loss": 1.3931, + "step": 946500 + }, + { + "epoch": 0.57, + "learning_rate": 5.206619751436076e-05, + "loss": 1.4204, + "step": 947000 + }, + { + "epoch": 0.57, + "learning_rate": 5.20640975488002e-05, + "loss": 1.431, + "step": 947500 + }, + { + "epoch": 0.57, + "learning_rate": 5.2062001783170756e-05, + "loss": 1.4018, + "step": 948000 + }, + { + "epoch": 0.57, + "learning_rate": 5.205990601754131e-05, + "loss": 1.3723, + "step": 948500 + }, + { + "epoch": 0.57, + "learning_rate": 5.205781445184299e-05, + "loss": 1.3877, + "step": 949000 + }, + { + "epoch": 0.57, + "learning_rate": 5.2055714486282423e-05, + "loss": 1.4077, + "step": 949500 + }, + { + "epoch": 0.57, + "learning_rate": 5.205361452072186e-05, + "loss": 1.4215, + "step": 950000 + }, + { + "epoch": 0.57, + "learning_rate": 5.20515145551613e-05, + "loss": 1.3674, + "step": 950500 + }, + { + "epoch": 0.57, + "learning_rate": 5.204941458960073e-05, + "loss": 1.4133, + "step": 951000 + }, + { + "epoch": 0.57, + "learning_rate": 5.2047314624040164e-05, + "loss": 1.4096, + "step": 951500 + }, + { + "epoch": 0.57, + "learning_rate": 5.2045214658479604e-05, + "loss": 1.3995, + "step": 952000 + }, + { + "epoch": 0.57, + "learning_rate": 5.204311469291904e-05, + "loss": 1.4209, + "step": 952500 + }, + { + "epoch": 0.57, + "learning_rate": 5.2041014727358464e-05, + "loss": 1.4311, + "step": 953000 + }, + { + "epoch": 0.57, + "learning_rate": 5.2038914761797905e-05, + "loss": 1.4108, + "step": 953500 + }, + { + "epoch": 0.57, + "learning_rate": 5.203681479623734e-05, + "loss": 1.3646, + "step": 954000 + }, + { + "epoch": 0.57, + "learning_rate": 5.203471483067677e-05, + "loss": 1.3945, + "step": 954500 + }, + { + "epoch": 0.57, + "learning_rate": 5.203261486511621e-05, + "loss": 1.3849, + "step": 955000 + }, + { + "epoch": 0.57, + "learning_rate": 5.2030519099486765e-05, + "loss": 1.3742, + "step": 955500 + }, + { + "epoch": 0.57, + "learning_rate": 5.20284191339262e-05, + "loss": 1.3872, + "step": 956000 + }, + { + "epoch": 0.57, + "learning_rate": 5.202631916836564e-05, + "loss": 1.4085, + "step": 956500 + }, + { + "epoch": 0.57, + "learning_rate": 5.202421920280507e-05, + "loss": 1.4047, + "step": 957000 + }, + { + "epoch": 0.57, + "learning_rate": 5.2022119237244506e-05, + "loss": 1.3947, + "step": 957500 + }, + { + "epoch": 0.57, + "learning_rate": 5.2020019271683946e-05, + "loss": 1.3956, + "step": 958000 + }, + { + "epoch": 0.57, + "learning_rate": 5.201791930612338e-05, + "loss": 1.421, + "step": 958500 + }, + { + "epoch": 0.57, + "learning_rate": 5.201582354049393e-05, + "loss": 1.3871, + "step": 959000 + }, + { + "epoch": 0.58, + "learning_rate": 5.201372357493337e-05, + "loss": 1.4278, + "step": 959500 + }, + { + "epoch": 0.58, + "learning_rate": 5.201162360937281e-05, + "loss": 1.3927, + "step": 960000 + }, + { + "epoch": 0.58, + "learning_rate": 5.200952364381224e-05, + "loss": 1.384, + "step": 960500 + }, + { + "epoch": 0.58, + "learning_rate": 5.2007423678251674e-05, + "loss": 1.4166, + "step": 961000 + }, + { + "epoch": 0.58, + "learning_rate": 5.200532791262223e-05, + "loss": 1.408, + "step": 961500 + }, + { + "epoch": 0.58, + "learning_rate": 5.200322794706167e-05, + "loss": 1.4496, + "step": 962000 + }, + { + "epoch": 0.58, + "learning_rate": 5.20011279815011e-05, + "loss": 1.4173, + "step": 962500 + }, + { + "epoch": 0.58, + "learning_rate": 5.1999028015940535e-05, + "loss": 1.3771, + "step": 963000 + }, + { + "epoch": 0.58, + "learning_rate": 5.1996928050379975e-05, + "loss": 1.4008, + "step": 963500 + }, + { + "epoch": 0.58, + "learning_rate": 5.199482808481941e-05, + "loss": 1.4063, + "step": 964000 + }, + { + "epoch": 0.58, + "learning_rate": 5.199272811925885e-05, + "loss": 1.4071, + "step": 964500 + }, + { + "epoch": 0.58, + "learning_rate": 5.199062815369828e-05, + "loss": 1.3668, + "step": 965000 + }, + { + "epoch": 0.58, + "learning_rate": 5.1988532388068836e-05, + "loss": 1.3878, + "step": 965500 + }, + { + "epoch": 0.58, + "learning_rate": 5.198643242250827e-05, + "loss": 1.4048, + "step": 966000 + }, + { + "epoch": 0.58, + "learning_rate": 5.198433665687882e-05, + "loss": 1.3872, + "step": 966500 + }, + { + "epoch": 0.58, + "learning_rate": 5.198224089124938e-05, + "loss": 1.4358, + "step": 967000 + }, + { + "epoch": 0.58, + "learning_rate": 5.1980140925688816e-05, + "loss": 1.3811, + "step": 967500 + }, + { + "epoch": 0.58, + "learning_rate": 5.197804096012825e-05, + "loss": 1.3875, + "step": 968000 + }, + { + "epoch": 0.58, + "learning_rate": 5.197594099456768e-05, + "loss": 1.3947, + "step": 968500 + }, + { + "epoch": 0.58, + "learning_rate": 5.1973841029007124e-05, + "loss": 1.4098, + "step": 969000 + }, + { + "epoch": 0.58, + "learning_rate": 5.197174106344656e-05, + "loss": 1.4383, + "step": 969500 + }, + { + "epoch": 0.58, + "learning_rate": 5.196964109788599e-05, + "loss": 1.4002, + "step": 970000 + }, + { + "epoch": 0.58, + "learning_rate": 5.196754113232543e-05, + "loss": 1.4456, + "step": 970500 + }, + { + "epoch": 0.58, + "learning_rate": 5.1965441166764864e-05, + "loss": 1.4, + "step": 971000 + }, + { + "epoch": 0.58, + "learning_rate": 5.1963341201204304e-05, + "loss": 1.377, + "step": 971500 + }, + { + "epoch": 0.58, + "learning_rate": 5.196124123564374e-05, + "loss": 1.3814, + "step": 972000 + }, + { + "epoch": 0.58, + "learning_rate": 5.195914127008317e-05, + "loss": 1.4064, + "step": 972500 + }, + { + "epoch": 0.58, + "learning_rate": 5.1957041304522605e-05, + "loss": 1.378, + "step": 973000 + }, + { + "epoch": 0.58, + "learning_rate": 5.195494133896204e-05, + "loss": 1.382, + "step": 973500 + }, + { + "epoch": 0.58, + "learning_rate": 5.195284977326372e-05, + "loss": 1.3616, + "step": 974000 + }, + { + "epoch": 0.58, + "learning_rate": 5.195074980770315e-05, + "loss": 1.4069, + "step": 974500 + }, + { + "epoch": 0.58, + "learning_rate": 5.1948649842142586e-05, + "loss": 1.3978, + "step": 975000 + }, + { + "epoch": 0.58, + "learning_rate": 5.1946549876582026e-05, + "loss": 1.3999, + "step": 975500 + }, + { + "epoch": 0.59, + "learning_rate": 5.194444991102146e-05, + "loss": 1.3601, + "step": 976000 + }, + { + "epoch": 0.59, + "learning_rate": 5.194234994546089e-05, + "loss": 1.3785, + "step": 976500 + }, + { + "epoch": 0.59, + "learning_rate": 5.194025417983145e-05, + "loss": 1.4267, + "step": 977000 + }, + { + "epoch": 0.59, + "learning_rate": 5.1938154214270887e-05, + "loss": 1.386, + "step": 977500 + }, + { + "epoch": 0.59, + "learning_rate": 5.193605424871032e-05, + "loss": 1.4166, + "step": 978000 + }, + { + "epoch": 0.59, + "learning_rate": 5.193395428314976e-05, + "loss": 1.3764, + "step": 978500 + }, + { + "epoch": 0.59, + "learning_rate": 5.1931854317589194e-05, + "loss": 1.3911, + "step": 979000 + }, + { + "epoch": 0.59, + "learning_rate": 5.192975435202863e-05, + "loss": 1.4092, + "step": 979500 + }, + { + "epoch": 0.59, + "learning_rate": 5.192765438646806e-05, + "loss": 1.3693, + "step": 980000 + }, + { + "epoch": 0.59, + "learning_rate": 5.1925554420907494e-05, + "loss": 1.4016, + "step": 980500 + }, + { + "epoch": 0.59, + "learning_rate": 5.1923458655278054e-05, + "loss": 1.4067, + "step": 981000 + }, + { + "epoch": 0.59, + "learning_rate": 5.192135868971749e-05, + "loss": 1.4018, + "step": 981500 + }, + { + "epoch": 0.59, + "learning_rate": 5.191925872415693e-05, + "loss": 1.3851, + "step": 982000 + }, + { + "epoch": 0.59, + "learning_rate": 5.1917158758596355e-05, + "loss": 1.4042, + "step": 982500 + }, + { + "epoch": 0.59, + "learning_rate": 5.191505879303579e-05, + "loss": 1.412, + "step": 983000 + }, + { + "epoch": 0.59, + "learning_rate": 5.191295882747523e-05, + "loss": 1.3879, + "step": 983500 + }, + { + "epoch": 0.59, + "learning_rate": 5.191086306184579e-05, + "loss": 1.4056, + "step": 984000 + }, + { + "epoch": 0.59, + "learning_rate": 5.190876309628522e-05, + "loss": 1.3936, + "step": 984500 + }, + { + "epoch": 0.59, + "learning_rate": 5.1906663130724656e-05, + "loss": 1.3976, + "step": 985000 + }, + { + "epoch": 0.59, + "learning_rate": 5.190456316516409e-05, + "loss": 1.3847, + "step": 985500 + }, + { + "epoch": 0.59, + "learning_rate": 5.190246319960352e-05, + "loss": 1.4073, + "step": 986000 + }, + { + "epoch": 0.59, + "learning_rate": 5.190036323404296e-05, + "loss": 1.4018, + "step": 986500 + }, + { + "epoch": 0.59, + "learning_rate": 5.1898263268482396e-05, + "loss": 1.4115, + "step": 987000 + }, + { + "epoch": 0.59, + "learning_rate": 5.189616330292183e-05, + "loss": 1.3592, + "step": 987500 + }, + { + "epoch": 0.59, + "learning_rate": 5.1894067537292383e-05, + "loss": 1.389, + "step": 988000 + }, + { + "epoch": 0.59, + "learning_rate": 5.1891967571731824e-05, + "loss": 1.3925, + "step": 988500 + }, + { + "epoch": 0.59, + "learning_rate": 5.188986760617126e-05, + "loss": 1.3924, + "step": 989000 + }, + { + "epoch": 0.59, + "learning_rate": 5.188777184054181e-05, + "loss": 1.3927, + "step": 989500 + }, + { + "epoch": 0.59, + "learning_rate": 5.1885671874981244e-05, + "loss": 1.3955, + "step": 990000 + }, + { + "epoch": 0.59, + "learning_rate": 5.1883571909420684e-05, + "loss": 1.3992, + "step": 990500 + }, + { + "epoch": 0.59, + "learning_rate": 5.188147194386012e-05, + "loss": 1.4168, + "step": 991000 + }, + { + "epoch": 0.59, + "learning_rate": 5.187937197829955e-05, + "loss": 1.3828, + "step": 991500 + }, + { + "epoch": 0.59, + "learning_rate": 5.187727621267011e-05, + "loss": 1.4009, + "step": 992000 + }, + { + "epoch": 0.6, + "learning_rate": 5.1875176247109545e-05, + "loss": 1.4023, + "step": 992500 + }, + { + "epoch": 0.6, + "learning_rate": 5.187307628154898e-05, + "loss": 1.3965, + "step": 993000 + }, + { + "epoch": 0.6, + "learning_rate": 5.187097631598842e-05, + "loss": 1.3953, + "step": 993500 + }, + { + "epoch": 0.6, + "learning_rate": 5.186888055035898e-05, + "loss": 1.3709, + "step": 994000 + }, + { + "epoch": 0.6, + "learning_rate": 5.1866780584798406e-05, + "loss": 1.3969, + "step": 994500 + }, + { + "epoch": 0.6, + "learning_rate": 5.186468061923784e-05, + "loss": 1.3666, + "step": 995000 + }, + { + "epoch": 0.6, + "learning_rate": 5.186258065367728e-05, + "loss": 1.3968, + "step": 995500 + }, + { + "epoch": 0.6, + "learning_rate": 5.186048068811671e-05, + "loss": 1.4132, + "step": 996000 + }, + { + "epoch": 0.6, + "learning_rate": 5.1858380722556146e-05, + "loss": 1.3949, + "step": 996500 + }, + { + "epoch": 0.6, + "learning_rate": 5.185628075699559e-05, + "loss": 1.3795, + "step": 997000 + }, + { + "epoch": 0.6, + "learning_rate": 5.185418499136614e-05, + "loss": 1.3726, + "step": 997500 + }, + { + "epoch": 0.6, + "learning_rate": 5.18520892257367e-05, + "loss": 1.3921, + "step": 998000 + }, + { + "epoch": 0.6, + "learning_rate": 5.1849989260176134e-05, + "loss": 1.3885, + "step": 998500 + }, + { + "epoch": 0.6, + "learning_rate": 5.184788929461557e-05, + "loss": 1.3876, + "step": 999000 + }, + { + "epoch": 0.6, + "learning_rate": 5.1845789329055e-05, + "loss": 1.3769, + "step": 999500 + }, + { + "epoch": 0.6, + "learning_rate": 5.1843689363494434e-05, + "loss": 1.4139, + "step": 1000000 + }, + { + "epoch": 0.6, + "eval_loss": 1.3278173208236694, + "eval_runtime": 1111.6546, + "eval_samples_per_second": 473.816, + "eval_steps_per_second": 78.97, + "step": 1000000 + }, + { + "epoch": 0.6, + "learning_rate": 5.1841589397933875e-05, + "loss": 1.3798, + "step": 1000500 + }, + { + "epoch": 0.6, + "learning_rate": 5.183948943237331e-05, + "loss": 1.4033, + "step": 1001000 + }, + { + "epoch": 0.6, + "learning_rate": 5.183738946681274e-05, + "loss": 1.4, + "step": 1001500 + }, + { + "epoch": 0.6, + "learning_rate": 5.183528950125218e-05, + "loss": 1.4045, + "step": 1002000 + }, + { + "epoch": 0.6, + "learning_rate": 5.1833193735622735e-05, + "loss": 1.4086, + "step": 1002500 + }, + { + "epoch": 0.6, + "learning_rate": 5.183109377006217e-05, + "loss": 1.3815, + "step": 1003000 + }, + { + "epoch": 0.6, + "learning_rate": 5.182899800443273e-05, + "loss": 1.3827, + "step": 1003500 + }, + { + "epoch": 0.6, + "learning_rate": 5.1826898038872156e-05, + "loss": 1.3701, + "step": 1004000 + }, + { + "epoch": 0.6, + "learning_rate": 5.1824798073311596e-05, + "loss": 1.3901, + "step": 1004500 + }, + { + "epoch": 0.6, + "learning_rate": 5.182269810775103e-05, + "loss": 1.3954, + "step": 1005000 + }, + { + "epoch": 0.6, + "learning_rate": 5.182059814219046e-05, + "loss": 1.3861, + "step": 1005500 + }, + { + "epoch": 0.6, + "learning_rate": 5.18184981766299e-05, + "loss": 1.385, + "step": 1006000 + }, + { + "epoch": 0.6, + "learning_rate": 5.181639821106934e-05, + "loss": 1.3888, + "step": 1006500 + }, + { + "epoch": 0.6, + "learning_rate": 5.181429824550877e-05, + "loss": 1.3929, + "step": 1007000 + }, + { + "epoch": 0.6, + "learning_rate": 5.181220247987933e-05, + "loss": 1.3979, + "step": 1007500 + }, + { + "epoch": 0.6, + "learning_rate": 5.181010671424989e-05, + "loss": 1.4017, + "step": 1008000 + }, + { + "epoch": 0.6, + "learning_rate": 5.1808010948620445e-05, + "loss": 1.3863, + "step": 1008500 + }, + { + "epoch": 0.6, + "learning_rate": 5.180591098305988e-05, + "loss": 1.4291, + "step": 1009000 + }, + { + "epoch": 0.61, + "learning_rate": 5.180381101749931e-05, + "loss": 1.4162, + "step": 1009500 + }, + { + "epoch": 0.61, + "learning_rate": 5.180171105193875e-05, + "loss": 1.3728, + "step": 1010000 + }, + { + "epoch": 0.61, + "learning_rate": 5.1799611086378185e-05, + "loss": 1.3718, + "step": 1010500 + }, + { + "epoch": 0.61, + "learning_rate": 5.179751112081761e-05, + "loss": 1.412, + "step": 1011000 + }, + { + "epoch": 0.61, + "learning_rate": 5.179541535518817e-05, + "loss": 1.3689, + "step": 1011500 + }, + { + "epoch": 0.61, + "learning_rate": 5.179331538962761e-05, + "loss": 1.3711, + "step": 1012000 + }, + { + "epoch": 0.61, + "learning_rate": 5.1791215424067046e-05, + "loss": 1.3999, + "step": 1012500 + }, + { + "epoch": 0.61, + "learning_rate": 5.178911545850648e-05, + "loss": 1.3754, + "step": 1013000 + }, + { + "epoch": 0.61, + "learning_rate": 5.178701549294591e-05, + "loss": 1.4067, + "step": 1013500 + }, + { + "epoch": 0.61, + "learning_rate": 5.1784915527385346e-05, + "loss": 1.4159, + "step": 1014000 + }, + { + "epoch": 0.61, + "learning_rate": 5.1782815561824786e-05, + "loss": 1.3835, + "step": 1014500 + }, + { + "epoch": 0.61, + "learning_rate": 5.178071559626422e-05, + "loss": 1.3775, + "step": 1015000 + }, + { + "epoch": 0.61, + "learning_rate": 5.177861563070365e-05, + "loss": 1.3898, + "step": 1015500 + }, + { + "epoch": 0.61, + "learning_rate": 5.177651986507421e-05, + "loss": 1.3919, + "step": 1016000 + }, + { + "epoch": 0.61, + "learning_rate": 5.177441989951365e-05, + "loss": 1.3715, + "step": 1016500 + }, + { + "epoch": 0.61, + "learning_rate": 5.177231993395308e-05, + "loss": 1.3596, + "step": 1017000 + }, + { + "epoch": 0.61, + "learning_rate": 5.1770219968392514e-05, + "loss": 1.3755, + "step": 1017500 + }, + { + "epoch": 0.61, + "learning_rate": 5.1768120002831954e-05, + "loss": 1.3783, + "step": 1018000 + }, + { + "epoch": 0.61, + "learning_rate": 5.176602003727139e-05, + "loss": 1.404, + "step": 1018500 + }, + { + "epoch": 0.61, + "learning_rate": 5.176392007171082e-05, + "loss": 1.4128, + "step": 1019000 + }, + { + "epoch": 0.61, + "learning_rate": 5.176182010615026e-05, + "loss": 1.4049, + "step": 1019500 + }, + { + "epoch": 0.61, + "learning_rate": 5.1759724340520815e-05, + "loss": 1.4072, + "step": 1020000 + }, + { + "epoch": 0.61, + "learning_rate": 5.175762857489137e-05, + "loss": 1.3885, + "step": 1020500 + }, + { + "epoch": 0.61, + "learning_rate": 5.17555286093308e-05, + "loss": 1.3825, + "step": 1021000 + }, + { + "epoch": 0.61, + "learning_rate": 5.175342864377024e-05, + "loss": 1.4055, + "step": 1021500 + }, + { + "epoch": 0.61, + "learning_rate": 5.1751328678209676e-05, + "loss": 1.3783, + "step": 1022000 + }, + { + "epoch": 0.61, + "learning_rate": 5.174922871264911e-05, + "loss": 1.3786, + "step": 1022500 + }, + { + "epoch": 0.61, + "learning_rate": 5.174712874708855e-05, + "loss": 1.4117, + "step": 1023000 + }, + { + "epoch": 0.61, + "learning_rate": 5.174502878152798e-05, + "loss": 1.3867, + "step": 1023500 + }, + { + "epoch": 0.61, + "learning_rate": 5.1742928815967416e-05, + "loss": 1.4044, + "step": 1024000 + }, + { + "epoch": 0.61, + "learning_rate": 5.174082885040686e-05, + "loss": 1.3679, + "step": 1024500 + }, + { + "epoch": 0.61, + "learning_rate": 5.173872888484629e-05, + "loss": 1.4149, + "step": 1025000 + }, + { + "epoch": 0.61, + "learning_rate": 5.1736633119216844e-05, + "loss": 1.3592, + "step": 1025500 + }, + { + "epoch": 0.62, + "learning_rate": 5.173453315365628e-05, + "loss": 1.408, + "step": 1026000 + }, + { + "epoch": 0.62, + "learning_rate": 5.173243318809572e-05, + "loss": 1.4198, + "step": 1026500 + }, + { + "epoch": 0.62, + "learning_rate": 5.173033322253515e-05, + "loss": 1.3621, + "step": 1027000 + }, + { + "epoch": 0.62, + "learning_rate": 5.1728233256974584e-05, + "loss": 1.3688, + "step": 1027500 + }, + { + "epoch": 0.62, + "learning_rate": 5.1726133291414025e-05, + "loss": 1.4138, + "step": 1028000 + }, + { + "epoch": 0.62, + "learning_rate": 5.172403332585345e-05, + "loss": 1.37, + "step": 1028500 + }, + { + "epoch": 0.62, + "learning_rate": 5.1721933360292885e-05, + "loss": 1.402, + "step": 1029000 + }, + { + "epoch": 0.62, + "learning_rate": 5.171983759466345e-05, + "loss": 1.4203, + "step": 1029500 + }, + { + "epoch": 0.62, + "learning_rate": 5.1717737629102885e-05, + "loss": 1.3896, + "step": 1030000 + }, + { + "epoch": 0.62, + "learning_rate": 5.171563766354232e-05, + "loss": 1.3662, + "step": 1030500 + }, + { + "epoch": 0.62, + "learning_rate": 5.171354189791287e-05, + "loss": 1.44, + "step": 1031000 + }, + { + "epoch": 0.62, + "learning_rate": 5.171144193235231e-05, + "loss": 1.384, + "step": 1031500 + }, + { + "epoch": 0.62, + "learning_rate": 5.1709341966791746e-05, + "loss": 1.3914, + "step": 1032000 + }, + { + "epoch": 0.62, + "learning_rate": 5.170724200123118e-05, + "loss": 1.3873, + "step": 1032500 + }, + { + "epoch": 0.62, + "learning_rate": 5.170514203567062e-05, + "loss": 1.4175, + "step": 1033000 + }, + { + "epoch": 0.62, + "learning_rate": 5.1703042070110046e-05, + "loss": 1.3952, + "step": 1033500 + }, + { + "epoch": 0.62, + "learning_rate": 5.170094210454948e-05, + "loss": 1.4053, + "step": 1034000 + }, + { + "epoch": 0.62, + "learning_rate": 5.169884213898892e-05, + "loss": 1.3834, + "step": 1034500 + }, + { + "epoch": 0.62, + "learning_rate": 5.169674637335948e-05, + "loss": 1.4075, + "step": 1035000 + }, + { + "epoch": 0.62, + "learning_rate": 5.1694646407798914e-05, + "loss": 1.3729, + "step": 1035500 + }, + { + "epoch": 0.62, + "learning_rate": 5.169254644223835e-05, + "loss": 1.3867, + "step": 1036000 + }, + { + "epoch": 0.62, + "learning_rate": 5.169044647667778e-05, + "loss": 1.4124, + "step": 1036500 + }, + { + "epoch": 0.62, + "learning_rate": 5.1688346511117214e-05, + "loss": 1.4109, + "step": 1037000 + }, + { + "epoch": 0.62, + "learning_rate": 5.1686246545556654e-05, + "loss": 1.4018, + "step": 1037500 + }, + { + "epoch": 0.62, + "learning_rate": 5.168414657999609e-05, + "loss": 1.402, + "step": 1038000 + }, + { + "epoch": 0.62, + "learning_rate": 5.168204661443552e-05, + "loss": 1.4015, + "step": 1038500 + }, + { + "epoch": 0.62, + "learning_rate": 5.1679950848806075e-05, + "loss": 1.366, + "step": 1039000 + }, + { + "epoch": 0.62, + "learning_rate": 5.1677850883245515e-05, + "loss": 1.4079, + "step": 1039500 + }, + { + "epoch": 0.62, + "learning_rate": 5.167575091768495e-05, + "loss": 1.368, + "step": 1040000 + }, + { + "epoch": 0.62, + "learning_rate": 5.167365095212438e-05, + "loss": 1.4189, + "step": 1040500 + }, + { + "epoch": 0.62, + "learning_rate": 5.167155098656382e-05, + "loss": 1.3794, + "step": 1041000 + }, + { + "epoch": 0.62, + "learning_rate": 5.1669455220934376e-05, + "loss": 1.3755, + "step": 1041500 + }, + { + "epoch": 0.62, + "learning_rate": 5.166735525537381e-05, + "loss": 1.3377, + "step": 1042000 + }, + { + "epoch": 0.63, + "learning_rate": 5.166525528981324e-05, + "loss": 1.3978, + "step": 1042500 + }, + { + "epoch": 0.63, + "learning_rate": 5.166315532425268e-05, + "loss": 1.4078, + "step": 1043000 + }, + { + "epoch": 0.63, + "learning_rate": 5.1661055358692117e-05, + "loss": 1.4194, + "step": 1043500 + }, + { + "epoch": 0.63, + "learning_rate": 5.165895539313155e-05, + "loss": 1.368, + "step": 1044000 + }, + { + "epoch": 0.63, + "learning_rate": 5.165685542757099e-05, + "loss": 1.3645, + "step": 1044500 + }, + { + "epoch": 0.63, + "learning_rate": 5.165476386187267e-05, + "loss": 1.3715, + "step": 1045000 + }, + { + "epoch": 0.63, + "learning_rate": 5.16526638963121e-05, + "loss": 1.3751, + "step": 1045500 + }, + { + "epoch": 0.63, + "learning_rate": 5.165056393075153e-05, + "loss": 1.4012, + "step": 1046000 + }, + { + "epoch": 0.63, + "learning_rate": 5.164846396519097e-05, + "loss": 1.3949, + "step": 1046500 + }, + { + "epoch": 0.63, + "learning_rate": 5.1646363999630405e-05, + "loss": 1.3993, + "step": 1047000 + }, + { + "epoch": 0.63, + "learning_rate": 5.164426403406984e-05, + "loss": 1.3863, + "step": 1047500 + }, + { + "epoch": 0.63, + "learning_rate": 5.164216406850928e-05, + "loss": 1.4035, + "step": 1048000 + }, + { + "epoch": 0.63, + "learning_rate": 5.164006410294871e-05, + "loss": 1.3584, + "step": 1048500 + }, + { + "epoch": 0.63, + "learning_rate": 5.1637968337319265e-05, + "loss": 1.3973, + "step": 1049000 + }, + { + "epoch": 0.63, + "learning_rate": 5.16358683717587e-05, + "loss": 1.3706, + "step": 1049500 + }, + { + "epoch": 0.63, + "learning_rate": 5.163376840619814e-05, + "loss": 1.4108, + "step": 1050000 + }, + { + "epoch": 0.63, + "learning_rate": 5.163166844063757e-05, + "loss": 1.3854, + "step": 1050500 + }, + { + "epoch": 0.63, + "learning_rate": 5.1629568475077006e-05, + "loss": 1.3789, + "step": 1051000 + }, + { + "epoch": 0.63, + "learning_rate": 5.1627468509516446e-05, + "loss": 1.3949, + "step": 1051500 + }, + { + "epoch": 0.63, + "learning_rate": 5.162536854395588e-05, + "loss": 1.4109, + "step": 1052000 + }, + { + "epoch": 0.63, + "learning_rate": 5.162326857839531e-05, + "loss": 1.3556, + "step": 1052500 + }, + { + "epoch": 0.63, + "learning_rate": 5.1621168612834746e-05, + "loss": 1.4084, + "step": 1053000 + }, + { + "epoch": 0.63, + "learning_rate": 5.161907284720531e-05, + "loss": 1.3938, + "step": 1053500 + }, + { + "epoch": 0.63, + "learning_rate": 5.161697288164474e-05, + "loss": 1.3817, + "step": 1054000 + }, + { + "epoch": 0.63, + "learning_rate": 5.1614877116015294e-05, + "loss": 1.3785, + "step": 1054500 + }, + { + "epoch": 0.63, + "learning_rate": 5.1612777150454734e-05, + "loss": 1.3916, + "step": 1055000 + }, + { + "epoch": 0.63, + "learning_rate": 5.161067718489417e-05, + "loss": 1.4128, + "step": 1055500 + }, + { + "epoch": 0.63, + "learning_rate": 5.16085772193336e-05, + "loss": 1.3998, + "step": 1056000 + }, + { + "epoch": 0.63, + "learning_rate": 5.160647725377304e-05, + "loss": 1.4013, + "step": 1056500 + }, + { + "epoch": 0.63, + "learning_rate": 5.1604381488143595e-05, + "loss": 1.3908, + "step": 1057000 + }, + { + "epoch": 0.63, + "learning_rate": 5.160228152258303e-05, + "loss": 1.3517, + "step": 1057500 + }, + { + "epoch": 0.63, + "learning_rate": 5.160018155702246e-05, + "loss": 1.3747, + "step": 1058000 + }, + { + "epoch": 0.63, + "learning_rate": 5.15980815914619e-05, + "loss": 1.4061, + "step": 1058500 + }, + { + "epoch": 0.63, + "learning_rate": 5.1595981625901335e-05, + "loss": 1.3756, + "step": 1059000 + }, + { + "epoch": 0.64, + "learning_rate": 5.159388166034077e-05, + "loss": 1.3746, + "step": 1059500 + }, + { + "epoch": 0.64, + "learning_rate": 5.159178169478021e-05, + "loss": 1.3892, + "step": 1060000 + }, + { + "epoch": 0.64, + "learning_rate": 5.158968592915076e-05, + "loss": 1.4025, + "step": 1060500 + }, + { + "epoch": 0.64, + "learning_rate": 5.1587585963590196e-05, + "loss": 1.3828, + "step": 1061000 + }, + { + "epoch": 0.64, + "learning_rate": 5.1585485998029636e-05, + "loss": 1.3817, + "step": 1061500 + }, + { + "epoch": 0.64, + "learning_rate": 5.158338603246907e-05, + "loss": 1.3874, + "step": 1062000 + }, + { + "epoch": 0.64, + "learning_rate": 5.1581286066908497e-05, + "loss": 1.4019, + "step": 1062500 + }, + { + "epoch": 0.64, + "learning_rate": 5.157918610134794e-05, + "loss": 1.3728, + "step": 1063000 + }, + { + "epoch": 0.64, + "learning_rate": 5.157708613578737e-05, + "loss": 1.4161, + "step": 1063500 + }, + { + "epoch": 0.64, + "learning_rate": 5.1574986170226804e-05, + "loss": 1.3771, + "step": 1064000 + }, + { + "epoch": 0.64, + "learning_rate": 5.1572890404597364e-05, + "loss": 1.413, + "step": 1064500 + }, + { + "epoch": 0.64, + "learning_rate": 5.157079463896792e-05, + "loss": 1.3853, + "step": 1065000 + }, + { + "epoch": 0.64, + "learning_rate": 5.156869467340736e-05, + "loss": 1.3814, + "step": 1065500 + }, + { + "epoch": 0.64, + "learning_rate": 5.156659470784679e-05, + "loss": 1.3648, + "step": 1066000 + }, + { + "epoch": 0.64, + "learning_rate": 5.1564494742286225e-05, + "loss": 1.3922, + "step": 1066500 + }, + { + "epoch": 0.64, + "learning_rate": 5.1562394776725665e-05, + "loss": 1.3924, + "step": 1067000 + }, + { + "epoch": 0.64, + "learning_rate": 5.156029481116509e-05, + "loss": 1.3848, + "step": 1067500 + }, + { + "epoch": 0.64, + "learning_rate": 5.155819904553565e-05, + "loss": 1.399, + "step": 1068000 + }, + { + "epoch": 0.64, + "learning_rate": 5.155609907997509e-05, + "loss": 1.3675, + "step": 1068500 + }, + { + "epoch": 0.64, + "learning_rate": 5.1553999114414526e-05, + "loss": 1.4029, + "step": 1069000 + }, + { + "epoch": 0.64, + "learning_rate": 5.155189914885396e-05, + "loss": 1.3658, + "step": 1069500 + }, + { + "epoch": 0.64, + "learning_rate": 5.154979918329339e-05, + "loss": 1.3809, + "step": 1070000 + }, + { + "epoch": 0.64, + "learning_rate": 5.1547699217732826e-05, + "loss": 1.3865, + "step": 1070500 + }, + { + "epoch": 0.64, + "learning_rate": 5.154559925217226e-05, + "loss": 1.3994, + "step": 1071000 + }, + { + "epoch": 0.64, + "learning_rate": 5.154350348654282e-05, + "loss": 1.4062, + "step": 1071500 + }, + { + "epoch": 0.64, + "learning_rate": 5.154140352098226e-05, + "loss": 1.3787, + "step": 1072000 + }, + { + "epoch": 0.64, + "learning_rate": 5.153930355542169e-05, + "loss": 1.3769, + "step": 1072500 + }, + { + "epoch": 0.64, + "learning_rate": 5.153720358986112e-05, + "loss": 1.3793, + "step": 1073000 + }, + { + "epoch": 0.64, + "learning_rate": 5.153510362430056e-05, + "loss": 1.3987, + "step": 1073500 + }, + { + "epoch": 0.64, + "learning_rate": 5.1533003658739994e-05, + "loss": 1.3735, + "step": 1074000 + }, + { + "epoch": 0.64, + "learning_rate": 5.153090369317943e-05, + "loss": 1.3841, + "step": 1074500 + }, + { + "epoch": 0.64, + "learning_rate": 5.152880372761887e-05, + "loss": 1.3733, + "step": 1075000 + }, + { + "epoch": 0.64, + "learning_rate": 5.15267037620583e-05, + "loss": 1.379, + "step": 1075500 + }, + { + "epoch": 0.65, + "learning_rate": 5.1524607996428855e-05, + "loss": 1.3643, + "step": 1076000 + }, + { + "epoch": 0.65, + "learning_rate": 5.1522508030868295e-05, + "loss": 1.4039, + "step": 1076500 + }, + { + "epoch": 0.65, + "learning_rate": 5.152041226523885e-05, + "loss": 1.422, + "step": 1077000 + }, + { + "epoch": 0.65, + "learning_rate": 5.151831229967828e-05, + "loss": 1.3997, + "step": 1077500 + }, + { + "epoch": 0.65, + "learning_rate": 5.1516212334117715e-05, + "loss": 1.3428, + "step": 1078000 + }, + { + "epoch": 0.65, + "learning_rate": 5.1514112368557156e-05, + "loss": 1.4137, + "step": 1078500 + }, + { + "epoch": 0.65, + "learning_rate": 5.1512016602927716e-05, + "loss": 1.3951, + "step": 1079000 + }, + { + "epoch": 0.65, + "learning_rate": 5.150991663736714e-05, + "loss": 1.4016, + "step": 1079500 + }, + { + "epoch": 0.65, + "learning_rate": 5.1507816671806576e-05, + "loss": 1.4085, + "step": 1080000 + }, + { + "epoch": 0.65, + "learning_rate": 5.1505716706246016e-05, + "loss": 1.3827, + "step": 1080500 + }, + { + "epoch": 0.65, + "learning_rate": 5.150361674068545e-05, + "loss": 1.349, + "step": 1081000 + }, + { + "epoch": 0.65, + "learning_rate": 5.150151677512488e-05, + "loss": 1.3886, + "step": 1081500 + }, + { + "epoch": 0.65, + "learning_rate": 5.1499416809564324e-05, + "loss": 1.414, + "step": 1082000 + }, + { + "epoch": 0.65, + "learning_rate": 5.149732104393488e-05, + "loss": 1.4363, + "step": 1082500 + }, + { + "epoch": 0.65, + "learning_rate": 5.149522107837431e-05, + "loss": 1.369, + "step": 1083000 + }, + { + "epoch": 0.65, + "learning_rate": 5.149312111281375e-05, + "loss": 1.3952, + "step": 1083500 + }, + { + "epoch": 0.65, + "learning_rate": 5.1491021147253184e-05, + "loss": 1.3957, + "step": 1084000 + }, + { + "epoch": 0.65, + "learning_rate": 5.148892118169262e-05, + "loss": 1.4046, + "step": 1084500 + }, + { + "epoch": 0.65, + "learning_rate": 5.148682121613206e-05, + "loss": 1.3808, + "step": 1085000 + }, + { + "epoch": 0.65, + "learning_rate": 5.148472545050261e-05, + "loss": 1.3724, + "step": 1085500 + }, + { + "epoch": 0.65, + "learning_rate": 5.1482625484942045e-05, + "loss": 1.402, + "step": 1086000 + }, + { + "epoch": 0.65, + "learning_rate": 5.148052551938148e-05, + "loss": 1.4139, + "step": 1086500 + }, + { + "epoch": 0.65, + "learning_rate": 5.147842555382092e-05, + "loss": 1.3749, + "step": 1087000 + }, + { + "epoch": 0.65, + "learning_rate": 5.147632558826035e-05, + "loss": 1.4002, + "step": 1087500 + }, + { + "epoch": 0.65, + "learning_rate": 5.1474225622699786e-05, + "loss": 1.3736, + "step": 1088000 + }, + { + "epoch": 0.65, + "learning_rate": 5.1472125657139226e-05, + "loss": 1.3747, + "step": 1088500 + }, + { + "epoch": 0.65, + "learning_rate": 5.147002569157866e-05, + "loss": 1.3658, + "step": 1089000 + }, + { + "epoch": 0.65, + "learning_rate": 5.146792992594921e-05, + "loss": 1.3934, + "step": 1089500 + }, + { + "epoch": 0.65, + "learning_rate": 5.146582996038865e-05, + "loss": 1.3778, + "step": 1090000 + }, + { + "epoch": 0.65, + "learning_rate": 5.146373419475921e-05, + "loss": 1.3887, + "step": 1090500 + }, + { + "epoch": 0.65, + "learning_rate": 5.146163422919864e-05, + "loss": 1.3824, + "step": 1091000 + }, + { + "epoch": 0.65, + "learning_rate": 5.1459534263638074e-05, + "loss": 1.41, + "step": 1091500 + }, + { + "epoch": 0.65, + "learning_rate": 5.1457434298077514e-05, + "loss": 1.4074, + "step": 1092000 + }, + { + "epoch": 0.65, + "learning_rate": 5.145533853244807e-05, + "loss": 1.4086, + "step": 1092500 + }, + { + "epoch": 0.66, + "learning_rate": 5.14532385668875e-05, + "loss": 1.3811, + "step": 1093000 + }, + { + "epoch": 0.66, + "learning_rate": 5.1451138601326934e-05, + "loss": 1.3733, + "step": 1093500 + }, + { + "epoch": 0.66, + "learning_rate": 5.1449038635766375e-05, + "loss": 1.4263, + "step": 1094000 + }, + { + "epoch": 0.66, + "learning_rate": 5.144693867020581e-05, + "loss": 1.3807, + "step": 1094500 + }, + { + "epoch": 0.66, + "learning_rate": 5.144483870464524e-05, + "loss": 1.3812, + "step": 1095000 + }, + { + "epoch": 0.66, + "learning_rate": 5.144273873908468e-05, + "loss": 1.3496, + "step": 1095500 + }, + { + "epoch": 0.66, + "learning_rate": 5.1440638773524115e-05, + "loss": 1.3953, + "step": 1096000 + }, + { + "epoch": 0.66, + "learning_rate": 5.143853880796355e-05, + "loss": 1.3786, + "step": 1096500 + }, + { + "epoch": 0.66, + "learning_rate": 5.143644304233411e-05, + "loss": 1.3497, + "step": 1097000 + }, + { + "epoch": 0.66, + "learning_rate": 5.143434307677354e-05, + "loss": 1.3816, + "step": 1097500 + }, + { + "epoch": 0.66, + "learning_rate": 5.1432243111212976e-05, + "loss": 1.3858, + "step": 1098000 + }, + { + "epoch": 0.66, + "learning_rate": 5.1430143145652416e-05, + "loss": 1.3926, + "step": 1098500 + }, + { + "epoch": 0.66, + "learning_rate": 5.142804318009184e-05, + "loss": 1.4155, + "step": 1099000 + }, + { + "epoch": 0.66, + "learning_rate": 5.14259474144624e-05, + "loss": 1.378, + "step": 1099500 + }, + { + "epoch": 0.66, + "learning_rate": 5.142384744890184e-05, + "loss": 1.3969, + "step": 1100000 + }, + { + "epoch": 0.66, + "eval_loss": 1.3153480291366577, + "eval_runtime": 1111.5226, + "eval_samples_per_second": 473.873, + "eval_steps_per_second": 78.979, + "step": 1100000 + }, + { + "epoch": 0.66, + "learning_rate": 5.142174748334128e-05, + "loss": 1.3804, + "step": 1100500 + }, + { + "epoch": 0.66, + "learning_rate": 5.141964751778071e-05, + "loss": 1.3754, + "step": 1101000 + }, + { + "epoch": 0.66, + "learning_rate": 5.141754755222014e-05, + "loss": 1.3715, + "step": 1101500 + }, + { + "epoch": 0.66, + "learning_rate": 5.141544758665958e-05, + "loss": 1.3656, + "step": 1102000 + }, + { + "epoch": 0.66, + "learning_rate": 5.141335182103014e-05, + "loss": 1.3822, + "step": 1102500 + }, + { + "epoch": 0.66, + "learning_rate": 5.141125185546957e-05, + "loss": 1.3963, + "step": 1103000 + }, + { + "epoch": 0.66, + "learning_rate": 5.1409151889909005e-05, + "loss": 1.3675, + "step": 1103500 + }, + { + "epoch": 0.66, + "learning_rate": 5.140705192434844e-05, + "loss": 1.415, + "step": 1104000 + }, + { + "epoch": 0.66, + "learning_rate": 5.140495195878787e-05, + "loss": 1.3495, + "step": 1104500 + }, + { + "epoch": 0.66, + "learning_rate": 5.140285199322731e-05, + "loss": 1.3917, + "step": 1105000 + }, + { + "epoch": 0.66, + "learning_rate": 5.1400752027666745e-05, + "loss": 1.3534, + "step": 1105500 + }, + { + "epoch": 0.66, + "learning_rate": 5.1398656262037306e-05, + "loss": 1.4196, + "step": 1106000 + }, + { + "epoch": 0.66, + "learning_rate": 5.139655629647673e-05, + "loss": 1.3883, + "step": 1106500 + }, + { + "epoch": 0.66, + "learning_rate": 5.139445633091617e-05, + "loss": 1.3661, + "step": 1107000 + }, + { + "epoch": 0.66, + "learning_rate": 5.1392356365355606e-05, + "loss": 1.3944, + "step": 1107500 + }, + { + "epoch": 0.66, + "learning_rate": 5.139025639979504e-05, + "loss": 1.3743, + "step": 1108000 + }, + { + "epoch": 0.66, + "learning_rate": 5.138815643423448e-05, + "loss": 1.3976, + "step": 1108500 + }, + { + "epoch": 0.66, + "learning_rate": 5.138605646867391e-05, + "loss": 1.4238, + "step": 1109000 + }, + { + "epoch": 0.67, + "learning_rate": 5.1383956503113347e-05, + "loss": 1.3618, + "step": 1109500 + }, + { + "epoch": 0.67, + "learning_rate": 5.138186493741503e-05, + "loss": 1.3853, + "step": 1110000 + }, + { + "epoch": 0.67, + "learning_rate": 5.137976497185446e-05, + "loss": 1.36, + "step": 1110500 + }, + { + "epoch": 0.67, + "learning_rate": 5.1377665006293894e-05, + "loss": 1.4155, + "step": 1111000 + }, + { + "epoch": 0.67, + "learning_rate": 5.1375569240664454e-05, + "loss": 1.3881, + "step": 1111500 + }, + { + "epoch": 0.67, + "learning_rate": 5.137346927510389e-05, + "loss": 1.3841, + "step": 1112000 + }, + { + "epoch": 0.67, + "learning_rate": 5.137136930954333e-05, + "loss": 1.3756, + "step": 1112500 + }, + { + "epoch": 0.67, + "learning_rate": 5.136926934398276e-05, + "loss": 1.4265, + "step": 1113000 + }, + { + "epoch": 0.67, + "learning_rate": 5.136716937842219e-05, + "loss": 1.3672, + "step": 1113500 + }, + { + "epoch": 0.67, + "learning_rate": 5.136506941286163e-05, + "loss": 1.3761, + "step": 1114000 + }, + { + "epoch": 0.67, + "learning_rate": 5.136296944730106e-05, + "loss": 1.3674, + "step": 1114500 + }, + { + "epoch": 0.67, + "learning_rate": 5.1360869481740495e-05, + "loss": 1.3773, + "step": 1115000 + }, + { + "epoch": 0.67, + "learning_rate": 5.1358769516179935e-05, + "loss": 1.3895, + "step": 1115500 + }, + { + "epoch": 0.67, + "learning_rate": 5.135666955061937e-05, + "loss": 1.3748, + "step": 1116000 + }, + { + "epoch": 0.67, + "learning_rate": 5.13545695850588e-05, + "loss": 1.4154, + "step": 1116500 + }, + { + "epoch": 0.67, + "learning_rate": 5.135246961949824e-05, + "loss": 1.401, + "step": 1117000 + }, + { + "epoch": 0.67, + "learning_rate": 5.1350373853868796e-05, + "loss": 1.4003, + "step": 1117500 + }, + { + "epoch": 0.67, + "learning_rate": 5.134827388830823e-05, + "loss": 1.3916, + "step": 1118000 + }, + { + "epoch": 0.67, + "learning_rate": 5.134617392274766e-05, + "loss": 1.3667, + "step": 1118500 + }, + { + "epoch": 0.67, + "learning_rate": 5.1344078157118223e-05, + "loss": 1.3732, + "step": 1119000 + }, + { + "epoch": 0.67, + "learning_rate": 5.1341982391488784e-05, + "loss": 1.3752, + "step": 1119500 + }, + { + "epoch": 0.67, + "learning_rate": 5.133988242592822e-05, + "loss": 1.3812, + "step": 1120000 + }, + { + "epoch": 0.67, + "learning_rate": 5.1337782460367644e-05, + "loss": 1.4159, + "step": 1120500 + }, + { + "epoch": 0.67, + "learning_rate": 5.1335682494807084e-05, + "loss": 1.3821, + "step": 1121000 + }, + { + "epoch": 0.67, + "learning_rate": 5.133358252924652e-05, + "loss": 1.3776, + "step": 1121500 + }, + { + "epoch": 0.67, + "learning_rate": 5.133148256368595e-05, + "loss": 1.4106, + "step": 1122000 + }, + { + "epoch": 0.67, + "learning_rate": 5.132938259812539e-05, + "loss": 1.3728, + "step": 1122500 + }, + { + "epoch": 0.67, + "learning_rate": 5.1327282632564825e-05, + "loss": 1.4052, + "step": 1123000 + }, + { + "epoch": 0.67, + "learning_rate": 5.132518266700426e-05, + "loss": 1.3816, + "step": 1123500 + }, + { + "epoch": 0.67, + "learning_rate": 5.13230827014437e-05, + "loss": 1.405, + "step": 1124000 + }, + { + "epoch": 0.67, + "learning_rate": 5.132098693581425e-05, + "loss": 1.3953, + "step": 1124500 + }, + { + "epoch": 0.67, + "learning_rate": 5.1318886970253686e-05, + "loss": 1.3854, + "step": 1125000 + }, + { + "epoch": 0.67, + "learning_rate": 5.131678700469312e-05, + "loss": 1.3799, + "step": 1125500 + }, + { + "epoch": 0.68, + "learning_rate": 5.131468703913256e-05, + "loss": 1.3544, + "step": 1126000 + }, + { + "epoch": 0.68, + "learning_rate": 5.131258707357199e-05, + "loss": 1.3627, + "step": 1126500 + }, + { + "epoch": 0.68, + "learning_rate": 5.1310487108011426e-05, + "loss": 1.3785, + "step": 1127000 + }, + { + "epoch": 0.68, + "learning_rate": 5.1308387142450866e-05, + "loss": 1.4006, + "step": 1127500 + }, + { + "epoch": 0.68, + "learning_rate": 5.130629137682142e-05, + "loss": 1.3694, + "step": 1128000 + }, + { + "epoch": 0.68, + "learning_rate": 5.1304191411260853e-05, + "loss": 1.3698, + "step": 1128500 + }, + { + "epoch": 0.68, + "learning_rate": 5.1302091445700294e-05, + "loss": 1.4122, + "step": 1129000 + }, + { + "epoch": 0.68, + "learning_rate": 5.129999148013973e-05, + "loss": 1.3834, + "step": 1129500 + }, + { + "epoch": 0.68, + "learning_rate": 5.129789151457916e-05, + "loss": 1.3871, + "step": 1130000 + }, + { + "epoch": 0.68, + "learning_rate": 5.1295795748949714e-05, + "loss": 1.3673, + "step": 1130500 + }, + { + "epoch": 0.68, + "learning_rate": 5.1293695783389154e-05, + "loss": 1.4203, + "step": 1131000 + }, + { + "epoch": 0.68, + "learning_rate": 5.129159581782859e-05, + "loss": 1.3797, + "step": 1131500 + }, + { + "epoch": 0.68, + "learning_rate": 5.128949585226802e-05, + "loss": 1.3973, + "step": 1132000 + }, + { + "epoch": 0.68, + "learning_rate": 5.1287400086638575e-05, + "loss": 1.3759, + "step": 1132500 + }, + { + "epoch": 0.68, + "learning_rate": 5.1285300121078015e-05, + "loss": 1.4024, + "step": 1133000 + }, + { + "epoch": 0.68, + "learning_rate": 5.128320435544857e-05, + "loss": 1.3668, + "step": 1133500 + }, + { + "epoch": 0.68, + "learning_rate": 5.1281104389888e-05, + "loss": 1.3785, + "step": 1134000 + }, + { + "epoch": 0.68, + "learning_rate": 5.127900442432744e-05, + "loss": 1.3942, + "step": 1134500 + }, + { + "epoch": 0.68, + "learning_rate": 5.1276904458766876e-05, + "loss": 1.3998, + "step": 1135000 + }, + { + "epoch": 0.68, + "learning_rate": 5.127480449320631e-05, + "loss": 1.3801, + "step": 1135500 + }, + { + "epoch": 0.68, + "learning_rate": 5.127270452764575e-05, + "loss": 1.375, + "step": 1136000 + }, + { + "epoch": 0.68, + "learning_rate": 5.127060456208518e-05, + "loss": 1.3782, + "step": 1136500 + }, + { + "epoch": 0.68, + "learning_rate": 5.1268508796455737e-05, + "loss": 1.3587, + "step": 1137000 + }, + { + "epoch": 0.68, + "learning_rate": 5.126640883089517e-05, + "loss": 1.3792, + "step": 1137500 + }, + { + "epoch": 0.68, + "learning_rate": 5.126430886533461e-05, + "loss": 1.3931, + "step": 1138000 + }, + { + "epoch": 0.68, + "learning_rate": 5.1262208899774044e-05, + "loss": 1.3647, + "step": 1138500 + }, + { + "epoch": 0.68, + "learning_rate": 5.126010893421348e-05, + "loss": 1.3771, + "step": 1139000 + }, + { + "epoch": 0.68, + "learning_rate": 5.125800896865292e-05, + "loss": 1.4079, + "step": 1139500 + }, + { + "epoch": 0.68, + "learning_rate": 5.125590900309235e-05, + "loss": 1.3752, + "step": 1140000 + }, + { + "epoch": 0.68, + "learning_rate": 5.125380903753178e-05, + "loss": 1.3997, + "step": 1140500 + }, + { + "epoch": 0.68, + "learning_rate": 5.125170907197122e-05, + "loss": 1.4085, + "step": 1141000 + }, + { + "epoch": 0.68, + "learning_rate": 5.124960910641065e-05, + "loss": 1.3524, + "step": 1141500 + }, + { + "epoch": 0.68, + "learning_rate": 5.124750914085009e-05, + "loss": 1.3721, + "step": 1142000 + }, + { + "epoch": 0.68, + "learning_rate": 5.1245409175289525e-05, + "loss": 1.3779, + "step": 1142500 + }, + { + "epoch": 0.69, + "learning_rate": 5.124331340966008e-05, + "loss": 1.3825, + "step": 1143000 + }, + { + "epoch": 0.69, + "learning_rate": 5.124121344409951e-05, + "loss": 1.4193, + "step": 1143500 + }, + { + "epoch": 0.69, + "learning_rate": 5.123911347853895e-05, + "loss": 1.3746, + "step": 1144000 + }, + { + "epoch": 0.69, + "learning_rate": 5.1237013512978386e-05, + "loss": 1.4017, + "step": 1144500 + }, + { + "epoch": 0.69, + "learning_rate": 5.123491354741782e-05, + "loss": 1.3472, + "step": 1145000 + }, + { + "epoch": 0.69, + "learning_rate": 5.123281358185726e-05, + "loss": 1.3713, + "step": 1145500 + }, + { + "epoch": 0.69, + "learning_rate": 5.123072201615893e-05, + "loss": 1.3892, + "step": 1146000 + }, + { + "epoch": 0.69, + "learning_rate": 5.122862205059837e-05, + "loss": 1.3863, + "step": 1146500 + }, + { + "epoch": 0.69, + "learning_rate": 5.122652208503781e-05, + "loss": 1.3757, + "step": 1147000 + }, + { + "epoch": 0.69, + "learning_rate": 5.1224422119477233e-05, + "loss": 1.3846, + "step": 1147500 + }, + { + "epoch": 0.69, + "learning_rate": 5.1222322153916674e-05, + "loss": 1.3781, + "step": 1148000 + }, + { + "epoch": 0.69, + "learning_rate": 5.122022218835611e-05, + "loss": 1.3856, + "step": 1148500 + }, + { + "epoch": 0.69, + "learning_rate": 5.121812222279555e-05, + "loss": 1.3714, + "step": 1149000 + }, + { + "epoch": 0.69, + "learning_rate": 5.121602225723498e-05, + "loss": 1.3471, + "step": 1149500 + }, + { + "epoch": 0.69, + "learning_rate": 5.1213922291674414e-05, + "loss": 1.3798, + "step": 1150000 + }, + { + "epoch": 0.69, + "learning_rate": 5.1211822326113855e-05, + "loss": 1.4012, + "step": 1150500 + }, + { + "epoch": 0.69, + "learning_rate": 5.120972236055329e-05, + "loss": 1.3677, + "step": 1151000 + }, + { + "epoch": 0.69, + "learning_rate": 5.120762239499272e-05, + "loss": 1.3716, + "step": 1151500 + }, + { + "epoch": 0.69, + "learning_rate": 5.1205526629363275e-05, + "loss": 1.3841, + "step": 1152000 + }, + { + "epoch": 0.69, + "learning_rate": 5.120343086373383e-05, + "loss": 1.3865, + "step": 1152500 + }, + { + "epoch": 0.69, + "learning_rate": 5.120133089817327e-05, + "loss": 1.3806, + "step": 1153000 + }, + { + "epoch": 0.69, + "learning_rate": 5.11992309326127e-05, + "loss": 1.3734, + "step": 1153500 + }, + { + "epoch": 0.69, + "learning_rate": 5.1197130967052136e-05, + "loss": 1.3676, + "step": 1154000 + }, + { + "epoch": 0.69, + "learning_rate": 5.1195031001491576e-05, + "loss": 1.3616, + "step": 1154500 + }, + { + "epoch": 0.69, + "learning_rate": 5.119293103593101e-05, + "loss": 1.3571, + "step": 1155000 + }, + { + "epoch": 0.69, + "learning_rate": 5.119083107037044e-05, + "loss": 1.3581, + "step": 1155500 + }, + { + "epoch": 0.69, + "learning_rate": 5.1188735304741e-05, + "loss": 1.4098, + "step": 1156000 + }, + { + "epoch": 0.69, + "learning_rate": 5.118663533918044e-05, + "loss": 1.3855, + "step": 1156500 + }, + { + "epoch": 0.69, + "learning_rate": 5.118453537361987e-05, + "loss": 1.3983, + "step": 1157000 + }, + { + "epoch": 0.69, + "learning_rate": 5.1182439607990424e-05, + "loss": 1.3756, + "step": 1157500 + }, + { + "epoch": 0.69, + "learning_rate": 5.1180339642429864e-05, + "loss": 1.3781, + "step": 1158000 + }, + { + "epoch": 0.69, + "learning_rate": 5.11782396768693e-05, + "loss": 1.3963, + "step": 1158500 + }, + { + "epoch": 0.69, + "learning_rate": 5.117613971130873e-05, + "loss": 1.3835, + "step": 1159000 + }, + { + "epoch": 0.7, + "learning_rate": 5.117403974574817e-05, + "loss": 1.3928, + "step": 1159500 + }, + { + "epoch": 0.7, + "learning_rate": 5.1171939780187605e-05, + "loss": 1.3423, + "step": 1160000 + }, + { + "epoch": 0.7, + "learning_rate": 5.116983981462704e-05, + "loss": 1.3655, + "step": 1160500 + }, + { + "epoch": 0.7, + "learning_rate": 5.116773984906648e-05, + "loss": 1.3685, + "step": 1161000 + }, + { + "epoch": 0.7, + "learning_rate": 5.116563988350591e-05, + "loss": 1.3539, + "step": 1161500 + }, + { + "epoch": 0.7, + "learning_rate": 5.1163539917945345e-05, + "loss": 1.3701, + "step": 1162000 + }, + { + "epoch": 0.7, + "learning_rate": 5.11614441523159e-05, + "loss": 1.3552, + "step": 1162500 + }, + { + "epoch": 0.7, + "learning_rate": 5.115934418675534e-05, + "loss": 1.4087, + "step": 1163000 + }, + { + "epoch": 0.7, + "learning_rate": 5.115724422119477e-05, + "loss": 1.3697, + "step": 1163500 + }, + { + "epoch": 0.7, + "learning_rate": 5.1155148455565326e-05, + "loss": 1.3504, + "step": 1164000 + }, + { + "epoch": 0.7, + "learning_rate": 5.1153048490004766e-05, + "loss": 1.3818, + "step": 1164500 + }, + { + "epoch": 0.7, + "learning_rate": 5.11509485244442e-05, + "loss": 1.3863, + "step": 1165000 + }, + { + "epoch": 0.7, + "learning_rate": 5.114884855888363e-05, + "loss": 1.3985, + "step": 1165500 + }, + { + "epoch": 0.7, + "learning_rate": 5.1146748593323073e-05, + "loss": 1.3701, + "step": 1166000 + }, + { + "epoch": 0.7, + "learning_rate": 5.114464862776251e-05, + "loss": 1.403, + "step": 1166500 + }, + { + "epoch": 0.7, + "learning_rate": 5.114254866220194e-05, + "loss": 1.3908, + "step": 1167000 + }, + { + "epoch": 0.7, + "learning_rate": 5.1140448696641374e-05, + "loss": 1.365, + "step": 1167500 + }, + { + "epoch": 0.7, + "learning_rate": 5.113834873108081e-05, + "loss": 1.3751, + "step": 1168000 + }, + { + "epoch": 0.7, + "learning_rate": 5.113625296545137e-05, + "loss": 1.4, + "step": 1168500 + }, + { + "epoch": 0.7, + "learning_rate": 5.11341529998908e-05, + "loss": 1.3502, + "step": 1169000 + }, + { + "epoch": 0.7, + "learning_rate": 5.113205303433024e-05, + "loss": 1.3935, + "step": 1169500 + }, + { + "epoch": 0.7, + "learning_rate": 5.112995306876967e-05, + "loss": 1.3827, + "step": 1170000 + }, + { + "epoch": 0.7, + "learning_rate": 5.112785730314023e-05, + "loss": 1.3787, + "step": 1170500 + }, + { + "epoch": 0.7, + "learning_rate": 5.112575733757966e-05, + "loss": 1.3628, + "step": 1171000 + }, + { + "epoch": 0.7, + "learning_rate": 5.11236573720191e-05, + "loss": 1.3505, + "step": 1171500 + }, + { + "epoch": 0.7, + "learning_rate": 5.1121557406458535e-05, + "loss": 1.3781, + "step": 1172000 + }, + { + "epoch": 0.7, + "learning_rate": 5.111945744089797e-05, + "loss": 1.3364, + "step": 1172500 + }, + { + "epoch": 0.7, + "learning_rate": 5.11173574753374e-05, + "loss": 1.3908, + "step": 1173000 + }, + { + "epoch": 0.7, + "learning_rate": 5.1115257509776836e-05, + "loss": 1.3812, + "step": 1173500 + }, + { + "epoch": 0.7, + "learning_rate": 5.1113161744147396e-05, + "loss": 1.4128, + "step": 1174000 + }, + { + "epoch": 0.7, + "learning_rate": 5.111106177858683e-05, + "loss": 1.3733, + "step": 1174500 + }, + { + "epoch": 0.7, + "learning_rate": 5.110896181302626e-05, + "loss": 1.3805, + "step": 1175000 + }, + { + "epoch": 0.7, + "learning_rate": 5.1106861847465697e-05, + "loss": 1.3571, + "step": 1175500 + }, + { + "epoch": 0.71, + "learning_rate": 5.110476188190514e-05, + "loss": 1.3847, + "step": 1176000 + }, + { + "epoch": 0.71, + "learning_rate": 5.110266191634457e-05, + "loss": 1.4097, + "step": 1176500 + }, + { + "epoch": 0.71, + "learning_rate": 5.1100566150715124e-05, + "loss": 1.4068, + "step": 1177000 + }, + { + "epoch": 0.71, + "learning_rate": 5.109846618515456e-05, + "loss": 1.3897, + "step": 1177500 + }, + { + "epoch": 0.71, + "learning_rate": 5.1096366219594e-05, + "loss": 1.3688, + "step": 1178000 + }, + { + "epoch": 0.71, + "learning_rate": 5.109426625403343e-05, + "loss": 1.3812, + "step": 1178500 + }, + { + "epoch": 0.71, + "learning_rate": 5.1092166288472864e-05, + "loss": 1.3976, + "step": 1179000 + }, + { + "epoch": 0.71, + "learning_rate": 5.1090066322912305e-05, + "loss": 1.3727, + "step": 1179500 + }, + { + "epoch": 0.71, + "learning_rate": 5.108796635735174e-05, + "loss": 1.3653, + "step": 1180000 + }, + { + "epoch": 0.71, + "learning_rate": 5.108587059172229e-05, + "loss": 1.3896, + "step": 1180500 + }, + { + "epoch": 0.71, + "learning_rate": 5.108377062616173e-05, + "loss": 1.3743, + "step": 1181000 + }, + { + "epoch": 0.71, + "learning_rate": 5.1081670660601165e-05, + "loss": 1.3712, + "step": 1181500 + }, + { + "epoch": 0.71, + "learning_rate": 5.10795706950406e-05, + "loss": 1.3768, + "step": 1182000 + }, + { + "epoch": 0.71, + "learning_rate": 5.107747072948004e-05, + "loss": 1.3707, + "step": 1182500 + }, + { + "epoch": 0.71, + "learning_rate": 5.107537076391947e-05, + "loss": 1.386, + "step": 1183000 + }, + { + "epoch": 0.71, + "learning_rate": 5.1073274998290026e-05, + "loss": 1.3595, + "step": 1183500 + }, + { + "epoch": 0.71, + "learning_rate": 5.107117503272946e-05, + "loss": 1.3561, + "step": 1184000 + }, + { + "epoch": 0.71, + "learning_rate": 5.10690750671689e-05, + "loss": 1.3615, + "step": 1184500 + }, + { + "epoch": 0.71, + "learning_rate": 5.106697510160833e-05, + "loss": 1.3894, + "step": 1185000 + }, + { + "epoch": 0.71, + "learning_rate": 5.106487513604777e-05, + "loss": 1.3899, + "step": 1185500 + }, + { + "epoch": 0.71, + "learning_rate": 5.106277517048721e-05, + "loss": 1.3547, + "step": 1186000 + }, + { + "epoch": 0.71, + "learning_rate": 5.106067940485776e-05, + "loss": 1.3683, + "step": 1186500 + }, + { + "epoch": 0.71, + "learning_rate": 5.1058579439297194e-05, + "loss": 1.3676, + "step": 1187000 + }, + { + "epoch": 0.71, + "learning_rate": 5.105647947373663e-05, + "loss": 1.3993, + "step": 1187500 + }, + { + "epoch": 0.71, + "learning_rate": 5.105437950817607e-05, + "loss": 1.3662, + "step": 1188000 + }, + { + "epoch": 0.71, + "learning_rate": 5.10522795426155e-05, + "loss": 1.3727, + "step": 1188500 + }, + { + "epoch": 0.71, + "learning_rate": 5.1050183776986055e-05, + "loss": 1.4103, + "step": 1189000 + }, + { + "epoch": 0.71, + "learning_rate": 5.1048083811425495e-05, + "loss": 1.3652, + "step": 1189500 + }, + { + "epoch": 0.71, + "learning_rate": 5.104598384586493e-05, + "loss": 1.3977, + "step": 1190000 + }, + { + "epoch": 0.71, + "learning_rate": 5.104388388030436e-05, + "loss": 1.3614, + "step": 1190500 + }, + { + "epoch": 0.71, + "learning_rate": 5.10417839147438e-05, + "loss": 1.414, + "step": 1191000 + }, + { + "epoch": 0.71, + "learning_rate": 5.1039683949183236e-05, + "loss": 1.3397, + "step": 1191500 + }, + { + "epoch": 0.71, + "learning_rate": 5.103758818355379e-05, + "loss": 1.3673, + "step": 1192000 + }, + { + "epoch": 0.71, + "learning_rate": 5.103548821799322e-05, + "loss": 1.3586, + "step": 1192500 + }, + { + "epoch": 0.72, + "learning_rate": 5.103338825243266e-05, + "loss": 1.3636, + "step": 1193000 + }, + { + "epoch": 0.72, + "learning_rate": 5.1031288286872096e-05, + "loss": 1.3844, + "step": 1193500 + }, + { + "epoch": 0.72, + "learning_rate": 5.102918832131153e-05, + "loss": 1.3879, + "step": 1194000 + }, + { + "epoch": 0.72, + "learning_rate": 5.102709255568209e-05, + "loss": 1.3613, + "step": 1194500 + }, + { + "epoch": 0.72, + "learning_rate": 5.1024992590121524e-05, + "loss": 1.3877, + "step": 1195000 + }, + { + "epoch": 0.72, + "learning_rate": 5.102289262456096e-05, + "loss": 1.3694, + "step": 1195500 + }, + { + "epoch": 0.72, + "learning_rate": 5.102079685893151e-05, + "loss": 1.3863, + "step": 1196000 + }, + { + "epoch": 0.72, + "learning_rate": 5.101869689337095e-05, + "loss": 1.3787, + "step": 1196500 + }, + { + "epoch": 0.72, + "learning_rate": 5.1016596927810384e-05, + "loss": 1.3816, + "step": 1197000 + }, + { + "epoch": 0.72, + "learning_rate": 5.101449696224982e-05, + "loss": 1.3487, + "step": 1197500 + }, + { + "epoch": 0.72, + "learning_rate": 5.101240119662037e-05, + "loss": 1.3569, + "step": 1198000 + }, + { + "epoch": 0.72, + "learning_rate": 5.101030123105981e-05, + "loss": 1.3908, + "step": 1198500 + }, + { + "epoch": 0.72, + "learning_rate": 5.1008201265499245e-05, + "loss": 1.3867, + "step": 1199000 + }, + { + "epoch": 0.72, + "learning_rate": 5.100610129993868e-05, + "loss": 1.3663, + "step": 1199500 + }, + { + "epoch": 0.72, + "learning_rate": 5.100400133437812e-05, + "loss": 1.3723, + "step": 1200000 + }, + { + "epoch": 0.72, + "eval_loss": 1.3151705265045166, + "eval_runtime": 1104.7712, + "eval_samples_per_second": 476.768, + "eval_steps_per_second": 79.462, + "step": 1200000 + }, + { + "epoch": 0.72, + "learning_rate": 5.100190136881755e-05, + "loss": 1.3804, + "step": 1200500 + }, + { + "epoch": 0.72, + "learning_rate": 5.0999801403256986e-05, + "loss": 1.3675, + "step": 1201000 + }, + { + "epoch": 0.72, + "learning_rate": 5.099770143769642e-05, + "loss": 1.3808, + "step": 1201500 + }, + { + "epoch": 0.72, + "learning_rate": 5.099560147213585e-05, + "loss": 1.3655, + "step": 1202000 + }, + { + "epoch": 0.72, + "learning_rate": 5.099350150657529e-05, + "loss": 1.3646, + "step": 1202500 + }, + { + "epoch": 0.72, + "learning_rate": 5.0991401541014726e-05, + "loss": 1.377, + "step": 1203000 + }, + { + "epoch": 0.72, + "learning_rate": 5.098930157545416e-05, + "loss": 1.3981, + "step": 1203500 + }, + { + "epoch": 0.72, + "learning_rate": 5.09872016098936e-05, + "loss": 1.3675, + "step": 1204000 + }, + { + "epoch": 0.72, + "learning_rate": 5.0985105844264154e-05, + "loss": 1.3753, + "step": 1204500 + }, + { + "epoch": 0.72, + "learning_rate": 5.098300587870359e-05, + "loss": 1.3472, + "step": 1205000 + }, + { + "epoch": 0.72, + "learning_rate": 5.098090591314302e-05, + "loss": 1.357, + "step": 1205500 + }, + { + "epoch": 0.72, + "learning_rate": 5.097880594758246e-05, + "loss": 1.3797, + "step": 1206000 + }, + { + "epoch": 0.72, + "learning_rate": 5.0976705982021894e-05, + "loss": 1.4025, + "step": 1206500 + }, + { + "epoch": 0.72, + "learning_rate": 5.097461021639245e-05, + "loss": 1.375, + "step": 1207000 + }, + { + "epoch": 0.72, + "learning_rate": 5.097251445076301e-05, + "loss": 1.3654, + "step": 1207500 + }, + { + "epoch": 0.72, + "learning_rate": 5.097041448520244e-05, + "loss": 1.3685, + "step": 1208000 + }, + { + "epoch": 0.72, + "learning_rate": 5.0968318719573e-05, + "loss": 1.3489, + "step": 1208500 + }, + { + "epoch": 0.72, + "learning_rate": 5.0966218754012435e-05, + "loss": 1.3542, + "step": 1209000 + }, + { + "epoch": 0.73, + "learning_rate": 5.096411878845187e-05, + "loss": 1.3754, + "step": 1209500 + }, + { + "epoch": 0.73, + "learning_rate": 5.096201882289131e-05, + "loss": 1.391, + "step": 1210000 + }, + { + "epoch": 0.73, + "learning_rate": 5.095991885733074e-05, + "loss": 1.379, + "step": 1210500 + }, + { + "epoch": 0.73, + "learning_rate": 5.095781889177017e-05, + "loss": 1.3446, + "step": 1211000 + }, + { + "epoch": 0.73, + "learning_rate": 5.095571892620961e-05, + "loss": 1.3724, + "step": 1211500 + }, + { + "epoch": 0.73, + "learning_rate": 5.095361896064904e-05, + "loss": 1.3668, + "step": 1212000 + }, + { + "epoch": 0.73, + "learning_rate": 5.0951518995088476e-05, + "loss": 1.3661, + "step": 1212500 + }, + { + "epoch": 0.73, + "learning_rate": 5.0949419029527917e-05, + "loss": 1.3654, + "step": 1213000 + }, + { + "epoch": 0.73, + "learning_rate": 5.094731906396735e-05, + "loss": 1.3541, + "step": 1213500 + }, + { + "epoch": 0.73, + "learning_rate": 5.0945219098406784e-05, + "loss": 1.3598, + "step": 1214000 + }, + { + "epoch": 0.73, + "learning_rate": 5.0943119132846224e-05, + "loss": 1.3503, + "step": 1214500 + }, + { + "epoch": 0.73, + "learning_rate": 5.094101916728566e-05, + "loss": 1.3971, + "step": 1215000 + }, + { + "epoch": 0.73, + "learning_rate": 5.093892340165621e-05, + "loss": 1.3821, + "step": 1215500 + }, + { + "epoch": 0.73, + "learning_rate": 5.0936823436095644e-05, + "loss": 1.377, + "step": 1216000 + }, + { + "epoch": 0.73, + "learning_rate": 5.0934723470535084e-05, + "loss": 1.356, + "step": 1216500 + }, + { + "epoch": 0.73, + "learning_rate": 5.093262350497452e-05, + "loss": 1.3789, + "step": 1217000 + }, + { + "epoch": 0.73, + "learning_rate": 5.093052353941395e-05, + "loss": 1.3561, + "step": 1217500 + }, + { + "epoch": 0.73, + "learning_rate": 5.092842357385339e-05, + "loss": 1.3694, + "step": 1218000 + }, + { + "epoch": 0.73, + "learning_rate": 5.0926327808223945e-05, + "loss": 1.3678, + "step": 1218500 + }, + { + "epoch": 0.73, + "learning_rate": 5.092422784266338e-05, + "loss": 1.3942, + "step": 1219000 + }, + { + "epoch": 0.73, + "learning_rate": 5.092212787710282e-05, + "loss": 1.3781, + "step": 1219500 + }, + { + "epoch": 0.73, + "learning_rate": 5.092002791154225e-05, + "loss": 1.4007, + "step": 1220000 + }, + { + "epoch": 0.73, + "learning_rate": 5.0917927945981686e-05, + "loss": 1.3577, + "step": 1220500 + }, + { + "epoch": 0.73, + "learning_rate": 5.091583218035224e-05, + "loss": 1.372, + "step": 1221000 + }, + { + "epoch": 0.73, + "learning_rate": 5.091373641472279e-05, + "loss": 1.3697, + "step": 1221500 + }, + { + "epoch": 0.73, + "learning_rate": 5.091164064909335e-05, + "loss": 1.3779, + "step": 1222000 + }, + { + "epoch": 0.73, + "learning_rate": 5.0909540683532794e-05, + "loss": 1.3555, + "step": 1222500 + }, + { + "epoch": 0.73, + "learning_rate": 5.090744071797222e-05, + "loss": 1.3911, + "step": 1223000 + }, + { + "epoch": 0.73, + "learning_rate": 5.090534075241166e-05, + "loss": 1.3517, + "step": 1223500 + }, + { + "epoch": 0.73, + "learning_rate": 5.0903240786851094e-05, + "loss": 1.3597, + "step": 1224000 + }, + { + "epoch": 0.73, + "learning_rate": 5.0901145021221654e-05, + "loss": 1.3848, + "step": 1224500 + }, + { + "epoch": 0.73, + "learning_rate": 5.089904505566109e-05, + "loss": 1.3372, + "step": 1225000 + }, + { + "epoch": 0.73, + "learning_rate": 5.089694509010052e-05, + "loss": 1.4007, + "step": 1225500 + }, + { + "epoch": 0.74, + "learning_rate": 5.0894845124539955e-05, + "loss": 1.3585, + "step": 1226000 + }, + { + "epoch": 0.74, + "learning_rate": 5.089274515897939e-05, + "loss": 1.3632, + "step": 1226500 + }, + { + "epoch": 0.74, + "learning_rate": 5.089064519341883e-05, + "loss": 1.3868, + "step": 1227000 + }, + { + "epoch": 0.74, + "learning_rate": 5.088854522785826e-05, + "loss": 1.3611, + "step": 1227500 + }, + { + "epoch": 0.74, + "learning_rate": 5.0886445262297695e-05, + "loss": 1.3871, + "step": 1228000 + }, + { + "epoch": 0.74, + "learning_rate": 5.088434949666825e-05, + "loss": 1.3915, + "step": 1228500 + }, + { + "epoch": 0.74, + "learning_rate": 5.088224953110769e-05, + "loss": 1.3762, + "step": 1229000 + }, + { + "epoch": 0.74, + "learning_rate": 5.088014956554712e-05, + "loss": 1.3556, + "step": 1229500 + }, + { + "epoch": 0.74, + "learning_rate": 5.0878049599986556e-05, + "loss": 1.3397, + "step": 1230000 + }, + { + "epoch": 0.74, + "learning_rate": 5.0875949634425996e-05, + "loss": 1.38, + "step": 1230500 + }, + { + "epoch": 0.74, + "learning_rate": 5.087384966886543e-05, + "loss": 1.3747, + "step": 1231000 + }, + { + "epoch": 0.74, + "learning_rate": 5.087174970330486e-05, + "loss": 1.359, + "step": 1231500 + }, + { + "epoch": 0.74, + "learning_rate": 5.08696497377443e-05, + "loss": 1.3434, + "step": 1232000 + }, + { + "epoch": 0.74, + "learning_rate": 5.086754977218374e-05, + "loss": 1.3631, + "step": 1232500 + }, + { + "epoch": 0.74, + "learning_rate": 5.086544980662317e-05, + "loss": 1.3705, + "step": 1233000 + }, + { + "epoch": 0.74, + "learning_rate": 5.0863349841062604e-05, + "loss": 1.3553, + "step": 1233500 + }, + { + "epoch": 0.74, + "learning_rate": 5.086124987550204e-05, + "loss": 1.3719, + "step": 1234000 + }, + { + "epoch": 0.74, + "learning_rate": 5.08591541098726e-05, + "loss": 1.3385, + "step": 1234500 + }, + { + "epoch": 0.74, + "learning_rate": 5.085705414431204e-05, + "loss": 1.3611, + "step": 1235000 + }, + { + "epoch": 0.74, + "learning_rate": 5.0854954178751464e-05, + "loss": 1.3961, + "step": 1235500 + }, + { + "epoch": 0.74, + "learning_rate": 5.08528542131909e-05, + "loss": 1.3432, + "step": 1236000 + }, + { + "epoch": 0.74, + "learning_rate": 5.085075424763034e-05, + "loss": 1.3536, + "step": 1236500 + }, + { + "epoch": 0.74, + "learning_rate": 5.084866268193201e-05, + "loss": 1.3644, + "step": 1237000 + }, + { + "epoch": 0.74, + "learning_rate": 5.084656271637145e-05, + "loss": 1.3721, + "step": 1237500 + }, + { + "epoch": 0.74, + "learning_rate": 5.0844466950742006e-05, + "loss": 1.4002, + "step": 1238000 + }, + { + "epoch": 0.74, + "learning_rate": 5.084236698518144e-05, + "loss": 1.3527, + "step": 1238500 + }, + { + "epoch": 0.74, + "learning_rate": 5.084026701962088e-05, + "loss": 1.3735, + "step": 1239000 + }, + { + "epoch": 0.74, + "learning_rate": 5.083816705406031e-05, + "loss": 1.3966, + "step": 1239500 + }, + { + "epoch": 0.74, + "learning_rate": 5.0836067088499746e-05, + "loss": 1.3662, + "step": 1240000 + }, + { + "epoch": 0.74, + "learning_rate": 5.0833967122939187e-05, + "loss": 1.3885, + "step": 1240500 + }, + { + "epoch": 0.74, + "learning_rate": 5.083186715737862e-05, + "loss": 1.3659, + "step": 1241000 + }, + { + "epoch": 0.74, + "learning_rate": 5.0829767191818053e-05, + "loss": 1.3429, + "step": 1241500 + }, + { + "epoch": 0.74, + "learning_rate": 5.0827667226257494e-05, + "loss": 1.3731, + "step": 1242000 + }, + { + "epoch": 0.74, + "learning_rate": 5.082556726069693e-05, + "loss": 1.3712, + "step": 1242500 + }, + { + "epoch": 0.75, + "learning_rate": 5.0823467295136354e-05, + "loss": 1.3826, + "step": 1243000 + }, + { + "epoch": 0.75, + "learning_rate": 5.0821371529506914e-05, + "loss": 1.3889, + "step": 1243500 + }, + { + "epoch": 0.75, + "learning_rate": 5.0819271563946354e-05, + "loss": 1.3738, + "step": 1244000 + }, + { + "epoch": 0.75, + "learning_rate": 5.081717159838579e-05, + "loss": 1.3506, + "step": 1244500 + }, + { + "epoch": 0.75, + "learning_rate": 5.081507163282522e-05, + "loss": 1.3725, + "step": 1245000 + }, + { + "epoch": 0.75, + "learning_rate": 5.0812971667264655e-05, + "loss": 1.3725, + "step": 1245500 + }, + { + "epoch": 0.75, + "learning_rate": 5.081087170170409e-05, + "loss": 1.3702, + "step": 1246000 + }, + { + "epoch": 0.75, + "learning_rate": 5.080877593607465e-05, + "loss": 1.3648, + "step": 1246500 + }, + { + "epoch": 0.75, + "learning_rate": 5.080667597051409e-05, + "loss": 1.3978, + "step": 1247000 + }, + { + "epoch": 0.75, + "learning_rate": 5.0804576004953516e-05, + "loss": 1.3837, + "step": 1247500 + }, + { + "epoch": 0.75, + "learning_rate": 5.080247603939295e-05, + "loss": 1.3811, + "step": 1248000 + }, + { + "epoch": 0.75, + "learning_rate": 5.080037607383239e-05, + "loss": 1.3716, + "step": 1248500 + }, + { + "epoch": 0.75, + "learning_rate": 5.079827610827182e-05, + "loss": 1.3669, + "step": 1249000 + }, + { + "epoch": 0.75, + "learning_rate": 5.0796176142711256e-05, + "loss": 1.3718, + "step": 1249500 + }, + { + "epoch": 0.75, + "learning_rate": 5.0794076177150696e-05, + "loss": 1.3555, + "step": 1250000 + }, + { + "epoch": 0.75, + "learning_rate": 5.079198041152125e-05, + "loss": 1.3691, + "step": 1250500 + }, + { + "epoch": 0.75, + "learning_rate": 5.0789880445960683e-05, + "loss": 1.3493, + "step": 1251000 + }, + { + "epoch": 0.75, + "learning_rate": 5.078778048040012e-05, + "loss": 1.4041, + "step": 1251500 + }, + { + "epoch": 0.75, + "learning_rate": 5.078568471477068e-05, + "loss": 1.4076, + "step": 1252000 + }, + { + "epoch": 0.75, + "learning_rate": 5.078358474921011e-05, + "loss": 1.3565, + "step": 1252500 + }, + { + "epoch": 0.75, + "learning_rate": 5.0781484783649544e-05, + "loss": 1.3829, + "step": 1253000 + }, + { + "epoch": 0.75, + "learning_rate": 5.0779384818088984e-05, + "loss": 1.3445, + "step": 1253500 + }, + { + "epoch": 0.75, + "learning_rate": 5.077728485252842e-05, + "loss": 1.3654, + "step": 1254000 + }, + { + "epoch": 0.75, + "learning_rate": 5.077518488696785e-05, + "loss": 1.3696, + "step": 1254500 + }, + { + "epoch": 0.75, + "learning_rate": 5.0773089121338405e-05, + "loss": 1.3649, + "step": 1255000 + }, + { + "epoch": 0.75, + "learning_rate": 5.0770989155777845e-05, + "loss": 1.3795, + "step": 1255500 + }, + { + "epoch": 0.75, + "learning_rate": 5.076888919021728e-05, + "loss": 1.3856, + "step": 1256000 + }, + { + "epoch": 0.75, + "learning_rate": 5.076678922465671e-05, + "loss": 1.3688, + "step": 1256500 + }, + { + "epoch": 0.75, + "learning_rate": 5.076468925909615e-05, + "loss": 1.346, + "step": 1257000 + }, + { + "epoch": 0.75, + "learning_rate": 5.0762593493466706e-05, + "loss": 1.3966, + "step": 1257500 + }, + { + "epoch": 0.75, + "learning_rate": 5.076049352790614e-05, + "loss": 1.36, + "step": 1258000 + }, + { + "epoch": 0.75, + "learning_rate": 5.075839356234557e-05, + "loss": 1.3862, + "step": 1258500 + }, + { + "epoch": 0.75, + "learning_rate": 5.075629359678501e-05, + "loss": 1.3487, + "step": 1259000 + }, + { + "epoch": 0.76, + "learning_rate": 5.0754197831155567e-05, + "loss": 1.3504, + "step": 1259500 + }, + { + "epoch": 0.76, + "learning_rate": 5.0752097865595e-05, + "loss": 1.4048, + "step": 1260000 + }, + { + "epoch": 0.76, + "learning_rate": 5.074999790003444e-05, + "loss": 1.3684, + "step": 1260500 + }, + { + "epoch": 0.76, + "learning_rate": 5.0747902134405e-05, + "loss": 1.3706, + "step": 1261000 + }, + { + "epoch": 0.76, + "learning_rate": 5.0745802168844434e-05, + "loss": 1.327, + "step": 1261500 + }, + { + "epoch": 0.76, + "learning_rate": 5.074370220328386e-05, + "loss": 1.3419, + "step": 1262000 + }, + { + "epoch": 0.76, + "learning_rate": 5.07416022377233e-05, + "loss": 1.3675, + "step": 1262500 + }, + { + "epoch": 0.76, + "learning_rate": 5.0739502272162734e-05, + "loss": 1.3986, + "step": 1263000 + }, + { + "epoch": 0.76, + "learning_rate": 5.073740230660217e-05, + "loss": 1.388, + "step": 1263500 + }, + { + "epoch": 0.76, + "learning_rate": 5.073530234104161e-05, + "loss": 1.3511, + "step": 1264000 + }, + { + "epoch": 0.76, + "learning_rate": 5.073320237548104e-05, + "loss": 1.3755, + "step": 1264500 + }, + { + "epoch": 0.76, + "learning_rate": 5.0731102409920475e-05, + "loss": 1.3551, + "step": 1265000 + }, + { + "epoch": 0.76, + "learning_rate": 5.072900664429103e-05, + "loss": 1.358, + "step": 1265500 + }, + { + "epoch": 0.76, + "learning_rate": 5.072690667873047e-05, + "loss": 1.3489, + "step": 1266000 + }, + { + "epoch": 0.76, + "learning_rate": 5.07248067131699e-05, + "loss": 1.3711, + "step": 1266500 + }, + { + "epoch": 0.76, + "learning_rate": 5.0722706747609336e-05, + "loss": 1.3771, + "step": 1267000 + }, + { + "epoch": 0.76, + "learning_rate": 5.0720606782048776e-05, + "loss": 1.351, + "step": 1267500 + }, + { + "epoch": 0.76, + "learning_rate": 5.071851101641933e-05, + "loss": 1.3494, + "step": 1268000 + }, + { + "epoch": 0.76, + "learning_rate": 5.071641525078989e-05, + "loss": 1.3627, + "step": 1268500 + }, + { + "epoch": 0.76, + "learning_rate": 5.0714315285229317e-05, + "loss": 1.3813, + "step": 1269000 + }, + { + "epoch": 0.76, + "learning_rate": 5.071221531966876e-05, + "loss": 1.3699, + "step": 1269500 + }, + { + "epoch": 0.76, + "learning_rate": 5.071011535410819e-05, + "loss": 1.3926, + "step": 1270000 + }, + { + "epoch": 0.76, + "learning_rate": 5.0708015388547624e-05, + "loss": 1.3404, + "step": 1270500 + }, + { + "epoch": 0.76, + "learning_rate": 5.0705915422987064e-05, + "loss": 1.3685, + "step": 1271000 + }, + { + "epoch": 0.76, + "learning_rate": 5.07038154574265e-05, + "loss": 1.359, + "step": 1271500 + }, + { + "epoch": 0.76, + "learning_rate": 5.070171549186593e-05, + "loss": 1.3457, + "step": 1272000 + }, + { + "epoch": 0.76, + "learning_rate": 5.069961552630537e-05, + "loss": 1.3798, + "step": 1272500 + }, + { + "epoch": 0.76, + "learning_rate": 5.0697519760675925e-05, + "loss": 1.3773, + "step": 1273000 + }, + { + "epoch": 0.76, + "learning_rate": 5.069541979511536e-05, + "loss": 1.3593, + "step": 1273500 + }, + { + "epoch": 0.76, + "learning_rate": 5.069331982955479e-05, + "loss": 1.3903, + "step": 1274000 + }, + { + "epoch": 0.76, + "learning_rate": 5.069121986399423e-05, + "loss": 1.3781, + "step": 1274500 + }, + { + "epoch": 0.76, + "learning_rate": 5.0689119898433665e-05, + "loss": 1.3771, + "step": 1275000 + }, + { + "epoch": 0.76, + "learning_rate": 5.06870199328731e-05, + "loss": 1.3793, + "step": 1275500 + }, + { + "epoch": 0.77, + "learning_rate": 5.068492416724366e-05, + "loss": 1.3676, + "step": 1276000 + }, + { + "epoch": 0.77, + "learning_rate": 5.068282420168309e-05, + "loss": 1.3566, + "step": 1276500 + }, + { + "epoch": 0.77, + "learning_rate": 5.0680724236122526e-05, + "loss": 1.3834, + "step": 1277000 + }, + { + "epoch": 0.77, + "learning_rate": 5.0678632670424207e-05, + "loss": 1.3827, + "step": 1277500 + }, + { + "epoch": 0.77, + "learning_rate": 5.067653270486364e-05, + "loss": 1.3837, + "step": 1278000 + }, + { + "epoch": 0.77, + "learning_rate": 5.0674432739303073e-05, + "loss": 1.3729, + "step": 1278500 + }, + { + "epoch": 0.77, + "learning_rate": 5.067233277374251e-05, + "loss": 1.3562, + "step": 1279000 + }, + { + "epoch": 0.77, + "learning_rate": 5.067023280818194e-05, + "loss": 1.3532, + "step": 1279500 + }, + { + "epoch": 0.77, + "learning_rate": 5.066813284262138e-05, + "loss": 1.3822, + "step": 1280000 + }, + { + "epoch": 0.77, + "learning_rate": 5.0666032877060814e-05, + "loss": 1.3682, + "step": 1280500 + }, + { + "epoch": 0.77, + "learning_rate": 5.066393291150025e-05, + "loss": 1.3506, + "step": 1281000 + }, + { + "epoch": 0.77, + "learning_rate": 5.066183294593969e-05, + "loss": 1.3694, + "step": 1281500 + }, + { + "epoch": 0.77, + "learning_rate": 5.065973298037912e-05, + "loss": 1.3946, + "step": 1282000 + }, + { + "epoch": 0.77, + "learning_rate": 5.0657633014818555e-05, + "loss": 1.3872, + "step": 1282500 + }, + { + "epoch": 0.77, + "learning_rate": 5.0655533049257995e-05, + "loss": 1.3448, + "step": 1283000 + }, + { + "epoch": 0.77, + "learning_rate": 5.065343728362855e-05, + "loss": 1.3682, + "step": 1283500 + }, + { + "epoch": 0.77, + "learning_rate": 5.065133731806798e-05, + "loss": 1.4006, + "step": 1284000 + }, + { + "epoch": 0.77, + "learning_rate": 5.064923735250742e-05, + "loss": 1.3558, + "step": 1284500 + }, + { + "epoch": 0.77, + "learning_rate": 5.0647137386946856e-05, + "loss": 1.3758, + "step": 1285000 + }, + { + "epoch": 0.77, + "learning_rate": 5.064503742138629e-05, + "loss": 1.3791, + "step": 1285500 + }, + { + "epoch": 0.77, + "learning_rate": 5.064293745582573e-05, + "loss": 1.3498, + "step": 1286000 + }, + { + "epoch": 0.77, + "learning_rate": 5.0640837490265156e-05, + "loss": 1.3765, + "step": 1286500 + }, + { + "epoch": 0.77, + "learning_rate": 5.063873752470459e-05, + "loss": 1.3596, + "step": 1287000 + }, + { + "epoch": 0.77, + "learning_rate": 5.063664175907515e-05, + "loss": 1.3406, + "step": 1287500 + }, + { + "epoch": 0.77, + "learning_rate": 5.063454179351459e-05, + "loss": 1.3448, + "step": 1288000 + }, + { + "epoch": 0.77, + "learning_rate": 5.0632441827954024e-05, + "loss": 1.3607, + "step": 1288500 + }, + { + "epoch": 0.77, + "learning_rate": 5.063034186239345e-05, + "loss": 1.3638, + "step": 1289000 + }, + { + "epoch": 0.77, + "learning_rate": 5.062824609676401e-05, + "loss": 1.3756, + "step": 1289500 + }, + { + "epoch": 0.77, + "learning_rate": 5.062614613120345e-05, + "loss": 1.3478, + "step": 1290000 + }, + { + "epoch": 0.77, + "learning_rate": 5.0624050365574004e-05, + "loss": 1.3888, + "step": 1290500 + }, + { + "epoch": 0.77, + "learning_rate": 5.062195040001344e-05, + "loss": 1.3328, + "step": 1291000 + }, + { + "epoch": 0.77, + "learning_rate": 5.061985043445288e-05, + "loss": 1.3895, + "step": 1291500 + }, + { + "epoch": 0.77, + "learning_rate": 5.061775046889231e-05, + "loss": 1.3858, + "step": 1292000 + }, + { + "epoch": 0.77, + "learning_rate": 5.0615650503331745e-05, + "loss": 1.3637, + "step": 1292500 + }, + { + "epoch": 0.78, + "learning_rate": 5.0613550537771185e-05, + "loss": 1.3533, + "step": 1293000 + }, + { + "epoch": 0.78, + "learning_rate": 5.061145057221061e-05, + "loss": 1.3716, + "step": 1293500 + }, + { + "epoch": 0.78, + "learning_rate": 5.0609350606650045e-05, + "loss": 1.3715, + "step": 1294000 + }, + { + "epoch": 0.78, + "learning_rate": 5.0607254841020606e-05, + "loss": 1.3417, + "step": 1294500 + }, + { + "epoch": 0.78, + "learning_rate": 5.0605154875460046e-05, + "loss": 1.3617, + "step": 1295000 + }, + { + "epoch": 0.78, + "learning_rate": 5.06030591098306e-05, + "loss": 1.358, + "step": 1295500 + }, + { + "epoch": 0.78, + "learning_rate": 5.060095914427003e-05, + "loss": 1.353, + "step": 1296000 + }, + { + "epoch": 0.78, + "learning_rate": 5.0598859178709466e-05, + "loss": 1.3714, + "step": 1296500 + }, + { + "epoch": 0.78, + "learning_rate": 5.059675921314891e-05, + "loss": 1.3416, + "step": 1297000 + }, + { + "epoch": 0.78, + "learning_rate": 5.059465924758834e-05, + "loss": 1.3545, + "step": 1297500 + }, + { + "epoch": 0.78, + "learning_rate": 5.0592563481958894e-05, + "loss": 1.3627, + "step": 1298000 + }, + { + "epoch": 0.78, + "learning_rate": 5.0590463516398334e-05, + "loss": 1.3899, + "step": 1298500 + }, + { + "epoch": 0.78, + "learning_rate": 5.058836775076889e-05, + "loss": 1.3737, + "step": 1299000 + }, + { + "epoch": 0.78, + "learning_rate": 5.058626778520832e-05, + "loss": 1.3628, + "step": 1299500 + }, + { + "epoch": 0.78, + "learning_rate": 5.0584167819647754e-05, + "loss": 1.3439, + "step": 1300000 + }, + { + "epoch": 0.78, + "eval_loss": 1.3037538528442383, + "eval_runtime": 1107.3433, + "eval_samples_per_second": 475.661, + "eval_steps_per_second": 79.277, + "step": 1300000 + }, + { + "epoch": 0.78, + "learning_rate": 5.0582067854087195e-05, + "loss": 1.344, + "step": 1300500 + }, + { + "epoch": 0.78, + "learning_rate": 5.057996788852663e-05, + "loss": 1.3935, + "step": 1301000 + }, + { + "epoch": 0.78, + "learning_rate": 5.057786792296606e-05, + "loss": 1.3962, + "step": 1301500 + }, + { + "epoch": 0.78, + "learning_rate": 5.0575772157336615e-05, + "loss": 1.3614, + "step": 1302000 + }, + { + "epoch": 0.78, + "learning_rate": 5.0573672191776055e-05, + "loss": 1.3494, + "step": 1302500 + }, + { + "epoch": 0.78, + "learning_rate": 5.057157222621549e-05, + "loss": 1.3693, + "step": 1303000 + }, + { + "epoch": 0.78, + "learning_rate": 5.056947226065492e-05, + "loss": 1.3903, + "step": 1303500 + }, + { + "epoch": 0.78, + "learning_rate": 5.056737229509436e-05, + "loss": 1.3436, + "step": 1304000 + }, + { + "epoch": 0.78, + "learning_rate": 5.0565272329533796e-05, + "loss": 1.3662, + "step": 1304500 + }, + { + "epoch": 0.78, + "learning_rate": 5.056317236397323e-05, + "loss": 1.3452, + "step": 1305000 + }, + { + "epoch": 0.78, + "learning_rate": 5.056107659834379e-05, + "loss": 1.3836, + "step": 1305500 + }, + { + "epoch": 0.78, + "learning_rate": 5.055897663278322e-05, + "loss": 1.3745, + "step": 1306000 + }, + { + "epoch": 0.78, + "learning_rate": 5.055687666722266e-05, + "loss": 1.3352, + "step": 1306500 + }, + { + "epoch": 0.78, + "learning_rate": 5.05547767016621e-05, + "loss": 1.3591, + "step": 1307000 + }, + { + "epoch": 0.78, + "learning_rate": 5.055267673610153e-05, + "loss": 1.3743, + "step": 1307500 + }, + { + "epoch": 0.78, + "learning_rate": 5.055057677054096e-05, + "loss": 1.3339, + "step": 1308000 + }, + { + "epoch": 0.78, + "learning_rate": 5.054848520484264e-05, + "loss": 1.363, + "step": 1308500 + }, + { + "epoch": 0.78, + "learning_rate": 5.054638523928207e-05, + "loss": 1.3596, + "step": 1309000 + }, + { + "epoch": 0.79, + "learning_rate": 5.054428527372151e-05, + "loss": 1.356, + "step": 1309500 + }, + { + "epoch": 0.79, + "learning_rate": 5.0542185308160945e-05, + "loss": 1.3246, + "step": 1310000 + }, + { + "epoch": 0.79, + "learning_rate": 5.054008534260038e-05, + "loss": 1.3558, + "step": 1310500 + }, + { + "epoch": 0.79, + "learning_rate": 5.053798537703982e-05, + "loss": 1.3723, + "step": 1311000 + }, + { + "epoch": 0.79, + "learning_rate": 5.053588541147925e-05, + "loss": 1.379, + "step": 1311500 + }, + { + "epoch": 0.79, + "learning_rate": 5.0533789645849805e-05, + "loss": 1.3505, + "step": 1312000 + }, + { + "epoch": 0.79, + "learning_rate": 5.0531689680289246e-05, + "loss": 1.3125, + "step": 1312500 + }, + { + "epoch": 0.79, + "learning_rate": 5.052958971472868e-05, + "loss": 1.3397, + "step": 1313000 + }, + { + "epoch": 0.79, + "learning_rate": 5.052748974916811e-05, + "loss": 1.3812, + "step": 1313500 + }, + { + "epoch": 0.79, + "learning_rate": 5.052538978360755e-05, + "loss": 1.3588, + "step": 1314000 + }, + { + "epoch": 0.79, + "learning_rate": 5.0523289818046986e-05, + "loss": 1.3531, + "step": 1314500 + }, + { + "epoch": 0.79, + "learning_rate": 5.052118985248641e-05, + "loss": 1.3294, + "step": 1315000 + }, + { + "epoch": 0.79, + "learning_rate": 5.051908988692585e-05, + "loss": 1.3517, + "step": 1315500 + }, + { + "epoch": 0.79, + "learning_rate": 5.051698992136529e-05, + "loss": 1.3449, + "step": 1316000 + }, + { + "epoch": 0.79, + "learning_rate": 5.051488995580472e-05, + "loss": 1.3732, + "step": 1316500 + }, + { + "epoch": 0.79, + "learning_rate": 5.051278999024416e-05, + "loss": 1.3561, + "step": 1317000 + }, + { + "epoch": 0.79, + "learning_rate": 5.0510690024683594e-05, + "loss": 1.3669, + "step": 1317500 + }, + { + "epoch": 0.79, + "learning_rate": 5.050859005912303e-05, + "loss": 1.3783, + "step": 1318000 + }, + { + "epoch": 0.79, + "learning_rate": 5.050649009356247e-05, + "loss": 1.3262, + "step": 1318500 + }, + { + "epoch": 0.79, + "learning_rate": 5.05043901280019e-05, + "loss": 1.3714, + "step": 1319000 + }, + { + "epoch": 0.79, + "learning_rate": 5.0502294362372455e-05, + "loss": 1.3824, + "step": 1319500 + }, + { + "epoch": 0.79, + "learning_rate": 5.0500194396811895e-05, + "loss": 1.3397, + "step": 1320000 + }, + { + "epoch": 0.79, + "learning_rate": 5.049809443125133e-05, + "loss": 1.373, + "step": 1320500 + }, + { + "epoch": 0.79, + "learning_rate": 5.049599446569076e-05, + "loss": 1.3412, + "step": 1321000 + }, + { + "epoch": 0.79, + "learning_rate": 5.04938945001302e-05, + "loss": 1.3345, + "step": 1321500 + }, + { + "epoch": 0.79, + "learning_rate": 5.0491794534569635e-05, + "loss": 1.3698, + "step": 1322000 + }, + { + "epoch": 0.79, + "learning_rate": 5.048969456900907e-05, + "loss": 1.3573, + "step": 1322500 + }, + { + "epoch": 0.79, + "learning_rate": 5.04875946034485e-05, + "loss": 1.3603, + "step": 1323000 + }, + { + "epoch": 0.79, + "learning_rate": 5.048549883781906e-05, + "loss": 1.3738, + "step": 1323500 + }, + { + "epoch": 0.79, + "learning_rate": 5.0483403072189616e-05, + "loss": 1.3813, + "step": 1324000 + }, + { + "epoch": 0.79, + "learning_rate": 5.048130310662905e-05, + "loss": 1.3621, + "step": 1324500 + }, + { + "epoch": 0.79, + "learning_rate": 5.047920314106848e-05, + "loss": 1.3689, + "step": 1325000 + }, + { + "epoch": 0.79, + "learning_rate": 5.0477103175507923e-05, + "loss": 1.3878, + "step": 1325500 + }, + { + "epoch": 0.79, + "learning_rate": 5.047500740987848e-05, + "loss": 1.3583, + "step": 1326000 + }, + { + "epoch": 0.8, + "learning_rate": 5.047290744431791e-05, + "loss": 1.3273, + "step": 1326500 + }, + { + "epoch": 0.8, + "learning_rate": 5.047080747875735e-05, + "loss": 1.3863, + "step": 1327000 + }, + { + "epoch": 0.8, + "learning_rate": 5.0468707513196784e-05, + "loss": 1.3773, + "step": 1327500 + }, + { + "epoch": 0.8, + "learning_rate": 5.046660754763622e-05, + "loss": 1.3776, + "step": 1328000 + }, + { + "epoch": 0.8, + "learning_rate": 5.046450758207566e-05, + "loss": 1.3504, + "step": 1328500 + }, + { + "epoch": 0.8, + "learning_rate": 5.046240761651509e-05, + "loss": 1.3939, + "step": 1329000 + }, + { + "epoch": 0.8, + "learning_rate": 5.0460307650954525e-05, + "loss": 1.3695, + "step": 1329500 + }, + { + "epoch": 0.8, + "learning_rate": 5.045820768539396e-05, + "loss": 1.3504, + "step": 1330000 + }, + { + "epoch": 0.8, + "learning_rate": 5.045610771983339e-05, + "loss": 1.3887, + "step": 1330500 + }, + { + "epoch": 0.8, + "learning_rate": 5.0454007754272825e-05, + "loss": 1.3703, + "step": 1331000 + }, + { + "epoch": 0.8, + "learning_rate": 5.0451907788712265e-05, + "loss": 1.3466, + "step": 1331500 + }, + { + "epoch": 0.8, + "learning_rate": 5.0449812023082826e-05, + "loss": 1.3766, + "step": 1332000 + }, + { + "epoch": 0.8, + "learning_rate": 5.044771205752225e-05, + "loss": 1.3679, + "step": 1332500 + }, + { + "epoch": 0.8, + "learning_rate": 5.0445612091961686e-05, + "loss": 1.3516, + "step": 1333000 + }, + { + "epoch": 0.8, + "learning_rate": 5.0443516326332246e-05, + "loss": 1.3815, + "step": 1333500 + }, + { + "epoch": 0.8, + "learning_rate": 5.0441416360771686e-05, + "loss": 1.4009, + "step": 1334000 + }, + { + "epoch": 0.8, + "learning_rate": 5.043931639521112e-05, + "loss": 1.3667, + "step": 1334500 + }, + { + "epoch": 0.8, + "learning_rate": 5.043721642965055e-05, + "loss": 1.3997, + "step": 1335000 + }, + { + "epoch": 0.8, + "learning_rate": 5.043511646408999e-05, + "loss": 1.3618, + "step": 1335500 + }, + { + "epoch": 0.8, + "learning_rate": 5.043301649852942e-05, + "loss": 1.3986, + "step": 1336000 + }, + { + "epoch": 0.8, + "learning_rate": 5.043092073289998e-05, + "loss": 1.3771, + "step": 1336500 + }, + { + "epoch": 0.8, + "learning_rate": 5.042882076733942e-05, + "loss": 1.3695, + "step": 1337000 + }, + { + "epoch": 0.8, + "learning_rate": 5.0426725001709974e-05, + "loss": 1.3269, + "step": 1337500 + }, + { + "epoch": 0.8, + "learning_rate": 5.042462503614941e-05, + "loss": 1.3586, + "step": 1338000 + }, + { + "epoch": 0.8, + "learning_rate": 5.042252507058884e-05, + "loss": 1.357, + "step": 1338500 + }, + { + "epoch": 0.8, + "learning_rate": 5.042042510502828e-05, + "loss": 1.3575, + "step": 1339000 + }, + { + "epoch": 0.8, + "learning_rate": 5.041832513946771e-05, + "loss": 1.3306, + "step": 1339500 + }, + { + "epoch": 0.8, + "learning_rate": 5.041622517390714e-05, + "loss": 1.3897, + "step": 1340000 + }, + { + "epoch": 0.8, + "learning_rate": 5.041412520834658e-05, + "loss": 1.3571, + "step": 1340500 + }, + { + "epoch": 0.8, + "learning_rate": 5.0412025242786015e-05, + "loss": 1.3571, + "step": 1341000 + }, + { + "epoch": 0.8, + "learning_rate": 5.040992527722545e-05, + "loss": 1.3547, + "step": 1341500 + }, + { + "epoch": 0.8, + "learning_rate": 5.040782531166489e-05, + "loss": 1.382, + "step": 1342000 + }, + { + "epoch": 0.8, + "learning_rate": 5.040572534610432e-05, + "loss": 1.3588, + "step": 1342500 + }, + { + "epoch": 0.81, + "learning_rate": 5.0403629580474876e-05, + "loss": 1.3556, + "step": 1343000 + }, + { + "epoch": 0.81, + "learning_rate": 5.0401529614914316e-05, + "loss": 1.3782, + "step": 1343500 + }, + { + "epoch": 0.81, + "learning_rate": 5.039942964935375e-05, + "loss": 1.3421, + "step": 1344000 + }, + { + "epoch": 0.81, + "learning_rate": 5.039732968379318e-05, + "loss": 1.368, + "step": 1344500 + }, + { + "epoch": 0.81, + "learning_rate": 5.0395229718232624e-05, + "loss": 1.3513, + "step": 1345000 + }, + { + "epoch": 0.81, + "learning_rate": 5.039312975267206e-05, + "loss": 1.3517, + "step": 1345500 + }, + { + "epoch": 0.81, + "learning_rate": 5.039102978711149e-05, + "loss": 1.3703, + "step": 1346000 + }, + { + "epoch": 0.81, + "learning_rate": 5.0388934021482044e-05, + "loss": 1.3803, + "step": 1346500 + }, + { + "epoch": 0.81, + "learning_rate": 5.0386834055921484e-05, + "loss": 1.3719, + "step": 1347000 + }, + { + "epoch": 0.81, + "learning_rate": 5.038473409036092e-05, + "loss": 1.3595, + "step": 1347500 + }, + { + "epoch": 0.81, + "learning_rate": 5.038263832473147e-05, + "loss": 1.3555, + "step": 1348000 + }, + { + "epoch": 0.81, + "learning_rate": 5.0380538359170905e-05, + "loss": 1.368, + "step": 1348500 + }, + { + "epoch": 0.81, + "learning_rate": 5.0378438393610345e-05, + "loss": 1.3555, + "step": 1349000 + }, + { + "epoch": 0.81, + "learning_rate": 5.037633842804978e-05, + "loss": 1.3669, + "step": 1349500 + }, + { + "epoch": 0.81, + "learning_rate": 5.037423846248921e-05, + "loss": 1.3711, + "step": 1350000 + }, + { + "epoch": 0.81, + "learning_rate": 5.037213849692865e-05, + "loss": 1.3683, + "step": 1350500 + }, + { + "epoch": 0.81, + "learning_rate": 5.0370038531368086e-05, + "loss": 1.3621, + "step": 1351000 + }, + { + "epoch": 0.81, + "learning_rate": 5.036793856580752e-05, + "loss": 1.3727, + "step": 1351500 + }, + { + "epoch": 0.81, + "learning_rate": 5.036584280017808e-05, + "loss": 1.3427, + "step": 1352000 + }, + { + "epoch": 0.81, + "learning_rate": 5.036374283461751e-05, + "loss": 1.3463, + "step": 1352500 + }, + { + "epoch": 0.81, + "learning_rate": 5.0361642869056946e-05, + "loss": 1.3728, + "step": 1353000 + }, + { + "epoch": 0.81, + "learning_rate": 5.0359542903496387e-05, + "loss": 1.3597, + "step": 1353500 + }, + { + "epoch": 0.81, + "learning_rate": 5.035744713786694e-05, + "loss": 1.3237, + "step": 1354000 + }, + { + "epoch": 0.81, + "learning_rate": 5.0355347172306374e-05, + "loss": 1.3856, + "step": 1354500 + }, + { + "epoch": 0.81, + "learning_rate": 5.035324720674581e-05, + "loss": 1.3451, + "step": 1355000 + }, + { + "epoch": 0.81, + "learning_rate": 5.035114724118525e-05, + "loss": 1.3661, + "step": 1355500 + }, + { + "epoch": 0.81, + "learning_rate": 5.034904727562468e-05, + "loss": 1.3572, + "step": 1356000 + }, + { + "epoch": 0.81, + "learning_rate": 5.0346947310064114e-05, + "loss": 1.3456, + "step": 1356500 + }, + { + "epoch": 0.81, + "learning_rate": 5.034484734450355e-05, + "loss": 1.3603, + "step": 1357000 + }, + { + "epoch": 0.81, + "learning_rate": 5.034274737894298e-05, + "loss": 1.3491, + "step": 1357500 + }, + { + "epoch": 0.81, + "learning_rate": 5.0340647413382415e-05, + "loss": 1.3883, + "step": 1358000 + }, + { + "epoch": 0.81, + "learning_rate": 5.0338551647752975e-05, + "loss": 1.3518, + "step": 1358500 + }, + { + "epoch": 0.81, + "learning_rate": 5.0336451682192415e-05, + "loss": 1.3455, + "step": 1359000 + }, + { + "epoch": 0.82, + "learning_rate": 5.033435171663184e-05, + "loss": 1.3467, + "step": 1359500 + }, + { + "epoch": 0.82, + "learning_rate": 5.033225175107128e-05, + "loss": 1.3773, + "step": 1360000 + }, + { + "epoch": 0.82, + "learning_rate": 5.0330151785510716e-05, + "loss": 1.3622, + "step": 1360500 + }, + { + "epoch": 0.82, + "learning_rate": 5.032805181995015e-05, + "loss": 1.3367, + "step": 1361000 + }, + { + "epoch": 0.82, + "learning_rate": 5.032595185438959e-05, + "loss": 1.364, + "step": 1361500 + }, + { + "epoch": 0.82, + "learning_rate": 5.032385608876014e-05, + "loss": 1.3671, + "step": 1362000 + }, + { + "epoch": 0.82, + "learning_rate": 5.0321756123199576e-05, + "loss": 1.3471, + "step": 1362500 + }, + { + "epoch": 0.82, + "learning_rate": 5.031966035757014e-05, + "loss": 1.3458, + "step": 1363000 + }, + { + "epoch": 0.82, + "learning_rate": 5.031756039200957e-05, + "loss": 1.3724, + "step": 1363500 + }, + { + "epoch": 0.82, + "learning_rate": 5.031546042644901e-05, + "loss": 1.3923, + "step": 1364000 + }, + { + "epoch": 0.82, + "learning_rate": 5.031336046088844e-05, + "loss": 1.3735, + "step": 1364500 + }, + { + "epoch": 0.82, + "learning_rate": 5.031126049532787e-05, + "loss": 1.366, + "step": 1365000 + }, + { + "epoch": 0.82, + "learning_rate": 5.030916052976731e-05, + "loss": 1.3841, + "step": 1365500 + }, + { + "epoch": 0.82, + "learning_rate": 5.0307060564206744e-05, + "loss": 1.3393, + "step": 1366000 + }, + { + "epoch": 0.82, + "learning_rate": 5.0304960598646184e-05, + "loss": 1.3327, + "step": 1366500 + }, + { + "epoch": 0.82, + "learning_rate": 5.030286063308562e-05, + "loss": 1.3566, + "step": 1367000 + }, + { + "epoch": 0.82, + "learning_rate": 5.030076066752505e-05, + "loss": 1.3514, + "step": 1367500 + }, + { + "epoch": 0.82, + "learning_rate": 5.029866070196449e-05, + "loss": 1.3473, + "step": 1368000 + }, + { + "epoch": 0.82, + "learning_rate": 5.0296560736403925e-05, + "loss": 1.3618, + "step": 1368500 + }, + { + "epoch": 0.82, + "learning_rate": 5.029446497077448e-05, + "loss": 1.3708, + "step": 1369000 + }, + { + "epoch": 0.82, + "learning_rate": 5.029236920514503e-05, + "loss": 1.37, + "step": 1369500 + }, + { + "epoch": 0.82, + "learning_rate": 5.029027343951559e-05, + "loss": 1.3517, + "step": 1370000 + }, + { + "epoch": 0.82, + "learning_rate": 5.0288173473955026e-05, + "loss": 1.3619, + "step": 1370500 + }, + { + "epoch": 0.82, + "learning_rate": 5.0286073508394466e-05, + "loss": 1.3541, + "step": 1371000 + }, + { + "epoch": 0.82, + "learning_rate": 5.028397354283389e-05, + "loss": 1.339, + "step": 1371500 + }, + { + "epoch": 0.82, + "learning_rate": 5.028187357727333e-05, + "loss": 1.3613, + "step": 1372000 + }, + { + "epoch": 0.82, + "learning_rate": 5.0279773611712767e-05, + "loss": 1.3839, + "step": 1372500 + }, + { + "epoch": 0.82, + "learning_rate": 5.02776736461522e-05, + "loss": 1.3925, + "step": 1373000 + }, + { + "epoch": 0.82, + "learning_rate": 5.027557368059164e-05, + "loss": 1.372, + "step": 1373500 + }, + { + "epoch": 0.82, + "learning_rate": 5.0273473715031074e-05, + "loss": 1.3491, + "step": 1374000 + }, + { + "epoch": 0.82, + "learning_rate": 5.027137374947051e-05, + "loss": 1.3841, + "step": 1374500 + }, + { + "epoch": 0.82, + "learning_rate": 5.026927798384106e-05, + "loss": 1.3851, + "step": 1375000 + }, + { + "epoch": 0.82, + "learning_rate": 5.02671780182805e-05, + "loss": 1.3629, + "step": 1375500 + }, + { + "epoch": 0.82, + "learning_rate": 5.0265078052719934e-05, + "loss": 1.3333, + "step": 1376000 + }, + { + "epoch": 0.83, + "learning_rate": 5.026297808715937e-05, + "loss": 1.3728, + "step": 1376500 + }, + { + "epoch": 0.83, + "learning_rate": 5.026087812159881e-05, + "loss": 1.359, + "step": 1377000 + }, + { + "epoch": 0.83, + "learning_rate": 5.025877815603824e-05, + "loss": 1.3648, + "step": 1377500 + }, + { + "epoch": 0.83, + "learning_rate": 5.0256678190477675e-05, + "loss": 1.3634, + "step": 1378000 + }, + { + "epoch": 0.83, + "learning_rate": 5.0254578224917115e-05, + "loss": 1.3242, + "step": 1378500 + }, + { + "epoch": 0.83, + "learning_rate": 5.025248245928767e-05, + "loss": 1.3445, + "step": 1379000 + }, + { + "epoch": 0.83, + "learning_rate": 5.02503824937271e-05, + "loss": 1.3264, + "step": 1379500 + }, + { + "epoch": 0.83, + "learning_rate": 5.0248282528166536e-05, + "loss": 1.3571, + "step": 1380000 + }, + { + "epoch": 0.83, + "learning_rate": 5.0246182562605976e-05, + "loss": 1.3501, + "step": 1380500 + }, + { + "epoch": 0.83, + "learning_rate": 5.024408259704541e-05, + "loss": 1.3979, + "step": 1381000 + }, + { + "epoch": 0.83, + "learning_rate": 5.024198683141596e-05, + "loss": 1.3461, + "step": 1381500 + }, + { + "epoch": 0.83, + "learning_rate": 5.02398868658554e-05, + "loss": 1.3789, + "step": 1382000 + }, + { + "epoch": 0.83, + "learning_rate": 5.023778690029484e-05, + "loss": 1.365, + "step": 1382500 + }, + { + "epoch": 0.83, + "learning_rate": 5.023568693473427e-05, + "loss": 1.3791, + "step": 1383000 + }, + { + "epoch": 0.83, + "learning_rate": 5.023358696917371e-05, + "loss": 1.3781, + "step": 1383500 + }, + { + "epoch": 0.83, + "learning_rate": 5.023148700361314e-05, + "loss": 1.3572, + "step": 1384000 + }, + { + "epoch": 0.83, + "learning_rate": 5.022938703805257e-05, + "loss": 1.3567, + "step": 1384500 + }, + { + "epoch": 0.83, + "learning_rate": 5.022728707249201e-05, + "loss": 1.3838, + "step": 1385000 + }, + { + "epoch": 0.83, + "learning_rate": 5.022519130686257e-05, + "loss": 1.3742, + "step": 1385500 + }, + { + "epoch": 0.83, + "learning_rate": 5.0223095541233125e-05, + "loss": 1.3452, + "step": 1386000 + }, + { + "epoch": 0.83, + "learning_rate": 5.022099557567256e-05, + "loss": 1.3815, + "step": 1386500 + }, + { + "epoch": 0.83, + "learning_rate": 5.021889981004311e-05, + "loss": 1.3345, + "step": 1387000 + }, + { + "epoch": 0.83, + "learning_rate": 5.021679984448255e-05, + "loss": 1.3558, + "step": 1387500 + }, + { + "epoch": 0.83, + "learning_rate": 5.0214699878921985e-05, + "loss": 1.3787, + "step": 1388000 + }, + { + "epoch": 0.83, + "learning_rate": 5.021259991336142e-05, + "loss": 1.3451, + "step": 1388500 + }, + { + "epoch": 0.83, + "learning_rate": 5.021049994780086e-05, + "loss": 1.3576, + "step": 1389000 + }, + { + "epoch": 0.83, + "learning_rate": 5.020840418217141e-05, + "loss": 1.3378, + "step": 1389500 + }, + { + "epoch": 0.83, + "learning_rate": 5.0206304216610846e-05, + "loss": 1.3333, + "step": 1390000 + }, + { + "epoch": 0.83, + "learning_rate": 5.020420425105028e-05, + "loss": 1.3448, + "step": 1390500 + }, + { + "epoch": 0.83, + "learning_rate": 5.020210428548972e-05, + "loss": 1.3811, + "step": 1391000 + }, + { + "epoch": 0.83, + "learning_rate": 5.020000431992915e-05, + "loss": 1.3399, + "step": 1391500 + }, + { + "epoch": 0.83, + "learning_rate": 5.019790855429971e-05, + "loss": 1.3733, + "step": 1392000 + }, + { + "epoch": 0.83, + "learning_rate": 5.019580858873914e-05, + "loss": 1.3372, + "step": 1392500 + }, + { + "epoch": 0.84, + "learning_rate": 5.019370862317858e-05, + "loss": 1.3775, + "step": 1393000 + }, + { + "epoch": 0.84, + "learning_rate": 5.0191608657618014e-05, + "loss": 1.3937, + "step": 1393500 + }, + { + "epoch": 0.84, + "learning_rate": 5.018951289198857e-05, + "loss": 1.3529, + "step": 1394000 + }, + { + "epoch": 0.84, + "learning_rate": 5.018741292642801e-05, + "loss": 1.338, + "step": 1394500 + }, + { + "epoch": 0.84, + "learning_rate": 5.018531296086744e-05, + "loss": 1.3768, + "step": 1395000 + }, + { + "epoch": 0.84, + "learning_rate": 5.0183212995306875e-05, + "loss": 1.3556, + "step": 1395500 + }, + { + "epoch": 0.84, + "learning_rate": 5.0181113029746315e-05, + "loss": 1.345, + "step": 1396000 + }, + { + "epoch": 0.84, + "learning_rate": 5.017901306418575e-05, + "loss": 1.3234, + "step": 1396500 + }, + { + "epoch": 0.84, + "learning_rate": 5.017691309862518e-05, + "loss": 1.3617, + "step": 1397000 + }, + { + "epoch": 0.84, + "learning_rate": 5.017481313306462e-05, + "loss": 1.3563, + "step": 1397500 + }, + { + "epoch": 0.84, + "learning_rate": 5.0172713167504056e-05, + "loss": 1.3239, + "step": 1398000 + }, + { + "epoch": 0.84, + "learning_rate": 5.017061320194348e-05, + "loss": 1.3378, + "step": 1398500 + }, + { + "epoch": 0.84, + "learning_rate": 5.016851743631404e-05, + "loss": 1.354, + "step": 1399000 + }, + { + "epoch": 0.84, + "learning_rate": 5.016641747075348e-05, + "loss": 1.3327, + "step": 1399500 + }, + { + "epoch": 0.84, + "learning_rate": 5.0164317505192916e-05, + "loss": 1.3511, + "step": 1400000 + }, + { + "epoch": 0.84, + "eval_loss": 1.2957533597946167, + "eval_runtime": 1117.7468, + "eval_samples_per_second": 471.234, + "eval_steps_per_second": 78.539, + "step": 1400000 + }, + { + "epoch": 0.84, + "learning_rate": 5.016221753963235e-05, + "loss": 1.3559, + "step": 1400500 + }, + { + "epoch": 0.84, + "learning_rate": 5.016011757407178e-05, + "loss": 1.3687, + "step": 1401000 + }, + { + "epoch": 0.84, + "learning_rate": 5.015801760851122e-05, + "loss": 1.3554, + "step": 1401500 + }, + { + "epoch": 0.84, + "learning_rate": 5.015592184288178e-05, + "loss": 1.36, + "step": 1402000 + }, + { + "epoch": 0.84, + "learning_rate": 5.015382187732121e-05, + "loss": 1.3392, + "step": 1402500 + }, + { + "epoch": 0.84, + "learning_rate": 5.0151721911760644e-05, + "loss": 1.3502, + "step": 1403000 + }, + { + "epoch": 0.84, + "learning_rate": 5.014962194620008e-05, + "loss": 1.3557, + "step": 1403500 + }, + { + "epoch": 0.84, + "learning_rate": 5.014752198063952e-05, + "loss": 1.3468, + "step": 1404000 + }, + { + "epoch": 0.84, + "learning_rate": 5.014542201507895e-05, + "loss": 1.3427, + "step": 1404500 + }, + { + "epoch": 0.84, + "learning_rate": 5.0143322049518385e-05, + "loss": 1.3872, + "step": 1405000 + }, + { + "epoch": 0.84, + "learning_rate": 5.0141222083957825e-05, + "loss": 1.3528, + "step": 1405500 + }, + { + "epoch": 0.84, + "learning_rate": 5.013912631832838e-05, + "loss": 1.3865, + "step": 1406000 + }, + { + "epoch": 0.84, + "learning_rate": 5.013702635276781e-05, + "loss": 1.3521, + "step": 1406500 + }, + { + "epoch": 0.84, + "learning_rate": 5.0134926387207245e-05, + "loss": 1.369, + "step": 1407000 + }, + { + "epoch": 0.84, + "learning_rate": 5.0132826421646686e-05, + "loss": 1.3716, + "step": 1407500 + }, + { + "epoch": 0.84, + "learning_rate": 5.013072645608612e-05, + "loss": 1.3633, + "step": 1408000 + }, + { + "epoch": 0.84, + "learning_rate": 5.012862649052555e-05, + "loss": 1.3631, + "step": 1408500 + }, + { + "epoch": 0.84, + "learning_rate": 5.012652652496499e-05, + "loss": 1.367, + "step": 1409000 + }, + { + "epoch": 0.85, + "learning_rate": 5.0124426559404426e-05, + "loss": 1.3253, + "step": 1409500 + }, + { + "epoch": 0.85, + "learning_rate": 5.012232659384386e-05, + "loss": 1.3425, + "step": 1410000 + }, + { + "epoch": 0.85, + "learning_rate": 5.01202266282833e-05, + "loss": 1.3829, + "step": 1410500 + }, + { + "epoch": 0.85, + "learning_rate": 5.011812666272273e-05, + "loss": 1.3671, + "step": 1411000 + }, + { + "epoch": 0.85, + "learning_rate": 5.011602669716216e-05, + "loss": 1.3428, + "step": 1411500 + }, + { + "epoch": 0.85, + "learning_rate": 5.011393093153273e-05, + "loss": 1.356, + "step": 1412000 + }, + { + "epoch": 0.85, + "learning_rate": 5.011183096597216e-05, + "loss": 1.3526, + "step": 1412500 + }, + { + "epoch": 0.85, + "learning_rate": 5.0109731000411594e-05, + "loss": 1.3665, + "step": 1413000 + }, + { + "epoch": 0.85, + "learning_rate": 5.010763103485103e-05, + "loss": 1.3537, + "step": 1413500 + }, + { + "epoch": 0.85, + "learning_rate": 5.010553106929046e-05, + "loss": 1.3535, + "step": 1414000 + }, + { + "epoch": 0.85, + "learning_rate": 5.0103431103729894e-05, + "loss": 1.3784, + "step": 1414500 + }, + { + "epoch": 0.85, + "learning_rate": 5.0101335338100455e-05, + "loss": 1.3379, + "step": 1415000 + }, + { + "epoch": 0.85, + "learning_rate": 5.0099235372539895e-05, + "loss": 1.3678, + "step": 1415500 + }, + { + "epoch": 0.85, + "learning_rate": 5.009713540697932e-05, + "loss": 1.3478, + "step": 1416000 + }, + { + "epoch": 0.85, + "learning_rate": 5.0095035441418755e-05, + "loss": 1.3404, + "step": 1416500 + }, + { + "epoch": 0.85, + "learning_rate": 5.0092935475858195e-05, + "loss": 1.3638, + "step": 1417000 + }, + { + "epoch": 0.85, + "learning_rate": 5.009083551029763e-05, + "loss": 1.3543, + "step": 1417500 + }, + { + "epoch": 0.85, + "learning_rate": 5.008873974466818e-05, + "loss": 1.3728, + "step": 1418000 + }, + { + "epoch": 0.85, + "learning_rate": 5.008663977910762e-05, + "loss": 1.3341, + "step": 1418500 + }, + { + "epoch": 0.85, + "learning_rate": 5.0084539813547056e-05, + "loss": 1.3762, + "step": 1419000 + }, + { + "epoch": 0.85, + "learning_rate": 5.008243984798649e-05, + "loss": 1.3458, + "step": 1419500 + }, + { + "epoch": 0.85, + "learning_rate": 5.008034408235705e-05, + "loss": 1.3447, + "step": 1420000 + }, + { + "epoch": 0.85, + "learning_rate": 5.0078244116796483e-05, + "loss": 1.3587, + "step": 1420500 + }, + { + "epoch": 0.85, + "learning_rate": 5.007614415123592e-05, + "loss": 1.3616, + "step": 1421000 + }, + { + "epoch": 0.85, + "learning_rate": 5.007404418567535e-05, + "loss": 1.3597, + "step": 1421500 + }, + { + "epoch": 0.85, + "learning_rate": 5.007194842004591e-05, + "loss": 1.3673, + "step": 1422000 + }, + { + "epoch": 0.85, + "learning_rate": 5.0069852654416464e-05, + "loss": 1.3474, + "step": 1422500 + }, + { + "epoch": 0.85, + "learning_rate": 5.0067752688855905e-05, + "loss": 1.3718, + "step": 1423000 + }, + { + "epoch": 0.85, + "learning_rate": 5.006565272329534e-05, + "loss": 1.3493, + "step": 1423500 + }, + { + "epoch": 0.85, + "learning_rate": 5.006355275773477e-05, + "loss": 1.3535, + "step": 1424000 + }, + { + "epoch": 0.85, + "learning_rate": 5.006145279217421e-05, + "loss": 1.353, + "step": 1424500 + }, + { + "epoch": 0.85, + "learning_rate": 5.0059352826613645e-05, + "loss": 1.3666, + "step": 1425000 + }, + { + "epoch": 0.85, + "learning_rate": 5.005725286105308e-05, + "loss": 1.3343, + "step": 1425500 + }, + { + "epoch": 0.85, + "learning_rate": 5.005515709542364e-05, + "loss": 1.368, + "step": 1426000 + }, + { + "epoch": 0.86, + "learning_rate": 5.005305712986307e-05, + "loss": 1.3494, + "step": 1426500 + }, + { + "epoch": 0.86, + "learning_rate": 5.0050957164302506e-05, + "loss": 1.3604, + "step": 1427000 + }, + { + "epoch": 0.86, + "learning_rate": 5.0048857198741946e-05, + "loss": 1.3449, + "step": 1427500 + }, + { + "epoch": 0.86, + "learning_rate": 5.004675723318137e-05, + "loss": 1.3752, + "step": 1428000 + }, + { + "epoch": 0.86, + "learning_rate": 5.0044657267620806e-05, + "loss": 1.3549, + "step": 1428500 + }, + { + "epoch": 0.86, + "learning_rate": 5.0042557302060246e-05, + "loss": 1.3495, + "step": 1429000 + }, + { + "epoch": 0.86, + "learning_rate": 5.004045733649968e-05, + "loss": 1.3743, + "step": 1429500 + }, + { + "epoch": 0.86, + "learning_rate": 5.0038357370939113e-05, + "loss": 1.3473, + "step": 1430000 + }, + { + "epoch": 0.86, + "learning_rate": 5.003626160530967e-05, + "loss": 1.3312, + "step": 1430500 + }, + { + "epoch": 0.86, + "learning_rate": 5.003416163974911e-05, + "loss": 1.3626, + "step": 1431000 + }, + { + "epoch": 0.86, + "learning_rate": 5.003206167418854e-05, + "loss": 1.3733, + "step": 1431500 + }, + { + "epoch": 0.86, + "learning_rate": 5.0029961708627974e-05, + "loss": 1.3673, + "step": 1432000 + }, + { + "epoch": 0.86, + "learning_rate": 5.0027865942998534e-05, + "loss": 1.3973, + "step": 1432500 + }, + { + "epoch": 0.86, + "learning_rate": 5.002576597743797e-05, + "loss": 1.3579, + "step": 1433000 + }, + { + "epoch": 0.86, + "learning_rate": 5.00236660118774e-05, + "loss": 1.3774, + "step": 1433500 + }, + { + "epoch": 0.86, + "learning_rate": 5.002157024624796e-05, + "loss": 1.3346, + "step": 1434000 + }, + { + "epoch": 0.86, + "learning_rate": 5.00194702806874e-05, + "loss": 1.3529, + "step": 1434500 + }, + { + "epoch": 0.86, + "learning_rate": 5.001737031512683e-05, + "loss": 1.3542, + "step": 1435000 + }, + { + "epoch": 0.86, + "learning_rate": 5.001527034956626e-05, + "loss": 1.3794, + "step": 1435500 + }, + { + "epoch": 0.86, + "learning_rate": 5.00131703840057e-05, + "loss": 1.3467, + "step": 1436000 + }, + { + "epoch": 0.86, + "learning_rate": 5.001107461837626e-05, + "loss": 1.3688, + "step": 1436500 + }, + { + "epoch": 0.86, + "learning_rate": 5.0008974652815696e-05, + "loss": 1.3703, + "step": 1437000 + }, + { + "epoch": 0.86, + "learning_rate": 5.000687468725512e-05, + "loss": 1.3655, + "step": 1437500 + }, + { + "epoch": 0.86, + "learning_rate": 5.000477472169456e-05, + "loss": 1.3403, + "step": 1438000 + }, + { + "epoch": 0.86, + "learning_rate": 5.0002674756133997e-05, + "loss": 1.3419, + "step": 1438500 + }, + { + "epoch": 0.86, + "learning_rate": 5.000057479057343e-05, + "loss": 1.3477, + "step": 1439000 + }, + { + "epoch": 0.86, + "learning_rate": 4.999847902494399e-05, + "loss": 1.3568, + "step": 1439500 + }, + { + "epoch": 0.86, + "learning_rate": 4.9996379059383424e-05, + "loss": 1.3574, + "step": 1440000 + }, + { + "epoch": 0.86, + "learning_rate": 4.999427909382286e-05, + "loss": 1.3569, + "step": 1440500 + }, + { + "epoch": 0.86, + "learning_rate": 4.99921791282623e-05, + "loss": 1.3307, + "step": 1441000 + }, + { + "epoch": 0.86, + "learning_rate": 4.999007916270173e-05, + "loss": 1.3464, + "step": 1441500 + }, + { + "epoch": 0.86, + "learning_rate": 4.9987983397072285e-05, + "loss": 1.3494, + "step": 1442000 + }, + { + "epoch": 0.86, + "learning_rate": 4.998588343151172e-05, + "loss": 1.3825, + "step": 1442500 + }, + { + "epoch": 0.87, + "learning_rate": 4.998378346595116e-05, + "loss": 1.3466, + "step": 1443000 + }, + { + "epoch": 0.87, + "learning_rate": 4.998168350039059e-05, + "loss": 1.3645, + "step": 1443500 + }, + { + "epoch": 0.87, + "learning_rate": 4.9979583534830025e-05, + "loss": 1.3432, + "step": 1444000 + }, + { + "epoch": 0.87, + "learning_rate": 4.9977483569269465e-05, + "loss": 1.3897, + "step": 1444500 + }, + { + "epoch": 0.87, + "learning_rate": 4.99753836037089e-05, + "loss": 1.3479, + "step": 1445000 + }, + { + "epoch": 0.87, + "learning_rate": 4.997328363814833e-05, + "loss": 1.3598, + "step": 1445500 + }, + { + "epoch": 0.87, + "learning_rate": 4.997118367258777e-05, + "loss": 1.3676, + "step": 1446000 + }, + { + "epoch": 0.87, + "learning_rate": 4.9969087906958326e-05, + "loss": 1.3521, + "step": 1446500 + }, + { + "epoch": 0.87, + "learning_rate": 4.996698794139776e-05, + "loss": 1.3406, + "step": 1447000 + }, + { + "epoch": 0.87, + "learning_rate": 4.996488797583719e-05, + "loss": 1.3737, + "step": 1447500 + }, + { + "epoch": 0.87, + "learning_rate": 4.996278801027663e-05, + "loss": 1.3665, + "step": 1448000 + }, + { + "epoch": 0.87, + "learning_rate": 4.996069224464719e-05, + "loss": 1.3931, + "step": 1448500 + }, + { + "epoch": 0.87, + "learning_rate": 4.995859227908662e-05, + "loss": 1.34, + "step": 1449000 + }, + { + "epoch": 0.87, + "learning_rate": 4.995649231352606e-05, + "loss": 1.3524, + "step": 1449500 + }, + { + "epoch": 0.87, + "learning_rate": 4.9954392347965494e-05, + "loss": 1.3403, + "step": 1450000 + }, + { + "epoch": 0.87, + "learning_rate": 4.995229238240493e-05, + "loss": 1.3441, + "step": 1450500 + }, + { + "epoch": 0.87, + "learning_rate": 4.995019241684437e-05, + "loss": 1.3675, + "step": 1451000 + }, + { + "epoch": 0.87, + "learning_rate": 4.99480924512838e-05, + "loss": 1.3558, + "step": 1451500 + }, + { + "epoch": 0.87, + "learning_rate": 4.9945992485723235e-05, + "loss": 1.3145, + "step": 1452000 + }, + { + "epoch": 0.87, + "learning_rate": 4.994389252016267e-05, + "loss": 1.3376, + "step": 1452500 + }, + { + "epoch": 0.87, + "learning_rate": 4.99417925546021e-05, + "loss": 1.3487, + "step": 1453000 + }, + { + "epoch": 0.87, + "learning_rate": 4.9939692589041535e-05, + "loss": 1.3964, + "step": 1453500 + }, + { + "epoch": 0.87, + "learning_rate": 4.9937596823412095e-05, + "loss": 1.384, + "step": 1454000 + }, + { + "epoch": 0.87, + "learning_rate": 4.993550105778265e-05, + "loss": 1.3288, + "step": 1454500 + }, + { + "epoch": 0.87, + "learning_rate": 4.993340109222209e-05, + "loss": 1.3658, + "step": 1455000 + }, + { + "epoch": 0.87, + "learning_rate": 4.993130112666152e-05, + "loss": 1.3944, + "step": 1455500 + }, + { + "epoch": 0.87, + "learning_rate": 4.9929201161100956e-05, + "loss": 1.3843, + "step": 1456000 + }, + { + "epoch": 0.87, + "learning_rate": 4.9927101195540396e-05, + "loss": 1.3401, + "step": 1456500 + }, + { + "epoch": 0.87, + "learning_rate": 4.992500122997982e-05, + "loss": 1.3714, + "step": 1457000 + }, + { + "epoch": 0.87, + "learning_rate": 4.992290126441926e-05, + "loss": 1.3549, + "step": 1457500 + }, + { + "epoch": 0.87, + "learning_rate": 4.99208012988587e-05, + "loss": 1.3391, + "step": 1458000 + }, + { + "epoch": 0.87, + "learning_rate": 4.991870133329813e-05, + "loss": 1.3782, + "step": 1458500 + }, + { + "epoch": 0.87, + "learning_rate": 4.991660136773757e-05, + "loss": 1.3308, + "step": 1459000 + }, + { + "epoch": 0.88, + "learning_rate": 4.9914501402177004e-05, + "loss": 1.3384, + "step": 1459500 + }, + { + "epoch": 0.88, + "learning_rate": 4.991240143661644e-05, + "loss": 1.3612, + "step": 1460000 + }, + { + "epoch": 0.88, + "learning_rate": 4.991030567098699e-05, + "loss": 1.3616, + "step": 1460500 + }, + { + "epoch": 0.88, + "learning_rate": 4.990820570542643e-05, + "loss": 1.3731, + "step": 1461000 + }, + { + "epoch": 0.88, + "learning_rate": 4.9906105739865865e-05, + "loss": 1.3501, + "step": 1461500 + }, + { + "epoch": 0.88, + "learning_rate": 4.99040057743053e-05, + "loss": 1.3622, + "step": 1462000 + }, + { + "epoch": 0.88, + "learning_rate": 4.990191000867585e-05, + "loss": 1.3907, + "step": 1462500 + }, + { + "epoch": 0.88, + "learning_rate": 4.989981424304641e-05, + "loss": 1.3486, + "step": 1463000 + }, + { + "epoch": 0.88, + "learning_rate": 4.989771427748585e-05, + "loss": 1.3768, + "step": 1463500 + }, + { + "epoch": 0.88, + "learning_rate": 4.9895614311925286e-05, + "loss": 1.3447, + "step": 1464000 + }, + { + "epoch": 0.88, + "learning_rate": 4.989351434636472e-05, + "loss": 1.3832, + "step": 1464500 + }, + { + "epoch": 0.88, + "learning_rate": 4.989141438080415e-05, + "loss": 1.3687, + "step": 1465000 + }, + { + "epoch": 0.88, + "learning_rate": 4.9889314415243586e-05, + "loss": 1.3254, + "step": 1465500 + }, + { + "epoch": 0.88, + "learning_rate": 4.9887218649614146e-05, + "loss": 1.3592, + "step": 1466000 + }, + { + "epoch": 0.88, + "learning_rate": 4.988511868405358e-05, + "loss": 1.3441, + "step": 1466500 + }, + { + "epoch": 0.88, + "learning_rate": 4.988301871849301e-05, + "loss": 1.3329, + "step": 1467000 + }, + { + "epoch": 0.88, + "learning_rate": 4.988091875293245e-05, + "loss": 1.327, + "step": 1467500 + }, + { + "epoch": 0.88, + "learning_rate": 4.987882298730301e-05, + "loss": 1.3498, + "step": 1468000 + }, + { + "epoch": 0.88, + "learning_rate": 4.987672302174245e-05, + "loss": 1.3766, + "step": 1468500 + }, + { + "epoch": 0.88, + "learning_rate": 4.9874623056181874e-05, + "loss": 1.3593, + "step": 1469000 + }, + { + "epoch": 0.88, + "learning_rate": 4.987252309062131e-05, + "loss": 1.3689, + "step": 1469500 + }, + { + "epoch": 0.88, + "learning_rate": 4.987042312506075e-05, + "loss": 1.3375, + "step": 1470000 + }, + { + "epoch": 0.88, + "learning_rate": 4.986832315950018e-05, + "loss": 1.3634, + "step": 1470500 + }, + { + "epoch": 0.88, + "learning_rate": 4.986622319393962e-05, + "loss": 1.3612, + "step": 1471000 + }, + { + "epoch": 0.88, + "learning_rate": 4.9864123228379055e-05, + "loss": 1.3619, + "step": 1471500 + }, + { + "epoch": 0.88, + "learning_rate": 4.986202746274961e-05, + "loss": 1.386, + "step": 1472000 + }, + { + "epoch": 0.88, + "learning_rate": 4.985992749718904e-05, + "loss": 1.343, + "step": 1472500 + }, + { + "epoch": 0.88, + "learning_rate": 4.985782753162848e-05, + "loss": 1.3386, + "step": 1473000 + }, + { + "epoch": 0.88, + "learning_rate": 4.9855727566067916e-05, + "loss": 1.3211, + "step": 1473500 + }, + { + "epoch": 0.88, + "learning_rate": 4.985363180043847e-05, + "loss": 1.365, + "step": 1474000 + }, + { + "epoch": 0.88, + "learning_rate": 4.98515318348779e-05, + "loss": 1.3443, + "step": 1474500 + }, + { + "epoch": 0.88, + "learning_rate": 4.984943186931734e-05, + "loss": 1.3525, + "step": 1475000 + }, + { + "epoch": 0.88, + "learning_rate": 4.9847331903756776e-05, + "loss": 1.3659, + "step": 1475500 + }, + { + "epoch": 0.88, + "learning_rate": 4.984523613812733e-05, + "loss": 1.3623, + "step": 1476000 + }, + { + "epoch": 0.89, + "learning_rate": 4.984314037249789e-05, + "loss": 1.3571, + "step": 1476500 + }, + { + "epoch": 0.89, + "learning_rate": 4.9841040406937324e-05, + "loss": 1.3557, + "step": 1477000 + }, + { + "epoch": 0.89, + "learning_rate": 4.9838940441376764e-05, + "loss": 1.3724, + "step": 1477500 + }, + { + "epoch": 0.89, + "learning_rate": 4.98368404758162e-05, + "loss": 1.3603, + "step": 1478000 + }, + { + "epoch": 0.89, + "learning_rate": 4.983474051025563e-05, + "loss": 1.3528, + "step": 1478500 + }, + { + "epoch": 0.89, + "learning_rate": 4.983264474462619e-05, + "loss": 1.35, + "step": 1479000 + }, + { + "epoch": 0.89, + "learning_rate": 4.9830544779065625e-05, + "loss": 1.3552, + "step": 1479500 + }, + { + "epoch": 0.89, + "learning_rate": 4.982844481350506e-05, + "loss": 1.3568, + "step": 1480000 + }, + { + "epoch": 0.89, + "learning_rate": 4.98263448479445e-05, + "loss": 1.3534, + "step": 1480500 + }, + { + "epoch": 0.89, + "learning_rate": 4.982424908231505e-05, + "loss": 1.327, + "step": 1481000 + }, + { + "epoch": 0.89, + "learning_rate": 4.9822149116754485e-05, + "loss": 1.357, + "step": 1481500 + }, + { + "epoch": 0.89, + "learning_rate": 4.982005335112504e-05, + "loss": 1.3553, + "step": 1482000 + }, + { + "epoch": 0.89, + "learning_rate": 4.981795338556447e-05, + "loss": 1.3514, + "step": 1482500 + }, + { + "epoch": 0.89, + "learning_rate": 4.981585342000391e-05, + "loss": 1.3264, + "step": 1483000 + }, + { + "epoch": 0.89, + "learning_rate": 4.9813753454443346e-05, + "loss": 1.3313, + "step": 1483500 + }, + { + "epoch": 0.89, + "learning_rate": 4.9811653488882786e-05, + "loss": 1.3577, + "step": 1484000 + }, + { + "epoch": 0.89, + "learning_rate": 4.980955352332222e-05, + "loss": 1.3812, + "step": 1484500 + }, + { + "epoch": 0.89, + "learning_rate": 4.980745355776165e-05, + "loss": 1.3652, + "step": 1485000 + }, + { + "epoch": 0.89, + "learning_rate": 4.980535359220109e-05, + "loss": 1.3653, + "step": 1485500 + }, + { + "epoch": 0.89, + "learning_rate": 4.980325362664052e-05, + "loss": 1.335, + "step": 1486000 + }, + { + "epoch": 0.89, + "learning_rate": 4.980115786101108e-05, + "loss": 1.3733, + "step": 1486500 + }, + { + "epoch": 0.89, + "learning_rate": 4.9799062095381634e-05, + "loss": 1.3455, + "step": 1487000 + }, + { + "epoch": 0.89, + "learning_rate": 4.979696212982107e-05, + "loss": 1.3338, + "step": 1487500 + }, + { + "epoch": 0.89, + "learning_rate": 4.979486216426051e-05, + "loss": 1.3578, + "step": 1488000 + }, + { + "epoch": 0.89, + "learning_rate": 4.979276219869994e-05, + "loss": 1.3312, + "step": 1488500 + }, + { + "epoch": 0.89, + "learning_rate": 4.9790662233139375e-05, + "loss": 1.3559, + "step": 1489000 + }, + { + "epoch": 0.89, + "learning_rate": 4.9788562267578815e-05, + "loss": 1.3392, + "step": 1489500 + }, + { + "epoch": 0.89, + "learning_rate": 4.978646230201825e-05, + "loss": 1.3373, + "step": 1490000 + }, + { + "epoch": 0.89, + "learning_rate": 4.978436233645768e-05, + "loss": 1.3669, + "step": 1490500 + }, + { + "epoch": 0.89, + "learning_rate": 4.9782262370897115e-05, + "loss": 1.3521, + "step": 1491000 + }, + { + "epoch": 0.89, + "learning_rate": 4.978016240533655e-05, + "loss": 1.3375, + "step": 1491500 + }, + { + "epoch": 0.89, + "learning_rate": 4.977806243977599e-05, + "loss": 1.324, + "step": 1492000 + }, + { + "epoch": 0.89, + "learning_rate": 4.977596667414655e-05, + "loss": 1.3571, + "step": 1492500 + }, + { + "epoch": 0.9, + "learning_rate": 4.9773866708585976e-05, + "loss": 1.3361, + "step": 1493000 + }, + { + "epoch": 0.9, + "learning_rate": 4.977176674302541e-05, + "loss": 1.3423, + "step": 1493500 + }, + { + "epoch": 0.9, + "learning_rate": 4.976966677746485e-05, + "loss": 1.3543, + "step": 1494000 + }, + { + "epoch": 0.9, + "learning_rate": 4.976756681190428e-05, + "loss": 1.359, + "step": 1494500 + }, + { + "epoch": 0.9, + "learning_rate": 4.9765471046274844e-05, + "loss": 1.3461, + "step": 1495000 + }, + { + "epoch": 0.9, + "learning_rate": 4.976337108071427e-05, + "loss": 1.3232, + "step": 1495500 + }, + { + "epoch": 0.9, + "learning_rate": 4.976127111515371e-05, + "loss": 1.3619, + "step": 1496000 + }, + { + "epoch": 0.9, + "learning_rate": 4.9759171149593144e-05, + "loss": 1.3703, + "step": 1496500 + }, + { + "epoch": 0.9, + "learning_rate": 4.9757075383963704e-05, + "loss": 1.3747, + "step": 1497000 + }, + { + "epoch": 0.9, + "learning_rate": 4.975497541840314e-05, + "loss": 1.36, + "step": 1497500 + }, + { + "epoch": 0.9, + "learning_rate": 4.975287545284257e-05, + "loss": 1.3668, + "step": 1498000 + }, + { + "epoch": 0.9, + "learning_rate": 4.9750775487282005e-05, + "loss": 1.3609, + "step": 1498500 + }, + { + "epoch": 0.9, + "learning_rate": 4.9748675521721445e-05, + "loss": 1.3692, + "step": 1499000 + }, + { + "epoch": 0.9, + "learning_rate": 4.974657555616088e-05, + "loss": 1.3724, + "step": 1499500 + }, + { + "epoch": 0.9, + "learning_rate": 4.974447559060031e-05, + "loss": 1.3617, + "step": 1500000 + }, + { + "epoch": 0.9, + "eval_loss": 1.2882256507873535, + "eval_runtime": 1113.7701, + "eval_samples_per_second": 472.916, + "eval_steps_per_second": 78.82, + "step": 1500000 + }, + { + "epoch": 0.9, + "learning_rate": 4.974237562503975e-05, + "loss": 1.348, + "step": 1500500 + }, + { + "epoch": 0.9, + "learning_rate": 4.9740275659479186e-05, + "loss": 1.3631, + "step": 1501000 + }, + { + "epoch": 0.9, + "learning_rate": 4.973817989384974e-05, + "loss": 1.328, + "step": 1501500 + }, + { + "epoch": 0.9, + "learning_rate": 4.973607992828917e-05, + "loss": 1.3218, + "step": 1502000 + }, + { + "epoch": 0.9, + "learning_rate": 4.973397996272861e-05, + "loss": 1.3427, + "step": 1502500 + }, + { + "epoch": 0.9, + "learning_rate": 4.9731879997168046e-05, + "loss": 1.3348, + "step": 1503000 + }, + { + "epoch": 0.9, + "learning_rate": 4.972978003160748e-05, + "loss": 1.3697, + "step": 1503500 + }, + { + "epoch": 0.9, + "learning_rate": 4.972768006604692e-05, + "loss": 1.3246, + "step": 1504000 + }, + { + "epoch": 0.9, + "learning_rate": 4.9725580100486353e-05, + "loss": 1.3394, + "step": 1504500 + }, + { + "epoch": 0.9, + "learning_rate": 4.972348013492579e-05, + "loss": 1.3589, + "step": 1505000 + }, + { + "epoch": 0.9, + "learning_rate": 4.972138436929634e-05, + "loss": 1.3467, + "step": 1505500 + }, + { + "epoch": 0.9, + "learning_rate": 4.971928440373578e-05, + "loss": 1.3528, + "step": 1506000 + }, + { + "epoch": 0.9, + "learning_rate": 4.9717188638106334e-05, + "loss": 1.3551, + "step": 1506500 + }, + { + "epoch": 0.9, + "learning_rate": 4.971508867254577e-05, + "loss": 1.3595, + "step": 1507000 + }, + { + "epoch": 0.9, + "learning_rate": 4.971298870698521e-05, + "loss": 1.3974, + "step": 1507500 + }, + { + "epoch": 0.9, + "learning_rate": 4.971088874142464e-05, + "loss": 1.3466, + "step": 1508000 + }, + { + "epoch": 0.9, + "learning_rate": 4.9708792975795195e-05, + "loss": 1.3667, + "step": 1508500 + }, + { + "epoch": 0.9, + "learning_rate": 4.9706697210165755e-05, + "loss": 1.3634, + "step": 1509000 + }, + { + "epoch": 0.91, + "learning_rate": 4.970459724460518e-05, + "loss": 1.3597, + "step": 1509500 + }, + { + "epoch": 0.91, + "learning_rate": 4.970249727904462e-05, + "loss": 1.354, + "step": 1510000 + }, + { + "epoch": 0.91, + "learning_rate": 4.9700397313484056e-05, + "loss": 1.3445, + "step": 1510500 + }, + { + "epoch": 0.91, + "learning_rate": 4.969829734792349e-05, + "loss": 1.3389, + "step": 1511000 + }, + { + "epoch": 0.91, + "learning_rate": 4.969619738236293e-05, + "loss": 1.3428, + "step": 1511500 + }, + { + "epoch": 0.91, + "learning_rate": 4.969409741680236e-05, + "loss": 1.3533, + "step": 1512000 + }, + { + "epoch": 0.91, + "learning_rate": 4.9691997451241796e-05, + "loss": 1.3077, + "step": 1512500 + }, + { + "epoch": 0.91, + "learning_rate": 4.968990168561236e-05, + "loss": 1.3885, + "step": 1513000 + }, + { + "epoch": 0.91, + "learning_rate": 4.968780172005179e-05, + "loss": 1.3356, + "step": 1513500 + }, + { + "epoch": 0.91, + "learning_rate": 4.9685701754491224e-05, + "loss": 1.3411, + "step": 1514000 + }, + { + "epoch": 0.91, + "learning_rate": 4.9683601788930664e-05, + "loss": 1.3309, + "step": 1514500 + }, + { + "epoch": 0.91, + "learning_rate": 4.96815018233701e-05, + "loss": 1.3334, + "step": 1515000 + }, + { + "epoch": 0.91, + "learning_rate": 4.967940185780953e-05, + "loss": 1.3377, + "step": 1515500 + }, + { + "epoch": 0.91, + "learning_rate": 4.9677306092180084e-05, + "loss": 1.3242, + "step": 1516000 + }, + { + "epoch": 0.91, + "learning_rate": 4.9675206126619525e-05, + "loss": 1.3217, + "step": 1516500 + }, + { + "epoch": 0.91, + "learning_rate": 4.967310616105896e-05, + "loss": 1.3435, + "step": 1517000 + }, + { + "epoch": 0.91, + "learning_rate": 4.967100619549839e-05, + "loss": 1.3899, + "step": 1517500 + }, + { + "epoch": 0.91, + "learning_rate": 4.9668910429868945e-05, + "loss": 1.3423, + "step": 1518000 + }, + { + "epoch": 0.91, + "learning_rate": 4.9666810464308385e-05, + "loss": 1.3375, + "step": 1518500 + }, + { + "epoch": 0.91, + "learning_rate": 4.966471469867894e-05, + "loss": 1.3677, + "step": 1519000 + }, + { + "epoch": 0.91, + "learning_rate": 4.966261473311837e-05, + "loss": 1.3255, + "step": 1519500 + }, + { + "epoch": 0.91, + "learning_rate": 4.966051476755781e-05, + "loss": 1.343, + "step": 1520000 + }, + { + "epoch": 0.91, + "learning_rate": 4.9658414801997246e-05, + "loss": 1.3466, + "step": 1520500 + }, + { + "epoch": 0.91, + "learning_rate": 4.9656319036367806e-05, + "loss": 1.3477, + "step": 1521000 + }, + { + "epoch": 0.91, + "learning_rate": 4.965421907080723e-05, + "loss": 1.3337, + "step": 1521500 + }, + { + "epoch": 0.91, + "learning_rate": 4.965211910524667e-05, + "loss": 1.3597, + "step": 1522000 + }, + { + "epoch": 0.91, + "learning_rate": 4.965001913968611e-05, + "loss": 1.3646, + "step": 1522500 + }, + { + "epoch": 0.91, + "learning_rate": 4.964791917412554e-05, + "loss": 1.3481, + "step": 1523000 + }, + { + "epoch": 0.91, + "learning_rate": 4.964581920856498e-05, + "loss": 1.3579, + "step": 1523500 + }, + { + "epoch": 0.91, + "learning_rate": 4.9643719243004414e-05, + "loss": 1.363, + "step": 1524000 + }, + { + "epoch": 0.91, + "learning_rate": 4.964161927744385e-05, + "loss": 1.336, + "step": 1524500 + }, + { + "epoch": 0.91, + "learning_rate": 4.963951931188329e-05, + "loss": 1.3627, + "step": 1525000 + }, + { + "epoch": 0.91, + "learning_rate": 4.963741934632272e-05, + "loss": 1.3363, + "step": 1525500 + }, + { + "epoch": 0.91, + "learning_rate": 4.9635319380762155e-05, + "loss": 1.3587, + "step": 1526000 + }, + { + "epoch": 0.92, + "learning_rate": 4.9633219415201595e-05, + "loss": 1.3497, + "step": 1526500 + }, + { + "epoch": 0.92, + "learning_rate": 4.963112364957215e-05, + "loss": 1.335, + "step": 1527000 + }, + { + "epoch": 0.92, + "learning_rate": 4.962902368401158e-05, + "loss": 1.3441, + "step": 1527500 + }, + { + "epoch": 0.92, + "learning_rate": 4.9626923718451015e-05, + "loss": 1.3459, + "step": 1528000 + }, + { + "epoch": 0.92, + "learning_rate": 4.9624823752890455e-05, + "loss": 1.3846, + "step": 1528500 + }, + { + "epoch": 0.92, + "learning_rate": 4.962273218719213e-05, + "loss": 1.351, + "step": 1529000 + }, + { + "epoch": 0.92, + "learning_rate": 4.962063222163156e-05, + "loss": 1.3635, + "step": 1529500 + }, + { + "epoch": 0.92, + "learning_rate": 4.961853645600212e-05, + "loss": 1.359, + "step": 1530000 + }, + { + "epoch": 0.92, + "learning_rate": 4.9616436490441556e-05, + "loss": 1.3364, + "step": 1530500 + }, + { + "epoch": 0.92, + "learning_rate": 4.961433652488099e-05, + "loss": 1.3353, + "step": 1531000 + }, + { + "epoch": 0.92, + "learning_rate": 4.961223655932042e-05, + "loss": 1.3298, + "step": 1531500 + }, + { + "epoch": 0.92, + "learning_rate": 4.961013659375986e-05, + "loss": 1.3628, + "step": 1532000 + }, + { + "epoch": 0.92, + "learning_rate": 4.960804082813042e-05, + "loss": 1.3335, + "step": 1532500 + }, + { + "epoch": 0.92, + "learning_rate": 4.960594086256986e-05, + "loss": 1.3212, + "step": 1533000 + }, + { + "epoch": 0.92, + "learning_rate": 4.9603840897009284e-05, + "loss": 1.3389, + "step": 1533500 + }, + { + "epoch": 0.92, + "learning_rate": 4.9601740931448724e-05, + "loss": 1.3497, + "step": 1534000 + }, + { + "epoch": 0.92, + "learning_rate": 4.959964096588816e-05, + "loss": 1.3317, + "step": 1534500 + }, + { + "epoch": 0.92, + "learning_rate": 4.959754100032759e-05, + "loss": 1.3454, + "step": 1535000 + }, + { + "epoch": 0.92, + "learning_rate": 4.959544103476703e-05, + "loss": 1.3644, + "step": 1535500 + }, + { + "epoch": 0.92, + "learning_rate": 4.9593341069206465e-05, + "loss": 1.3565, + "step": 1536000 + }, + { + "epoch": 0.92, + "learning_rate": 4.959124530357702e-05, + "loss": 1.3489, + "step": 1536500 + }, + { + "epoch": 0.92, + "learning_rate": 4.958914953794758e-05, + "loss": 1.3489, + "step": 1537000 + }, + { + "epoch": 0.92, + "learning_rate": 4.958704957238701e-05, + "loss": 1.37, + "step": 1537500 + }, + { + "epoch": 0.92, + "learning_rate": 4.9584949606826446e-05, + "loss": 1.3483, + "step": 1538000 + }, + { + "epoch": 0.92, + "learning_rate": 4.958284964126588e-05, + "loss": 1.3569, + "step": 1538500 + }, + { + "epoch": 0.92, + "learning_rate": 4.958074967570531e-05, + "loss": 1.3278, + "step": 1539000 + }, + { + "epoch": 0.92, + "learning_rate": 4.957864971014475e-05, + "loss": 1.3169, + "step": 1539500 + }, + { + "epoch": 0.92, + "learning_rate": 4.9576549744584186e-05, + "loss": 1.3644, + "step": 1540000 + }, + { + "epoch": 0.92, + "learning_rate": 4.957444977902362e-05, + "loss": 1.3393, + "step": 1540500 + }, + { + "epoch": 0.92, + "learning_rate": 4.957234981346306e-05, + "loss": 1.3195, + "step": 1541000 + }, + { + "epoch": 0.92, + "learning_rate": 4.9570254047833614e-05, + "loss": 1.3636, + "step": 1541500 + }, + { + "epoch": 0.92, + "learning_rate": 4.9568158282204174e-05, + "loss": 1.3549, + "step": 1542000 + }, + { + "epoch": 0.92, + "learning_rate": 4.956605831664361e-05, + "loss": 1.3589, + "step": 1542500 + }, + { + "epoch": 0.93, + "learning_rate": 4.956395835108304e-05, + "loss": 1.3544, + "step": 1543000 + }, + { + "epoch": 0.93, + "learning_rate": 4.9561858385522474e-05, + "loss": 1.36, + "step": 1543500 + }, + { + "epoch": 0.93, + "learning_rate": 4.955975841996191e-05, + "loss": 1.328, + "step": 1544000 + }, + { + "epoch": 0.93, + "learning_rate": 4.955765845440135e-05, + "loss": 1.3684, + "step": 1544500 + }, + { + "epoch": 0.93, + "learning_rate": 4.955555848884078e-05, + "loss": 1.3337, + "step": 1545000 + }, + { + "epoch": 0.93, + "learning_rate": 4.9553458523280215e-05, + "loss": 1.3604, + "step": 1545500 + }, + { + "epoch": 0.93, + "learning_rate": 4.9551358557719655e-05, + "loss": 1.3611, + "step": 1546000 + }, + { + "epoch": 0.93, + "learning_rate": 4.954926279209021e-05, + "loss": 1.3561, + "step": 1546500 + }, + { + "epoch": 0.93, + "learning_rate": 4.954716282652964e-05, + "loss": 1.3712, + "step": 1547000 + }, + { + "epoch": 0.93, + "learning_rate": 4.9545062860969076e-05, + "loss": 1.3572, + "step": 1547500 + }, + { + "epoch": 0.93, + "learning_rate": 4.9542962895408516e-05, + "loss": 1.3385, + "step": 1548000 + }, + { + "epoch": 0.93, + "learning_rate": 4.954086292984795e-05, + "loss": 1.33, + "step": 1548500 + }, + { + "epoch": 0.93, + "learning_rate": 4.953876296428739e-05, + "loss": 1.3408, + "step": 1549000 + }, + { + "epoch": 0.93, + "learning_rate": 4.953666719865794e-05, + "loss": 1.3574, + "step": 1549500 + }, + { + "epoch": 0.93, + "learning_rate": 4.953456723309738e-05, + "loss": 1.3448, + "step": 1550000 + }, + { + "epoch": 0.93, + "learning_rate": 4.953246726753681e-05, + "loss": 1.324, + "step": 1550500 + }, + { + "epoch": 0.93, + "learning_rate": 4.953036730197625e-05, + "loss": 1.3288, + "step": 1551000 + }, + { + "epoch": 0.93, + "learning_rate": 4.9528267336415684e-05, + "loss": 1.3402, + "step": 1551500 + }, + { + "epoch": 0.93, + "learning_rate": 4.952617157078624e-05, + "loss": 1.3622, + "step": 1552000 + }, + { + "epoch": 0.93, + "learning_rate": 4.952407160522567e-05, + "loss": 1.3224, + "step": 1552500 + }, + { + "epoch": 0.93, + "learning_rate": 4.952197163966511e-05, + "loss": 1.3556, + "step": 1553000 + }, + { + "epoch": 0.93, + "learning_rate": 4.9519871674104545e-05, + "loss": 1.34, + "step": 1553500 + }, + { + "epoch": 0.93, + "learning_rate": 4.95177759084751e-05, + "loss": 1.3238, + "step": 1554000 + }, + { + "epoch": 0.93, + "learning_rate": 4.951567594291453e-05, + "loss": 1.3197, + "step": 1554500 + }, + { + "epoch": 0.93, + "learning_rate": 4.951357597735397e-05, + "loss": 1.3351, + "step": 1555000 + }, + { + "epoch": 0.93, + "learning_rate": 4.9511476011793405e-05, + "loss": 1.3388, + "step": 1555500 + }, + { + "epoch": 0.93, + "learning_rate": 4.9509376046232846e-05, + "loss": 1.3504, + "step": 1556000 + }, + { + "epoch": 0.93, + "learning_rate": 4.950727608067228e-05, + "loss": 1.354, + "step": 1556500 + }, + { + "epoch": 0.93, + "learning_rate": 4.950517611511171e-05, + "loss": 1.3417, + "step": 1557000 + }, + { + "epoch": 0.93, + "learning_rate": 4.950307614955115e-05, + "loss": 1.3471, + "step": 1557500 + }, + { + "epoch": 0.93, + "learning_rate": 4.950097618399058e-05, + "loss": 1.3371, + "step": 1558000 + }, + { + "epoch": 0.93, + "learning_rate": 4.949888041836114e-05, + "loss": 1.4033, + "step": 1558500 + }, + { + "epoch": 0.93, + "learning_rate": 4.949678045280057e-05, + "loss": 1.3568, + "step": 1559000 + }, + { + "epoch": 0.93, + "learning_rate": 4.9494680487240013e-05, + "loss": 1.3091, + "step": 1559500 + }, + { + "epoch": 0.94, + "learning_rate": 4.949258052167945e-05, + "loss": 1.3257, + "step": 1560000 + }, + { + "epoch": 0.94, + "learning_rate": 4.949048475605e-05, + "loss": 1.3642, + "step": 1560500 + }, + { + "epoch": 0.94, + "learning_rate": 4.9488384790489434e-05, + "loss": 1.3463, + "step": 1561000 + }, + { + "epoch": 0.94, + "learning_rate": 4.9486284824928874e-05, + "loss": 1.352, + "step": 1561500 + }, + { + "epoch": 0.94, + "learning_rate": 4.948418485936831e-05, + "loss": 1.3293, + "step": 1562000 + }, + { + "epoch": 0.94, + "learning_rate": 4.948208489380774e-05, + "loss": 1.3767, + "step": 1562500 + }, + { + "epoch": 0.94, + "learning_rate": 4.9479984928247175e-05, + "loss": 1.3366, + "step": 1563000 + }, + { + "epoch": 0.94, + "learning_rate": 4.947788496268661e-05, + "loss": 1.3837, + "step": 1563500 + }, + { + "epoch": 0.94, + "learning_rate": 4.947578919705717e-05, + "loss": 1.3572, + "step": 1564000 + }, + { + "epoch": 0.94, + "learning_rate": 4.947368923149661e-05, + "loss": 1.3573, + "step": 1564500 + }, + { + "epoch": 0.94, + "learning_rate": 4.9471589265936035e-05, + "loss": 1.3428, + "step": 1565000 + }, + { + "epoch": 0.94, + "learning_rate": 4.946948930037547e-05, + "loss": 1.3316, + "step": 1565500 + }, + { + "epoch": 0.94, + "learning_rate": 4.946739353474603e-05, + "loss": 1.3365, + "step": 1566000 + }, + { + "epoch": 0.94, + "learning_rate": 4.946529356918547e-05, + "loss": 1.322, + "step": 1566500 + }, + { + "epoch": 0.94, + "learning_rate": 4.94631936036249e-05, + "loss": 1.3555, + "step": 1567000 + }, + { + "epoch": 0.94, + "learning_rate": 4.946109363806433e-05, + "loss": 1.3733, + "step": 1567500 + }, + { + "epoch": 0.94, + "learning_rate": 4.945899367250377e-05, + "loss": 1.3221, + "step": 1568000 + }, + { + "epoch": 0.94, + "learning_rate": 4.945689790687433e-05, + "loss": 1.3194, + "step": 1568500 + }, + { + "epoch": 0.94, + "learning_rate": 4.9454802141244884e-05, + "loss": 1.3709, + "step": 1569000 + }, + { + "epoch": 0.94, + "learning_rate": 4.945270217568432e-05, + "loss": 1.3719, + "step": 1569500 + }, + { + "epoch": 0.94, + "learning_rate": 4.945060221012376e-05, + "loss": 1.3441, + "step": 1570000 + }, + { + "epoch": 0.94, + "learning_rate": 4.944850224456319e-05, + "loss": 1.3369, + "step": 1570500 + }, + { + "epoch": 0.94, + "learning_rate": 4.9446402279002624e-05, + "loss": 1.3299, + "step": 1571000 + }, + { + "epoch": 0.94, + "learning_rate": 4.9444302313442064e-05, + "loss": 1.3348, + "step": 1571500 + }, + { + "epoch": 0.94, + "learning_rate": 4.94422023478815e-05, + "loss": 1.3646, + "step": 1572000 + }, + { + "epoch": 0.94, + "learning_rate": 4.9440102382320925e-05, + "loss": 1.3545, + "step": 1572500 + }, + { + "epoch": 0.94, + "learning_rate": 4.9438006616691485e-05, + "loss": 1.3508, + "step": 1573000 + }, + { + "epoch": 0.94, + "learning_rate": 4.9435906651130925e-05, + "loss": 1.3335, + "step": 1573500 + }, + { + "epoch": 0.94, + "learning_rate": 4.943380668557036e-05, + "loss": 1.3367, + "step": 1574000 + }, + { + "epoch": 0.94, + "learning_rate": 4.9431706720009785e-05, + "loss": 1.3435, + "step": 1574500 + }, + { + "epoch": 0.94, + "learning_rate": 4.9429606754449226e-05, + "loss": 1.3548, + "step": 1575000 + }, + { + "epoch": 0.94, + "learning_rate": 4.9427510988819786e-05, + "loss": 1.3785, + "step": 1575500 + }, + { + "epoch": 0.94, + "learning_rate": 4.942541102325922e-05, + "loss": 1.3359, + "step": 1576000 + }, + { + "epoch": 0.95, + "learning_rate": 4.942331525762977e-05, + "loss": 1.3524, + "step": 1576500 + }, + { + "epoch": 0.95, + "learning_rate": 4.942121529206921e-05, + "loss": 1.3399, + "step": 1577000 + }, + { + "epoch": 0.95, + "learning_rate": 4.941911532650865e-05, + "loss": 1.3354, + "step": 1577500 + }, + { + "epoch": 0.95, + "learning_rate": 4.941701536094808e-05, + "loss": 1.3445, + "step": 1578000 + }, + { + "epoch": 0.95, + "learning_rate": 4.941491539538752e-05, + "loss": 1.366, + "step": 1578500 + }, + { + "epoch": 0.95, + "learning_rate": 4.9412815429826954e-05, + "loss": 1.3466, + "step": 1579000 + }, + { + "epoch": 0.95, + "learning_rate": 4.941071546426638e-05, + "loss": 1.347, + "step": 1579500 + }, + { + "epoch": 0.95, + "learning_rate": 4.940861969863694e-05, + "loss": 1.3362, + "step": 1580000 + }, + { + "epoch": 0.95, + "learning_rate": 4.940651973307638e-05, + "loss": 1.3347, + "step": 1580500 + }, + { + "epoch": 0.95, + "learning_rate": 4.9404419767515815e-05, + "loss": 1.3625, + "step": 1581000 + }, + { + "epoch": 0.95, + "learning_rate": 4.940231980195525e-05, + "loss": 1.3505, + "step": 1581500 + }, + { + "epoch": 0.95, + "learning_rate": 4.940021983639468e-05, + "loss": 1.3196, + "step": 1582000 + }, + { + "epoch": 0.95, + "learning_rate": 4.9398119870834115e-05, + "loss": 1.3699, + "step": 1582500 + }, + { + "epoch": 0.95, + "learning_rate": 4.939601990527355e-05, + "loss": 1.3295, + "step": 1583000 + }, + { + "epoch": 0.95, + "learning_rate": 4.939391993971299e-05, + "loss": 1.3681, + "step": 1583500 + }, + { + "epoch": 0.95, + "learning_rate": 4.939181997415242e-05, + "loss": 1.3205, + "step": 1584000 + }, + { + "epoch": 0.95, + "learning_rate": 4.9389724208522976e-05, + "loss": 1.3392, + "step": 1584500 + }, + { + "epoch": 0.95, + "learning_rate": 4.9387624242962416e-05, + "loss": 1.3744, + "step": 1585000 + }, + { + "epoch": 0.95, + "learning_rate": 4.938552427740185e-05, + "loss": 1.3297, + "step": 1585500 + }, + { + "epoch": 0.95, + "learning_rate": 4.938342431184128e-05, + "loss": 1.3485, + "step": 1586000 + }, + { + "epoch": 0.95, + "learning_rate": 4.9381328546211836e-05, + "loss": 1.3344, + "step": 1586500 + }, + { + "epoch": 0.95, + "learning_rate": 4.9379228580651277e-05, + "loss": 1.3471, + "step": 1587000 + }, + { + "epoch": 0.95, + "learning_rate": 4.937712861509071e-05, + "loss": 1.3561, + "step": 1587500 + }, + { + "epoch": 0.95, + "learning_rate": 4.9375028649530143e-05, + "loss": 1.3412, + "step": 1588000 + }, + { + "epoch": 0.95, + "learning_rate": 4.9372928683969584e-05, + "loss": 1.3467, + "step": 1588500 + }, + { + "epoch": 0.95, + "learning_rate": 4.937083291834014e-05, + "loss": 1.3465, + "step": 1589000 + }, + { + "epoch": 0.95, + "learning_rate": 4.936873295277957e-05, + "loss": 1.3542, + "step": 1589500 + }, + { + "epoch": 0.95, + "learning_rate": 4.9366632987219004e-05, + "loss": 1.3381, + "step": 1590000 + }, + { + "epoch": 0.95, + "learning_rate": 4.9364533021658444e-05, + "loss": 1.3452, + "step": 1590500 + }, + { + "epoch": 0.95, + "learning_rate": 4.9362437256029005e-05, + "loss": 1.3143, + "step": 1591000 + }, + { + "epoch": 0.95, + "learning_rate": 4.936033729046843e-05, + "loss": 1.3282, + "step": 1591500 + }, + { + "epoch": 0.95, + "learning_rate": 4.935823732490787e-05, + "loss": 1.3443, + "step": 1592000 + }, + { + "epoch": 0.95, + "learning_rate": 4.9356137359347305e-05, + "loss": 1.3675, + "step": 1592500 + }, + { + "epoch": 0.96, + "learning_rate": 4.935403739378674e-05, + "loss": 1.3367, + "step": 1593000 + }, + { + "epoch": 0.96, + "learning_rate": 4.935194162815729e-05, + "loss": 1.3559, + "step": 1593500 + }, + { + "epoch": 0.96, + "learning_rate": 4.934984166259673e-05, + "loss": 1.3653, + "step": 1594000 + }, + { + "epoch": 0.96, + "learning_rate": 4.934774589696729e-05, + "loss": 1.3417, + "step": 1594500 + }, + { + "epoch": 0.96, + "learning_rate": 4.9345645931406726e-05, + "loss": 1.3747, + "step": 1595000 + }, + { + "epoch": 0.96, + "learning_rate": 4.934354596584616e-05, + "loss": 1.3429, + "step": 1595500 + }, + { + "epoch": 0.96, + "learning_rate": 4.934144600028559e-05, + "loss": 1.3333, + "step": 1596000 + }, + { + "epoch": 0.96, + "learning_rate": 4.933934603472503e-05, + "loss": 1.3325, + "step": 1596500 + }, + { + "epoch": 0.96, + "learning_rate": 4.933724606916446e-05, + "loss": 1.3429, + "step": 1597000 + }, + { + "epoch": 0.96, + "learning_rate": 4.93351461036039e-05, + "loss": 1.3447, + "step": 1597500 + }, + { + "epoch": 0.96, + "learning_rate": 4.9333046138043334e-05, + "loss": 1.3466, + "step": 1598000 + }, + { + "epoch": 0.96, + "learning_rate": 4.933094617248277e-05, + "loss": 1.3154, + "step": 1598500 + }, + { + "epoch": 0.96, + "learning_rate": 4.932885040685333e-05, + "loss": 1.3398, + "step": 1599000 + }, + { + "epoch": 0.96, + "learning_rate": 4.932675044129276e-05, + "loss": 1.3711, + "step": 1599500 + }, + { + "epoch": 0.96, + "learning_rate": 4.9324650475732195e-05, + "loss": 1.3525, + "step": 1600000 + }, + { + "epoch": 0.96, + "eval_loss": 1.2816615104675293, + "eval_runtime": 1114.3312, + "eval_samples_per_second": 472.678, + "eval_steps_per_second": 78.78, + "step": 1600000 + }, + { + "epoch": 0.96, + "learning_rate": 4.9322554710102755e-05, + "loss": 1.3228, + "step": 1600500 + }, + { + "epoch": 0.96, + "learning_rate": 4.932045474454219e-05, + "loss": 1.3275, + "step": 1601000 + }, + { + "epoch": 0.96, + "learning_rate": 4.931835477898162e-05, + "loss": 1.3305, + "step": 1601500 + }, + { + "epoch": 0.96, + "learning_rate": 4.9316254813421055e-05, + "loss": 1.3366, + "step": 1602000 + }, + { + "epoch": 0.96, + "learning_rate": 4.9314154847860495e-05, + "loss": 1.3326, + "step": 1602500 + }, + { + "epoch": 0.96, + "learning_rate": 4.931205488229993e-05, + "loss": 1.351, + "step": 1603000 + }, + { + "epoch": 0.96, + "learning_rate": 4.930995491673936e-05, + "loss": 1.3251, + "step": 1603500 + }, + { + "epoch": 0.96, + "learning_rate": 4.93078549511788e-05, + "loss": 1.3241, + "step": 1604000 + }, + { + "epoch": 0.96, + "learning_rate": 4.9305754985618236e-05, + "loss": 1.3535, + "step": 1604500 + }, + { + "epoch": 0.96, + "learning_rate": 4.930365502005767e-05, + "loss": 1.3476, + "step": 1605000 + }, + { + "epoch": 0.96, + "learning_rate": 4.930155505449711e-05, + "loss": 1.3506, + "step": 1605500 + }, + { + "epoch": 0.96, + "learning_rate": 4.929945508893654e-05, + "loss": 1.3257, + "step": 1606000 + }, + { + "epoch": 0.96, + "learning_rate": 4.929736352323822e-05, + "loss": 1.3537, + "step": 1606500 + }, + { + "epoch": 0.96, + "learning_rate": 4.929526355767765e-05, + "loss": 1.3489, + "step": 1607000 + }, + { + "epoch": 0.96, + "learning_rate": 4.929316359211709e-05, + "loss": 1.3398, + "step": 1607500 + }, + { + "epoch": 0.96, + "learning_rate": 4.9291063626556524e-05, + "loss": 1.3953, + "step": 1608000 + }, + { + "epoch": 0.96, + "learning_rate": 4.928896786092708e-05, + "loss": 1.38, + "step": 1608500 + }, + { + "epoch": 0.96, + "learning_rate": 4.928686789536651e-05, + "loss": 1.3583, + "step": 1609000 + }, + { + "epoch": 0.96, + "learning_rate": 4.928476792980595e-05, + "loss": 1.3318, + "step": 1609500 + }, + { + "epoch": 0.97, + "learning_rate": 4.9282667964245385e-05, + "loss": 1.3325, + "step": 1610000 + }, + { + "epoch": 0.97, + "learning_rate": 4.928056799868482e-05, + "loss": 1.3764, + "step": 1610500 + }, + { + "epoch": 0.97, + "learning_rate": 4.927847223305537e-05, + "loss": 1.3424, + "step": 1611000 + }, + { + "epoch": 0.97, + "learning_rate": 4.927637226749481e-05, + "loss": 1.3613, + "step": 1611500 + }, + { + "epoch": 0.97, + "learning_rate": 4.9274272301934246e-05, + "loss": 1.3303, + "step": 1612000 + }, + { + "epoch": 0.97, + "learning_rate": 4.927217233637368e-05, + "loss": 1.342, + "step": 1612500 + }, + { + "epoch": 0.97, + "learning_rate": 4.927007237081312e-05, + "loss": 1.3667, + "step": 1613000 + }, + { + "epoch": 0.97, + "learning_rate": 4.926797240525255e-05, + "loss": 1.3277, + "step": 1613500 + }, + { + "epoch": 0.97, + "learning_rate": 4.9265876639623106e-05, + "loss": 1.3334, + "step": 1614000 + }, + { + "epoch": 0.97, + "learning_rate": 4.9263776674062547e-05, + "loss": 1.3126, + "step": 1614500 + }, + { + "epoch": 0.97, + "learning_rate": 4.926167670850198e-05, + "loss": 1.3503, + "step": 1615000 + }, + { + "epoch": 0.97, + "learning_rate": 4.9259580942872534e-05, + "loss": 1.3597, + "step": 1615500 + }, + { + "epoch": 0.97, + "learning_rate": 4.925748097731197e-05, + "loss": 1.3893, + "step": 1616000 + }, + { + "epoch": 0.97, + "learning_rate": 4.925538101175141e-05, + "loss": 1.3699, + "step": 1616500 + }, + { + "epoch": 0.97, + "learning_rate": 4.925328104619084e-05, + "loss": 1.3627, + "step": 1617000 + }, + { + "epoch": 0.97, + "learning_rate": 4.9251181080630274e-05, + "loss": 1.3352, + "step": 1617500 + }, + { + "epoch": 0.97, + "learning_rate": 4.9249081115069714e-05, + "loss": 1.3576, + "step": 1618000 + }, + { + "epoch": 0.97, + "learning_rate": 4.924698534944027e-05, + "loss": 1.3476, + "step": 1618500 + }, + { + "epoch": 0.97, + "learning_rate": 4.92448853838797e-05, + "loss": 1.3203, + "step": 1619000 + }, + { + "epoch": 0.97, + "learning_rate": 4.9242785418319135e-05, + "loss": 1.3482, + "step": 1619500 + }, + { + "epoch": 0.97, + "learning_rate": 4.9240685452758575e-05, + "loss": 1.3479, + "step": 1620000 + }, + { + "epoch": 0.97, + "learning_rate": 4.923858968712913e-05, + "loss": 1.3514, + "step": 1620500 + }, + { + "epoch": 0.97, + "learning_rate": 4.923648972156856e-05, + "loss": 1.3628, + "step": 1621000 + }, + { + "epoch": 0.97, + "learning_rate": 4.9234389756008e-05, + "loss": 1.3282, + "step": 1621500 + }, + { + "epoch": 0.97, + "learning_rate": 4.9232289790447436e-05, + "loss": 1.3338, + "step": 1622000 + }, + { + "epoch": 0.97, + "learning_rate": 4.923018982488687e-05, + "loss": 1.3206, + "step": 1622500 + }, + { + "epoch": 0.97, + "learning_rate": 4.922808985932631e-05, + "loss": 1.3188, + "step": 1623000 + }, + { + "epoch": 0.97, + "learning_rate": 4.922598989376574e-05, + "loss": 1.3437, + "step": 1623500 + }, + { + "epoch": 0.97, + "learning_rate": 4.9223889928205176e-05, + "loss": 1.3719, + "step": 1624000 + }, + { + "epoch": 0.97, + "learning_rate": 4.922178996264462e-05, + "loss": 1.3251, + "step": 1624500 + }, + { + "epoch": 0.97, + "learning_rate": 4.921968999708405e-05, + "loss": 1.3605, + "step": 1625000 + }, + { + "epoch": 0.97, + "learning_rate": 4.9217594231454604e-05, + "loss": 1.3332, + "step": 1625500 + }, + { + "epoch": 0.97, + "learning_rate": 4.921549426589404e-05, + "loss": 1.3278, + "step": 1626000 + }, + { + "epoch": 0.98, + "learning_rate": 4.921339430033348e-05, + "loss": 1.3342, + "step": 1626500 + }, + { + "epoch": 0.98, + "learning_rate": 4.921129433477291e-05, + "loss": 1.3519, + "step": 1627000 + }, + { + "epoch": 0.98, + "learning_rate": 4.9209194369212344e-05, + "loss": 1.3322, + "step": 1627500 + }, + { + "epoch": 0.98, + "learning_rate": 4.920709440365178e-05, + "loss": 1.3127, + "step": 1628000 + }, + { + "epoch": 0.98, + "learning_rate": 4.920499443809121e-05, + "loss": 1.3316, + "step": 1628500 + }, + { + "epoch": 0.98, + "learning_rate": 4.920289447253065e-05, + "loss": 1.3169, + "step": 1629000 + }, + { + "epoch": 0.98, + "learning_rate": 4.920079870690121e-05, + "loss": 1.3298, + "step": 1629500 + }, + { + "epoch": 0.98, + "learning_rate": 4.919869874134064e-05, + "loss": 1.327, + "step": 1630000 + }, + { + "epoch": 0.98, + "learning_rate": 4.919659877578007e-05, + "loss": 1.3527, + "step": 1630500 + }, + { + "epoch": 0.98, + "learning_rate": 4.919449881021951e-05, + "loss": 1.3395, + "step": 1631000 + }, + { + "epoch": 0.98, + "learning_rate": 4.9192398844658946e-05, + "loss": 1.3324, + "step": 1631500 + }, + { + "epoch": 0.98, + "learning_rate": 4.919029887909838e-05, + "loss": 1.3712, + "step": 1632000 + }, + { + "epoch": 0.98, + "learning_rate": 4.918819891353782e-05, + "loss": 1.3527, + "step": 1632500 + }, + { + "epoch": 0.98, + "learning_rate": 4.918609894797725e-05, + "loss": 1.3549, + "step": 1633000 + }, + { + "epoch": 0.98, + "learning_rate": 4.9184003182347806e-05, + "loss": 1.3562, + "step": 1633500 + }, + { + "epoch": 0.98, + "learning_rate": 4.918190321678724e-05, + "loss": 1.3464, + "step": 1634000 + }, + { + "epoch": 0.98, + "learning_rate": 4.91798074511578e-05, + "loss": 1.3637, + "step": 1634500 + }, + { + "epoch": 0.98, + "learning_rate": 4.9177707485597234e-05, + "loss": 1.3466, + "step": 1635000 + }, + { + "epoch": 0.98, + "learning_rate": 4.9175611719967794e-05, + "loss": 1.3584, + "step": 1635500 + }, + { + "epoch": 0.98, + "learning_rate": 4.917351175440723e-05, + "loss": 1.3563, + "step": 1636000 + }, + { + "epoch": 0.98, + "learning_rate": 4.917141178884667e-05, + "loss": 1.3437, + "step": 1636500 + }, + { + "epoch": 0.98, + "learning_rate": 4.91693118232861e-05, + "loss": 1.3621, + "step": 1637000 + }, + { + "epoch": 0.98, + "learning_rate": 4.916721185772553e-05, + "loss": 1.3542, + "step": 1637500 + }, + { + "epoch": 0.98, + "learning_rate": 4.916511189216497e-05, + "loss": 1.3303, + "step": 1638000 + }, + { + "epoch": 0.98, + "learning_rate": 4.916301612653553e-05, + "loss": 1.3375, + "step": 1638500 + }, + { + "epoch": 0.98, + "learning_rate": 4.916091616097496e-05, + "loss": 1.3842, + "step": 1639000 + }, + { + "epoch": 0.98, + "learning_rate": 4.9158816195414395e-05, + "loss": 1.3391, + "step": 1639500 + }, + { + "epoch": 0.98, + "learning_rate": 4.915671622985383e-05, + "loss": 1.3402, + "step": 1640000 + }, + { + "epoch": 0.98, + "learning_rate": 4.915461626429326e-05, + "loss": 1.3568, + "step": 1640500 + }, + { + "epoch": 0.98, + "learning_rate": 4.9152516298732696e-05, + "loss": 1.3437, + "step": 1641000 + }, + { + "epoch": 0.98, + "learning_rate": 4.9150416333172136e-05, + "loss": 1.3553, + "step": 1641500 + }, + { + "epoch": 0.98, + "learning_rate": 4.914832056754269e-05, + "loss": 1.3354, + "step": 1642000 + }, + { + "epoch": 0.98, + "learning_rate": 4.914622060198212e-05, + "loss": 1.3628, + "step": 1642500 + }, + { + "epoch": 0.99, + "learning_rate": 4.914412063642156e-05, + "loss": 1.3646, + "step": 1643000 + }, + { + "epoch": 0.99, + "learning_rate": 4.9142020670861e-05, + "loss": 1.3636, + "step": 1643500 + }, + { + "epoch": 0.99, + "learning_rate": 4.913992070530043e-05, + "loss": 1.3389, + "step": 1644000 + }, + { + "epoch": 0.99, + "learning_rate": 4.913782073973987e-05, + "loss": 1.3594, + "step": 1644500 + }, + { + "epoch": 0.99, + "learning_rate": 4.9135720774179304e-05, + "loss": 1.3287, + "step": 1645000 + }, + { + "epoch": 0.99, + "learning_rate": 4.913362500854986e-05, + "loss": 1.3302, + "step": 1645500 + }, + { + "epoch": 0.99, + "learning_rate": 4.913152504298929e-05, + "loss": 1.3227, + "step": 1646000 + }, + { + "epoch": 0.99, + "learning_rate": 4.912942507742873e-05, + "loss": 1.3276, + "step": 1646500 + }, + { + "epoch": 0.99, + "learning_rate": 4.9127325111868165e-05, + "loss": 1.3495, + "step": 1647000 + }, + { + "epoch": 0.99, + "learning_rate": 4.91252251463076e-05, + "loss": 1.3695, + "step": 1647500 + }, + { + "epoch": 0.99, + "learning_rate": 4.912312518074704e-05, + "loss": 1.3119, + "step": 1648000 + }, + { + "epoch": 0.99, + "learning_rate": 4.912102521518647e-05, + "loss": 1.3442, + "step": 1648500 + }, + { + "epoch": 0.99, + "learning_rate": 4.911893364948815e-05, + "loss": 1.3458, + "step": 1649000 + }, + { + "epoch": 0.99, + "learning_rate": 4.911683368392758e-05, + "loss": 1.3335, + "step": 1649500 + }, + { + "epoch": 0.99, + "learning_rate": 4.911473371836702e-05, + "loss": 1.3144, + "step": 1650000 + }, + { + "epoch": 0.99, + "learning_rate": 4.911263375280645e-05, + "loss": 1.3231, + "step": 1650500 + }, + { + "epoch": 0.99, + "learning_rate": 4.9110533787245886e-05, + "loss": 1.3279, + "step": 1651000 + }, + { + "epoch": 0.99, + "learning_rate": 4.9108433821685326e-05, + "loss": 1.3703, + "step": 1651500 + }, + { + "epoch": 0.99, + "learning_rate": 4.910633385612476e-05, + "loss": 1.3416, + "step": 1652000 + }, + { + "epoch": 0.99, + "learning_rate": 4.910423389056419e-05, + "loss": 1.314, + "step": 1652500 + }, + { + "epoch": 0.99, + "learning_rate": 4.9102133925003633e-05, + "loss": 1.3466, + "step": 1653000 + }, + { + "epoch": 0.99, + "learning_rate": 4.910003395944307e-05, + "loss": 1.374, + "step": 1653500 + }, + { + "epoch": 0.99, + "learning_rate": 4.90979339938825e-05, + "loss": 1.3301, + "step": 1654000 + }, + { + "epoch": 0.99, + "learning_rate": 4.909583402832194e-05, + "loss": 1.3464, + "step": 1654500 + }, + { + "epoch": 0.99, + "learning_rate": 4.9093738262692494e-05, + "loss": 1.3181, + "step": 1655000 + }, + { + "epoch": 0.99, + "learning_rate": 4.909163829713193e-05, + "loss": 1.3446, + "step": 1655500 + }, + { + "epoch": 0.99, + "learning_rate": 4.908953833157136e-05, + "loss": 1.3494, + "step": 1656000 + }, + { + "epoch": 0.99, + "learning_rate": 4.90874383660108e-05, + "loss": 1.3476, + "step": 1656500 + }, + { + "epoch": 0.99, + "learning_rate": 4.908533840045023e-05, + "loss": 1.3353, + "step": 1657000 + }, + { + "epoch": 0.99, + "learning_rate": 4.908323843488966e-05, + "loss": 1.3224, + "step": 1657500 + }, + { + "epoch": 0.99, + "learning_rate": 4.90811384693291e-05, + "loss": 1.3577, + "step": 1658000 + }, + { + "epoch": 0.99, + "learning_rate": 4.9079038503768535e-05, + "loss": 1.3249, + "step": 1658500 + }, + { + "epoch": 0.99, + "learning_rate": 4.9076942738139096e-05, + "loss": 1.3217, + "step": 1659000 + }, + { + "epoch": 0.99, + "learning_rate": 4.907484277257853e-05, + "loss": 1.3337, + "step": 1659500 + }, + { + "epoch": 1.0, + "learning_rate": 4.907274280701796e-05, + "loss": 1.3134, + "step": 1660000 + }, + { + "epoch": 1.0, + "learning_rate": 4.9070642841457396e-05, + "loss": 1.329, + "step": 1660500 + }, + { + "epoch": 1.0, + "learning_rate": 4.9068542875896836e-05, + "loss": 1.3373, + "step": 1661000 + }, + { + "epoch": 1.0, + "learning_rate": 4.906644291033627e-05, + "loss": 1.3225, + "step": 1661500 + }, + { + "epoch": 1.0, + "learning_rate": 4.906434714470682e-05, + "loss": 1.3423, + "step": 1662000 + }, + { + "epoch": 1.0, + "learning_rate": 4.9062247179146257e-05, + "loss": 1.3482, + "step": 1662500 + }, + { + "epoch": 1.0, + "learning_rate": 4.90601472135857e-05, + "loss": 1.3441, + "step": 1663000 + }, + { + "epoch": 1.0, + "learning_rate": 4.905804724802513e-05, + "loss": 1.3595, + "step": 1663500 + }, + { + "epoch": 1.0, + "learning_rate": 4.9055947282464564e-05, + "loss": 1.3429, + "step": 1664000 + }, + { + "epoch": 1.0, + "learning_rate": 4.9053847316904004e-05, + "loss": 1.3229, + "step": 1664500 + }, + { + "epoch": 1.0, + "learning_rate": 4.905174735134344e-05, + "loss": 1.3678, + "step": 1665000 + }, + { + "epoch": 1.0, + "learning_rate": 4.904965158571399e-05, + "loss": 1.3735, + "step": 1665500 + }, + { + "epoch": 1.0, + "learning_rate": 4.9047551620153424e-05, + "loss": 1.3369, + "step": 1666000 + }, + { + "epoch": 1.0, + "learning_rate": 4.9045451654592865e-05, + "loss": 1.3198, + "step": 1666500 + }, + { + "epoch": 1.0, + "learning_rate": 4.90433516890323e-05, + "loss": 1.3055, + "step": 1667000 + }, + { + "epoch": 1.0, + "learning_rate": 4.904125172347174e-05, + "loss": 1.3209, + "step": 1667500 + }, + { + "epoch": 1.0, + "learning_rate": 4.903915175791117e-05, + "loss": 1.3425, + "step": 1668000 + }, + { + "epoch": 1.0, + "learning_rate": 4.9037055992281725e-05, + "loss": 1.2968, + "step": 1668500 + }, + { + "epoch": 1.0, + "learning_rate": 4.903495602672116e-05, + "loss": 1.3499, + "step": 1669000 + }, + { + "epoch": 1.0, + "learning_rate": 4.90328560611606e-05, + "loss": 1.3247, + "step": 1669500 + }, + { + "epoch": 1.0, + "learning_rate": 4.903075609560003e-05, + "loss": 1.3059, + "step": 1670000 + }, + { + "epoch": 1.0, + "learning_rate": 4.9028656130039466e-05, + "loss": 1.3355, + "step": 1670500 + }, + { + "epoch": 1.0, + "learning_rate": 4.9026556164478906e-05, + "loss": 1.3115, + "step": 1671000 + }, + { + "epoch": 1.0, + "learning_rate": 4.902445619891834e-05, + "loss": 1.2839, + "step": 1671500 + }, + { + "epoch": 1.0, + "learning_rate": 4.902236043328889e-05, + "loss": 1.2893, + "step": 1672000 + }, + { + "epoch": 1.0, + "learning_rate": 4.902026046772833e-05, + "loss": 1.3132, + "step": 1672500 + }, + { + "epoch": 1.0, + "learning_rate": 4.901816050216777e-05, + "loss": 1.3137, + "step": 1673000 + }, + { + "epoch": 1.0, + "learning_rate": 4.90160605366072e-05, + "loss": 1.3299, + "step": 1673500 + }, + { + "epoch": 1.0, + "learning_rate": 4.9013960571046634e-05, + "loss": 1.3091, + "step": 1674000 + }, + { + "epoch": 1.0, + "learning_rate": 4.9011864805417194e-05, + "loss": 1.3247, + "step": 1674500 + }, + { + "epoch": 1.0, + "learning_rate": 4.900976483985663e-05, + "loss": 1.3192, + "step": 1675000 + }, + { + "epoch": 1.0, + "learning_rate": 4.900766487429606e-05, + "loss": 1.3255, + "step": 1675500 + }, + { + "epoch": 1.0, + "learning_rate": 4.90055649087355e-05, + "loss": 1.2757, + "step": 1676000 + }, + { + "epoch": 1.01, + "learning_rate": 4.9003464943174935e-05, + "loss": 1.3088, + "step": 1676500 + }, + { + "epoch": 1.01, + "learning_rate": 4.900136497761436e-05, + "loss": 1.3, + "step": 1677000 + }, + { + "epoch": 1.01, + "learning_rate": 4.899926921198492e-05, + "loss": 1.333, + "step": 1677500 + }, + { + "epoch": 1.01, + "learning_rate": 4.899716924642436e-05, + "loss": 1.3413, + "step": 1678000 + }, + { + "epoch": 1.01, + "learning_rate": 4.8995069280863796e-05, + "loss": 1.3359, + "step": 1678500 + }, + { + "epoch": 1.01, + "learning_rate": 4.899296931530323e-05, + "loss": 1.3033, + "step": 1679000 + }, + { + "epoch": 1.01, + "learning_rate": 4.899086934974266e-05, + "loss": 1.3078, + "step": 1679500 + }, + { + "epoch": 1.01, + "learning_rate": 4.898877358411322e-05, + "loss": 1.3102, + "step": 1680000 + }, + { + "epoch": 1.01, + "learning_rate": 4.8986673618552656e-05, + "loss": 1.3324, + "step": 1680500 + }, + { + "epoch": 1.01, + "learning_rate": 4.898457785292321e-05, + "loss": 1.3398, + "step": 1681000 + }, + { + "epoch": 1.01, + "learning_rate": 4.898247788736265e-05, + "loss": 1.2893, + "step": 1681500 + }, + { + "epoch": 1.01, + "learning_rate": 4.8980377921802084e-05, + "loss": 1.2809, + "step": 1682000 + }, + { + "epoch": 1.01, + "learning_rate": 4.897827795624152e-05, + "loss": 1.319, + "step": 1682500 + }, + { + "epoch": 1.01, + "learning_rate": 4.897617799068096e-05, + "loss": 1.3058, + "step": 1683000 + }, + { + "epoch": 1.01, + "learning_rate": 4.897407802512039e-05, + "loss": 1.313, + "step": 1683500 + }, + { + "epoch": 1.01, + "learning_rate": 4.897197805955982e-05, + "loss": 1.2913, + "step": 1684000 + }, + { + "epoch": 1.01, + "learning_rate": 4.896987809399926e-05, + "loss": 1.2862, + "step": 1684500 + }, + { + "epoch": 1.01, + "learning_rate": 4.896777812843869e-05, + "loss": 1.3271, + "step": 1685000 + }, + { + "epoch": 1.01, + "learning_rate": 4.896568236280925e-05, + "loss": 1.3049, + "step": 1685500 + }, + { + "epoch": 1.01, + "learning_rate": 4.8963582397248685e-05, + "loss": 1.3139, + "step": 1686000 + }, + { + "epoch": 1.01, + "learning_rate": 4.896148663161924e-05, + "loss": 1.3118, + "step": 1686500 + }, + { + "epoch": 1.01, + "learning_rate": 4.895938666605868e-05, + "loss": 1.3058, + "step": 1687000 + }, + { + "epoch": 1.01, + "learning_rate": 4.895728670049811e-05, + "loss": 1.3056, + "step": 1687500 + }, + { + "epoch": 1.01, + "learning_rate": 4.8955186734937546e-05, + "loss": 1.2781, + "step": 1688000 + }, + { + "epoch": 1.01, + "learning_rate": 4.8953086769376986e-05, + "loss": 1.3212, + "step": 1688500 + }, + { + "epoch": 1.01, + "learning_rate": 4.895098680381641e-05, + "loss": 1.3027, + "step": 1689000 + }, + { + "epoch": 1.01, + "learning_rate": 4.894888683825585e-05, + "loss": 1.3256, + "step": 1689500 + }, + { + "epoch": 1.01, + "learning_rate": 4.8946786872695286e-05, + "loss": 1.2971, + "step": 1690000 + }, + { + "epoch": 1.01, + "learning_rate": 4.894468690713472e-05, + "loss": 1.3134, + "step": 1690500 + }, + { + "epoch": 1.01, + "learning_rate": 4.894258694157416e-05, + "loss": 1.3294, + "step": 1691000 + }, + { + "epoch": 1.01, + "learning_rate": 4.8940486976013593e-05, + "loss": 1.2934, + "step": 1691500 + }, + { + "epoch": 1.01, + "learning_rate": 4.893839121038415e-05, + "loss": 1.2831, + "step": 1692000 + }, + { + "epoch": 1.01, + "learning_rate": 4.893629124482358e-05, + "loss": 1.344, + "step": 1692500 + }, + { + "epoch": 1.02, + "learning_rate": 4.893419127926302e-05, + "loss": 1.3321, + "step": 1693000 + }, + { + "epoch": 1.02, + "learning_rate": 4.8932091313702454e-05, + "loss": 1.3235, + "step": 1693500 + }, + { + "epoch": 1.02, + "learning_rate": 4.892999134814189e-05, + "loss": 1.3072, + "step": 1694000 + }, + { + "epoch": 1.02, + "learning_rate": 4.892789558251244e-05, + "loss": 1.2983, + "step": 1694500 + }, + { + "epoch": 1.02, + "learning_rate": 4.8925799816883e-05, + "loss": 1.3203, + "step": 1695000 + }, + { + "epoch": 1.02, + "learning_rate": 4.892369985132244e-05, + "loss": 1.3262, + "step": 1695500 + }, + { + "epoch": 1.02, + "learning_rate": 4.892159988576187e-05, + "loss": 1.3245, + "step": 1696000 + }, + { + "epoch": 1.02, + "learning_rate": 4.891949992020131e-05, + "loss": 1.3063, + "step": 1696500 + }, + { + "epoch": 1.02, + "learning_rate": 4.891739995464074e-05, + "loss": 1.3251, + "step": 1697000 + }, + { + "epoch": 1.02, + "learning_rate": 4.8915299989080176e-05, + "loss": 1.2963, + "step": 1697500 + }, + { + "epoch": 1.02, + "learning_rate": 4.8913200023519616e-05, + "loss": 1.315, + "step": 1698000 + }, + { + "epoch": 1.02, + "learning_rate": 4.891110005795905e-05, + "loss": 1.2855, + "step": 1698500 + }, + { + "epoch": 1.02, + "learning_rate": 4.890900009239848e-05, + "loss": 1.3112, + "step": 1699000 + }, + { + "epoch": 1.02, + "learning_rate": 4.8906904326769036e-05, + "loss": 1.3363, + "step": 1699500 + }, + { + "epoch": 1.02, + "learning_rate": 4.8904804361208477e-05, + "loss": 1.3065, + "step": 1700000 + }, + { + "epoch": 1.02, + "eval_loss": 1.2773289680480957, + "eval_runtime": 1103.5769, + "eval_samples_per_second": 477.284, + "eval_steps_per_second": 79.548, + "step": 1700000 + }, + { + "epoch": 1.02, + "learning_rate": 4.890270439564791e-05, + "loss": 1.3451, + "step": 1700500 + }, + { + "epoch": 1.02, + "learning_rate": 4.8900604430087344e-05, + "loss": 1.3006, + "step": 1701000 + }, + { + "epoch": 1.02, + "learning_rate": 4.8898504464526784e-05, + "loss": 1.2997, + "step": 1701500 + }, + { + "epoch": 1.02, + "learning_rate": 4.889640449896622e-05, + "loss": 1.3124, + "step": 1702000 + }, + { + "epoch": 1.02, + "learning_rate": 4.889430453340565e-05, + "loss": 1.2988, + "step": 1702500 + }, + { + "epoch": 1.02, + "learning_rate": 4.8892208767776204e-05, + "loss": 1.3237, + "step": 1703000 + }, + { + "epoch": 1.02, + "learning_rate": 4.8890108802215644e-05, + "loss": 1.3079, + "step": 1703500 + }, + { + "epoch": 1.02, + "learning_rate": 4.888800883665508e-05, + "loss": 1.3231, + "step": 1704000 + }, + { + "epoch": 1.02, + "learning_rate": 4.888590887109451e-05, + "loss": 1.3401, + "step": 1704500 + }, + { + "epoch": 1.02, + "learning_rate": 4.888380890553395e-05, + "loss": 1.2761, + "step": 1705000 + }, + { + "epoch": 1.02, + "learning_rate": 4.8881708939973385e-05, + "loss": 1.2937, + "step": 1705500 + }, + { + "epoch": 1.02, + "learning_rate": 4.887960897441282e-05, + "loss": 1.32, + "step": 1706000 + }, + { + "epoch": 1.02, + "learning_rate": 4.887751320878338e-05, + "loss": 1.3185, + "step": 1706500 + }, + { + "epoch": 1.02, + "learning_rate": 4.887541324322281e-05, + "loss": 1.3029, + "step": 1707000 + }, + { + "epoch": 1.02, + "learning_rate": 4.8873313277662246e-05, + "loss": 1.3474, + "step": 1707500 + }, + { + "epoch": 1.02, + "learning_rate": 4.8871213312101686e-05, + "loss": 1.302, + "step": 1708000 + }, + { + "epoch": 1.02, + "learning_rate": 4.886911754647224e-05, + "loss": 1.2993, + "step": 1708500 + }, + { + "epoch": 1.02, + "learning_rate": 4.886701758091167e-05, + "loss": 1.2953, + "step": 1709000 + }, + { + "epoch": 1.02, + "learning_rate": 4.886492181528223e-05, + "loss": 1.3249, + "step": 1709500 + }, + { + "epoch": 1.03, + "learning_rate": 4.886282184972166e-05, + "loss": 1.3487, + "step": 1710000 + }, + { + "epoch": 1.03, + "learning_rate": 4.88607218841611e-05, + "loss": 1.3395, + "step": 1710500 + }, + { + "epoch": 1.03, + "learning_rate": 4.8858621918600534e-05, + "loss": 1.3049, + "step": 1711000 + }, + { + "epoch": 1.03, + "learning_rate": 4.885652195303997e-05, + "loss": 1.3145, + "step": 1711500 + }, + { + "epoch": 1.03, + "learning_rate": 4.885442198747941e-05, + "loss": 1.3196, + "step": 1712000 + }, + { + "epoch": 1.03, + "learning_rate": 4.885232202191884e-05, + "loss": 1.3258, + "step": 1712500 + }, + { + "epoch": 1.03, + "learning_rate": 4.8850222056358274e-05, + "loss": 1.2732, + "step": 1713000 + }, + { + "epoch": 1.03, + "learning_rate": 4.884812209079771e-05, + "loss": 1.3186, + "step": 1713500 + }, + { + "epoch": 1.03, + "learning_rate": 4.884602212523714e-05, + "loss": 1.3046, + "step": 1714000 + }, + { + "epoch": 1.03, + "learning_rate": 4.884392215967658e-05, + "loss": 1.3048, + "step": 1714500 + }, + { + "epoch": 1.03, + "learning_rate": 4.8841822194116015e-05, + "loss": 1.3228, + "step": 1715000 + }, + { + "epoch": 1.03, + "learning_rate": 4.883972222855545e-05, + "loss": 1.3315, + "step": 1715500 + }, + { + "epoch": 1.03, + "learning_rate": 4.883762226299489e-05, + "loss": 1.3386, + "step": 1716000 + }, + { + "epoch": 1.03, + "learning_rate": 4.883552229743432e-05, + "loss": 1.3365, + "step": 1716500 + }, + { + "epoch": 1.03, + "learning_rate": 4.8833422331873756e-05, + "loss": 1.2803, + "step": 1717000 + }, + { + "epoch": 1.03, + "learning_rate": 4.8831322366313196e-05, + "loss": 1.2987, + "step": 1717500 + }, + { + "epoch": 1.03, + "learning_rate": 4.882923080061487e-05, + "loss": 1.3279, + "step": 1718000 + }, + { + "epoch": 1.03, + "learning_rate": 4.88271308350543e-05, + "loss": 1.3276, + "step": 1718500 + }, + { + "epoch": 1.03, + "learning_rate": 4.8825030869493737e-05, + "loss": 1.2691, + "step": 1719000 + }, + { + "epoch": 1.03, + "learning_rate": 4.882293090393317e-05, + "loss": 1.3368, + "step": 1719500 + }, + { + "epoch": 1.03, + "learning_rate": 4.882083093837261e-05, + "loss": 1.3238, + "step": 1720000 + }, + { + "epoch": 1.03, + "learning_rate": 4.8818735172743164e-05, + "loss": 1.335, + "step": 1720500 + }, + { + "epoch": 1.03, + "learning_rate": 4.88166352071826e-05, + "loss": 1.2921, + "step": 1721000 + }, + { + "epoch": 1.03, + "learning_rate": 4.881453524162204e-05, + "loss": 1.3196, + "step": 1721500 + }, + { + "epoch": 1.03, + "learning_rate": 4.881243527606147e-05, + "loss": 1.2938, + "step": 1722000 + }, + { + "epoch": 1.03, + "learning_rate": 4.8810335310500904e-05, + "loss": 1.3096, + "step": 1722500 + }, + { + "epoch": 1.03, + "learning_rate": 4.8808235344940345e-05, + "loss": 1.2825, + "step": 1723000 + }, + { + "epoch": 1.03, + "learning_rate": 4.88061395793109e-05, + "loss": 1.3051, + "step": 1723500 + }, + { + "epoch": 1.03, + "learning_rate": 4.880403961375033e-05, + "loss": 1.3161, + "step": 1724000 + }, + { + "epoch": 1.03, + "learning_rate": 4.8801939648189765e-05, + "loss": 1.2923, + "step": 1724500 + }, + { + "epoch": 1.03, + "learning_rate": 4.8799843882560325e-05, + "loss": 1.3401, + "step": 1725000 + }, + { + "epoch": 1.03, + "learning_rate": 4.879774391699976e-05, + "loss": 1.3084, + "step": 1725500 + }, + { + "epoch": 1.03, + "learning_rate": 4.879564395143919e-05, + "loss": 1.316, + "step": 1726000 + }, + { + "epoch": 1.04, + "learning_rate": 4.8793543985878626e-05, + "loss": 1.3236, + "step": 1726500 + }, + { + "epoch": 1.04, + "learning_rate": 4.8791444020318066e-05, + "loss": 1.3192, + "step": 1727000 + }, + { + "epoch": 1.04, + "learning_rate": 4.87893440547575e-05, + "loss": 1.298, + "step": 1727500 + }, + { + "epoch": 1.04, + "learning_rate": 4.878724828912805e-05, + "loss": 1.2932, + "step": 1728000 + }, + { + "epoch": 1.04, + "learning_rate": 4.878514832356749e-05, + "loss": 1.3344, + "step": 1728500 + }, + { + "epoch": 1.04, + "learning_rate": 4.878304835800693e-05, + "loss": 1.3266, + "step": 1729000 + }, + { + "epoch": 1.04, + "learning_rate": 4.878094839244636e-05, + "loss": 1.3025, + "step": 1729500 + }, + { + "epoch": 1.04, + "learning_rate": 4.87788484268858e-05, + "loss": 1.3232, + "step": 1730000 + }, + { + "epoch": 1.04, + "learning_rate": 4.8776752661256354e-05, + "loss": 1.3291, + "step": 1730500 + }, + { + "epoch": 1.04, + "learning_rate": 4.877465269569579e-05, + "loss": 1.3149, + "step": 1731000 + }, + { + "epoch": 1.04, + "learning_rate": 4.877255273013522e-05, + "loss": 1.3003, + "step": 1731500 + }, + { + "epoch": 1.04, + "learning_rate": 4.877045276457466e-05, + "loss": 1.3323, + "step": 1732000 + }, + { + "epoch": 1.04, + "learning_rate": 4.8768352799014095e-05, + "loss": 1.3121, + "step": 1732500 + }, + { + "epoch": 1.04, + "learning_rate": 4.876625283345353e-05, + "loss": 1.2915, + "step": 1733000 + }, + { + "epoch": 1.04, + "learning_rate": 4.876415286789297e-05, + "loss": 1.2812, + "step": 1733500 + }, + { + "epoch": 1.04, + "learning_rate": 4.87620529023324e-05, + "loss": 1.2925, + "step": 1734000 + }, + { + "epoch": 1.04, + "learning_rate": 4.8759952936771835e-05, + "loss": 1.2923, + "step": 1734500 + }, + { + "epoch": 1.04, + "learning_rate": 4.8757852971211276e-05, + "loss": 1.3062, + "step": 1735000 + }, + { + "epoch": 1.04, + "learning_rate": 4.875575720558183e-05, + "loss": 1.308, + "step": 1735500 + }, + { + "epoch": 1.04, + "learning_rate": 4.875365724002126e-05, + "loss": 1.2999, + "step": 1736000 + }, + { + "epoch": 1.04, + "learning_rate": 4.87515572744607e-05, + "loss": 1.3146, + "step": 1736500 + }, + { + "epoch": 1.04, + "learning_rate": 4.8749457308900136e-05, + "loss": 1.3186, + "step": 1737000 + }, + { + "epoch": 1.04, + "learning_rate": 4.874735734333957e-05, + "loss": 1.3461, + "step": 1737500 + }, + { + "epoch": 1.04, + "learning_rate": 4.874526157771012e-05, + "loss": 1.2961, + "step": 1738000 + }, + { + "epoch": 1.04, + "learning_rate": 4.8743161612149564e-05, + "loss": 1.2979, + "step": 1738500 + }, + { + "epoch": 1.04, + "learning_rate": 4.874106584652012e-05, + "loss": 1.3106, + "step": 1739000 + }, + { + "epoch": 1.04, + "learning_rate": 4.873896588095955e-05, + "loss": 1.3428, + "step": 1739500 + }, + { + "epoch": 1.04, + "learning_rate": 4.8736865915398984e-05, + "loss": 1.3389, + "step": 1740000 + }, + { + "epoch": 1.04, + "learning_rate": 4.8734765949838424e-05, + "loss": 1.3272, + "step": 1740500 + }, + { + "epoch": 1.04, + "learning_rate": 4.873266598427786e-05, + "loss": 1.3012, + "step": 1741000 + }, + { + "epoch": 1.04, + "learning_rate": 4.873056601871729e-05, + "loss": 1.3113, + "step": 1741500 + }, + { + "epoch": 1.04, + "learning_rate": 4.872846605315673e-05, + "loss": 1.3202, + "step": 1742000 + }, + { + "epoch": 1.04, + "learning_rate": 4.8726366087596165e-05, + "loss": 1.32, + "step": 1742500 + }, + { + "epoch": 1.04, + "learning_rate": 4.87242661220356e-05, + "loss": 1.303, + "step": 1743000 + }, + { + "epoch": 1.05, + "learning_rate": 4.872217035640616e-05, + "loss": 1.3142, + "step": 1743500 + }, + { + "epoch": 1.05, + "learning_rate": 4.872007039084559e-05, + "loss": 1.297, + "step": 1744000 + }, + { + "epoch": 1.05, + "learning_rate": 4.8717970425285026e-05, + "loss": 1.3163, + "step": 1744500 + }, + { + "epoch": 1.05, + "learning_rate": 4.8715870459724466e-05, + "loss": 1.3082, + "step": 1745000 + }, + { + "epoch": 1.05, + "learning_rate": 4.871377049416389e-05, + "loss": 1.2977, + "step": 1745500 + }, + { + "epoch": 1.05, + "learning_rate": 4.8711670528603326e-05, + "loss": 1.3109, + "step": 1746000 + }, + { + "epoch": 1.05, + "learning_rate": 4.8709574762973886e-05, + "loss": 1.309, + "step": 1746500 + }, + { + "epoch": 1.05, + "learning_rate": 4.8707474797413327e-05, + "loss": 1.2974, + "step": 1747000 + }, + { + "epoch": 1.05, + "learning_rate": 4.870537483185275e-05, + "loss": 1.3414, + "step": 1747500 + }, + { + "epoch": 1.05, + "learning_rate": 4.870327486629219e-05, + "loss": 1.341, + "step": 1748000 + }, + { + "epoch": 1.05, + "learning_rate": 4.870117490073163e-05, + "loss": 1.3161, + "step": 1748500 + }, + { + "epoch": 1.05, + "learning_rate": 4.869907493517106e-05, + "loss": 1.324, + "step": 1749000 + }, + { + "epoch": 1.05, + "learning_rate": 4.869697916954162e-05, + "loss": 1.3014, + "step": 1749500 + }, + { + "epoch": 1.05, + "learning_rate": 4.8694879203981054e-05, + "loss": 1.3037, + "step": 1750000 + }, + { + "epoch": 1.05, + "learning_rate": 4.869277923842049e-05, + "loss": 1.3504, + "step": 1750500 + }, + { + "epoch": 1.05, + "learning_rate": 4.869067927285992e-05, + "loss": 1.3312, + "step": 1751000 + }, + { + "epoch": 1.05, + "learning_rate": 4.868857930729936e-05, + "loss": 1.3136, + "step": 1751500 + }, + { + "epoch": 1.05, + "learning_rate": 4.868648354166992e-05, + "loss": 1.3492, + "step": 1752000 + }, + { + "epoch": 1.05, + "learning_rate": 4.868438357610935e-05, + "loss": 1.2909, + "step": 1752500 + }, + { + "epoch": 1.05, + "learning_rate": 4.868228361054878e-05, + "loss": 1.3058, + "step": 1753000 + }, + { + "epoch": 1.05, + "learning_rate": 4.868018364498822e-05, + "loss": 1.3117, + "step": 1753500 + }, + { + "epoch": 1.05, + "learning_rate": 4.867808787935878e-05, + "loss": 1.2739, + "step": 1754000 + }, + { + "epoch": 1.05, + "learning_rate": 4.8675987913798216e-05, + "loss": 1.3095, + "step": 1754500 + }, + { + "epoch": 1.05, + "learning_rate": 4.867388794823764e-05, + "loss": 1.308, + "step": 1755000 + }, + { + "epoch": 1.05, + "learning_rate": 4.867178798267708e-05, + "loss": 1.3201, + "step": 1755500 + }, + { + "epoch": 1.05, + "learning_rate": 4.8669688017116516e-05, + "loss": 1.2708, + "step": 1756000 + }, + { + "epoch": 1.05, + "learning_rate": 4.866759225148708e-05, + "loss": 1.3014, + "step": 1756500 + }, + { + "epoch": 1.05, + "learning_rate": 4.866549228592651e-05, + "loss": 1.3001, + "step": 1757000 + }, + { + "epoch": 1.05, + "learning_rate": 4.8663392320365944e-05, + "loss": 1.3107, + "step": 1757500 + }, + { + "epoch": 1.05, + "learning_rate": 4.866129235480538e-05, + "loss": 1.3307, + "step": 1758000 + }, + { + "epoch": 1.05, + "learning_rate": 4.865919238924482e-05, + "loss": 1.2898, + "step": 1758500 + }, + { + "epoch": 1.05, + "learning_rate": 4.865709242368425e-05, + "loss": 1.3414, + "step": 1759000 + }, + { + "epoch": 1.05, + "learning_rate": 4.8654996658054804e-05, + "loss": 1.3133, + "step": 1759500 + }, + { + "epoch": 1.06, + "learning_rate": 4.865289669249424e-05, + "loss": 1.3219, + "step": 1760000 + }, + { + "epoch": 1.06, + "learning_rate": 4.86508009268648e-05, + "loss": 1.3287, + "step": 1760500 + }, + { + "epoch": 1.06, + "learning_rate": 4.864870096130424e-05, + "loss": 1.3176, + "step": 1761000 + }, + { + "epoch": 1.06, + "learning_rate": 4.864660099574367e-05, + "loss": 1.3227, + "step": 1761500 + }, + { + "epoch": 1.06, + "learning_rate": 4.86445010301831e-05, + "loss": 1.3296, + "step": 1762000 + }, + { + "epoch": 1.06, + "learning_rate": 4.864240106462254e-05, + "loss": 1.3105, + "step": 1762500 + }, + { + "epoch": 1.06, + "learning_rate": 4.864030109906197e-05, + "loss": 1.3107, + "step": 1763000 + }, + { + "epoch": 1.06, + "learning_rate": 4.863820533343253e-05, + "loss": 1.3732, + "step": 1763500 + }, + { + "epoch": 1.06, + "learning_rate": 4.8636105367871966e-05, + "loss": 1.3197, + "step": 1764000 + }, + { + "epoch": 1.06, + "learning_rate": 4.86340054023114e-05, + "loss": 1.3488, + "step": 1764500 + }, + { + "epoch": 1.06, + "learning_rate": 4.863190543675083e-05, + "loss": 1.3445, + "step": 1765000 + }, + { + "epoch": 1.06, + "learning_rate": 4.862980547119027e-05, + "loss": 1.3561, + "step": 1765500 + }, + { + "epoch": 1.06, + "learning_rate": 4.862771390549195e-05, + "loss": 1.3153, + "step": 1766000 + }, + { + "epoch": 1.06, + "learning_rate": 4.862561393993139e-05, + "loss": 1.3101, + "step": 1766500 + }, + { + "epoch": 1.06, + "learning_rate": 4.862351397437082e-05, + "loss": 1.3025, + "step": 1767000 + }, + { + "epoch": 1.06, + "learning_rate": 4.8621414008810254e-05, + "loss": 1.3141, + "step": 1767500 + }, + { + "epoch": 1.06, + "learning_rate": 4.8619314043249694e-05, + "loss": 1.3163, + "step": 1768000 + }, + { + "epoch": 1.06, + "learning_rate": 4.861721407768913e-05, + "loss": 1.3264, + "step": 1768500 + }, + { + "epoch": 1.06, + "learning_rate": 4.8615114112128554e-05, + "loss": 1.2973, + "step": 1769000 + }, + { + "epoch": 1.06, + "learning_rate": 4.8613014146567995e-05, + "loss": 1.3279, + "step": 1769500 + }, + { + "epoch": 1.06, + "learning_rate": 4.8610918380938555e-05, + "loss": 1.3032, + "step": 1770000 + }, + { + "epoch": 1.06, + "learning_rate": 4.860881841537799e-05, + "loss": 1.3418, + "step": 1770500 + }, + { + "epoch": 1.06, + "learning_rate": 4.860671844981742e-05, + "loss": 1.3011, + "step": 1771000 + }, + { + "epoch": 1.06, + "learning_rate": 4.860462268418798e-05, + "loss": 1.3262, + "step": 1771500 + }, + { + "epoch": 1.06, + "learning_rate": 4.8602522718627416e-05, + "loss": 1.3351, + "step": 1772000 + }, + { + "epoch": 1.06, + "learning_rate": 4.860042275306685e-05, + "loss": 1.3056, + "step": 1772500 + }, + { + "epoch": 1.06, + "learning_rate": 4.859832278750629e-05, + "loss": 1.3281, + "step": 1773000 + }, + { + "epoch": 1.06, + "learning_rate": 4.859622282194572e-05, + "loss": 1.2955, + "step": 1773500 + }, + { + "epoch": 1.06, + "learning_rate": 4.859412285638515e-05, + "loss": 1.3369, + "step": 1774000 + }, + { + "epoch": 1.06, + "learning_rate": 4.859202289082459e-05, + "loss": 1.3167, + "step": 1774500 + }, + { + "epoch": 1.06, + "learning_rate": 4.858992292526402e-05, + "loss": 1.3314, + "step": 1775000 + }, + { + "epoch": 1.06, + "learning_rate": 4.858782295970346e-05, + "loss": 1.3354, + "step": 1775500 + }, + { + "epoch": 1.06, + "learning_rate": 4.85857229941429e-05, + "loss": 1.3033, + "step": 1776000 + }, + { + "epoch": 1.07, + "learning_rate": 4.858362302858233e-05, + "loss": 1.3206, + "step": 1776500 + }, + { + "epoch": 1.07, + "learning_rate": 4.8581523063021764e-05, + "loss": 1.348, + "step": 1777000 + }, + { + "epoch": 1.07, + "learning_rate": 4.8579423097461204e-05, + "loss": 1.3127, + "step": 1777500 + }, + { + "epoch": 1.07, + "learning_rate": 4.857732733183176e-05, + "loss": 1.3249, + "step": 1778000 + }, + { + "epoch": 1.07, + "learning_rate": 4.857523156620231e-05, + "loss": 1.2975, + "step": 1778500 + }, + { + "epoch": 1.07, + "learning_rate": 4.8573131600641745e-05, + "loss": 1.3334, + "step": 1779000 + }, + { + "epoch": 1.07, + "learning_rate": 4.8571031635081185e-05, + "loss": 1.3137, + "step": 1779500 + }, + { + "epoch": 1.07, + "learning_rate": 4.856893166952062e-05, + "loss": 1.3106, + "step": 1780000 + }, + { + "epoch": 1.07, + "learning_rate": 4.856683170396005e-05, + "loss": 1.3027, + "step": 1780500 + }, + { + "epoch": 1.07, + "learning_rate": 4.8564735938330605e-05, + "loss": 1.2983, + "step": 1781000 + }, + { + "epoch": 1.07, + "learning_rate": 4.8562635972770046e-05, + "loss": 1.2961, + "step": 1781500 + }, + { + "epoch": 1.07, + "learning_rate": 4.856053600720948e-05, + "loss": 1.3247, + "step": 1782000 + }, + { + "epoch": 1.07, + "learning_rate": 4.855843604164891e-05, + "loss": 1.3572, + "step": 1782500 + }, + { + "epoch": 1.07, + "learning_rate": 4.855633607608835e-05, + "loss": 1.3089, + "step": 1783000 + }, + { + "epoch": 1.07, + "learning_rate": 4.8554236110527786e-05, + "loss": 1.3045, + "step": 1783500 + }, + { + "epoch": 1.07, + "learning_rate": 4.855213614496722e-05, + "loss": 1.3004, + "step": 1784000 + }, + { + "epoch": 1.07, + "learning_rate": 4.855003617940666e-05, + "loss": 1.3203, + "step": 1784500 + }, + { + "epoch": 1.07, + "learning_rate": 4.854793621384609e-05, + "loss": 1.3277, + "step": 1785000 + }, + { + "epoch": 1.07, + "learning_rate": 4.854583624828553e-05, + "loss": 1.343, + "step": 1785500 + }, + { + "epoch": 1.07, + "learning_rate": 4.854373628272497e-05, + "loss": 1.3062, + "step": 1786000 + }, + { + "epoch": 1.07, + "learning_rate": 4.8541636317164394e-05, + "loss": 1.312, + "step": 1786500 + }, + { + "epoch": 1.07, + "learning_rate": 4.8539540551534954e-05, + "loss": 1.3076, + "step": 1787000 + }, + { + "epoch": 1.07, + "learning_rate": 4.853744478590551e-05, + "loss": 1.3162, + "step": 1787500 + }, + { + "epoch": 1.07, + "learning_rate": 4.853534482034495e-05, + "loss": 1.3062, + "step": 1788000 + }, + { + "epoch": 1.07, + "learning_rate": 4.853324485478438e-05, + "loss": 1.3056, + "step": 1788500 + }, + { + "epoch": 1.07, + "learning_rate": 4.8531144889223815e-05, + "loss": 1.3177, + "step": 1789000 + }, + { + "epoch": 1.07, + "learning_rate": 4.8529044923663255e-05, + "loss": 1.3211, + "step": 1789500 + }, + { + "epoch": 1.07, + "learning_rate": 4.852694495810269e-05, + "loss": 1.3162, + "step": 1790000 + }, + { + "epoch": 1.07, + "learning_rate": 4.852484499254212e-05, + "loss": 1.3199, + "step": 1790500 + }, + { + "epoch": 1.07, + "learning_rate": 4.8522749226912676e-05, + "loss": 1.3287, + "step": 1791000 + }, + { + "epoch": 1.07, + "learning_rate": 4.8520649261352116e-05, + "loss": 1.3033, + "step": 1791500 + }, + { + "epoch": 1.07, + "learning_rate": 4.851854929579155e-05, + "loss": 1.3347, + "step": 1792000 + }, + { + "epoch": 1.07, + "learning_rate": 4.851644933023098e-05, + "loss": 1.3048, + "step": 1792500 + }, + { + "epoch": 1.07, + "learning_rate": 4.851434936467042e-05, + "loss": 1.3333, + "step": 1793000 + }, + { + "epoch": 1.08, + "learning_rate": 4.851224939910985e-05, + "loss": 1.2907, + "step": 1793500 + }, + { + "epoch": 1.08, + "learning_rate": 4.851014943354929e-05, + "loss": 1.3106, + "step": 1794000 + }, + { + "epoch": 1.08, + "learning_rate": 4.850804946798872e-05, + "loss": 1.3237, + "step": 1794500 + }, + { + "epoch": 1.08, + "learning_rate": 4.8505953702359284e-05, + "loss": 1.3053, + "step": 1795000 + }, + { + "epoch": 1.08, + "learning_rate": 4.850385373679872e-05, + "loss": 1.3079, + "step": 1795500 + }, + { + "epoch": 1.08, + "learning_rate": 4.850175797116927e-05, + "loss": 1.3212, + "step": 1796000 + }, + { + "epoch": 1.08, + "learning_rate": 4.849965800560871e-05, + "loss": 1.3209, + "step": 1796500 + }, + { + "epoch": 1.08, + "learning_rate": 4.8497558040048144e-05, + "loss": 1.3323, + "step": 1797000 + }, + { + "epoch": 1.08, + "learning_rate": 4.849545807448758e-05, + "loss": 1.3424, + "step": 1797500 + }, + { + "epoch": 1.08, + "learning_rate": 4.849335810892702e-05, + "loss": 1.3093, + "step": 1798000 + }, + { + "epoch": 1.08, + "learning_rate": 4.8491258143366445e-05, + "loss": 1.2874, + "step": 1798500 + }, + { + "epoch": 1.08, + "learning_rate": 4.848915817780588e-05, + "loss": 1.338, + "step": 1799000 + }, + { + "epoch": 1.08, + "learning_rate": 4.848705821224532e-05, + "loss": 1.3179, + "step": 1799500 + }, + { + "epoch": 1.08, + "learning_rate": 4.848496244661588e-05, + "loss": 1.3104, + "step": 1800000 + }, + { + "epoch": 1.08, + "eval_loss": 1.270274043083191, + "eval_runtime": 1102.2881, + "eval_samples_per_second": 477.842, + "eval_steps_per_second": 79.641, + "step": 1800000 + }, + { + "epoch": 1.08, + "learning_rate": 4.848286668098643e-05, + "loss": 1.2991, + "step": 1800500 + }, + { + "epoch": 1.08, + "learning_rate": 4.8480766715425866e-05, + "loss": 1.3134, + "step": 1801000 + }, + { + "epoch": 1.08, + "learning_rate": 4.8478666749865306e-05, + "loss": 1.311, + "step": 1801500 + }, + { + "epoch": 1.08, + "learning_rate": 4.847656678430474e-05, + "loss": 1.3283, + "step": 1802000 + }, + { + "epoch": 1.08, + "learning_rate": 4.847447101867529e-05, + "loss": 1.3142, + "step": 1802500 + }, + { + "epoch": 1.08, + "learning_rate": 4.8472371053114727e-05, + "loss": 1.3167, + "step": 1803000 + }, + { + "epoch": 1.08, + "learning_rate": 4.847027108755417e-05, + "loss": 1.287, + "step": 1803500 + }, + { + "epoch": 1.08, + "learning_rate": 4.84681711219936e-05, + "loss": 1.3142, + "step": 1804000 + }, + { + "epoch": 1.08, + "learning_rate": 4.8466071156433034e-05, + "loss": 1.3273, + "step": 1804500 + }, + { + "epoch": 1.08, + "learning_rate": 4.8463971190872474e-05, + "loss": 1.2969, + "step": 1805000 + }, + { + "epoch": 1.08, + "learning_rate": 4.846187542524303e-05, + "loss": 1.2837, + "step": 1805500 + }, + { + "epoch": 1.08, + "learning_rate": 4.845977545968246e-05, + "loss": 1.3223, + "step": 1806000 + }, + { + "epoch": 1.08, + "learning_rate": 4.8457675494121894e-05, + "loss": 1.3271, + "step": 1806500 + }, + { + "epoch": 1.08, + "learning_rate": 4.8455575528561335e-05, + "loss": 1.3276, + "step": 1807000 + }, + { + "epoch": 1.08, + "learning_rate": 4.845347556300077e-05, + "loss": 1.3072, + "step": 1807500 + }, + { + "epoch": 1.08, + "learning_rate": 4.84513755974402e-05, + "loss": 1.2955, + "step": 1808000 + }, + { + "epoch": 1.08, + "learning_rate": 4.8449275631879635e-05, + "loss": 1.3455, + "step": 1808500 + }, + { + "epoch": 1.08, + "learning_rate": 4.8447179866250195e-05, + "loss": 1.3101, + "step": 1809000 + }, + { + "epoch": 1.08, + "learning_rate": 4.844507990068963e-05, + "loss": 1.3247, + "step": 1809500 + }, + { + "epoch": 1.09, + "learning_rate": 4.844297993512907e-05, + "loss": 1.3299, + "step": 1810000 + }, + { + "epoch": 1.09, + "learning_rate": 4.8440879969568496e-05, + "loss": 1.2931, + "step": 1810500 + }, + { + "epoch": 1.09, + "learning_rate": 4.843878000400793e-05, + "loss": 1.3211, + "step": 1811000 + }, + { + "epoch": 1.09, + "learning_rate": 4.843668423837849e-05, + "loss": 1.3119, + "step": 1811500 + }, + { + "epoch": 1.09, + "learning_rate": 4.843458427281793e-05, + "loss": 1.3255, + "step": 1812000 + }, + { + "epoch": 1.09, + "learning_rate": 4.843248430725736e-05, + "loss": 1.2941, + "step": 1812500 + }, + { + "epoch": 1.09, + "learning_rate": 4.843038434169679e-05, + "loss": 1.2795, + "step": 1813000 + }, + { + "epoch": 1.09, + "learning_rate": 4.842828437613623e-05, + "loss": 1.3158, + "step": 1813500 + }, + { + "epoch": 1.09, + "learning_rate": 4.842618861050679e-05, + "loss": 1.3327, + "step": 1814000 + }, + { + "epoch": 1.09, + "learning_rate": 4.8424088644946224e-05, + "loss": 1.2865, + "step": 1814500 + }, + { + "epoch": 1.09, + "learning_rate": 4.842198867938566e-05, + "loss": 1.343, + "step": 1815000 + }, + { + "epoch": 1.09, + "learning_rate": 4.841988871382509e-05, + "loss": 1.3282, + "step": 1815500 + }, + { + "epoch": 1.09, + "learning_rate": 4.8417788748264524e-05, + "loss": 1.3172, + "step": 1816000 + }, + { + "epoch": 1.09, + "learning_rate": 4.8415688782703965e-05, + "loss": 1.3135, + "step": 1816500 + }, + { + "epoch": 1.09, + "learning_rate": 4.84135888171434e-05, + "loss": 1.3162, + "step": 1817000 + }, + { + "epoch": 1.09, + "learning_rate": 4.841149305151395e-05, + "loss": 1.2869, + "step": 1817500 + }, + { + "epoch": 1.09, + "learning_rate": 4.8409393085953385e-05, + "loss": 1.3241, + "step": 1818000 + }, + { + "epoch": 1.09, + "learning_rate": 4.8407293120392825e-05, + "loss": 1.3076, + "step": 1818500 + }, + { + "epoch": 1.09, + "learning_rate": 4.8405197354763386e-05, + "loss": 1.3266, + "step": 1819000 + }, + { + "epoch": 1.09, + "learning_rate": 4.840309738920282e-05, + "loss": 1.3018, + "step": 1819500 + }, + { + "epoch": 1.09, + "learning_rate": 4.8400997423642246e-05, + "loss": 1.3167, + "step": 1820000 + }, + { + "epoch": 1.09, + "learning_rate": 4.8398897458081686e-05, + "loss": 1.328, + "step": 1820500 + }, + { + "epoch": 1.09, + "learning_rate": 4.839679749252112e-05, + "loss": 1.3197, + "step": 1821000 + }, + { + "epoch": 1.09, + "learning_rate": 4.839469752696055e-05, + "loss": 1.3096, + "step": 1821500 + }, + { + "epoch": 1.09, + "learning_rate": 4.839259756139999e-05, + "loss": 1.3213, + "step": 1822000 + }, + { + "epoch": 1.09, + "learning_rate": 4.839049759583943e-05, + "loss": 1.3035, + "step": 1822500 + }, + { + "epoch": 1.09, + "learning_rate": 4.838839763027886e-05, + "loss": 1.3237, + "step": 1823000 + }, + { + "epoch": 1.09, + "learning_rate": 4.83862976647183e-05, + "loss": 1.3145, + "step": 1823500 + }, + { + "epoch": 1.09, + "learning_rate": 4.8384201899088854e-05, + "loss": 1.342, + "step": 1824000 + }, + { + "epoch": 1.09, + "learning_rate": 4.838210193352829e-05, + "loss": 1.3149, + "step": 1824500 + }, + { + "epoch": 1.09, + "learning_rate": 4.838000196796773e-05, + "loss": 1.3287, + "step": 1825000 + }, + { + "epoch": 1.09, + "learning_rate": 4.837790200240716e-05, + "loss": 1.3375, + "step": 1825500 + }, + { + "epoch": 1.09, + "learning_rate": 4.8375802036846595e-05, + "loss": 1.3295, + "step": 1826000 + }, + { + "epoch": 1.1, + "learning_rate": 4.837370627121715e-05, + "loss": 1.3244, + "step": 1826500 + }, + { + "epoch": 1.1, + "learning_rate": 4.837160630565659e-05, + "loss": 1.3153, + "step": 1827000 + }, + { + "epoch": 1.1, + "learning_rate": 4.836950634009602e-05, + "loss": 1.3397, + "step": 1827500 + }, + { + "epoch": 1.1, + "learning_rate": 4.8367406374535455e-05, + "loss": 1.2851, + "step": 1828000 + }, + { + "epoch": 1.1, + "learning_rate": 4.8365306408974896e-05, + "loss": 1.3149, + "step": 1828500 + }, + { + "epoch": 1.1, + "learning_rate": 4.836321064334545e-05, + "loss": 1.3114, + "step": 1829000 + }, + { + "epoch": 1.1, + "learning_rate": 4.836111067778488e-05, + "loss": 1.3317, + "step": 1829500 + }, + { + "epoch": 1.1, + "learning_rate": 4.8359010712224316e-05, + "loss": 1.3038, + "step": 1830000 + }, + { + "epoch": 1.1, + "learning_rate": 4.8356910746663756e-05, + "loss": 1.3123, + "step": 1830500 + }, + { + "epoch": 1.1, + "learning_rate": 4.835481078110319e-05, + "loss": 1.3107, + "step": 1831000 + }, + { + "epoch": 1.1, + "learning_rate": 4.835271081554262e-05, + "loss": 1.2752, + "step": 1831500 + }, + { + "epoch": 1.1, + "learning_rate": 4.8350610849982063e-05, + "loss": 1.2903, + "step": 1832000 + }, + { + "epoch": 1.1, + "learning_rate": 4.834851088442149e-05, + "loss": 1.2883, + "step": 1832500 + }, + { + "epoch": 1.1, + "learning_rate": 4.834641511879205e-05, + "loss": 1.3159, + "step": 1833000 + }, + { + "epoch": 1.1, + "learning_rate": 4.834431515323149e-05, + "loss": 1.3215, + "step": 1833500 + }, + { + "epoch": 1.1, + "learning_rate": 4.8342215187670924e-05, + "loss": 1.3161, + "step": 1834000 + }, + { + "epoch": 1.1, + "learning_rate": 4.834011522211036e-05, + "loss": 1.3116, + "step": 1834500 + }, + { + "epoch": 1.1, + "learning_rate": 4.833801525654979e-05, + "loss": 1.3314, + "step": 1835000 + }, + { + "epoch": 1.1, + "learning_rate": 4.833591949092035e-05, + "loss": 1.2894, + "step": 1835500 + }, + { + "epoch": 1.1, + "learning_rate": 4.8333819525359785e-05, + "loss": 1.3186, + "step": 1836000 + }, + { + "epoch": 1.1, + "learning_rate": 4.833171955979922e-05, + "loss": 1.3133, + "step": 1836500 + }, + { + "epoch": 1.1, + "learning_rate": 4.832961959423866e-05, + "loss": 1.2812, + "step": 1837000 + }, + { + "epoch": 1.1, + "learning_rate": 4.832752802854033e-05, + "loss": 1.3165, + "step": 1837500 + }, + { + "epoch": 1.1, + "learning_rate": 4.8325428062979766e-05, + "loss": 1.3302, + "step": 1838000 + }, + { + "epoch": 1.1, + "learning_rate": 4.83233280974192e-05, + "loss": 1.2881, + "step": 1838500 + }, + { + "epoch": 1.1, + "learning_rate": 4.832122813185864e-05, + "loss": 1.2915, + "step": 1839000 + }, + { + "epoch": 1.1, + "learning_rate": 4.831912816629807e-05, + "loss": 1.3224, + "step": 1839500 + }, + { + "epoch": 1.1, + "learning_rate": 4.8317028200737506e-05, + "loss": 1.3222, + "step": 1840000 + }, + { + "epoch": 1.1, + "learning_rate": 4.8314928235176947e-05, + "loss": 1.3011, + "step": 1840500 + }, + { + "epoch": 1.1, + "learning_rate": 4.831282826961638e-05, + "loss": 1.2879, + "step": 1841000 + }, + { + "epoch": 1.1, + "learning_rate": 4.8310732503986934e-05, + "loss": 1.2982, + "step": 1841500 + }, + { + "epoch": 1.1, + "learning_rate": 4.830863253842637e-05, + "loss": 1.3242, + "step": 1842000 + }, + { + "epoch": 1.1, + "learning_rate": 4.830653677279692e-05, + "loss": 1.3228, + "step": 1842500 + }, + { + "epoch": 1.1, + "learning_rate": 4.830443680723636e-05, + "loss": 1.3108, + "step": 1843000 + }, + { + "epoch": 1.11, + "learning_rate": 4.8302336841675794e-05, + "loss": 1.3055, + "step": 1843500 + }, + { + "epoch": 1.11, + "learning_rate": 4.830024107604635e-05, + "loss": 1.2989, + "step": 1844000 + }, + { + "epoch": 1.11, + "learning_rate": 4.829814111048579e-05, + "loss": 1.3046, + "step": 1844500 + }, + { + "epoch": 1.11, + "learning_rate": 4.829604114492522e-05, + "loss": 1.3159, + "step": 1845000 + }, + { + "epoch": 1.11, + "learning_rate": 4.8293941179364655e-05, + "loss": 1.3203, + "step": 1845500 + }, + { + "epoch": 1.11, + "learning_rate": 4.8291841213804095e-05, + "loss": 1.3156, + "step": 1846000 + }, + { + "epoch": 1.11, + "learning_rate": 4.828974124824353e-05, + "loss": 1.3104, + "step": 1846500 + }, + { + "epoch": 1.11, + "learning_rate": 4.828764128268296e-05, + "loss": 1.3332, + "step": 1847000 + }, + { + "epoch": 1.11, + "learning_rate": 4.82855413171224e-05, + "loss": 1.3068, + "step": 1847500 + }, + { + "epoch": 1.11, + "learning_rate": 4.8283441351561836e-05, + "loss": 1.3318, + "step": 1848000 + }, + { + "epoch": 1.11, + "learning_rate": 4.828134138600127e-05, + "loss": 1.327, + "step": 1848500 + }, + { + "epoch": 1.11, + "learning_rate": 4.827924142044071e-05, + "loss": 1.3263, + "step": 1849000 + }, + { + "epoch": 1.11, + "learning_rate": 4.8277141454880136e-05, + "loss": 1.2962, + "step": 1849500 + }, + { + "epoch": 1.11, + "learning_rate": 4.82750456892507e-05, + "loss": 1.2958, + "step": 1850000 + }, + { + "epoch": 1.11, + "learning_rate": 4.827294992362125e-05, + "loss": 1.307, + "step": 1850500 + }, + { + "epoch": 1.11, + "learning_rate": 4.8270854157991804e-05, + "loss": 1.3069, + "step": 1851000 + }, + { + "epoch": 1.11, + "learning_rate": 4.8268754192431244e-05, + "loss": 1.3428, + "step": 1851500 + }, + { + "epoch": 1.11, + "learning_rate": 4.826665422687068e-05, + "loss": 1.3033, + "step": 1852000 + }, + { + "epoch": 1.11, + "learning_rate": 4.826455426131011e-05, + "loss": 1.3086, + "step": 1852500 + }, + { + "epoch": 1.11, + "learning_rate": 4.826245429574955e-05, + "loss": 1.2995, + "step": 1853000 + }, + { + "epoch": 1.11, + "learning_rate": 4.8260354330188985e-05, + "loss": 1.3417, + "step": 1853500 + }, + { + "epoch": 1.11, + "learning_rate": 4.825825436462842e-05, + "loss": 1.3133, + "step": 1854000 + }, + { + "epoch": 1.11, + "learning_rate": 4.825615439906786e-05, + "loss": 1.2966, + "step": 1854500 + }, + { + "epoch": 1.11, + "learning_rate": 4.825405863343841e-05, + "loss": 1.2957, + "step": 1855000 + }, + { + "epoch": 1.11, + "learning_rate": 4.8251958667877845e-05, + "loss": 1.3265, + "step": 1855500 + }, + { + "epoch": 1.11, + "learning_rate": 4.824985870231728e-05, + "loss": 1.325, + "step": 1856000 + }, + { + "epoch": 1.11, + "learning_rate": 4.824775873675672e-05, + "loss": 1.3063, + "step": 1856500 + }, + { + "epoch": 1.11, + "learning_rate": 4.824565877119615e-05, + "loss": 1.3177, + "step": 1857000 + }, + { + "epoch": 1.11, + "learning_rate": 4.8243558805635586e-05, + "loss": 1.3145, + "step": 1857500 + }, + { + "epoch": 1.11, + "learning_rate": 4.8241458840075026e-05, + "loss": 1.3118, + "step": 1858000 + }, + { + "epoch": 1.11, + "learning_rate": 4.823936307444558e-05, + "loss": 1.3287, + "step": 1858500 + }, + { + "epoch": 1.11, + "learning_rate": 4.823726310888501e-05, + "loss": 1.3177, + "step": 1859000 + }, + { + "epoch": 1.11, + "learning_rate": 4.8235163143324453e-05, + "loss": 1.338, + "step": 1859500 + }, + { + "epoch": 1.12, + "learning_rate": 4.823306317776389e-05, + "loss": 1.297, + "step": 1860000 + }, + { + "epoch": 1.12, + "learning_rate": 4.823096321220332e-05, + "loss": 1.3539, + "step": 1860500 + }, + { + "epoch": 1.12, + "learning_rate": 4.8228863246642754e-05, + "loss": 1.3059, + "step": 1861000 + }, + { + "epoch": 1.12, + "learning_rate": 4.822676328108219e-05, + "loss": 1.2969, + "step": 1861500 + }, + { + "epoch": 1.12, + "learning_rate": 4.822466331552162e-05, + "loss": 1.2838, + "step": 1862000 + }, + { + "epoch": 1.12, + "learning_rate": 4.822256334996106e-05, + "loss": 1.282, + "step": 1862500 + }, + { + "epoch": 1.12, + "learning_rate": 4.822046758433162e-05, + "loss": 1.3411, + "step": 1863000 + }, + { + "epoch": 1.12, + "learning_rate": 4.821836761877105e-05, + "loss": 1.3203, + "step": 1863500 + }, + { + "epoch": 1.12, + "learning_rate": 4.821626765321048e-05, + "loss": 1.3352, + "step": 1864000 + }, + { + "epoch": 1.12, + "learning_rate": 4.821416768764992e-05, + "loss": 1.3241, + "step": 1864500 + }, + { + "epoch": 1.12, + "learning_rate": 4.8212067722089355e-05, + "loss": 1.3355, + "step": 1865000 + }, + { + "epoch": 1.12, + "learning_rate": 4.820996775652879e-05, + "loss": 1.3013, + "step": 1865500 + }, + { + "epoch": 1.12, + "learning_rate": 4.820786779096823e-05, + "loss": 1.3189, + "step": 1866000 + }, + { + "epoch": 1.12, + "learning_rate": 4.820577202533878e-05, + "loss": 1.2824, + "step": 1866500 + }, + { + "epoch": 1.12, + "learning_rate": 4.820367625970934e-05, + "loss": 1.3141, + "step": 1867000 + }, + { + "epoch": 1.12, + "learning_rate": 4.8201576294148776e-05, + "loss": 1.3096, + "step": 1867500 + }, + { + "epoch": 1.12, + "learning_rate": 4.8199476328588217e-05, + "loss": 1.2998, + "step": 1868000 + }, + { + "epoch": 1.12, + "learning_rate": 4.819737636302764e-05, + "loss": 1.3126, + "step": 1868500 + }, + { + "epoch": 1.12, + "learning_rate": 4.819527639746708e-05, + "loss": 1.3126, + "step": 1869000 + }, + { + "epoch": 1.12, + "learning_rate": 4.819317643190652e-05, + "loss": 1.3258, + "step": 1869500 + }, + { + "epoch": 1.12, + "learning_rate": 4.819107646634595e-05, + "loss": 1.2946, + "step": 1870000 + }, + { + "epoch": 1.12, + "learning_rate": 4.8188976500785384e-05, + "loss": 1.3413, + "step": 1870500 + }, + { + "epoch": 1.12, + "learning_rate": 4.8186876535224824e-05, + "loss": 1.3415, + "step": 1871000 + }, + { + "epoch": 1.12, + "learning_rate": 4.818477656966426e-05, + "loss": 1.3305, + "step": 1871500 + }, + { + "epoch": 1.12, + "learning_rate": 4.818267660410369e-05, + "loss": 1.3167, + "step": 1872000 + }, + { + "epoch": 1.12, + "learning_rate": 4.8180580838474245e-05, + "loss": 1.2958, + "step": 1872500 + }, + { + "epoch": 1.12, + "learning_rate": 4.8178480872913685e-05, + "loss": 1.3007, + "step": 1873000 + }, + { + "epoch": 1.12, + "learning_rate": 4.817638090735312e-05, + "loss": 1.2843, + "step": 1873500 + }, + { + "epoch": 1.12, + "learning_rate": 4.817428094179255e-05, + "loss": 1.3086, + "step": 1874000 + }, + { + "epoch": 1.12, + "learning_rate": 4.817218097623199e-05, + "loss": 1.2905, + "step": 1874500 + }, + { + "epoch": 1.12, + "learning_rate": 4.8170081010671425e-05, + "loss": 1.3166, + "step": 1875000 + }, + { + "epoch": 1.12, + "learning_rate": 4.816798104511086e-05, + "loss": 1.2994, + "step": 1875500 + }, + { + "epoch": 1.12, + "learning_rate": 4.816588107955029e-05, + "loss": 1.3281, + "step": 1876000 + }, + { + "epoch": 1.13, + "learning_rate": 4.816378531392085e-05, + "loss": 1.2857, + "step": 1876500 + }, + { + "epoch": 1.13, + "learning_rate": 4.8161689548291406e-05, + "loss": 1.3176, + "step": 1877000 + }, + { + "epoch": 1.13, + "learning_rate": 4.815958958273084e-05, + "loss": 1.2986, + "step": 1877500 + }, + { + "epoch": 1.13, + "learning_rate": 4.815748961717028e-05, + "loss": 1.3129, + "step": 1878000 + }, + { + "epoch": 1.13, + "learning_rate": 4.8155393851540834e-05, + "loss": 1.3136, + "step": 1878500 + }, + { + "epoch": 1.13, + "learning_rate": 4.8153298085911394e-05, + "loss": 1.3019, + "step": 1879000 + }, + { + "epoch": 1.13, + "learning_rate": 4.815119812035083e-05, + "loss": 1.3076, + "step": 1879500 + }, + { + "epoch": 1.13, + "learning_rate": 4.814909815479026e-05, + "loss": 1.297, + "step": 1880000 + }, + { + "epoch": 1.13, + "learning_rate": 4.8146998189229694e-05, + "loss": 1.3049, + "step": 1880500 + }, + { + "epoch": 1.13, + "learning_rate": 4.814489822366913e-05, + "loss": 1.3001, + "step": 1881000 + }, + { + "epoch": 1.13, + "learning_rate": 4.814279825810857e-05, + "loss": 1.3266, + "step": 1881500 + }, + { + "epoch": 1.13, + "learning_rate": 4.8140698292548e-05, + "loss": 1.3026, + "step": 1882000 + }, + { + "epoch": 1.13, + "learning_rate": 4.8138598326987435e-05, + "loss": 1.3462, + "step": 1882500 + }, + { + "epoch": 1.13, + "learning_rate": 4.8136498361426875e-05, + "loss": 1.326, + "step": 1883000 + }, + { + "epoch": 1.13, + "learning_rate": 4.813439839586631e-05, + "loss": 1.3078, + "step": 1883500 + }, + { + "epoch": 1.13, + "learning_rate": 4.813229843030574e-05, + "loss": 1.3208, + "step": 1884000 + }, + { + "epoch": 1.13, + "learning_rate": 4.813019846474518e-05, + "loss": 1.3004, + "step": 1884500 + }, + { + "epoch": 1.13, + "learning_rate": 4.8128098499184616e-05, + "loss": 1.316, + "step": 1885000 + }, + { + "epoch": 1.13, + "learning_rate": 4.812599853362405e-05, + "loss": 1.2925, + "step": 1885500 + }, + { + "epoch": 1.13, + "learning_rate": 4.812389856806348e-05, + "loss": 1.3095, + "step": 1886000 + }, + { + "epoch": 1.13, + "learning_rate": 4.8121798602502916e-05, + "loss": 1.3264, + "step": 1886500 + }, + { + "epoch": 1.13, + "learning_rate": 4.8119702836873476e-05, + "loss": 1.2915, + "step": 1887000 + }, + { + "epoch": 1.13, + "learning_rate": 4.811760707124403e-05, + "loss": 1.3195, + "step": 1887500 + }, + { + "epoch": 1.13, + "learning_rate": 4.8115511305614584e-05, + "loss": 1.2971, + "step": 1888000 + }, + { + "epoch": 1.13, + "learning_rate": 4.8113411340054024e-05, + "loss": 1.2841, + "step": 1888500 + }, + { + "epoch": 1.13, + "learning_rate": 4.811131137449346e-05, + "loss": 1.329, + "step": 1889000 + }, + { + "epoch": 1.13, + "learning_rate": 4.810921140893289e-05, + "loss": 1.2787, + "step": 1889500 + }, + { + "epoch": 1.13, + "learning_rate": 4.810711144337233e-05, + "loss": 1.3185, + "step": 1890000 + }, + { + "epoch": 1.13, + "learning_rate": 4.8105015677742885e-05, + "loss": 1.3155, + "step": 1890500 + }, + { + "epoch": 1.13, + "learning_rate": 4.810291571218232e-05, + "loss": 1.3171, + "step": 1891000 + }, + { + "epoch": 1.13, + "learning_rate": 4.810081574662175e-05, + "loss": 1.2975, + "step": 1891500 + }, + { + "epoch": 1.13, + "learning_rate": 4.809871578106119e-05, + "loss": 1.3036, + "step": 1892000 + }, + { + "epoch": 1.13, + "learning_rate": 4.8096615815500625e-05, + "loss": 1.3092, + "step": 1892500 + }, + { + "epoch": 1.13, + "learning_rate": 4.809452004987118e-05, + "loss": 1.3449, + "step": 1893000 + }, + { + "epoch": 1.14, + "learning_rate": 4.809242428424174e-05, + "loss": 1.3265, + "step": 1893500 + }, + { + "epoch": 1.14, + "learning_rate": 4.809032431868117e-05, + "loss": 1.3099, + "step": 1894000 + }, + { + "epoch": 1.14, + "learning_rate": 4.8088224353120606e-05, + "loss": 1.3159, + "step": 1894500 + }, + { + "epoch": 1.14, + "learning_rate": 4.808612438756004e-05, + "loss": 1.3047, + "step": 1895000 + }, + { + "epoch": 1.14, + "learning_rate": 4.808402442199948e-05, + "loss": 1.3174, + "step": 1895500 + }, + { + "epoch": 1.14, + "learning_rate": 4.808192445643891e-05, + "loss": 1.3332, + "step": 1896000 + }, + { + "epoch": 1.14, + "learning_rate": 4.8079824490878347e-05, + "loss": 1.2987, + "step": 1896500 + }, + { + "epoch": 1.14, + "learning_rate": 4.807772452531779e-05, + "loss": 1.2993, + "step": 1897000 + }, + { + "epoch": 1.14, + "learning_rate": 4.807562455975722e-05, + "loss": 1.3207, + "step": 1897500 + }, + { + "epoch": 1.14, + "learning_rate": 4.8073524594196654e-05, + "loss": 1.275, + "step": 1898000 + }, + { + "epoch": 1.14, + "learning_rate": 4.807142882856721e-05, + "loss": 1.3002, + "step": 1898500 + }, + { + "epoch": 1.14, + "learning_rate": 4.806932886300665e-05, + "loss": 1.3351, + "step": 1899000 + }, + { + "epoch": 1.14, + "learning_rate": 4.806722889744608e-05, + "loss": 1.2986, + "step": 1899500 + }, + { + "epoch": 1.14, + "learning_rate": 4.8065128931885514e-05, + "loss": 1.3284, + "step": 1900000 + }, + { + "epoch": 1.14, + "eval_loss": 1.2703109979629517, + "eval_runtime": 1101.196, + "eval_samples_per_second": 478.316, + "eval_steps_per_second": 79.72, + "step": 1900000 + }, + { + "epoch": 1.14, + "learning_rate": 4.8063028966324955e-05, + "loss": 1.2887, + "step": 1900500 + }, + { + "epoch": 1.14, + "learning_rate": 4.806092900076439e-05, + "loss": 1.3282, + "step": 1901000 + }, + { + "epoch": 1.14, + "learning_rate": 4.805882903520382e-05, + "loss": 1.3289, + "step": 1901500 + }, + { + "epoch": 1.14, + "learning_rate": 4.8056733269574375e-05, + "loss": 1.3267, + "step": 1902000 + }, + { + "epoch": 1.14, + "learning_rate": 4.8054633304013815e-05, + "loss": 1.3294, + "step": 1902500 + }, + { + "epoch": 1.14, + "learning_rate": 4.805253333845325e-05, + "loss": 1.2941, + "step": 1903000 + }, + { + "epoch": 1.14, + "learning_rate": 4.805043337289268e-05, + "loss": 1.2935, + "step": 1903500 + }, + { + "epoch": 1.14, + "learning_rate": 4.804833340733212e-05, + "loss": 1.3163, + "step": 1904000 + }, + { + "epoch": 1.14, + "learning_rate": 4.8046233441771556e-05, + "loss": 1.3318, + "step": 1904500 + }, + { + "epoch": 1.14, + "learning_rate": 4.804413767614211e-05, + "loss": 1.3048, + "step": 1905000 + }, + { + "epoch": 1.14, + "learning_rate": 4.804203771058155e-05, + "loss": 1.334, + "step": 1905500 + }, + { + "epoch": 1.14, + "learning_rate": 4.803993774502098e-05, + "loss": 1.2887, + "step": 1906000 + }, + { + "epoch": 1.14, + "learning_rate": 4.803783777946042e-05, + "loss": 1.292, + "step": 1906500 + }, + { + "epoch": 1.14, + "learning_rate": 4.803573781389985e-05, + "loss": 1.3307, + "step": 1907000 + }, + { + "epoch": 1.14, + "learning_rate": 4.8033637848339284e-05, + "loss": 1.2953, + "step": 1907500 + }, + { + "epoch": 1.14, + "learning_rate": 4.803153788277872e-05, + "loss": 1.2881, + "step": 1908000 + }, + { + "epoch": 1.14, + "learning_rate": 4.802944211714928e-05, + "loss": 1.3179, + "step": 1908500 + }, + { + "epoch": 1.14, + "learning_rate": 4.802734635151983e-05, + "loss": 1.3172, + "step": 1909000 + }, + { + "epoch": 1.14, + "learning_rate": 4.802524638595927e-05, + "loss": 1.3109, + "step": 1909500 + }, + { + "epoch": 1.15, + "learning_rate": 4.8023146420398705e-05, + "loss": 1.289, + "step": 1910000 + }, + { + "epoch": 1.15, + "learning_rate": 4.8021046454838145e-05, + "loss": 1.2974, + "step": 1910500 + }, + { + "epoch": 1.15, + "learning_rate": 4.801894648927758e-05, + "loss": 1.3036, + "step": 1911000 + }, + { + "epoch": 1.15, + "learning_rate": 4.801684652371701e-05, + "loss": 1.2854, + "step": 1911500 + }, + { + "epoch": 1.15, + "learning_rate": 4.8014746558156445e-05, + "loss": 1.2929, + "step": 1912000 + }, + { + "epoch": 1.15, + "learning_rate": 4.801264659259588e-05, + "loss": 1.3062, + "step": 1912500 + }, + { + "epoch": 1.15, + "learning_rate": 4.801054662703531e-05, + "loss": 1.3065, + "step": 1913000 + }, + { + "epoch": 1.15, + "learning_rate": 4.800844666147475e-05, + "loss": 1.2574, + "step": 1913500 + }, + { + "epoch": 1.15, + "learning_rate": 4.800635089584531e-05, + "loss": 1.3001, + "step": 1914000 + }, + { + "epoch": 1.15, + "learning_rate": 4.800425093028474e-05, + "loss": 1.2838, + "step": 1914500 + }, + { + "epoch": 1.15, + "learning_rate": 4.800215096472417e-05, + "loss": 1.306, + "step": 1915000 + }, + { + "epoch": 1.15, + "learning_rate": 4.800005099916361e-05, + "loss": 1.3196, + "step": 1915500 + }, + { + "epoch": 1.15, + "learning_rate": 4.799795103360305e-05, + "loss": 1.3106, + "step": 1916000 + }, + { + "epoch": 1.15, + "learning_rate": 4.799585106804248e-05, + "loss": 1.3306, + "step": 1916500 + }, + { + "epoch": 1.15, + "learning_rate": 4.799375110248192e-05, + "loss": 1.3111, + "step": 1917000 + }, + { + "epoch": 1.15, + "learning_rate": 4.7991651136921354e-05, + "loss": 1.3301, + "step": 1917500 + }, + { + "epoch": 1.15, + "learning_rate": 4.798955117136079e-05, + "loss": 1.2936, + "step": 1918000 + }, + { + "epoch": 1.15, + "learning_rate": 4.798745120580023e-05, + "loss": 1.3296, + "step": 1918500 + }, + { + "epoch": 1.15, + "learning_rate": 4.798535124023966e-05, + "loss": 1.3108, + "step": 1919000 + }, + { + "epoch": 1.15, + "learning_rate": 4.7983251274679095e-05, + "loss": 1.3224, + "step": 1919500 + }, + { + "epoch": 1.15, + "learning_rate": 4.7981155509049655e-05, + "loss": 1.2901, + "step": 1920000 + }, + { + "epoch": 1.15, + "learning_rate": 4.797905554348909e-05, + "loss": 1.3028, + "step": 1920500 + }, + { + "epoch": 1.15, + "learning_rate": 4.797695977785964e-05, + "loss": 1.3334, + "step": 1921000 + }, + { + "epoch": 1.15, + "learning_rate": 4.7974859812299075e-05, + "loss": 1.3049, + "step": 1921500 + }, + { + "epoch": 1.15, + "learning_rate": 4.7972759846738516e-05, + "loss": 1.2857, + "step": 1922000 + }, + { + "epoch": 1.15, + "learning_rate": 4.797065988117795e-05, + "loss": 1.3086, + "step": 1922500 + }, + { + "epoch": 1.15, + "learning_rate": 4.796855991561738e-05, + "loss": 1.3116, + "step": 1923000 + }, + { + "epoch": 1.15, + "learning_rate": 4.796645995005682e-05, + "loss": 1.3488, + "step": 1923500 + }, + { + "epoch": 1.15, + "learning_rate": 4.7964359984496256e-05, + "loss": 1.3126, + "step": 1924000 + }, + { + "epoch": 1.15, + "learning_rate": 4.796226421886681e-05, + "loss": 1.3128, + "step": 1924500 + }, + { + "epoch": 1.15, + "learning_rate": 4.796016425330624e-05, + "loss": 1.3525, + "step": 1925000 + }, + { + "epoch": 1.15, + "learning_rate": 4.7958064287745683e-05, + "loss": 1.3269, + "step": 1925500 + }, + { + "epoch": 1.15, + "learning_rate": 4.795596852211624e-05, + "loss": 1.3106, + "step": 1926000 + }, + { + "epoch": 1.16, + "learning_rate": 4.795386855655567e-05, + "loss": 1.3043, + "step": 1926500 + }, + { + "epoch": 1.16, + "learning_rate": 4.795176859099511e-05, + "loss": 1.3259, + "step": 1927000 + }, + { + "epoch": 1.16, + "learning_rate": 4.7949668625434544e-05, + "loss": 1.3182, + "step": 1927500 + }, + { + "epoch": 1.16, + "learning_rate": 4.794756865987398e-05, + "loss": 1.3094, + "step": 1928000 + }, + { + "epoch": 1.16, + "learning_rate": 4.794547289424453e-05, + "loss": 1.3304, + "step": 1928500 + }, + { + "epoch": 1.16, + "learning_rate": 4.794337292868397e-05, + "loss": 1.3227, + "step": 1929000 + }, + { + "epoch": 1.16, + "learning_rate": 4.7941272963123405e-05, + "loss": 1.3197, + "step": 1929500 + }, + { + "epoch": 1.16, + "learning_rate": 4.793917719749396e-05, + "loss": 1.3179, + "step": 1930000 + }, + { + "epoch": 1.16, + "learning_rate": 4.793707723193339e-05, + "loss": 1.3055, + "step": 1930500 + }, + { + "epoch": 1.16, + "learning_rate": 4.793497726637283e-05, + "loss": 1.3084, + "step": 1931000 + }, + { + "epoch": 1.16, + "learning_rate": 4.7932877300812266e-05, + "loss": 1.2987, + "step": 1931500 + }, + { + "epoch": 1.16, + "learning_rate": 4.79307773352517e-05, + "loss": 1.3032, + "step": 1932000 + }, + { + "epoch": 1.16, + "learning_rate": 4.792867736969114e-05, + "loss": 1.3378, + "step": 1932500 + }, + { + "epoch": 1.16, + "learning_rate": 4.792657740413057e-05, + "loss": 1.3174, + "step": 1933000 + }, + { + "epoch": 1.16, + "learning_rate": 4.7924481638501126e-05, + "loss": 1.3101, + "step": 1933500 + }, + { + "epoch": 1.16, + "learning_rate": 4.792238167294057e-05, + "loss": 1.3142, + "step": 1934000 + }, + { + "epoch": 1.16, + "learning_rate": 4.792028170738e-05, + "loss": 1.3066, + "step": 1934500 + }, + { + "epoch": 1.16, + "learning_rate": 4.7918181741819434e-05, + "loss": 1.3203, + "step": 1935000 + }, + { + "epoch": 1.16, + "learning_rate": 4.7916081776258874e-05, + "loss": 1.3089, + "step": 1935500 + }, + { + "epoch": 1.16, + "learning_rate": 4.791398181069831e-05, + "loss": 1.3238, + "step": 1936000 + }, + { + "epoch": 1.16, + "learning_rate": 4.7911881845137734e-05, + "loss": 1.3104, + "step": 1936500 + }, + { + "epoch": 1.16, + "learning_rate": 4.7909781879577174e-05, + "loss": 1.3061, + "step": 1937000 + }, + { + "epoch": 1.16, + "learning_rate": 4.790768191401661e-05, + "loss": 1.3394, + "step": 1937500 + }, + { + "epoch": 1.16, + "learning_rate": 4.790558194845604e-05, + "loss": 1.3253, + "step": 1938000 + }, + { + "epoch": 1.16, + "learning_rate": 4.790348198289548e-05, + "loss": 1.3198, + "step": 1938500 + }, + { + "epoch": 1.16, + "learning_rate": 4.7901386217266035e-05, + "loss": 1.307, + "step": 1939000 + }, + { + "epoch": 1.16, + "learning_rate": 4.789928625170547e-05, + "loss": 1.2937, + "step": 1939500 + }, + { + "epoch": 1.16, + "learning_rate": 4.78971862861449e-05, + "loss": 1.2981, + "step": 1940000 + }, + { + "epoch": 1.16, + "learning_rate": 4.789508632058434e-05, + "loss": 1.3396, + "step": 1940500 + }, + { + "epoch": 1.16, + "learning_rate": 4.7892986355023775e-05, + "loss": 1.3233, + "step": 1941000 + }, + { + "epoch": 1.16, + "learning_rate": 4.789089058939433e-05, + "loss": 1.2959, + "step": 1941500 + }, + { + "epoch": 1.16, + "learning_rate": 4.788879062383377e-05, + "loss": 1.3036, + "step": 1942000 + }, + { + "epoch": 1.16, + "learning_rate": 4.78866906582732e-05, + "loss": 1.319, + "step": 1942500 + }, + { + "epoch": 1.16, + "learning_rate": 4.7884590692712636e-05, + "loss": 1.2911, + "step": 1943000 + }, + { + "epoch": 1.17, + "learning_rate": 4.7882490727152076e-05, + "loss": 1.3313, + "step": 1943500 + }, + { + "epoch": 1.17, + "learning_rate": 4.788039076159151e-05, + "loss": 1.308, + "step": 1944000 + }, + { + "epoch": 1.17, + "learning_rate": 4.787829079603094e-05, + "loss": 1.3128, + "step": 1944500 + }, + { + "epoch": 1.17, + "learning_rate": 4.7876190830470384e-05, + "loss": 1.2849, + "step": 1945000 + }, + { + "epoch": 1.17, + "learning_rate": 4.787409506484094e-05, + "loss": 1.3079, + "step": 1945500 + }, + { + "epoch": 1.17, + "learning_rate": 4.787199509928037e-05, + "loss": 1.2978, + "step": 1946000 + }, + { + "epoch": 1.17, + "learning_rate": 4.7869895133719804e-05, + "loss": 1.3392, + "step": 1946500 + }, + { + "epoch": 1.17, + "learning_rate": 4.7867795168159244e-05, + "loss": 1.303, + "step": 1947000 + }, + { + "epoch": 1.17, + "learning_rate": 4.786569520259868e-05, + "loss": 1.2798, + "step": 1947500 + }, + { + "epoch": 1.17, + "learning_rate": 4.786359523703811e-05, + "loss": 1.3084, + "step": 1948000 + }, + { + "epoch": 1.17, + "learning_rate": 4.786149527147755e-05, + "loss": 1.3264, + "step": 1948500 + }, + { + "epoch": 1.17, + "learning_rate": 4.785939530591698e-05, + "loss": 1.2991, + "step": 1949000 + }, + { + "epoch": 1.17, + "learning_rate": 4.785729954028754e-05, + "loss": 1.3265, + "step": 1949500 + }, + { + "epoch": 1.17, + "learning_rate": 4.785519957472697e-05, + "loss": 1.2822, + "step": 1950000 + }, + { + "epoch": 1.17, + "learning_rate": 4.785309960916641e-05, + "loss": 1.3209, + "step": 1950500 + }, + { + "epoch": 1.17, + "learning_rate": 4.7851003843536966e-05, + "loss": 1.3073, + "step": 1951000 + }, + { + "epoch": 1.17, + "learning_rate": 4.78489038779764e-05, + "loss": 1.3184, + "step": 1951500 + }, + { + "epoch": 1.17, + "learning_rate": 4.784680391241584e-05, + "loss": 1.2972, + "step": 1952000 + }, + { + "epoch": 1.17, + "learning_rate": 4.784470394685527e-05, + "loss": 1.3109, + "step": 1952500 + }, + { + "epoch": 1.17, + "learning_rate": 4.7842603981294706e-05, + "loss": 1.3136, + "step": 1953000 + }, + { + "epoch": 1.17, + "learning_rate": 4.784050401573415e-05, + "loss": 1.2922, + "step": 1953500 + }, + { + "epoch": 1.17, + "learning_rate": 4.78384082501047e-05, + "loss": 1.326, + "step": 1954000 + }, + { + "epoch": 1.17, + "learning_rate": 4.7836308284544134e-05, + "loss": 1.3251, + "step": 1954500 + }, + { + "epoch": 1.17, + "learning_rate": 4.783420831898357e-05, + "loss": 1.3091, + "step": 1955000 + }, + { + "epoch": 1.17, + "learning_rate": 4.783210835342301e-05, + "loss": 1.2829, + "step": 1955500 + }, + { + "epoch": 1.17, + "learning_rate": 4.783001258779356e-05, + "loss": 1.3083, + "step": 1956000 + }, + { + "epoch": 1.17, + "learning_rate": 4.7827912622232994e-05, + "loss": 1.2976, + "step": 1956500 + }, + { + "epoch": 1.17, + "learning_rate": 4.7825812656672435e-05, + "loss": 1.3159, + "step": 1957000 + }, + { + "epoch": 1.17, + "learning_rate": 4.782371269111187e-05, + "loss": 1.318, + "step": 1957500 + }, + { + "epoch": 1.17, + "learning_rate": 4.78216127255513e-05, + "loss": 1.3248, + "step": 1958000 + }, + { + "epoch": 1.17, + "learning_rate": 4.781951275999074e-05, + "loss": 1.3238, + "step": 1958500 + }, + { + "epoch": 1.17, + "learning_rate": 4.781741279443017e-05, + "loss": 1.31, + "step": 1959000 + }, + { + "epoch": 1.17, + "learning_rate": 4.781531702880073e-05, + "loss": 1.2805, + "step": 1959500 + }, + { + "epoch": 1.18, + "learning_rate": 4.781321706324016e-05, + "loss": 1.3078, + "step": 1960000 + }, + { + "epoch": 1.18, + "learning_rate": 4.78111170976796e-05, + "loss": 1.3041, + "step": 1960500 + }, + { + "epoch": 1.18, + "learning_rate": 4.780901713211903e-05, + "loss": 1.3363, + "step": 1961000 + }, + { + "epoch": 1.18, + "learning_rate": 4.780691716655846e-05, + "loss": 1.3228, + "step": 1961500 + }, + { + "epoch": 1.18, + "learning_rate": 4.78048172009979e-05, + "loss": 1.302, + "step": 1962000 + }, + { + "epoch": 1.18, + "learning_rate": 4.7802717235437336e-05, + "loss": 1.3133, + "step": 1962500 + }, + { + "epoch": 1.18, + "learning_rate": 4.780061726987677e-05, + "loss": 1.3215, + "step": 1963000 + }, + { + "epoch": 1.18, + "learning_rate": 4.779851730431621e-05, + "loss": 1.3024, + "step": 1963500 + }, + { + "epoch": 1.18, + "learning_rate": 4.7796421538686764e-05, + "loss": 1.3371, + "step": 1964000 + }, + { + "epoch": 1.18, + "learning_rate": 4.77943215731262e-05, + "loss": 1.3041, + "step": 1964500 + }, + { + "epoch": 1.18, + "learning_rate": 4.779222160756564e-05, + "loss": 1.3172, + "step": 1965000 + }, + { + "epoch": 1.18, + "learning_rate": 4.779012164200507e-05, + "loss": 1.3548, + "step": 1965500 + }, + { + "epoch": 1.18, + "learning_rate": 4.7788021676444504e-05, + "loss": 1.3079, + "step": 1966000 + }, + { + "epoch": 1.18, + "learning_rate": 4.7785921710883944e-05, + "loss": 1.3098, + "step": 1966500 + }, + { + "epoch": 1.18, + "learning_rate": 4.77838259452545e-05, + "loss": 1.2921, + "step": 1967000 + }, + { + "epoch": 1.18, + "learning_rate": 4.778172597969393e-05, + "loss": 1.2957, + "step": 1967500 + }, + { + "epoch": 1.18, + "learning_rate": 4.7779626014133365e-05, + "loss": 1.3378, + "step": 1968000 + }, + { + "epoch": 1.18, + "learning_rate": 4.777753024850392e-05, + "loss": 1.306, + "step": 1968500 + }, + { + "epoch": 1.18, + "learning_rate": 4.777543028294336e-05, + "loss": 1.3239, + "step": 1969000 + }, + { + "epoch": 1.18, + "learning_rate": 4.777333451731392e-05, + "loss": 1.3367, + "step": 1969500 + }, + { + "epoch": 1.18, + "learning_rate": 4.777123455175335e-05, + "loss": 1.3068, + "step": 1970000 + }, + { + "epoch": 1.18, + "learning_rate": 4.7769134586192786e-05, + "loss": 1.3171, + "step": 1970500 + }, + { + "epoch": 1.18, + "learning_rate": 4.776703462063222e-05, + "loss": 1.2998, + "step": 1971000 + }, + { + "epoch": 1.18, + "learning_rate": 4.776493465507165e-05, + "loss": 1.3171, + "step": 1971500 + }, + { + "epoch": 1.18, + "learning_rate": 4.776283468951109e-05, + "loss": 1.31, + "step": 1972000 + }, + { + "epoch": 1.18, + "learning_rate": 4.776073472395053e-05, + "loss": 1.2945, + "step": 1972500 + }, + { + "epoch": 1.18, + "learning_rate": 4.775863475838996e-05, + "loss": 1.336, + "step": 1973000 + }, + { + "epoch": 1.18, + "learning_rate": 4.7756538992760514e-05, + "loss": 1.2934, + "step": 1973500 + }, + { + "epoch": 1.18, + "learning_rate": 4.7754439027199954e-05, + "loss": 1.2931, + "step": 1974000 + }, + { + "epoch": 1.18, + "learning_rate": 4.775233906163939e-05, + "loss": 1.2957, + "step": 1974500 + }, + { + "epoch": 1.18, + "learning_rate": 4.775023909607882e-05, + "loss": 1.2815, + "step": 1975000 + }, + { + "epoch": 1.18, + "learning_rate": 4.7748143330449374e-05, + "loss": 1.3078, + "step": 1975500 + }, + { + "epoch": 1.18, + "learning_rate": 4.7746043364888815e-05, + "loss": 1.3086, + "step": 1976000 + }, + { + "epoch": 1.18, + "learning_rate": 4.774394339932825e-05, + "loss": 1.3303, + "step": 1976500 + }, + { + "epoch": 1.19, + "learning_rate": 4.774184343376768e-05, + "loss": 1.2859, + "step": 1977000 + }, + { + "epoch": 1.19, + "learning_rate": 4.773974346820712e-05, + "loss": 1.3045, + "step": 1977500 + }, + { + "epoch": 1.19, + "learning_rate": 4.7737643502646555e-05, + "loss": 1.2856, + "step": 1978000 + }, + { + "epoch": 1.19, + "learning_rate": 4.773554353708599e-05, + "loss": 1.3258, + "step": 1978500 + }, + { + "epoch": 1.19, + "learning_rate": 4.773344357152543e-05, + "loss": 1.3055, + "step": 1979000 + }, + { + "epoch": 1.19, + "learning_rate": 4.773134360596486e-05, + "loss": 1.3127, + "step": 1979500 + }, + { + "epoch": 1.19, + "learning_rate": 4.7729243640404296e-05, + "loss": 1.3333, + "step": 1980000 + }, + { + "epoch": 1.19, + "learning_rate": 4.7727143674843736e-05, + "loss": 1.3097, + "step": 1980500 + }, + { + "epoch": 1.19, + "learning_rate": 4.772504370928316e-05, + "loss": 1.3109, + "step": 1981000 + }, + { + "epoch": 1.19, + "learning_rate": 4.772294794365372e-05, + "loss": 1.3105, + "step": 1981500 + }, + { + "epoch": 1.19, + "learning_rate": 4.772084797809316e-05, + "loss": 1.3171, + "step": 1982000 + }, + { + "epoch": 1.19, + "learning_rate": 4.77187480125326e-05, + "loss": 1.3272, + "step": 1982500 + }, + { + "epoch": 1.19, + "learning_rate": 4.771664804697203e-05, + "loss": 1.3086, + "step": 1983000 + }, + { + "epoch": 1.19, + "learning_rate": 4.7714548081411464e-05, + "loss": 1.3165, + "step": 1983500 + }, + { + "epoch": 1.19, + "learning_rate": 4.7712452315782024e-05, + "loss": 1.298, + "step": 1984000 + }, + { + "epoch": 1.19, + "learning_rate": 4.771035235022146e-05, + "loss": 1.3231, + "step": 1984500 + }, + { + "epoch": 1.19, + "learning_rate": 4.770825238466089e-05, + "loss": 1.3212, + "step": 1985000 + }, + { + "epoch": 1.19, + "learning_rate": 4.7706152419100324e-05, + "loss": 1.2968, + "step": 1985500 + }, + { + "epoch": 1.19, + "learning_rate": 4.7704056653470885e-05, + "loss": 1.3224, + "step": 1986000 + }, + { + "epoch": 1.19, + "learning_rate": 4.770195668791032e-05, + "loss": 1.3082, + "step": 1986500 + }, + { + "epoch": 1.19, + "learning_rate": 4.769985672234975e-05, + "loss": 1.2747, + "step": 1987000 + }, + { + "epoch": 1.19, + "learning_rate": 4.769776095672031e-05, + "loss": 1.2951, + "step": 1987500 + }, + { + "epoch": 1.19, + "learning_rate": 4.7695660991159746e-05, + "loss": 1.2899, + "step": 1988000 + }, + { + "epoch": 1.19, + "learning_rate": 4.769356102559918e-05, + "loss": 1.3238, + "step": 1988500 + }, + { + "epoch": 1.19, + "learning_rate": 4.769146106003862e-05, + "loss": 1.3103, + "step": 1989000 + }, + { + "epoch": 1.19, + "learning_rate": 4.768936109447805e-05, + "loss": 1.3129, + "step": 1989500 + }, + { + "epoch": 1.19, + "learning_rate": 4.7687261128917486e-05, + "loss": 1.3034, + "step": 1990000 + }, + { + "epoch": 1.19, + "learning_rate": 4.768516116335692e-05, + "loss": 1.3063, + "step": 1990500 + }, + { + "epoch": 1.19, + "learning_rate": 4.768306119779635e-05, + "loss": 1.2932, + "step": 1991000 + }, + { + "epoch": 1.19, + "learning_rate": 4.7680961232235787e-05, + "loss": 1.3289, + "step": 1991500 + }, + { + "epoch": 1.19, + "learning_rate": 4.767886126667523e-05, + "loss": 1.3119, + "step": 1992000 + }, + { + "epoch": 1.19, + "learning_rate": 4.767676130111466e-05, + "loss": 1.2969, + "step": 1992500 + }, + { + "epoch": 1.19, + "learning_rate": 4.7674665535485214e-05, + "loss": 1.3076, + "step": 1993000 + }, + { + "epoch": 1.2, + "learning_rate": 4.767256556992465e-05, + "loss": 1.3159, + "step": 1993500 + }, + { + "epoch": 1.2, + "learning_rate": 4.767046980429521e-05, + "loss": 1.2829, + "step": 1994000 + }, + { + "epoch": 1.2, + "learning_rate": 4.766836983873465e-05, + "loss": 1.2993, + "step": 1994500 + }, + { + "epoch": 1.2, + "learning_rate": 4.766626987317408e-05, + "loss": 1.3246, + "step": 1995000 + }, + { + "epoch": 1.2, + "learning_rate": 4.7664169907613515e-05, + "loss": 1.2838, + "step": 1995500 + }, + { + "epoch": 1.2, + "learning_rate": 4.766206994205295e-05, + "loss": 1.3062, + "step": 1996000 + }, + { + "epoch": 1.2, + "learning_rate": 4.765997417642351e-05, + "loss": 1.3042, + "step": 1996500 + }, + { + "epoch": 1.2, + "learning_rate": 4.765787421086294e-05, + "loss": 1.3223, + "step": 1997000 + }, + { + "epoch": 1.2, + "learning_rate": 4.7655774245302376e-05, + "loss": 1.2662, + "step": 1997500 + }, + { + "epoch": 1.2, + "learning_rate": 4.765367427974181e-05, + "loss": 1.3111, + "step": 1998000 + }, + { + "epoch": 1.2, + "learning_rate": 4.765157431418124e-05, + "loss": 1.3282, + "step": 1998500 + }, + { + "epoch": 1.2, + "learning_rate": 4.764947434862068e-05, + "loss": 1.2805, + "step": 1999000 + }, + { + "epoch": 1.2, + "learning_rate": 4.7647374383060116e-05, + "loss": 1.2951, + "step": 1999500 + }, + { + "epoch": 1.2, + "learning_rate": 4.764527441749955e-05, + "loss": 1.3019, + "step": 2000000 + }, + { + "epoch": 1.2, + "eval_loss": 1.2635042667388916, + "eval_runtime": 1110.2941, + "eval_samples_per_second": 474.397, + "eval_steps_per_second": 79.066, + "step": 2000000 + }, + { + "epoch": 1.2, + "learning_rate": 4.764317445193899e-05, + "loss": 1.3184, + "step": 2000500 + }, + { + "epoch": 1.2, + "learning_rate": 4.764107868630954e-05, + "loss": 1.3177, + "step": 2001000 + }, + { + "epoch": 1.2, + "learning_rate": 4.763897872074898e-05, + "loss": 1.2882, + "step": 2001500 + }, + { + "epoch": 1.2, + "learning_rate": 4.763687875518841e-05, + "loss": 1.2976, + "step": 2002000 + }, + { + "epoch": 1.2, + "learning_rate": 4.763477878962785e-05, + "loss": 1.3246, + "step": 2002500 + }, + { + "epoch": 1.2, + "learning_rate": 4.7632678824067284e-05, + "loss": 1.3032, + "step": 2003000 + }, + { + "epoch": 1.2, + "learning_rate": 4.7630578858506724e-05, + "loss": 1.3143, + "step": 2003500 + }, + { + "epoch": 1.2, + "learning_rate": 4.762847889294616e-05, + "loss": 1.3001, + "step": 2004000 + }, + { + "epoch": 1.2, + "learning_rate": 4.762637892738559e-05, + "loss": 1.3211, + "step": 2004500 + }, + { + "epoch": 1.2, + "learning_rate": 4.7624283161756145e-05, + "loss": 1.3073, + "step": 2005000 + }, + { + "epoch": 1.2, + "learning_rate": 4.7622183196195585e-05, + "loss": 1.309, + "step": 2005500 + }, + { + "epoch": 1.2, + "learning_rate": 4.762008743056614e-05, + "loss": 1.3084, + "step": 2006000 + }, + { + "epoch": 1.2, + "learning_rate": 4.761798746500557e-05, + "loss": 1.3333, + "step": 2006500 + }, + { + "epoch": 1.2, + "learning_rate": 4.7615887499445005e-05, + "loss": 1.2914, + "step": 2007000 + }, + { + "epoch": 1.2, + "learning_rate": 4.7613787533884446e-05, + "loss": 1.2925, + "step": 2007500 + }, + { + "epoch": 1.2, + "learning_rate": 4.761168756832388e-05, + "loss": 1.309, + "step": 2008000 + }, + { + "epoch": 1.2, + "learning_rate": 4.760958760276331e-05, + "loss": 1.3079, + "step": 2008500 + }, + { + "epoch": 1.2, + "learning_rate": 4.760748763720275e-05, + "loss": 1.3071, + "step": 2009000 + }, + { + "epoch": 1.2, + "learning_rate": 4.7605391871573306e-05, + "loss": 1.3132, + "step": 2009500 + }, + { + "epoch": 1.21, + "learning_rate": 4.760329610594386e-05, + "loss": 1.2999, + "step": 2010000 + }, + { + "epoch": 1.21, + "learning_rate": 4.7601196140383293e-05, + "loss": 1.3272, + "step": 2010500 + }, + { + "epoch": 1.21, + "learning_rate": 4.7599096174822734e-05, + "loss": 1.3097, + "step": 2011000 + }, + { + "epoch": 1.21, + "learning_rate": 4.759699620926217e-05, + "loss": 1.2843, + "step": 2011500 + }, + { + "epoch": 1.21, + "learning_rate": 4.75948962437016e-05, + "loss": 1.3241, + "step": 2012000 + }, + { + "epoch": 1.21, + "learning_rate": 4.759279627814104e-05, + "loss": 1.3017, + "step": 2012500 + }, + { + "epoch": 1.21, + "learning_rate": 4.7590696312580474e-05, + "loss": 1.3028, + "step": 2013000 + }, + { + "epoch": 1.21, + "learning_rate": 4.758859634701991e-05, + "loss": 1.2988, + "step": 2013500 + }, + { + "epoch": 1.21, + "learning_rate": 4.758649638145935e-05, + "loss": 1.3152, + "step": 2014000 + }, + { + "epoch": 1.21, + "learning_rate": 4.75844006158299e-05, + "loss": 1.3362, + "step": 2014500 + }, + { + "epoch": 1.21, + "learning_rate": 4.7582300650269335e-05, + "loss": 1.3022, + "step": 2015000 + }, + { + "epoch": 1.21, + "learning_rate": 4.758020068470877e-05, + "loss": 1.2848, + "step": 2015500 + }, + { + "epoch": 1.21, + "learning_rate": 4.757810071914821e-05, + "loss": 1.3017, + "step": 2016000 + }, + { + "epoch": 1.21, + "learning_rate": 4.757600075358764e-05, + "loss": 1.3187, + "step": 2016500 + }, + { + "epoch": 1.21, + "learning_rate": 4.7573900788027076e-05, + "loss": 1.2904, + "step": 2017000 + }, + { + "epoch": 1.21, + "learning_rate": 4.757180082246651e-05, + "loss": 1.3214, + "step": 2017500 + }, + { + "epoch": 1.21, + "learning_rate": 4.756970085690594e-05, + "loss": 1.2859, + "step": 2018000 + }, + { + "epoch": 1.21, + "learning_rate": 4.756760929120762e-05, + "loss": 1.3173, + "step": 2018500 + }, + { + "epoch": 1.21, + "learning_rate": 4.7565509325647056e-05, + "loss": 1.3088, + "step": 2019000 + }, + { + "epoch": 1.21, + "learning_rate": 4.75634093600865e-05, + "loss": 1.3237, + "step": 2019500 + }, + { + "epoch": 1.21, + "learning_rate": 4.756130939452593e-05, + "loss": 1.3135, + "step": 2020000 + }, + { + "epoch": 1.21, + "learning_rate": 4.7559209428965364e-05, + "loss": 1.315, + "step": 2020500 + }, + { + "epoch": 1.21, + "learning_rate": 4.7557109463404804e-05, + "loss": 1.3039, + "step": 2021000 + }, + { + "epoch": 1.21, + "learning_rate": 4.755500949784424e-05, + "loss": 1.3214, + "step": 2021500 + }, + { + "epoch": 1.21, + "learning_rate": 4.755291373221479e-05, + "loss": 1.2855, + "step": 2022000 + }, + { + "epoch": 1.21, + "learning_rate": 4.7550813766654224e-05, + "loss": 1.3199, + "step": 2022500 + }, + { + "epoch": 1.21, + "learning_rate": 4.7548713801093665e-05, + "loss": 1.3314, + "step": 2023000 + }, + { + "epoch": 1.21, + "learning_rate": 4.75466138355331e-05, + "loss": 1.3271, + "step": 2023500 + }, + { + "epoch": 1.21, + "learning_rate": 4.754451386997253e-05, + "loss": 1.3423, + "step": 2024000 + }, + { + "epoch": 1.21, + "learning_rate": 4.7542413904411965e-05, + "loss": 1.323, + "step": 2024500 + }, + { + "epoch": 1.21, + "learning_rate": 4.75403139388514e-05, + "loss": 1.3235, + "step": 2025000 + }, + { + "epoch": 1.21, + "learning_rate": 4.753821397329084e-05, + "loss": 1.3155, + "step": 2025500 + }, + { + "epoch": 1.21, + "learning_rate": 4.75361182076614e-05, + "loss": 1.293, + "step": 2026000 + }, + { + "epoch": 1.21, + "learning_rate": 4.753402244203195e-05, + "loss": 1.3104, + "step": 2026500 + }, + { + "epoch": 1.22, + "learning_rate": 4.7531922476471386e-05, + "loss": 1.3364, + "step": 2027000 + }, + { + "epoch": 1.22, + "learning_rate": 4.752982251091082e-05, + "loss": 1.279, + "step": 2027500 + }, + { + "epoch": 1.22, + "learning_rate": 4.752772254535026e-05, + "loss": 1.3082, + "step": 2028000 + }, + { + "epoch": 1.22, + "learning_rate": 4.752562257978969e-05, + "loss": 1.3133, + "step": 2028500 + }, + { + "epoch": 1.22, + "learning_rate": 4.752353101409137e-05, + "loss": 1.2863, + "step": 2029000 + }, + { + "epoch": 1.22, + "learning_rate": 4.75214310485308e-05, + "loss": 1.2735, + "step": 2029500 + }, + { + "epoch": 1.22, + "learning_rate": 4.751933108297024e-05, + "loss": 1.3269, + "step": 2030000 + }, + { + "epoch": 1.22, + "learning_rate": 4.7517231117409674e-05, + "loss": 1.3441, + "step": 2030500 + }, + { + "epoch": 1.22, + "learning_rate": 4.751513115184911e-05, + "loss": 1.3173, + "step": 2031000 + }, + { + "epoch": 1.22, + "learning_rate": 4.751303118628855e-05, + "loss": 1.328, + "step": 2031500 + }, + { + "epoch": 1.22, + "learning_rate": 4.751093122072798e-05, + "loss": 1.2935, + "step": 2032000 + }, + { + "epoch": 1.22, + "learning_rate": 4.7508831255167415e-05, + "loss": 1.2998, + "step": 2032500 + }, + { + "epoch": 1.22, + "learning_rate": 4.7506731289606855e-05, + "loss": 1.2958, + "step": 2033000 + }, + { + "epoch": 1.22, + "learning_rate": 4.750463132404629e-05, + "loss": 1.293, + "step": 2033500 + }, + { + "epoch": 1.22, + "learning_rate": 4.750253975834796e-05, + "loss": 1.3184, + "step": 2034000 + }, + { + "epoch": 1.22, + "learning_rate": 4.7500439792787396e-05, + "loss": 1.3178, + "step": 2034500 + }, + { + "epoch": 1.22, + "learning_rate": 4.749833982722683e-05, + "loss": 1.2973, + "step": 2035000 + }, + { + "epoch": 1.22, + "learning_rate": 4.749623986166627e-05, + "loss": 1.3073, + "step": 2035500 + }, + { + "epoch": 1.22, + "learning_rate": 4.74941398961057e-05, + "loss": 1.3038, + "step": 2036000 + }, + { + "epoch": 1.22, + "learning_rate": 4.7492039930545136e-05, + "loss": 1.3156, + "step": 2036500 + }, + { + "epoch": 1.22, + "learning_rate": 4.7489939964984576e-05, + "loss": 1.3153, + "step": 2037000 + }, + { + "epoch": 1.22, + "learning_rate": 4.748783999942401e-05, + "loss": 1.2881, + "step": 2037500 + }, + { + "epoch": 1.22, + "learning_rate": 4.748574003386344e-05, + "loss": 1.3233, + "step": 2038000 + }, + { + "epoch": 1.22, + "learning_rate": 4.7483640068302884e-05, + "loss": 1.3069, + "step": 2038500 + }, + { + "epoch": 1.22, + "learning_rate": 4.748154010274231e-05, + "loss": 1.2989, + "step": 2039000 + }, + { + "epoch": 1.22, + "learning_rate": 4.747944013718175e-05, + "loss": 1.3174, + "step": 2039500 + }, + { + "epoch": 1.22, + "learning_rate": 4.747734437155231e-05, + "loss": 1.2975, + "step": 2040000 + }, + { + "epoch": 1.22, + "learning_rate": 4.7475244405991744e-05, + "loss": 1.3095, + "step": 2040500 + }, + { + "epoch": 1.22, + "learning_rate": 4.747314444043118e-05, + "loss": 1.296, + "step": 2041000 + }, + { + "epoch": 1.22, + "learning_rate": 4.747104867480173e-05, + "loss": 1.3202, + "step": 2041500 + }, + { + "epoch": 1.22, + "learning_rate": 4.746894870924117e-05, + "loss": 1.3006, + "step": 2042000 + }, + { + "epoch": 1.22, + "learning_rate": 4.7466848743680605e-05, + "loss": 1.3178, + "step": 2042500 + }, + { + "epoch": 1.22, + "learning_rate": 4.746474877812004e-05, + "loss": 1.3315, + "step": 2043000 + }, + { + "epoch": 1.23, + "learning_rate": 4.746264881255947e-05, + "loss": 1.326, + "step": 2043500 + }, + { + "epoch": 1.23, + "learning_rate": 4.7460548846998905e-05, + "loss": 1.3472, + "step": 2044000 + }, + { + "epoch": 1.23, + "learning_rate": 4.7458453081369466e-05, + "loss": 1.2894, + "step": 2044500 + }, + { + "epoch": 1.23, + "learning_rate": 4.74563531158089e-05, + "loss": 1.3077, + "step": 2045000 + }, + { + "epoch": 1.23, + "learning_rate": 4.745425315024834e-05, + "loss": 1.3049, + "step": 2045500 + }, + { + "epoch": 1.23, + "learning_rate": 4.7452153184687766e-05, + "loss": 1.3121, + "step": 2046000 + }, + { + "epoch": 1.23, + "learning_rate": 4.7450053219127206e-05, + "loss": 1.3135, + "step": 2046500 + }, + { + "epoch": 1.23, + "learning_rate": 4.744795325356664e-05, + "loss": 1.2927, + "step": 2047000 + }, + { + "epoch": 1.23, + "learning_rate": 4.744585328800607e-05, + "loss": 1.3167, + "step": 2047500 + }, + { + "epoch": 1.23, + "learning_rate": 4.7443757522376634e-05, + "loss": 1.3012, + "step": 2048000 + }, + { + "epoch": 1.23, + "learning_rate": 4.744165755681607e-05, + "loss": 1.2855, + "step": 2048500 + }, + { + "epoch": 1.23, + "learning_rate": 4.74395575912555e-05, + "loss": 1.3016, + "step": 2049000 + }, + { + "epoch": 1.23, + "learning_rate": 4.7437457625694934e-05, + "loss": 1.2813, + "step": 2049500 + }, + { + "epoch": 1.23, + "learning_rate": 4.7435357660134374e-05, + "loss": 1.304, + "step": 2050000 + }, + { + "epoch": 1.23, + "learning_rate": 4.743325769457381e-05, + "loss": 1.3497, + "step": 2050500 + }, + { + "epoch": 1.23, + "learning_rate": 4.743116192894436e-05, + "loss": 1.3306, + "step": 2051000 + }, + { + "epoch": 1.23, + "learning_rate": 4.7429061963383795e-05, + "loss": 1.3091, + "step": 2051500 + }, + { + "epoch": 1.23, + "learning_rate": 4.7426961997823235e-05, + "loss": 1.3144, + "step": 2052000 + }, + { + "epoch": 1.23, + "learning_rate": 4.742486203226267e-05, + "loss": 1.303, + "step": 2052500 + }, + { + "epoch": 1.23, + "learning_rate": 4.74227620667021e-05, + "loss": 1.3204, + "step": 2053000 + }, + { + "epoch": 1.23, + "learning_rate": 4.742066210114154e-05, + "loss": 1.2898, + "step": 2053500 + }, + { + "epoch": 1.23, + "learning_rate": 4.7418562135580976e-05, + "loss": 1.3483, + "step": 2054000 + }, + { + "epoch": 1.23, + "learning_rate": 4.741646217002041e-05, + "loss": 1.2907, + "step": 2054500 + }, + { + "epoch": 1.23, + "learning_rate": 4.741436640439097e-05, + "loss": 1.3236, + "step": 2055000 + }, + { + "epoch": 1.23, + "learning_rate": 4.74122664388304e-05, + "loss": 1.3411, + "step": 2055500 + }, + { + "epoch": 1.23, + "learning_rate": 4.7410166473269836e-05, + "loss": 1.3246, + "step": 2056000 + }, + { + "epoch": 1.23, + "learning_rate": 4.740807070764039e-05, + "loss": 1.2847, + "step": 2056500 + }, + { + "epoch": 1.23, + "learning_rate": 4.740597074207983e-05, + "loss": 1.3262, + "step": 2057000 + }, + { + "epoch": 1.23, + "learning_rate": 4.7403870776519264e-05, + "loss": 1.2978, + "step": 2057500 + }, + { + "epoch": 1.23, + "learning_rate": 4.74017708109587e-05, + "loss": 1.3048, + "step": 2058000 + }, + { + "epoch": 1.23, + "learning_rate": 4.739967084539814e-05, + "loss": 1.3204, + "step": 2058500 + }, + { + "epoch": 1.23, + "learning_rate": 4.739757507976869e-05, + "loss": 1.3146, + "step": 2059000 + }, + { + "epoch": 1.23, + "learning_rate": 4.7395475114208124e-05, + "loss": 1.2996, + "step": 2059500 + }, + { + "epoch": 1.24, + "learning_rate": 4.739337514864756e-05, + "loss": 1.3284, + "step": 2060000 + }, + { + "epoch": 1.24, + "learning_rate": 4.7391275183087e-05, + "loss": 1.3154, + "step": 2060500 + }, + { + "epoch": 1.24, + "learning_rate": 4.738917941745755e-05, + "loss": 1.2837, + "step": 2061000 + }, + { + "epoch": 1.24, + "learning_rate": 4.7387079451896985e-05, + "loss": 1.2931, + "step": 2061500 + }, + { + "epoch": 1.24, + "learning_rate": 4.7384979486336425e-05, + "loss": 1.285, + "step": 2062000 + }, + { + "epoch": 1.24, + "learning_rate": 4.738287952077586e-05, + "loss": 1.2819, + "step": 2062500 + }, + { + "epoch": 1.24, + "learning_rate": 4.738077955521529e-05, + "loss": 1.328, + "step": 2063000 + }, + { + "epoch": 1.24, + "learning_rate": 4.737867958965473e-05, + "loss": 1.3267, + "step": 2063500 + }, + { + "epoch": 1.24, + "learning_rate": 4.7376579624094166e-05, + "loss": 1.3084, + "step": 2064000 + }, + { + "epoch": 1.24, + "learning_rate": 4.73744796585336e-05, + "loss": 1.3122, + "step": 2064500 + }, + { + "epoch": 1.24, + "learning_rate": 4.737237969297304e-05, + "loss": 1.3371, + "step": 2065000 + }, + { + "epoch": 1.24, + "learning_rate": 4.737028392734359e-05, + "loss": 1.3154, + "step": 2065500 + }, + { + "epoch": 1.24, + "learning_rate": 4.736818816171415e-05, + "loss": 1.3268, + "step": 2066000 + }, + { + "epoch": 1.24, + "learning_rate": 4.736608819615358e-05, + "loss": 1.2935, + "step": 2066500 + }, + { + "epoch": 1.24, + "learning_rate": 4.7363988230593014e-05, + "loss": 1.3219, + "step": 2067000 + }, + { + "epoch": 1.24, + "learning_rate": 4.7361888265032454e-05, + "loss": 1.3166, + "step": 2067500 + }, + { + "epoch": 1.24, + "learning_rate": 4.735979249940301e-05, + "loss": 1.3136, + "step": 2068000 + }, + { + "epoch": 1.24, + "learning_rate": 4.735769253384244e-05, + "loss": 1.2952, + "step": 2068500 + }, + { + "epoch": 1.24, + "learning_rate": 4.735559256828188e-05, + "loss": 1.2956, + "step": 2069000 + }, + { + "epoch": 1.24, + "learning_rate": 4.735349680265244e-05, + "loss": 1.3205, + "step": 2069500 + }, + { + "epoch": 1.24, + "learning_rate": 4.735139683709187e-05, + "loss": 1.3191, + "step": 2070000 + }, + { + "epoch": 1.24, + "learning_rate": 4.73492968715313e-05, + "loss": 1.2867, + "step": 2070500 + }, + { + "epoch": 1.24, + "learning_rate": 4.734719690597074e-05, + "loss": 1.3288, + "step": 2071000 + }, + { + "epoch": 1.24, + "learning_rate": 4.7345096940410175e-05, + "loss": 1.2941, + "step": 2071500 + }, + { + "epoch": 1.24, + "learning_rate": 4.734299697484961e-05, + "loss": 1.2976, + "step": 2072000 + }, + { + "epoch": 1.24, + "learning_rate": 4.734089700928905e-05, + "loss": 1.3322, + "step": 2072500 + }, + { + "epoch": 1.24, + "learning_rate": 4.733879704372848e-05, + "loss": 1.3032, + "step": 2073000 + }, + { + "epoch": 1.24, + "learning_rate": 4.7336697078167916e-05, + "loss": 1.3135, + "step": 2073500 + }, + { + "epoch": 1.24, + "learning_rate": 4.7334597112607356e-05, + "loss": 1.2917, + "step": 2074000 + }, + { + "epoch": 1.24, + "learning_rate": 4.733249714704679e-05, + "loss": 1.3004, + "step": 2074500 + }, + { + "epoch": 1.24, + "learning_rate": 4.733039718148622e-05, + "loss": 1.3116, + "step": 2075000 + }, + { + "epoch": 1.24, + "learning_rate": 4.7328301415856783e-05, + "loss": 1.2761, + "step": 2075500 + }, + { + "epoch": 1.24, + "learning_rate": 4.732620145029622e-05, + "loss": 1.343, + "step": 2076000 + }, + { + "epoch": 1.24, + "learning_rate": 4.732410148473565e-05, + "loss": 1.3075, + "step": 2076500 + }, + { + "epoch": 1.25, + "learning_rate": 4.732200151917509e-05, + "loss": 1.2946, + "step": 2077000 + }, + { + "epoch": 1.25, + "learning_rate": 4.7319901553614524e-05, + "loss": 1.3313, + "step": 2077500 + }, + { + "epoch": 1.25, + "learning_rate": 4.731780158805395e-05, + "loss": 1.3118, + "step": 2078000 + }, + { + "epoch": 1.25, + "learning_rate": 4.731570162249339e-05, + "loss": 1.307, + "step": 2078500 + }, + { + "epoch": 1.25, + "learning_rate": 4.7313601656932824e-05, + "loss": 1.2916, + "step": 2079000 + }, + { + "epoch": 1.25, + "learning_rate": 4.7311510091234505e-05, + "loss": 1.3316, + "step": 2079500 + }, + { + "epoch": 1.25, + "learning_rate": 4.730941012567394e-05, + "loss": 1.2948, + "step": 2080000 + }, + { + "epoch": 1.25, + "learning_rate": 4.730731016011337e-05, + "loss": 1.2968, + "step": 2080500 + }, + { + "epoch": 1.25, + "learning_rate": 4.730521019455281e-05, + "loss": 1.2793, + "step": 2081000 + }, + { + "epoch": 1.25, + "learning_rate": 4.7303110228992245e-05, + "loss": 1.3118, + "step": 2081500 + }, + { + "epoch": 1.25, + "learning_rate": 4.730101026343168e-05, + "loss": 1.2868, + "step": 2082000 + }, + { + "epoch": 1.25, + "learning_rate": 4.729891029787111e-05, + "loss": 1.2819, + "step": 2082500 + }, + { + "epoch": 1.25, + "learning_rate": 4.729681453224167e-05, + "loss": 1.2951, + "step": 2083000 + }, + { + "epoch": 1.25, + "learning_rate": 4.7294714566681106e-05, + "loss": 1.2996, + "step": 2083500 + }, + { + "epoch": 1.25, + "learning_rate": 4.7292614601120546e-05, + "loss": 1.2857, + "step": 2084000 + }, + { + "epoch": 1.25, + "learning_rate": 4.729051463555998e-05, + "loss": 1.2822, + "step": 2084500 + }, + { + "epoch": 1.25, + "learning_rate": 4.7288414669999407e-05, + "loss": 1.3085, + "step": 2085000 + }, + { + "epoch": 1.25, + "learning_rate": 4.728631470443885e-05, + "loss": 1.2799, + "step": 2085500 + }, + { + "epoch": 1.25, + "learning_rate": 4.728421473887828e-05, + "loss": 1.3053, + "step": 2086000 + }, + { + "epoch": 1.25, + "learning_rate": 4.7282114773317714e-05, + "loss": 1.2769, + "step": 2086500 + }, + { + "epoch": 1.25, + "learning_rate": 4.7280019007688274e-05, + "loss": 1.3155, + "step": 2087000 + }, + { + "epoch": 1.25, + "learning_rate": 4.727791904212771e-05, + "loss": 1.2979, + "step": 2087500 + }, + { + "epoch": 1.25, + "learning_rate": 4.727581907656714e-05, + "loss": 1.3011, + "step": 2088000 + }, + { + "epoch": 1.25, + "learning_rate": 4.7273719111006574e-05, + "loss": 1.2925, + "step": 2088500 + }, + { + "epoch": 1.25, + "learning_rate": 4.7271627545308255e-05, + "loss": 1.3319, + "step": 2089000 + }, + { + "epoch": 1.25, + "learning_rate": 4.7269527579747695e-05, + "loss": 1.3107, + "step": 2089500 + }, + { + "epoch": 1.25, + "learning_rate": 4.726742761418713e-05, + "loss": 1.3185, + "step": 2090000 + }, + { + "epoch": 1.25, + "learning_rate": 4.726532764862656e-05, + "loss": 1.2748, + "step": 2090500 + }, + { + "epoch": 1.25, + "learning_rate": 4.7263227683066e-05, + "loss": 1.3045, + "step": 2091000 + }, + { + "epoch": 1.25, + "learning_rate": 4.7261127717505436e-05, + "loss": 1.2925, + "step": 2091500 + }, + { + "epoch": 1.25, + "learning_rate": 4.725902775194486e-05, + "loss": 1.3007, + "step": 2092000 + }, + { + "epoch": 1.25, + "learning_rate": 4.725693198631542e-05, + "loss": 1.3188, + "step": 2092500 + }, + { + "epoch": 1.25, + "learning_rate": 4.725483202075486e-05, + "loss": 1.314, + "step": 2093000 + }, + { + "epoch": 1.26, + "learning_rate": 4.7252732055194296e-05, + "loss": 1.3045, + "step": 2093500 + }, + { + "epoch": 1.26, + "learning_rate": 4.725063208963373e-05, + "loss": 1.2829, + "step": 2094000 + }, + { + "epoch": 1.26, + "learning_rate": 4.7248536324004284e-05, + "loss": 1.2943, + "step": 2094500 + }, + { + "epoch": 1.26, + "learning_rate": 4.7246436358443724e-05, + "loss": 1.2947, + "step": 2095000 + }, + { + "epoch": 1.26, + "learning_rate": 4.724433639288316e-05, + "loss": 1.306, + "step": 2095500 + }, + { + "epoch": 1.26, + "learning_rate": 4.724223642732259e-05, + "loss": 1.2993, + "step": 2096000 + }, + { + "epoch": 1.26, + "learning_rate": 4.724013646176203e-05, + "loss": 1.3149, + "step": 2096500 + }, + { + "epoch": 1.26, + "learning_rate": 4.723803649620146e-05, + "loss": 1.2976, + "step": 2097000 + }, + { + "epoch": 1.26, + "learning_rate": 4.723594073057202e-05, + "loss": 1.3189, + "step": 2097500 + }, + { + "epoch": 1.26, + "learning_rate": 4.723384076501146e-05, + "loss": 1.3227, + "step": 2098000 + }, + { + "epoch": 1.26, + "learning_rate": 4.723174079945089e-05, + "loss": 1.2949, + "step": 2098500 + }, + { + "epoch": 1.26, + "learning_rate": 4.7229640833890325e-05, + "loss": 1.2889, + "step": 2099000 + }, + { + "epoch": 1.26, + "learning_rate": 4.722754086832976e-05, + "loss": 1.3133, + "step": 2099500 + }, + { + "epoch": 1.26, + "learning_rate": 4.722544090276919e-05, + "loss": 1.2928, + "step": 2100000 + }, + { + "epoch": 1.26, + "eval_loss": 1.2566254138946533, + "eval_runtime": 1098.8033, + "eval_samples_per_second": 479.358, + "eval_steps_per_second": 79.893, + "step": 2100000 + }, + { + "epoch": 1.26, + "learning_rate": 4.722334513713975e-05, + "loss": 1.3077, + "step": 2100500 + }, + { + "epoch": 1.26, + "learning_rate": 4.7221245171579186e-05, + "loss": 1.2959, + "step": 2101000 + }, + { + "epoch": 1.26, + "learning_rate": 4.721914520601862e-05, + "loss": 1.3162, + "step": 2101500 + }, + { + "epoch": 1.26, + "learning_rate": 4.721704524045805e-05, + "loss": 1.2964, + "step": 2102000 + }, + { + "epoch": 1.26, + "learning_rate": 4.7214945274897486e-05, + "loss": 1.2677, + "step": 2102500 + }, + { + "epoch": 1.26, + "learning_rate": 4.7212845309336926e-05, + "loss": 1.3008, + "step": 2103000 + }, + { + "epoch": 1.26, + "learning_rate": 4.721074954370749e-05, + "loss": 1.2803, + "step": 2103500 + }, + { + "epoch": 1.26, + "learning_rate": 4.7208649578146913e-05, + "loss": 1.3284, + "step": 2104000 + }, + { + "epoch": 1.26, + "learning_rate": 4.7206549612586354e-05, + "loss": 1.3021, + "step": 2104500 + }, + { + "epoch": 1.26, + "learning_rate": 4.720444964702579e-05, + "loss": 1.319, + "step": 2105000 + }, + { + "epoch": 1.26, + "learning_rate": 4.720234968146522e-05, + "loss": 1.3188, + "step": 2105500 + }, + { + "epoch": 1.26, + "learning_rate": 4.720025391583578e-05, + "loss": 1.299, + "step": 2106000 + }, + { + "epoch": 1.26, + "learning_rate": 4.7198153950275214e-05, + "loss": 1.292, + "step": 2106500 + }, + { + "epoch": 1.26, + "learning_rate": 4.719605398471465e-05, + "loss": 1.2733, + "step": 2107000 + }, + { + "epoch": 1.26, + "learning_rate": 4.719395401915408e-05, + "loss": 1.2849, + "step": 2107500 + }, + { + "epoch": 1.26, + "learning_rate": 4.719185405359352e-05, + "loss": 1.3065, + "step": 2108000 + }, + { + "epoch": 1.26, + "learning_rate": 4.7189754088032955e-05, + "loss": 1.2831, + "step": 2108500 + }, + { + "epoch": 1.26, + "learning_rate": 4.718765412247239e-05, + "loss": 1.2901, + "step": 2109000 + }, + { + "epoch": 1.26, + "learning_rate": 4.718555415691183e-05, + "loss": 1.2861, + "step": 2109500 + }, + { + "epoch": 1.27, + "learning_rate": 4.71834625912135e-05, + "loss": 1.308, + "step": 2110000 + }, + { + "epoch": 1.27, + "learning_rate": 4.718136262565294e-05, + "loss": 1.308, + "step": 2110500 + }, + { + "epoch": 1.27, + "learning_rate": 4.717926266009237e-05, + "loss": 1.3069, + "step": 2111000 + }, + { + "epoch": 1.27, + "learning_rate": 4.717716269453181e-05, + "loss": 1.2779, + "step": 2111500 + }, + { + "epoch": 1.27, + "learning_rate": 4.717506272897124e-05, + "loss": 1.2905, + "step": 2112000 + }, + { + "epoch": 1.27, + "learning_rate": 4.7172962763410677e-05, + "loss": 1.3082, + "step": 2112500 + }, + { + "epoch": 1.27, + "learning_rate": 4.717086279785012e-05, + "loss": 1.3273, + "step": 2113000 + }, + { + "epoch": 1.27, + "learning_rate": 4.716876283228955e-05, + "loss": 1.3164, + "step": 2113500 + }, + { + "epoch": 1.27, + "learning_rate": 4.7166662866728984e-05, + "loss": 1.31, + "step": 2114000 + }, + { + "epoch": 1.27, + "learning_rate": 4.7164562901168424e-05, + "loss": 1.2888, + "step": 2114500 + }, + { + "epoch": 1.27, + "learning_rate": 4.716246713553898e-05, + "loss": 1.3108, + "step": 2115000 + }, + { + "epoch": 1.27, + "learning_rate": 4.716036716997841e-05, + "loss": 1.2832, + "step": 2115500 + }, + { + "epoch": 1.27, + "learning_rate": 4.7158267204417844e-05, + "loss": 1.2932, + "step": 2116000 + }, + { + "epoch": 1.27, + "learning_rate": 4.7156167238857285e-05, + "loss": 1.3313, + "step": 2116500 + }, + { + "epoch": 1.27, + "learning_rate": 4.715406727329672e-05, + "loss": 1.3278, + "step": 2117000 + }, + { + "epoch": 1.27, + "learning_rate": 4.715196730773615e-05, + "loss": 1.304, + "step": 2117500 + }, + { + "epoch": 1.27, + "learning_rate": 4.714986734217559e-05, + "loss": 1.313, + "step": 2118000 + }, + { + "epoch": 1.27, + "learning_rate": 4.7147767376615025e-05, + "loss": 1.2911, + "step": 2118500 + }, + { + "epoch": 1.27, + "learning_rate": 4.714567161098558e-05, + "loss": 1.3017, + "step": 2119000 + }, + { + "epoch": 1.27, + "learning_rate": 4.714357164542501e-05, + "loss": 1.3371, + "step": 2119500 + }, + { + "epoch": 1.27, + "learning_rate": 4.714147167986445e-05, + "loss": 1.289, + "step": 2120000 + }, + { + "epoch": 1.27, + "learning_rate": 4.7139371714303886e-05, + "loss": 1.3024, + "step": 2120500 + }, + { + "epoch": 1.27, + "learning_rate": 4.713727174874332e-05, + "loss": 1.3215, + "step": 2121000 + }, + { + "epoch": 1.27, + "learning_rate": 4.713517178318275e-05, + "loss": 1.2975, + "step": 2121500 + }, + { + "epoch": 1.27, + "learning_rate": 4.7133071817622186e-05, + "loss": 1.299, + "step": 2122000 + }, + { + "epoch": 1.27, + "learning_rate": 4.7130971852061627e-05, + "loss": 1.291, + "step": 2122500 + }, + { + "epoch": 1.27, + "learning_rate": 4.71288802863633e-05, + "loss": 1.3185, + "step": 2123000 + }, + { + "epoch": 1.27, + "learning_rate": 4.712678032080274e-05, + "loss": 1.3085, + "step": 2123500 + }, + { + "epoch": 1.27, + "learning_rate": 4.7124680355242174e-05, + "loss": 1.2923, + "step": 2124000 + }, + { + "epoch": 1.27, + "learning_rate": 4.712258038968161e-05, + "loss": 1.2792, + "step": 2124500 + }, + { + "epoch": 1.27, + "learning_rate": 4.712048042412105e-05, + "loss": 1.307, + "step": 2125000 + }, + { + "epoch": 1.27, + "learning_rate": 4.711838045856048e-05, + "loss": 1.2888, + "step": 2125500 + }, + { + "epoch": 1.27, + "learning_rate": 4.711628049299991e-05, + "loss": 1.3012, + "step": 2126000 + }, + { + "epoch": 1.27, + "learning_rate": 4.711418052743935e-05, + "loss": 1.303, + "step": 2126500 + }, + { + "epoch": 1.28, + "learning_rate": 4.711208056187878e-05, + "loss": 1.3162, + "step": 2127000 + }, + { + "epoch": 1.28, + "learning_rate": 4.710998479624934e-05, + "loss": 1.2963, + "step": 2127500 + }, + { + "epoch": 1.28, + "learning_rate": 4.710788483068878e-05, + "loss": 1.2893, + "step": 2128000 + }, + { + "epoch": 1.28, + "learning_rate": 4.710578486512821e-05, + "loss": 1.2947, + "step": 2128500 + }, + { + "epoch": 1.28, + "learning_rate": 4.710368489956764e-05, + "loss": 1.3196, + "step": 2129000 + }, + { + "epoch": 1.28, + "learning_rate": 4.71015891339382e-05, + "loss": 1.3049, + "step": 2129500 + }, + { + "epoch": 1.28, + "learning_rate": 4.7099493368308756e-05, + "loss": 1.3145, + "step": 2130000 + }, + { + "epoch": 1.28, + "learning_rate": 4.7097393402748196e-05, + "loss": 1.2462, + "step": 2130500 + }, + { + "epoch": 1.28, + "learning_rate": 4.709529343718763e-05, + "loss": 1.3073, + "step": 2131000 + }, + { + "epoch": 1.28, + "learning_rate": 4.709319347162706e-05, + "loss": 1.3123, + "step": 2131500 + }, + { + "epoch": 1.28, + "learning_rate": 4.7091093506066504e-05, + "loss": 1.3297, + "step": 2132000 + }, + { + "epoch": 1.28, + "learning_rate": 4.708899354050594e-05, + "loss": 1.2877, + "step": 2132500 + }, + { + "epoch": 1.28, + "learning_rate": 4.708689357494537e-05, + "loss": 1.3054, + "step": 2133000 + }, + { + "epoch": 1.28, + "learning_rate": 4.7084793609384804e-05, + "loss": 1.3155, + "step": 2133500 + }, + { + "epoch": 1.28, + "learning_rate": 4.7082697843755364e-05, + "loss": 1.2698, + "step": 2134000 + }, + { + "epoch": 1.28, + "learning_rate": 4.70805978781948e-05, + "loss": 1.2962, + "step": 2134500 + }, + { + "epoch": 1.28, + "learning_rate": 4.707850631249647e-05, + "loss": 1.3119, + "step": 2135000 + }, + { + "epoch": 1.28, + "learning_rate": 4.7076406346935905e-05, + "loss": 1.3021, + "step": 2135500 + }, + { + "epoch": 1.28, + "learning_rate": 4.7074306381375345e-05, + "loss": 1.2672, + "step": 2136000 + }, + { + "epoch": 1.28, + "learning_rate": 4.707220641581478e-05, + "loss": 1.3078, + "step": 2136500 + }, + { + "epoch": 1.28, + "learning_rate": 4.707010645025421e-05, + "loss": 1.2866, + "step": 2137000 + }, + { + "epoch": 1.28, + "learning_rate": 4.706800648469365e-05, + "loss": 1.3061, + "step": 2137500 + }, + { + "epoch": 1.28, + "learning_rate": 4.7065906519133086e-05, + "loss": 1.3172, + "step": 2138000 + }, + { + "epoch": 1.28, + "learning_rate": 4.706380655357252e-05, + "loss": 1.3106, + "step": 2138500 + }, + { + "epoch": 1.28, + "learning_rate": 4.706170658801196e-05, + "loss": 1.2807, + "step": 2139000 + }, + { + "epoch": 1.28, + "learning_rate": 4.705960662245139e-05, + "loss": 1.3079, + "step": 2139500 + }, + { + "epoch": 1.28, + "learning_rate": 4.7057506656890826e-05, + "loss": 1.2682, + "step": 2140000 + }, + { + "epoch": 1.28, + "learning_rate": 4.705540669133026e-05, + "loss": 1.2924, + "step": 2140500 + }, + { + "epoch": 1.28, + "learning_rate": 4.705331092570082e-05, + "loss": 1.3214, + "step": 2141000 + }, + { + "epoch": 1.28, + "learning_rate": 4.7051210960140254e-05, + "loss": 1.2939, + "step": 2141500 + }, + { + "epoch": 1.28, + "learning_rate": 4.7049110994579694e-05, + "loss": 1.2926, + "step": 2142000 + }, + { + "epoch": 1.28, + "learning_rate": 4.704701942888136e-05, + "loss": 1.2906, + "step": 2142500 + }, + { + "epoch": 1.28, + "learning_rate": 4.70449194633208e-05, + "loss": 1.3183, + "step": 2143000 + }, + { + "epoch": 1.29, + "learning_rate": 4.7042819497760234e-05, + "loss": 1.2804, + "step": 2143500 + }, + { + "epoch": 1.29, + "learning_rate": 4.704071953219967e-05, + "loss": 1.3177, + "step": 2144000 + }, + { + "epoch": 1.29, + "learning_rate": 4.703861956663911e-05, + "loss": 1.2715, + "step": 2144500 + }, + { + "epoch": 1.29, + "learning_rate": 4.703651960107854e-05, + "loss": 1.2879, + "step": 2145000 + }, + { + "epoch": 1.29, + "learning_rate": 4.7034423835449095e-05, + "loss": 1.3203, + "step": 2145500 + }, + { + "epoch": 1.29, + "learning_rate": 4.703232386988853e-05, + "loss": 1.2726, + "step": 2146000 + }, + { + "epoch": 1.29, + "learning_rate": 4.703022390432797e-05, + "loss": 1.3169, + "step": 2146500 + }, + { + "epoch": 1.29, + "learning_rate": 4.70281239387674e-05, + "loss": 1.3098, + "step": 2147000 + }, + { + "epoch": 1.29, + "learning_rate": 4.702602397320684e-05, + "loss": 1.2992, + "step": 2147500 + }, + { + "epoch": 1.29, + "learning_rate": 4.7023924007646276e-05, + "loss": 1.2911, + "step": 2148000 + }, + { + "epoch": 1.29, + "learning_rate": 4.702182404208571e-05, + "loss": 1.2933, + "step": 2148500 + }, + { + "epoch": 1.29, + "learning_rate": 4.701972407652515e-05, + "loss": 1.2892, + "step": 2149000 + }, + { + "epoch": 1.29, + "learning_rate": 4.701762411096458e-05, + "loss": 1.3169, + "step": 2149500 + }, + { + "epoch": 1.29, + "learning_rate": 4.701552834533514e-05, + "loss": 1.2979, + "step": 2150000 + }, + { + "epoch": 1.29, + "learning_rate": 4.701342837977457e-05, + "loss": 1.3174, + "step": 2150500 + }, + { + "epoch": 1.29, + "learning_rate": 4.701132841421401e-05, + "loss": 1.295, + "step": 2151000 + }, + { + "epoch": 1.29, + "learning_rate": 4.7009228448653444e-05, + "loss": 1.2988, + "step": 2151500 + }, + { + "epoch": 1.29, + "learning_rate": 4.700712848309288e-05, + "loss": 1.2717, + "step": 2152000 + }, + { + "epoch": 1.29, + "learning_rate": 4.700502851753231e-05, + "loss": 1.3021, + "step": 2152500 + }, + { + "epoch": 1.29, + "learning_rate": 4.7002928551971744e-05, + "loss": 1.2963, + "step": 2153000 + }, + { + "epoch": 1.29, + "learning_rate": 4.7000832786342305e-05, + "loss": 1.3104, + "step": 2153500 + }, + { + "epoch": 1.29, + "learning_rate": 4.699873282078174e-05, + "loss": 1.2788, + "step": 2154000 + }, + { + "epoch": 1.29, + "learning_rate": 4.699663285522118e-05, + "loss": 1.3219, + "step": 2154500 + }, + { + "epoch": 1.29, + "learning_rate": 4.6994532889660605e-05, + "loss": 1.3133, + "step": 2155000 + }, + { + "epoch": 1.29, + "learning_rate": 4.6992432924100045e-05, + "loss": 1.3099, + "step": 2155500 + }, + { + "epoch": 1.29, + "learning_rate": 4.699033295853948e-05, + "loss": 1.31, + "step": 2156000 + }, + { + "epoch": 1.29, + "learning_rate": 4.698823299297891e-05, + "loss": 1.3022, + "step": 2156500 + }, + { + "epoch": 1.29, + "learning_rate": 4.6986137227349466e-05, + "loss": 1.2902, + "step": 2157000 + }, + { + "epoch": 1.29, + "learning_rate": 4.6984037261788906e-05, + "loss": 1.3182, + "step": 2157500 + }, + { + "epoch": 1.29, + "learning_rate": 4.698193729622834e-05, + "loss": 1.2903, + "step": 2158000 + }, + { + "epoch": 1.29, + "learning_rate": 4.697983733066777e-05, + "loss": 1.3087, + "step": 2158500 + }, + { + "epoch": 1.29, + "learning_rate": 4.697773736510721e-05, + "loss": 1.3112, + "step": 2159000 + }, + { + "epoch": 1.29, + "learning_rate": 4.6975637399546647e-05, + "loss": 1.3376, + "step": 2159500 + }, + { + "epoch": 1.3, + "learning_rate": 4.69735416339172e-05, + "loss": 1.2639, + "step": 2160000 + }, + { + "epoch": 1.3, + "learning_rate": 4.6971441668356634e-05, + "loss": 1.3126, + "step": 2160500 + }, + { + "epoch": 1.3, + "learning_rate": 4.6969341702796074e-05, + "loss": 1.3024, + "step": 2161000 + }, + { + "epoch": 1.3, + "learning_rate": 4.696724173723551e-05, + "loss": 1.2967, + "step": 2161500 + }, + { + "epoch": 1.3, + "learning_rate": 4.696514597160606e-05, + "loss": 1.2999, + "step": 2162000 + }, + { + "epoch": 1.3, + "learning_rate": 4.696305020597662e-05, + "loss": 1.2965, + "step": 2162500 + }, + { + "epoch": 1.3, + "learning_rate": 4.6960954440347175e-05, + "loss": 1.2943, + "step": 2163000 + }, + { + "epoch": 1.3, + "learning_rate": 4.6958854474786615e-05, + "loss": 1.2958, + "step": 2163500 + }, + { + "epoch": 1.3, + "learning_rate": 4.695675450922605e-05, + "loss": 1.2983, + "step": 2164000 + }, + { + "epoch": 1.3, + "learning_rate": 4.695465454366548e-05, + "loss": 1.2821, + "step": 2164500 + }, + { + "epoch": 1.3, + "learning_rate": 4.695255457810492e-05, + "loss": 1.2944, + "step": 2165000 + }, + { + "epoch": 1.3, + "learning_rate": 4.6950454612544356e-05, + "loss": 1.2745, + "step": 2165500 + }, + { + "epoch": 1.3, + "learning_rate": 4.694835464698379e-05, + "loss": 1.324, + "step": 2166000 + }, + { + "epoch": 1.3, + "learning_rate": 4.694625468142322e-05, + "loss": 1.3051, + "step": 2166500 + }, + { + "epoch": 1.3, + "learning_rate": 4.6944154715862656e-05, + "loss": 1.3191, + "step": 2167000 + }, + { + "epoch": 1.3, + "learning_rate": 4.694205475030209e-05, + "loss": 1.3141, + "step": 2167500 + }, + { + "epoch": 1.3, + "learning_rate": 4.693995478474153e-05, + "loss": 1.3012, + "step": 2168000 + }, + { + "epoch": 1.3, + "learning_rate": 4.693785481918096e-05, + "loss": 1.3137, + "step": 2168500 + }, + { + "epoch": 1.3, + "learning_rate": 4.69357548536204e-05, + "loss": 1.2997, + "step": 2169000 + }, + { + "epoch": 1.3, + "learning_rate": 4.693365488805984e-05, + "loss": 1.2951, + "step": 2169500 + }, + { + "epoch": 1.3, + "learning_rate": 4.693155912243039e-05, + "loss": 1.2867, + "step": 2170000 + }, + { + "epoch": 1.3, + "learning_rate": 4.6929459156869824e-05, + "loss": 1.2777, + "step": 2170500 + }, + { + "epoch": 1.3, + "learning_rate": 4.6927359191309264e-05, + "loss": 1.3149, + "step": 2171000 + }, + { + "epoch": 1.3, + "learning_rate": 4.69252592257487e-05, + "loss": 1.2897, + "step": 2171500 + }, + { + "epoch": 1.3, + "learning_rate": 4.692316346011925e-05, + "loss": 1.3101, + "step": 2172000 + }, + { + "epoch": 1.3, + "learning_rate": 4.6921063494558685e-05, + "loss": 1.3044, + "step": 2172500 + }, + { + "epoch": 1.3, + "learning_rate": 4.6918963528998125e-05, + "loss": 1.3205, + "step": 2173000 + }, + { + "epoch": 1.3, + "learning_rate": 4.691686356343756e-05, + "loss": 1.3072, + "step": 2173500 + }, + { + "epoch": 1.3, + "learning_rate": 4.691476359787699e-05, + "loss": 1.3045, + "step": 2174000 + }, + { + "epoch": 1.3, + "learning_rate": 4.6912667832247545e-05, + "loss": 1.2946, + "step": 2174500 + }, + { + "epoch": 1.3, + "learning_rate": 4.6910567866686986e-05, + "loss": 1.3025, + "step": 2175000 + }, + { + "epoch": 1.3, + "learning_rate": 4.6908472101057546e-05, + "loss": 1.2733, + "step": 2175500 + }, + { + "epoch": 1.3, + "learning_rate": 4.690637213549698e-05, + "loss": 1.3078, + "step": 2176000 + }, + { + "epoch": 1.3, + "learning_rate": 4.690427216993641e-05, + "loss": 1.27, + "step": 2176500 + }, + { + "epoch": 1.31, + "learning_rate": 4.6902172204375846e-05, + "loss": 1.2954, + "step": 2177000 + }, + { + "epoch": 1.31, + "learning_rate": 4.690007223881528e-05, + "loss": 1.3015, + "step": 2177500 + }, + { + "epoch": 1.31, + "learning_rate": 4.689797227325472e-05, + "loss": 1.3106, + "step": 2178000 + }, + { + "epoch": 1.31, + "learning_rate": 4.6895872307694153e-05, + "loss": 1.3247, + "step": 2178500 + }, + { + "epoch": 1.31, + "learning_rate": 4.689377234213359e-05, + "loss": 1.3034, + "step": 2179000 + }, + { + "epoch": 1.31, + "learning_rate": 4.689168077643527e-05, + "loss": 1.2953, + "step": 2179500 + }, + { + "epoch": 1.31, + "learning_rate": 4.68895808108747e-05, + "loss": 1.3089, + "step": 2180000 + }, + { + "epoch": 1.31, + "learning_rate": 4.688748084531414e-05, + "loss": 1.2996, + "step": 2180500 + }, + { + "epoch": 1.31, + "learning_rate": 4.688538087975357e-05, + "loss": 1.3184, + "step": 2181000 + }, + { + "epoch": 1.31, + "learning_rate": 4.6883280914193e-05, + "loss": 1.3064, + "step": 2181500 + }, + { + "epoch": 1.31, + "learning_rate": 4.688118094863244e-05, + "loss": 1.2797, + "step": 2182000 + }, + { + "epoch": 1.31, + "learning_rate": 4.6879080983071875e-05, + "loss": 1.2958, + "step": 2182500 + }, + { + "epoch": 1.31, + "learning_rate": 4.687698101751131e-05, + "loss": 1.2903, + "step": 2183000 + }, + { + "epoch": 1.31, + "learning_rate": 4.687488105195075e-05, + "loss": 1.2999, + "step": 2183500 + }, + { + "epoch": 1.31, + "learning_rate": 4.687278108639018e-05, + "loss": 1.2913, + "step": 2184000 + }, + { + "epoch": 1.31, + "learning_rate": 4.6870681120829616e-05, + "loss": 1.2997, + "step": 2184500 + }, + { + "epoch": 1.31, + "learning_rate": 4.6868581155269056e-05, + "loss": 1.2939, + "step": 2185000 + }, + { + "epoch": 1.31, + "learning_rate": 4.686648118970849e-05, + "loss": 1.3071, + "step": 2185500 + }, + { + "epoch": 1.31, + "learning_rate": 4.686438962401016e-05, + "loss": 1.3262, + "step": 2186000 + }, + { + "epoch": 1.31, + "learning_rate": 4.6862289658449596e-05, + "loss": 1.3023, + "step": 2186500 + }, + { + "epoch": 1.31, + "learning_rate": 4.686018969288904e-05, + "loss": 1.2758, + "step": 2187000 + }, + { + "epoch": 1.31, + "learning_rate": 4.685808972732847e-05, + "loss": 1.3133, + "step": 2187500 + }, + { + "epoch": 1.31, + "learning_rate": 4.6855989761767904e-05, + "loss": 1.2848, + "step": 2188000 + }, + { + "epoch": 1.31, + "learning_rate": 4.6853889796207344e-05, + "loss": 1.3079, + "step": 2188500 + }, + { + "epoch": 1.31, + "learning_rate": 4.685178983064678e-05, + "loss": 1.2885, + "step": 2189000 + }, + { + "epoch": 1.31, + "learning_rate": 4.684968986508621e-05, + "loss": 1.3003, + "step": 2189500 + }, + { + "epoch": 1.31, + "learning_rate": 4.684758989952565e-05, + "loss": 1.2812, + "step": 2190000 + }, + { + "epoch": 1.31, + "learning_rate": 4.6845489933965084e-05, + "loss": 1.3098, + "step": 2190500 + }, + { + "epoch": 1.31, + "learning_rate": 4.684338996840452e-05, + "loss": 1.2912, + "step": 2191000 + }, + { + "epoch": 1.31, + "learning_rate": 4.684129000284395e-05, + "loss": 1.3051, + "step": 2191500 + }, + { + "epoch": 1.31, + "learning_rate": 4.683919423721451e-05, + "loss": 1.2818, + "step": 2192000 + }, + { + "epoch": 1.31, + "learning_rate": 4.6837094271653945e-05, + "loss": 1.2831, + "step": 2192500 + }, + { + "epoch": 1.31, + "learning_rate": 4.683499430609338e-05, + "loss": 1.3283, + "step": 2193000 + }, + { + "epoch": 1.32, + "learning_rate": 4.683289434053281e-05, + "loss": 1.2793, + "step": 2193500 + }, + { + "epoch": 1.32, + "learning_rate": 4.6830794374972245e-05, + "loss": 1.2845, + "step": 2194000 + }, + { + "epoch": 1.32, + "learning_rate": 4.6828694409411686e-05, + "loss": 1.2864, + "step": 2194500 + }, + { + "epoch": 1.32, + "learning_rate": 4.682659444385112e-05, + "loss": 1.3281, + "step": 2195000 + }, + { + "epoch": 1.32, + "learning_rate": 4.682449447829055e-05, + "loss": 1.2927, + "step": 2195500 + }, + { + "epoch": 1.32, + "learning_rate": 4.6822398712661106e-05, + "loss": 1.2917, + "step": 2196000 + }, + { + "epoch": 1.32, + "learning_rate": 4.6820298747100546e-05, + "loss": 1.2737, + "step": 2196500 + }, + { + "epoch": 1.32, + "learning_rate": 4.681820298147111e-05, + "loss": 1.2818, + "step": 2197000 + }, + { + "epoch": 1.32, + "learning_rate": 4.681610301591054e-05, + "loss": 1.2887, + "step": 2197500 + }, + { + "epoch": 1.32, + "learning_rate": 4.6814003050349974e-05, + "loss": 1.3135, + "step": 2198000 + }, + { + "epoch": 1.32, + "learning_rate": 4.681190308478941e-05, + "loss": 1.3057, + "step": 2198500 + }, + { + "epoch": 1.32, + "learning_rate": 4.680980311922884e-05, + "loss": 1.3412, + "step": 2199000 + }, + { + "epoch": 1.32, + "learning_rate": 4.6807703153668274e-05, + "loss": 1.31, + "step": 2199500 + }, + { + "epoch": 1.32, + "learning_rate": 4.680560738803884e-05, + "loss": 1.2881, + "step": 2200000 + }, + { + "epoch": 1.32, + "eval_loss": 1.2527239322662354, + "eval_runtime": 1100.921, + "eval_samples_per_second": 478.436, + "eval_steps_per_second": 79.74, + "step": 2200000 + }, + { + "epoch": 1.32, + "learning_rate": 4.6803507422478275e-05, + "loss": 1.2734, + "step": 2200500 + }, + { + "epoch": 1.32, + "learning_rate": 4.68014074569177e-05, + "loss": 1.3167, + "step": 2201000 + }, + { + "epoch": 1.32, + "learning_rate": 4.679930749135714e-05, + "loss": 1.295, + "step": 2201500 + }, + { + "epoch": 1.32, + "learning_rate": 4.6797207525796575e-05, + "loss": 1.2774, + "step": 2202000 + }, + { + "epoch": 1.32, + "learning_rate": 4.6795111760167135e-05, + "loss": 1.3323, + "step": 2202500 + }, + { + "epoch": 1.32, + "learning_rate": 4.679301179460656e-05, + "loss": 1.2898, + "step": 2203000 + }, + { + "epoch": 1.32, + "learning_rate": 4.6790911829046e-05, + "loss": 1.301, + "step": 2203500 + }, + { + "epoch": 1.32, + "learning_rate": 4.6788811863485436e-05, + "loss": 1.3103, + "step": 2204000 + }, + { + "epoch": 1.32, + "learning_rate": 4.678671189792487e-05, + "loss": 1.2631, + "step": 2204500 + }, + { + "epoch": 1.32, + "learning_rate": 4.678461193236431e-05, + "loss": 1.2943, + "step": 2205000 + }, + { + "epoch": 1.32, + "learning_rate": 4.678251616673486e-05, + "loss": 1.2944, + "step": 2205500 + }, + { + "epoch": 1.32, + "learning_rate": 4.6780420401105423e-05, + "loss": 1.2924, + "step": 2206000 + }, + { + "epoch": 1.32, + "learning_rate": 4.677832043554486e-05, + "loss": 1.2832, + "step": 2206500 + }, + { + "epoch": 1.32, + "learning_rate": 4.67762204699843e-05, + "loss": 1.3144, + "step": 2207000 + }, + { + "epoch": 1.32, + "learning_rate": 4.677412050442373e-05, + "loss": 1.3082, + "step": 2207500 + }, + { + "epoch": 1.32, + "learning_rate": 4.677202053886316e-05, + "loss": 1.3081, + "step": 2208000 + }, + { + "epoch": 1.32, + "learning_rate": 4.67699205733026e-05, + "loss": 1.302, + "step": 2208500 + }, + { + "epoch": 1.32, + "learning_rate": 4.676782060774203e-05, + "loss": 1.314, + "step": 2209000 + }, + { + "epoch": 1.32, + "learning_rate": 4.6765720642181464e-05, + "loss": 1.2998, + "step": 2209500 + }, + { + "epoch": 1.32, + "learning_rate": 4.6763624876552025e-05, + "loss": 1.294, + "step": 2210000 + }, + { + "epoch": 1.33, + "learning_rate": 4.676152911092258e-05, + "loss": 1.3207, + "step": 2210500 + }, + { + "epoch": 1.33, + "learning_rate": 4.675942914536202e-05, + "loss": 1.2986, + "step": 2211000 + }, + { + "epoch": 1.33, + "learning_rate": 4.675732917980145e-05, + "loss": 1.306, + "step": 2211500 + }, + { + "epoch": 1.33, + "learning_rate": 4.6755229214240885e-05, + "loss": 1.2873, + "step": 2212000 + }, + { + "epoch": 1.33, + "learning_rate": 4.6753129248680326e-05, + "loss": 1.276, + "step": 2212500 + }, + { + "epoch": 1.33, + "learning_rate": 4.675102928311975e-05, + "loss": 1.3286, + "step": 2213000 + }, + { + "epoch": 1.33, + "learning_rate": 4.674892931755919e-05, + "loss": 1.3012, + "step": 2213500 + }, + { + "epoch": 1.33, + "learning_rate": 4.674683355192975e-05, + "loss": 1.2965, + "step": 2214000 + }, + { + "epoch": 1.33, + "learning_rate": 4.6744733586369186e-05, + "loss": 1.2722, + "step": 2214500 + }, + { + "epoch": 1.33, + "learning_rate": 4.674263362080861e-05, + "loss": 1.2788, + "step": 2215000 + }, + { + "epoch": 1.33, + "learning_rate": 4.674053365524805e-05, + "loss": 1.2766, + "step": 2215500 + }, + { + "epoch": 1.33, + "learning_rate": 4.673843368968749e-05, + "loss": 1.3094, + "step": 2216000 + }, + { + "epoch": 1.33, + "learning_rate": 4.673633372412692e-05, + "loss": 1.292, + "step": 2216500 + }, + { + "epoch": 1.33, + "learning_rate": 4.673423375856636e-05, + "loss": 1.2821, + "step": 2217000 + }, + { + "epoch": 1.33, + "learning_rate": 4.6732133793005794e-05, + "loss": 1.3001, + "step": 2217500 + }, + { + "epoch": 1.33, + "learning_rate": 4.673003382744523e-05, + "loss": 1.307, + "step": 2218000 + }, + { + "epoch": 1.33, + "learning_rate": 4.672793806181578e-05, + "loss": 1.3112, + "step": 2218500 + }, + { + "epoch": 1.33, + "learning_rate": 4.672583809625522e-05, + "loss": 1.3469, + "step": 2219000 + }, + { + "epoch": 1.33, + "learning_rate": 4.6723738130694655e-05, + "loss": 1.3156, + "step": 2219500 + }, + { + "epoch": 1.33, + "learning_rate": 4.672163816513409e-05, + "loss": 1.2869, + "step": 2220000 + }, + { + "epoch": 1.33, + "learning_rate": 4.671954239950465e-05, + "loss": 1.301, + "step": 2220500 + }, + { + "epoch": 1.33, + "learning_rate": 4.671744243394408e-05, + "loss": 1.2725, + "step": 2221000 + }, + { + "epoch": 1.33, + "learning_rate": 4.6715342468383515e-05, + "loss": 1.2962, + "step": 2221500 + }, + { + "epoch": 1.33, + "learning_rate": 4.6713246702754076e-05, + "loss": 1.2724, + "step": 2222000 + }, + { + "epoch": 1.33, + "learning_rate": 4.671114673719351e-05, + "loss": 1.3134, + "step": 2222500 + }, + { + "epoch": 1.33, + "learning_rate": 4.670904677163294e-05, + "loss": 1.3162, + "step": 2223000 + }, + { + "epoch": 1.33, + "learning_rate": 4.6706946806072376e-05, + "loss": 1.2715, + "step": 2223500 + }, + { + "epoch": 1.33, + "learning_rate": 4.6704846840511816e-05, + "loss": 1.2806, + "step": 2224000 + }, + { + "epoch": 1.33, + "learning_rate": 4.670274687495125e-05, + "loss": 1.2666, + "step": 2224500 + }, + { + "epoch": 1.33, + "learning_rate": 4.670064690939068e-05, + "loss": 1.3119, + "step": 2225000 + }, + { + "epoch": 1.33, + "learning_rate": 4.6698546943830124e-05, + "loss": 1.2865, + "step": 2225500 + }, + { + "epoch": 1.33, + "learning_rate": 4.669645117820068e-05, + "loss": 1.3324, + "step": 2226000 + }, + { + "epoch": 1.33, + "learning_rate": 4.669435121264011e-05, + "loss": 1.2923, + "step": 2226500 + }, + { + "epoch": 1.34, + "learning_rate": 4.6692251247079544e-05, + "loss": 1.3143, + "step": 2227000 + }, + { + "epoch": 1.34, + "learning_rate": 4.6690151281518984e-05, + "loss": 1.2843, + "step": 2227500 + }, + { + "epoch": 1.34, + "learning_rate": 4.668805131595842e-05, + "loss": 1.3038, + "step": 2228000 + }, + { + "epoch": 1.34, + "learning_rate": 4.668595135039785e-05, + "loss": 1.322, + "step": 2228500 + }, + { + "epoch": 1.34, + "learning_rate": 4.668385138483729e-05, + "loss": 1.286, + "step": 2229000 + }, + { + "epoch": 1.34, + "learning_rate": 4.6681751419276725e-05, + "loss": 1.3026, + "step": 2229500 + }, + { + "epoch": 1.34, + "learning_rate": 4.667965565364728e-05, + "loss": 1.3323, + "step": 2230000 + }, + { + "epoch": 1.34, + "learning_rate": 4.667755568808672e-05, + "loss": 1.3133, + "step": 2230500 + }, + { + "epoch": 1.34, + "learning_rate": 4.667545572252615e-05, + "loss": 1.3191, + "step": 2231000 + }, + { + "epoch": 1.34, + "learning_rate": 4.667336415682783e-05, + "loss": 1.2958, + "step": 2231500 + }, + { + "epoch": 1.34, + "learning_rate": 4.667126419126726e-05, + "loss": 1.3135, + "step": 2232000 + }, + { + "epoch": 1.34, + "learning_rate": 4.666916422570669e-05, + "loss": 1.2853, + "step": 2232500 + }, + { + "epoch": 1.34, + "learning_rate": 4.666706426014613e-05, + "loss": 1.2745, + "step": 2233000 + }, + { + "epoch": 1.34, + "learning_rate": 4.6664964294585566e-05, + "loss": 1.2897, + "step": 2233500 + }, + { + "epoch": 1.34, + "learning_rate": 4.6662864329025e-05, + "loss": 1.296, + "step": 2234000 + }, + { + "epoch": 1.34, + "learning_rate": 4.666076436346444e-05, + "loss": 1.3027, + "step": 2234500 + }, + { + "epoch": 1.34, + "learning_rate": 4.6658664397903874e-05, + "loss": 1.319, + "step": 2235000 + }, + { + "epoch": 1.34, + "learning_rate": 4.665656443234331e-05, + "loss": 1.2971, + "step": 2235500 + }, + { + "epoch": 1.34, + "learning_rate": 4.665446446678275e-05, + "loss": 1.2938, + "step": 2236000 + }, + { + "epoch": 1.34, + "learning_rate": 4.665236450122218e-05, + "loss": 1.3074, + "step": 2236500 + }, + { + "epoch": 1.34, + "learning_rate": 4.6650268735592734e-05, + "loss": 1.2965, + "step": 2237000 + }, + { + "epoch": 1.34, + "learning_rate": 4.6648168770032175e-05, + "loss": 1.2943, + "step": 2237500 + }, + { + "epoch": 1.34, + "learning_rate": 4.664606880447161e-05, + "loss": 1.2951, + "step": 2238000 + }, + { + "epoch": 1.34, + "learning_rate": 4.664396883891104e-05, + "loss": 1.2736, + "step": 2238500 + }, + { + "epoch": 1.34, + "learning_rate": 4.6641873073281595e-05, + "loss": 1.3118, + "step": 2239000 + }, + { + "epoch": 1.34, + "learning_rate": 4.6639773107721035e-05, + "loss": 1.299, + "step": 2239500 + }, + { + "epoch": 1.34, + "learning_rate": 4.663767734209159e-05, + "loss": 1.3115, + "step": 2240000 + }, + { + "epoch": 1.34, + "learning_rate": 4.663557737653102e-05, + "loss": 1.2997, + "step": 2240500 + }, + { + "epoch": 1.34, + "learning_rate": 4.6633477410970456e-05, + "loss": 1.3099, + "step": 2241000 + }, + { + "epoch": 1.34, + "learning_rate": 4.6631377445409896e-05, + "loss": 1.2724, + "step": 2241500 + }, + { + "epoch": 1.34, + "learning_rate": 4.662927747984933e-05, + "loss": 1.3063, + "step": 2242000 + }, + { + "epoch": 1.34, + "learning_rate": 4.662717751428876e-05, + "loss": 1.2978, + "step": 2242500 + }, + { + "epoch": 1.34, + "learning_rate": 4.66250775487282e-05, + "loss": 1.2564, + "step": 2243000 + }, + { + "epoch": 1.35, + "learning_rate": 4.662297758316764e-05, + "loss": 1.3206, + "step": 2243500 + }, + { + "epoch": 1.35, + "learning_rate": 4.662088181753819e-05, + "loss": 1.2842, + "step": 2244000 + }, + { + "epoch": 1.35, + "learning_rate": 4.661878185197763e-05, + "loss": 1.3035, + "step": 2244500 + }, + { + "epoch": 1.35, + "learning_rate": 4.6616681886417064e-05, + "loss": 1.3145, + "step": 2245000 + }, + { + "epoch": 1.35, + "learning_rate": 4.66145819208565e-05, + "loss": 1.3327, + "step": 2245500 + }, + { + "epoch": 1.35, + "learning_rate": 4.661248195529594e-05, + "loss": 1.2972, + "step": 2246000 + }, + { + "epoch": 1.35, + "learning_rate": 4.661038198973537e-05, + "loss": 1.3036, + "step": 2246500 + }, + { + "epoch": 1.35, + "learning_rate": 4.66082820241748e-05, + "loss": 1.2864, + "step": 2247000 + }, + { + "epoch": 1.35, + "learning_rate": 4.660618205861424e-05, + "loss": 1.3089, + "step": 2247500 + }, + { + "epoch": 1.35, + "learning_rate": 4.66040862929848e-05, + "loss": 1.2754, + "step": 2248000 + }, + { + "epoch": 1.35, + "learning_rate": 4.660198632742423e-05, + "loss": 1.3042, + "step": 2248500 + }, + { + "epoch": 1.35, + "learning_rate": 4.6599886361863665e-05, + "loss": 1.292, + "step": 2249000 + }, + { + "epoch": 1.35, + "learning_rate": 4.65977863963031e-05, + "loss": 1.2758, + "step": 2249500 + }, + { + "epoch": 1.35, + "learning_rate": 4.659568643074253e-05, + "loss": 1.3118, + "step": 2250000 + }, + { + "epoch": 1.35, + "learning_rate": 4.6593586465181966e-05, + "loss": 1.3109, + "step": 2250500 + }, + { + "epoch": 1.35, + "learning_rate": 4.6591486499621406e-05, + "loss": 1.3248, + "step": 2251000 + }, + { + "epoch": 1.35, + "learning_rate": 4.658939073399196e-05, + "loss": 1.2969, + "step": 2251500 + }, + { + "epoch": 1.35, + "learning_rate": 4.658729076843139e-05, + "loss": 1.2684, + "step": 2252000 + }, + { + "epoch": 1.35, + "learning_rate": 4.658519500280195e-05, + "loss": 1.3061, + "step": 2252500 + }, + { + "epoch": 1.35, + "learning_rate": 4.6583095037241393e-05, + "loss": 1.3306, + "step": 2253000 + }, + { + "epoch": 1.35, + "learning_rate": 4.658099507168083e-05, + "loss": 1.2987, + "step": 2253500 + }, + { + "epoch": 1.35, + "learning_rate": 4.6578895106120254e-05, + "loss": 1.3017, + "step": 2254000 + }, + { + "epoch": 1.35, + "learning_rate": 4.6576795140559694e-05, + "loss": 1.2799, + "step": 2254500 + }, + { + "epoch": 1.35, + "learning_rate": 4.657469517499913e-05, + "loss": 1.2884, + "step": 2255000 + }, + { + "epoch": 1.35, + "learning_rate": 4.657259520943856e-05, + "loss": 1.3019, + "step": 2255500 + }, + { + "epoch": 1.35, + "learning_rate": 4.657049944380912e-05, + "loss": 1.2894, + "step": 2256000 + }, + { + "epoch": 1.35, + "learning_rate": 4.6568399478248555e-05, + "loss": 1.2969, + "step": 2256500 + }, + { + "epoch": 1.35, + "learning_rate": 4.656629951268799e-05, + "loss": 1.2884, + "step": 2257000 + }, + { + "epoch": 1.35, + "learning_rate": 4.656419954712742e-05, + "loss": 1.2966, + "step": 2257500 + }, + { + "epoch": 1.35, + "learning_rate": 4.656209958156686e-05, + "loss": 1.2782, + "step": 2258000 + }, + { + "epoch": 1.35, + "learning_rate": 4.6559999616006295e-05, + "loss": 1.3302, + "step": 2258500 + }, + { + "epoch": 1.35, + "learning_rate": 4.6557899650445735e-05, + "loss": 1.2937, + "step": 2259000 + }, + { + "epoch": 1.35, + "learning_rate": 4.655579968488517e-05, + "loss": 1.2745, + "step": 2259500 + }, + { + "epoch": 1.35, + "learning_rate": 4.65536997193246e-05, + "loss": 1.3018, + "step": 2260000 + }, + { + "epoch": 1.36, + "learning_rate": 4.6551603953695156e-05, + "loss": 1.3177, + "step": 2260500 + }, + { + "epoch": 1.36, + "learning_rate": 4.6549503988134596e-05, + "loss": 1.2754, + "step": 2261000 + }, + { + "epoch": 1.36, + "learning_rate": 4.654740402257403e-05, + "loss": 1.3223, + "step": 2261500 + }, + { + "epoch": 1.36, + "learning_rate": 4.654530405701346e-05, + "loss": 1.2784, + "step": 2262000 + }, + { + "epoch": 1.36, + "learning_rate": 4.65432040914529e-05, + "loss": 1.2722, + "step": 2262500 + }, + { + "epoch": 1.36, + "learning_rate": 4.654110412589234e-05, + "loss": 1.3009, + "step": 2263000 + }, + { + "epoch": 1.36, + "learning_rate": 4.653900836026289e-05, + "loss": 1.3102, + "step": 2263500 + }, + { + "epoch": 1.36, + "learning_rate": 4.6536908394702324e-05, + "loss": 1.3325, + "step": 2264000 + }, + { + "epoch": 1.36, + "learning_rate": 4.6534808429141764e-05, + "loss": 1.2946, + "step": 2264500 + }, + { + "epoch": 1.36, + "learning_rate": 4.65327084635812e-05, + "loss": 1.284, + "step": 2265000 + }, + { + "epoch": 1.36, + "learning_rate": 4.653060849802063e-05, + "loss": 1.3078, + "step": 2265500 + }, + { + "epoch": 1.36, + "learning_rate": 4.652850853246007e-05, + "loss": 1.2931, + "step": 2266000 + }, + { + "epoch": 1.36, + "learning_rate": 4.65264085668995e-05, + "loss": 1.2773, + "step": 2266500 + }, + { + "epoch": 1.36, + "learning_rate": 4.652430860133894e-05, + "loss": 1.2688, + "step": 2267000 + }, + { + "epoch": 1.36, + "learning_rate": 4.65222128357095e-05, + "loss": 1.3088, + "step": 2267500 + }, + { + "epoch": 1.36, + "learning_rate": 4.652011287014893e-05, + "loss": 1.2823, + "step": 2268000 + }, + { + "epoch": 1.36, + "learning_rate": 4.6518017104519486e-05, + "loss": 1.2822, + "step": 2268500 + }, + { + "epoch": 1.36, + "learning_rate": 4.651591713895892e-05, + "loss": 1.302, + "step": 2269000 + }, + { + "epoch": 1.36, + "learning_rate": 4.651381717339836e-05, + "loss": 1.3335, + "step": 2269500 + }, + { + "epoch": 1.36, + "learning_rate": 4.651171720783779e-05, + "loss": 1.2964, + "step": 2270000 + }, + { + "epoch": 1.36, + "learning_rate": 4.6509621442208346e-05, + "loss": 1.3176, + "step": 2270500 + }, + { + "epoch": 1.36, + "learning_rate": 4.650752147664778e-05, + "loss": 1.3005, + "step": 2271000 + }, + { + "epoch": 1.36, + "learning_rate": 4.650542151108722e-05, + "loss": 1.2932, + "step": 2271500 + }, + { + "epoch": 1.36, + "learning_rate": 4.650332154552665e-05, + "loss": 1.2737, + "step": 2272000 + }, + { + "epoch": 1.36, + "learning_rate": 4.650122157996609e-05, + "loss": 1.2979, + "step": 2272500 + }, + { + "epoch": 1.36, + "learning_rate": 4.649912161440553e-05, + "loss": 1.3064, + "step": 2273000 + }, + { + "epoch": 1.36, + "learning_rate": 4.649702164884496e-05, + "loss": 1.3267, + "step": 2273500 + }, + { + "epoch": 1.36, + "learning_rate": 4.6494921683284394e-05, + "loss": 1.3039, + "step": 2274000 + }, + { + "epoch": 1.36, + "learning_rate": 4.6492825917654954e-05, + "loss": 1.2954, + "step": 2274500 + }, + { + "epoch": 1.36, + "learning_rate": 4.649072595209439e-05, + "loss": 1.2922, + "step": 2275000 + }, + { + "epoch": 1.36, + "learning_rate": 4.648863018646494e-05, + "loss": 1.3065, + "step": 2275500 + }, + { + "epoch": 1.36, + "learning_rate": 4.6486534420835495e-05, + "loss": 1.3031, + "step": 2276000 + }, + { + "epoch": 1.36, + "learning_rate": 4.648443445527493e-05, + "loss": 1.2907, + "step": 2276500 + }, + { + "epoch": 1.37, + "learning_rate": 4.648233448971437e-05, + "loss": 1.2877, + "step": 2277000 + }, + { + "epoch": 1.37, + "learning_rate": 4.64802345241538e-05, + "loss": 1.2736, + "step": 2277500 + }, + { + "epoch": 1.37, + "learning_rate": 4.6478134558593236e-05, + "loss": 1.2866, + "step": 2278000 + }, + { + "epoch": 1.37, + "learning_rate": 4.6476034593032676e-05, + "loss": 1.2804, + "step": 2278500 + }, + { + "epoch": 1.37, + "learning_rate": 4.647393462747211e-05, + "loss": 1.3138, + "step": 2279000 + }, + { + "epoch": 1.37, + "learning_rate": 4.647183466191154e-05, + "loss": 1.2649, + "step": 2279500 + }, + { + "epoch": 1.37, + "learning_rate": 4.646973469635098e-05, + "loss": 1.3112, + "step": 2280000 + }, + { + "epoch": 1.37, + "learning_rate": 4.6467634730790416e-05, + "loss": 1.3056, + "step": 2280500 + }, + { + "epoch": 1.37, + "learning_rate": 4.646553476522985e-05, + "loss": 1.3003, + "step": 2281000 + }, + { + "epoch": 1.37, + "learning_rate": 4.646343479966928e-05, + "loss": 1.2893, + "step": 2281500 + }, + { + "epoch": 1.37, + "learning_rate": 4.6461339034039844e-05, + "loss": 1.2977, + "step": 2282000 + }, + { + "epoch": 1.37, + "learning_rate": 4.64592432684104e-05, + "loss": 1.2905, + "step": 2282500 + }, + { + "epoch": 1.37, + "learning_rate": 4.645714330284983e-05, + "loss": 1.3007, + "step": 2283000 + }, + { + "epoch": 1.37, + "learning_rate": 4.6455047537220384e-05, + "loss": 1.2901, + "step": 2283500 + }, + { + "epoch": 1.37, + "learning_rate": 4.6452947571659825e-05, + "loss": 1.2815, + "step": 2284000 + }, + { + "epoch": 1.37, + "learning_rate": 4.645084760609926e-05, + "loss": 1.3198, + "step": 2284500 + }, + { + "epoch": 1.37, + "learning_rate": 4.644874764053869e-05, + "loss": 1.2933, + "step": 2285000 + }, + { + "epoch": 1.37, + "learning_rate": 4.644664767497813e-05, + "loss": 1.2763, + "step": 2285500 + }, + { + "epoch": 1.37, + "learning_rate": 4.6444547709417565e-05, + "loss": 1.2977, + "step": 2286000 + }, + { + "epoch": 1.37, + "learning_rate": 4.644245194378812e-05, + "loss": 1.3025, + "step": 2286500 + }, + { + "epoch": 1.37, + "learning_rate": 4.644035197822756e-05, + "loss": 1.2982, + "step": 2287000 + }, + { + "epoch": 1.37, + "learning_rate": 4.643825201266699e-05, + "loss": 1.2912, + "step": 2287500 + }, + { + "epoch": 1.37, + "learning_rate": 4.6436152047106426e-05, + "loss": 1.2818, + "step": 2288000 + }, + { + "epoch": 1.37, + "learning_rate": 4.6434052081545866e-05, + "loss": 1.3461, + "step": 2288500 + }, + { + "epoch": 1.37, + "learning_rate": 4.64319521159853e-05, + "loss": 1.2826, + "step": 2289000 + }, + { + "epoch": 1.37, + "learning_rate": 4.642985215042473e-05, + "loss": 1.3084, + "step": 2289500 + }, + { + "epoch": 1.37, + "learning_rate": 4.642775218486417e-05, + "loss": 1.2781, + "step": 2290000 + }, + { + "epoch": 1.37, + "learning_rate": 4.64256522193036e-05, + "loss": 1.2973, + "step": 2290500 + }, + { + "epoch": 1.37, + "learning_rate": 4.642355645367416e-05, + "loss": 1.2925, + "step": 2291000 + }, + { + "epoch": 1.37, + "learning_rate": 4.6421456488113594e-05, + "loss": 1.2886, + "step": 2291500 + }, + { + "epoch": 1.37, + "learning_rate": 4.641936072248415e-05, + "loss": 1.2997, + "step": 2292000 + }, + { + "epoch": 1.37, + "learning_rate": 4.641726075692359e-05, + "loss": 1.2762, + "step": 2292500 + }, + { + "epoch": 1.37, + "learning_rate": 4.641516079136302e-05, + "loss": 1.28, + "step": 2293000 + }, + { + "epoch": 1.38, + "learning_rate": 4.6413060825802454e-05, + "loss": 1.3279, + "step": 2293500 + }, + { + "epoch": 1.38, + "learning_rate": 4.6410965060173015e-05, + "loss": 1.2638, + "step": 2294000 + }, + { + "epoch": 1.38, + "learning_rate": 4.640886509461245e-05, + "loss": 1.2722, + "step": 2294500 + }, + { + "epoch": 1.38, + "learning_rate": 4.640676512905188e-05, + "loss": 1.3138, + "step": 2295000 + }, + { + "epoch": 1.38, + "learning_rate": 4.640466516349132e-05, + "loss": 1.2903, + "step": 2295500 + }, + { + "epoch": 1.38, + "learning_rate": 4.6402565197930755e-05, + "loss": 1.2959, + "step": 2296000 + }, + { + "epoch": 1.38, + "learning_rate": 4.640046523237019e-05, + "loss": 1.2991, + "step": 2296500 + }, + { + "epoch": 1.38, + "learning_rate": 4.639836526680963e-05, + "loss": 1.2732, + "step": 2297000 + }, + { + "epoch": 1.38, + "learning_rate": 4.6396265301249056e-05, + "loss": 1.3255, + "step": 2297500 + }, + { + "epoch": 1.38, + "learning_rate": 4.639416533568849e-05, + "loss": 1.3055, + "step": 2298000 + }, + { + "epoch": 1.38, + "learning_rate": 4.639206537012793e-05, + "loss": 1.3088, + "step": 2298500 + }, + { + "epoch": 1.38, + "learning_rate": 4.638996540456736e-05, + "loss": 1.2932, + "step": 2299000 + }, + { + "epoch": 1.38, + "learning_rate": 4.6387865439006796e-05, + "loss": 1.2821, + "step": 2299500 + }, + { + "epoch": 1.38, + "learning_rate": 4.638576547344624e-05, + "loss": 1.288, + "step": 2300000 + }, + { + "epoch": 1.38, + "eval_loss": 1.2455518245697021, + "eval_runtime": 1106.2825, + "eval_samples_per_second": 476.117, + "eval_steps_per_second": 79.353, + "step": 2300000 + }, + { + "epoch": 1.38, + "learning_rate": 4.638366970781679e-05, + "loss": 1.2798, + "step": 2300500 + }, + { + "epoch": 1.38, + "learning_rate": 4.6381569742256224e-05, + "loss": 1.3179, + "step": 2301000 + }, + { + "epoch": 1.38, + "learning_rate": 4.637946977669566e-05, + "loss": 1.3051, + "step": 2301500 + }, + { + "epoch": 1.38, + "learning_rate": 4.63773698111351e-05, + "loss": 1.2896, + "step": 2302000 + }, + { + "epoch": 1.38, + "learning_rate": 4.637526984557453e-05, + "loss": 1.2863, + "step": 2302500 + }, + { + "epoch": 1.38, + "learning_rate": 4.6373169880013964e-05, + "loss": 1.3223, + "step": 2303000 + }, + { + "epoch": 1.38, + "learning_rate": 4.6371069914453405e-05, + "loss": 1.3026, + "step": 2303500 + }, + { + "epoch": 1.38, + "learning_rate": 4.636897414882396e-05, + "loss": 1.2974, + "step": 2304000 + }, + { + "epoch": 1.38, + "learning_rate": 4.636687418326339e-05, + "loss": 1.302, + "step": 2304500 + }, + { + "epoch": 1.38, + "learning_rate": 4.636477421770283e-05, + "loss": 1.2879, + "step": 2305000 + }, + { + "epoch": 1.38, + "learning_rate": 4.6362674252142265e-05, + "loss": 1.2889, + "step": 2305500 + }, + { + "epoch": 1.38, + "learning_rate": 4.636057848651282e-05, + "loss": 1.2862, + "step": 2306000 + }, + { + "epoch": 1.38, + "learning_rate": 4.635847852095225e-05, + "loss": 1.2743, + "step": 2306500 + }, + { + "epoch": 1.38, + "learning_rate": 4.635637855539169e-05, + "loss": 1.2755, + "step": 2307000 + }, + { + "epoch": 1.38, + "learning_rate": 4.6354278589831126e-05, + "loss": 1.2925, + "step": 2307500 + }, + { + "epoch": 1.38, + "learning_rate": 4.635217862427056e-05, + "loss": 1.2772, + "step": 2308000 + }, + { + "epoch": 1.38, + "learning_rate": 4.635007865871e-05, + "loss": 1.2943, + "step": 2308500 + }, + { + "epoch": 1.38, + "learning_rate": 4.634797869314943e-05, + "loss": 1.3097, + "step": 2309000 + }, + { + "epoch": 1.38, + "learning_rate": 4.634587872758887e-05, + "loss": 1.2934, + "step": 2309500 + }, + { + "epoch": 1.38, + "learning_rate": 4.634377876202831e-05, + "loss": 1.2788, + "step": 2310000 + }, + { + "epoch": 1.39, + "learning_rate": 4.634168299639886e-05, + "loss": 1.304, + "step": 2310500 + }, + { + "epoch": 1.39, + "learning_rate": 4.6339587230769414e-05, + "loss": 1.2732, + "step": 2311000 + }, + { + "epoch": 1.39, + "learning_rate": 4.633748726520885e-05, + "loss": 1.3261, + "step": 2311500 + }, + { + "epoch": 1.39, + "learning_rate": 4.633538729964829e-05, + "loss": 1.3063, + "step": 2312000 + }, + { + "epoch": 1.39, + "learning_rate": 4.633328733408772e-05, + "loss": 1.3185, + "step": 2312500 + }, + { + "epoch": 1.39, + "learning_rate": 4.6331187368527155e-05, + "loss": 1.2942, + "step": 2313000 + }, + { + "epoch": 1.39, + "learning_rate": 4.6329087402966595e-05, + "loss": 1.2738, + "step": 2313500 + }, + { + "epoch": 1.39, + "learning_rate": 4.632698743740603e-05, + "loss": 1.3169, + "step": 2314000 + }, + { + "epoch": 1.39, + "learning_rate": 4.632488747184546e-05, + "loss": 1.2872, + "step": 2314500 + }, + { + "epoch": 1.39, + "learning_rate": 4.6322791706216015e-05, + "loss": 1.3158, + "step": 2315000 + }, + { + "epoch": 1.39, + "learning_rate": 4.6320691740655456e-05, + "loss": 1.2949, + "step": 2315500 + }, + { + "epoch": 1.39, + "learning_rate": 4.631859177509489e-05, + "loss": 1.2925, + "step": 2316000 + }, + { + "epoch": 1.39, + "learning_rate": 4.631649180953432e-05, + "loss": 1.3328, + "step": 2316500 + }, + { + "epoch": 1.39, + "learning_rate": 4.631439184397376e-05, + "loss": 1.296, + "step": 2317000 + }, + { + "epoch": 1.39, + "learning_rate": 4.631229187841319e-05, + "loss": 1.2819, + "step": 2317500 + }, + { + "epoch": 1.39, + "learning_rate": 4.631019191285262e-05, + "loss": 1.276, + "step": 2318000 + }, + { + "epoch": 1.39, + "learning_rate": 4.630809194729206e-05, + "loss": 1.2908, + "step": 2318500 + }, + { + "epoch": 1.39, + "learning_rate": 4.6305996181662623e-05, + "loss": 1.3047, + "step": 2319000 + }, + { + "epoch": 1.39, + "learning_rate": 4.630389621610206e-05, + "loss": 1.2927, + "step": 2319500 + }, + { + "epoch": 1.39, + "learning_rate": 4.630180045047261e-05, + "loss": 1.2969, + "step": 2320000 + }, + { + "epoch": 1.39, + "learning_rate": 4.629970048491205e-05, + "loss": 1.3405, + "step": 2320500 + }, + { + "epoch": 1.39, + "learning_rate": 4.6297600519351484e-05, + "loss": 1.3179, + "step": 2321000 + }, + { + "epoch": 1.39, + "learning_rate": 4.629550055379092e-05, + "loss": 1.3056, + "step": 2321500 + }, + { + "epoch": 1.39, + "learning_rate": 4.629340058823036e-05, + "loss": 1.2859, + "step": 2322000 + }, + { + "epoch": 1.39, + "learning_rate": 4.6291300622669785e-05, + "loss": 1.2527, + "step": 2322500 + }, + { + "epoch": 1.39, + "learning_rate": 4.628920065710922e-05, + "loss": 1.2885, + "step": 2323000 + }, + { + "epoch": 1.39, + "learning_rate": 4.628710069154866e-05, + "loss": 1.2838, + "step": 2323500 + }, + { + "epoch": 1.39, + "learning_rate": 4.628500072598809e-05, + "loss": 1.3033, + "step": 2324000 + }, + { + "epoch": 1.39, + "learning_rate": 4.6282900760427525e-05, + "loss": 1.2698, + "step": 2324500 + }, + { + "epoch": 1.39, + "learning_rate": 4.6280800794866965e-05, + "loss": 1.3107, + "step": 2325000 + }, + { + "epoch": 1.39, + "learning_rate": 4.62787008293064e-05, + "loss": 1.2818, + "step": 2325500 + }, + { + "epoch": 1.39, + "learning_rate": 4.627660506367695e-05, + "loss": 1.2776, + "step": 2326000 + }, + { + "epoch": 1.39, + "learning_rate": 4.627450509811639e-05, + "loss": 1.3122, + "step": 2326500 + }, + { + "epoch": 1.4, + "learning_rate": 4.6272405132555826e-05, + "loss": 1.2634, + "step": 2327000 + }, + { + "epoch": 1.4, + "learning_rate": 4.627030516699526e-05, + "loss": 1.2735, + "step": 2327500 + }, + { + "epoch": 1.4, + "learning_rate": 4.626820940136581e-05, + "loss": 1.3148, + "step": 2328000 + }, + { + "epoch": 1.4, + "learning_rate": 4.6266109435805253e-05, + "loss": 1.3328, + "step": 2328500 + }, + { + "epoch": 1.4, + "learning_rate": 4.626400947024469e-05, + "loss": 1.2974, + "step": 2329000 + }, + { + "epoch": 1.4, + "learning_rate": 4.626190950468412e-05, + "loss": 1.2861, + "step": 2329500 + }, + { + "epoch": 1.4, + "learning_rate": 4.625980953912356e-05, + "loss": 1.3123, + "step": 2330000 + }, + { + "epoch": 1.4, + "learning_rate": 4.6257709573562994e-05, + "loss": 1.2748, + "step": 2330500 + }, + { + "epoch": 1.4, + "learning_rate": 4.625560960800243e-05, + "loss": 1.2806, + "step": 2331000 + }, + { + "epoch": 1.4, + "learning_rate": 4.625350964244187e-05, + "loss": 1.3202, + "step": 2331500 + }, + { + "epoch": 1.4, + "learning_rate": 4.625141387681242e-05, + "loss": 1.2773, + "step": 2332000 + }, + { + "epoch": 1.4, + "learning_rate": 4.6249313911251855e-05, + "loss": 1.3239, + "step": 2332500 + }, + { + "epoch": 1.4, + "learning_rate": 4.624721394569129e-05, + "loss": 1.29, + "step": 2333000 + }, + { + "epoch": 1.4, + "learning_rate": 4.624511818006185e-05, + "loss": 1.3223, + "step": 2333500 + }, + { + "epoch": 1.4, + "learning_rate": 4.624301821450128e-05, + "loss": 1.3051, + "step": 2334000 + }, + { + "epoch": 1.4, + "learning_rate": 4.6240918248940715e-05, + "loss": 1.306, + "step": 2334500 + }, + { + "epoch": 1.4, + "learning_rate": 4.6238818283380156e-05, + "loss": 1.3051, + "step": 2335000 + }, + { + "epoch": 1.4, + "learning_rate": 4.623671831781959e-05, + "loss": 1.2673, + "step": 2335500 + }, + { + "epoch": 1.4, + "learning_rate": 4.623461835225902e-05, + "loss": 1.3023, + "step": 2336000 + }, + { + "epoch": 1.4, + "learning_rate": 4.623251838669846e-05, + "loss": 1.2617, + "step": 2336500 + }, + { + "epoch": 1.4, + "learning_rate": 4.6230418421137896e-05, + "loss": 1.285, + "step": 2337000 + }, + { + "epoch": 1.4, + "learning_rate": 4.622831845557732e-05, + "loss": 1.2997, + "step": 2337500 + }, + { + "epoch": 1.4, + "learning_rate": 4.622621849001676e-05, + "loss": 1.2867, + "step": 2338000 + }, + { + "epoch": 1.4, + "learning_rate": 4.6224122724387324e-05, + "loss": 1.3164, + "step": 2338500 + }, + { + "epoch": 1.4, + "learning_rate": 4.622202695875788e-05, + "loss": 1.2961, + "step": 2339000 + }, + { + "epoch": 1.4, + "learning_rate": 4.621992699319731e-05, + "loss": 1.323, + "step": 2339500 + }, + { + "epoch": 1.4, + "learning_rate": 4.6217827027636744e-05, + "loss": 1.3008, + "step": 2340000 + }, + { + "epoch": 1.4, + "learning_rate": 4.6215731262007304e-05, + "loss": 1.3047, + "step": 2340500 + }, + { + "epoch": 1.4, + "learning_rate": 4.621363129644674e-05, + "loss": 1.2989, + "step": 2341000 + }, + { + "epoch": 1.4, + "learning_rate": 4.621153133088617e-05, + "loss": 1.3063, + "step": 2341500 + }, + { + "epoch": 1.4, + "learning_rate": 4.620943136532561e-05, + "loss": 1.3167, + "step": 2342000 + }, + { + "epoch": 1.4, + "learning_rate": 4.6207331399765045e-05, + "loss": 1.3272, + "step": 2342500 + }, + { + "epoch": 1.4, + "learning_rate": 4.620523143420448e-05, + "loss": 1.3168, + "step": 2343000 + }, + { + "epoch": 1.41, + "learning_rate": 4.620313566857503e-05, + "loss": 1.308, + "step": 2343500 + }, + { + "epoch": 1.41, + "learning_rate": 4.620103570301447e-05, + "loss": 1.3284, + "step": 2344000 + }, + { + "epoch": 1.41, + "learning_rate": 4.6198935737453906e-05, + "loss": 1.3037, + "step": 2344500 + }, + { + "epoch": 1.41, + "learning_rate": 4.619683997182446e-05, + "loss": 1.2934, + "step": 2345000 + }, + { + "epoch": 1.41, + "learning_rate": 4.619474000626389e-05, + "loss": 1.2741, + "step": 2345500 + }, + { + "epoch": 1.41, + "learning_rate": 4.619264004070333e-05, + "loss": 1.2789, + "step": 2346000 + }, + { + "epoch": 1.41, + "learning_rate": 4.6190540075142767e-05, + "loss": 1.3141, + "step": 2346500 + }, + { + "epoch": 1.41, + "learning_rate": 4.61884401095822e-05, + "loss": 1.3117, + "step": 2347000 + }, + { + "epoch": 1.41, + "learning_rate": 4.618634014402164e-05, + "loss": 1.2914, + "step": 2347500 + }, + { + "epoch": 1.41, + "learning_rate": 4.6184240178461074e-05, + "loss": 1.2901, + "step": 2348000 + }, + { + "epoch": 1.41, + "learning_rate": 4.618214021290051e-05, + "loss": 1.2943, + "step": 2348500 + }, + { + "epoch": 1.41, + "learning_rate": 4.618004024733995e-05, + "loss": 1.2785, + "step": 2349000 + }, + { + "epoch": 1.41, + "learning_rate": 4.6177940281779374e-05, + "loss": 1.2823, + "step": 2349500 + }, + { + "epoch": 1.41, + "learning_rate": 4.6175840316218814e-05, + "loss": 1.2861, + "step": 2350000 + }, + { + "epoch": 1.41, + "learning_rate": 4.617374035065825e-05, + "loss": 1.2859, + "step": 2350500 + }, + { + "epoch": 1.41, + "learning_rate": 4.617164038509768e-05, + "loss": 1.3077, + "step": 2351000 + }, + { + "epoch": 1.41, + "learning_rate": 4.616954041953712e-05, + "loss": 1.2524, + "step": 2351500 + }, + { + "epoch": 1.41, + "learning_rate": 4.6167440453976555e-05, + "loss": 1.2938, + "step": 2352000 + }, + { + "epoch": 1.41, + "learning_rate": 4.616534048841599e-05, + "loss": 1.3115, + "step": 2352500 + }, + { + "epoch": 1.41, + "learning_rate": 4.616324052285543e-05, + "loss": 1.3106, + "step": 2353000 + }, + { + "epoch": 1.41, + "learning_rate": 4.616114475722598e-05, + "loss": 1.3199, + "step": 2353500 + }, + { + "epoch": 1.41, + "learning_rate": 4.6159044791665416e-05, + "loss": 1.2777, + "step": 2354000 + }, + { + "epoch": 1.41, + "learning_rate": 4.615694482610485e-05, + "loss": 1.2608, + "step": 2354500 + }, + { + "epoch": 1.41, + "learning_rate": 4.615484486054429e-05, + "loss": 1.3005, + "step": 2355000 + }, + { + "epoch": 1.41, + "learning_rate": 4.615274909491484e-05, + "loss": 1.2807, + "step": 2355500 + }, + { + "epoch": 1.41, + "learning_rate": 4.6150649129354276e-05, + "loss": 1.2883, + "step": 2356000 + }, + { + "epoch": 1.41, + "learning_rate": 4.614854916379371e-05, + "loss": 1.2886, + "step": 2356500 + }, + { + "epoch": 1.41, + "learning_rate": 4.614644919823315e-05, + "loss": 1.3124, + "step": 2357000 + }, + { + "epoch": 1.41, + "learning_rate": 4.6144353432603704e-05, + "loss": 1.2828, + "step": 2357500 + }, + { + "epoch": 1.41, + "learning_rate": 4.614225346704314e-05, + "loss": 1.253, + "step": 2358000 + }, + { + "epoch": 1.41, + "learning_rate": 4.614015350148258e-05, + "loss": 1.2813, + "step": 2358500 + }, + { + "epoch": 1.41, + "learning_rate": 4.613805353592201e-05, + "loss": 1.2631, + "step": 2359000 + }, + { + "epoch": 1.41, + "learning_rate": 4.6135953570361444e-05, + "loss": 1.2863, + "step": 2359500 + }, + { + "epoch": 1.41, + "learning_rate": 4.6133857804732e-05, + "loss": 1.3079, + "step": 2360000 + }, + { + "epoch": 1.42, + "learning_rate": 4.613175783917144e-05, + "loss": 1.294, + "step": 2360500 + }, + { + "epoch": 1.42, + "learning_rate": 4.612965787361087e-05, + "loss": 1.2917, + "step": 2361000 + }, + { + "epoch": 1.42, + "learning_rate": 4.6127557908050305e-05, + "loss": 1.289, + "step": 2361500 + }, + { + "epoch": 1.42, + "learning_rate": 4.6125457942489745e-05, + "loss": 1.2959, + "step": 2362000 + }, + { + "epoch": 1.42, + "learning_rate": 4.612335797692918e-05, + "loss": 1.3066, + "step": 2362500 + }, + { + "epoch": 1.42, + "learning_rate": 4.612125801136861e-05, + "loss": 1.3018, + "step": 2363000 + }, + { + "epoch": 1.42, + "learning_rate": 4.611915804580805e-05, + "loss": 1.2903, + "step": 2363500 + }, + { + "epoch": 1.42, + "learning_rate": 4.6117058080247486e-05, + "loss": 1.2832, + "step": 2364000 + }, + { + "epoch": 1.42, + "learning_rate": 4.611496231461804e-05, + "loss": 1.2831, + "step": 2364500 + }, + { + "epoch": 1.42, + "learning_rate": 4.611286234905748e-05, + "loss": 1.314, + "step": 2365000 + }, + { + "epoch": 1.42, + "learning_rate": 4.611076238349691e-05, + "loss": 1.2875, + "step": 2365500 + }, + { + "epoch": 1.42, + "learning_rate": 4.6108662417936347e-05, + "loss": 1.3031, + "step": 2366000 + }, + { + "epoch": 1.42, + "learning_rate": 4.610656245237578e-05, + "loss": 1.2956, + "step": 2366500 + }, + { + "epoch": 1.42, + "learning_rate": 4.6104462486815213e-05, + "loss": 1.3006, + "step": 2367000 + }, + { + "epoch": 1.42, + "learning_rate": 4.610236252125465e-05, + "loss": 1.2941, + "step": 2367500 + }, + { + "epoch": 1.42, + "learning_rate": 4.610026675562521e-05, + "loss": 1.3108, + "step": 2368000 + }, + { + "epoch": 1.42, + "learning_rate": 4.609816679006465e-05, + "loss": 1.2935, + "step": 2368500 + }, + { + "epoch": 1.42, + "learning_rate": 4.6096066824504074e-05, + "loss": 1.2873, + "step": 2369000 + }, + { + "epoch": 1.42, + "learning_rate": 4.609396685894351e-05, + "loss": 1.2912, + "step": 2369500 + }, + { + "epoch": 1.42, + "learning_rate": 4.609186689338295e-05, + "loss": 1.2927, + "step": 2370000 + }, + { + "epoch": 1.42, + "learning_rate": 4.608976692782238e-05, + "loss": 1.2987, + "step": 2370500 + }, + { + "epoch": 1.42, + "learning_rate": 4.608767116219294e-05, + "loss": 1.2951, + "step": 2371000 + }, + { + "epoch": 1.42, + "learning_rate": 4.6085571196632375e-05, + "loss": 1.301, + "step": 2371500 + }, + { + "epoch": 1.42, + "learning_rate": 4.608347123107181e-05, + "loss": 1.2768, + "step": 2372000 + }, + { + "epoch": 1.42, + "learning_rate": 4.608137126551124e-05, + "loss": 1.31, + "step": 2372500 + }, + { + "epoch": 1.42, + "learning_rate": 4.60792754998818e-05, + "loss": 1.3129, + "step": 2373000 + }, + { + "epoch": 1.42, + "learning_rate": 4.607717553432124e-05, + "loss": 1.3254, + "step": 2373500 + }, + { + "epoch": 1.42, + "learning_rate": 4.6075079768691796e-05, + "loss": 1.3078, + "step": 2374000 + }, + { + "epoch": 1.42, + "learning_rate": 4.607297980313123e-05, + "loss": 1.2997, + "step": 2374500 + }, + { + "epoch": 1.42, + "learning_rate": 4.607087983757066e-05, + "loss": 1.3192, + "step": 2375000 + }, + { + "epoch": 1.42, + "learning_rate": 4.60687798720101e-05, + "loss": 1.3055, + "step": 2375500 + }, + { + "epoch": 1.42, + "learning_rate": 4.606667990644953e-05, + "loss": 1.2904, + "step": 2376000 + }, + { + "epoch": 1.42, + "learning_rate": 4.6064579940888964e-05, + "loss": 1.2697, + "step": 2376500 + }, + { + "epoch": 1.43, + "learning_rate": 4.6062479975328404e-05, + "loss": 1.2983, + "step": 2377000 + }, + { + "epoch": 1.43, + "learning_rate": 4.6060384209698964e-05, + "loss": 1.2853, + "step": 2377500 + }, + { + "epoch": 1.43, + "learning_rate": 4.60582842441384e-05, + "loss": 1.2846, + "step": 2378000 + }, + { + "epoch": 1.43, + "learning_rate": 4.605618427857783e-05, + "loss": 1.2802, + "step": 2378500 + }, + { + "epoch": 1.43, + "learning_rate": 4.6054084313017264e-05, + "loss": 1.2968, + "step": 2379000 + }, + { + "epoch": 1.43, + "learning_rate": 4.60519843474567e-05, + "loss": 1.2991, + "step": 2379500 + }, + { + "epoch": 1.43, + "learning_rate": 4.604988438189614e-05, + "loss": 1.2938, + "step": 2380000 + }, + { + "epoch": 1.43, + "learning_rate": 4.604778441633557e-05, + "loss": 1.294, + "step": 2380500 + }, + { + "epoch": 1.43, + "learning_rate": 4.6045684450775005e-05, + "loss": 1.3013, + "step": 2381000 + }, + { + "epoch": 1.43, + "learning_rate": 4.6043584485214445e-05, + "loss": 1.2893, + "step": 2381500 + }, + { + "epoch": 1.43, + "learning_rate": 4.6041488719585e-05, + "loss": 1.2824, + "step": 2382000 + }, + { + "epoch": 1.43, + "learning_rate": 4.603939715388667e-05, + "loss": 1.2879, + "step": 2382500 + }, + { + "epoch": 1.43, + "learning_rate": 4.603729718832611e-05, + "loss": 1.2562, + "step": 2383000 + }, + { + "epoch": 1.43, + "learning_rate": 4.6035197222765546e-05, + "loss": 1.2687, + "step": 2383500 + }, + { + "epoch": 1.43, + "learning_rate": 4.603309725720498e-05, + "loss": 1.2971, + "step": 2384000 + }, + { + "epoch": 1.43, + "learning_rate": 4.603099729164442e-05, + "loss": 1.3108, + "step": 2384500 + }, + { + "epoch": 1.43, + "learning_rate": 4.6028897326083853e-05, + "loss": 1.2912, + "step": 2385000 + }, + { + "epoch": 1.43, + "learning_rate": 4.602679736052329e-05, + "loss": 1.292, + "step": 2385500 + }, + { + "epoch": 1.43, + "learning_rate": 4.602469739496272e-05, + "loss": 1.2852, + "step": 2386000 + }, + { + "epoch": 1.43, + "learning_rate": 4.6022597429402154e-05, + "loss": 1.2846, + "step": 2386500 + }, + { + "epoch": 1.43, + "learning_rate": 4.6020497463841594e-05, + "loss": 1.2607, + "step": 2387000 + }, + { + "epoch": 1.43, + "learning_rate": 4.601839749828103e-05, + "loss": 1.2958, + "step": 2387500 + }, + { + "epoch": 1.43, + "learning_rate": 4.601629753272046e-05, + "loss": 1.2838, + "step": 2388000 + }, + { + "epoch": 1.43, + "learning_rate": 4.60141975671599e-05, + "loss": 1.2743, + "step": 2388500 + }, + { + "epoch": 1.43, + "learning_rate": 4.6012097601599335e-05, + "loss": 1.2688, + "step": 2389000 + }, + { + "epoch": 1.43, + "learning_rate": 4.600999763603877e-05, + "loss": 1.2871, + "step": 2389500 + }, + { + "epoch": 1.43, + "learning_rate": 4.600790187040932e-05, + "loss": 1.3003, + "step": 2390000 + }, + { + "epoch": 1.43, + "learning_rate": 4.600580190484876e-05, + "loss": 1.2955, + "step": 2390500 + }, + { + "epoch": 1.43, + "learning_rate": 4.6003701939288195e-05, + "loss": 1.3004, + "step": 2391000 + }, + { + "epoch": 1.43, + "learning_rate": 4.600160197372763e-05, + "loss": 1.2977, + "step": 2391500 + }, + { + "epoch": 1.43, + "learning_rate": 4.599950200816707e-05, + "loss": 1.3495, + "step": 2392000 + }, + { + "epoch": 1.43, + "learning_rate": 4.59974020426065e-05, + "loss": 1.2895, + "step": 2392500 + }, + { + "epoch": 1.43, + "learning_rate": 4.5995302077045936e-05, + "loss": 1.2887, + "step": 2393000 + }, + { + "epoch": 1.44, + "learning_rate": 4.599320631141649e-05, + "loss": 1.2873, + "step": 2393500 + }, + { + "epoch": 1.44, + "learning_rate": 4.599110634585593e-05, + "loss": 1.2978, + "step": 2394000 + }, + { + "epoch": 1.44, + "learning_rate": 4.598900638029536e-05, + "loss": 1.2998, + "step": 2394500 + }, + { + "epoch": 1.44, + "learning_rate": 4.59869064147348e-05, + "loss": 1.2726, + "step": 2395000 + }, + { + "epoch": 1.44, + "learning_rate": 4.598480644917424e-05, + "loss": 1.3146, + "step": 2395500 + }, + { + "epoch": 1.44, + "learning_rate": 4.5982706483613664e-05, + "loss": 1.3099, + "step": 2396000 + }, + { + "epoch": 1.44, + "learning_rate": 4.5980606518053104e-05, + "loss": 1.293, + "step": 2396500 + }, + { + "epoch": 1.44, + "learning_rate": 4.597850655249254e-05, + "loss": 1.2994, + "step": 2397000 + }, + { + "epoch": 1.44, + "learning_rate": 4.59764107868631e-05, + "loss": 1.2754, + "step": 2397500 + }, + { + "epoch": 1.44, + "learning_rate": 4.597431502123365e-05, + "loss": 1.3155, + "step": 2398000 + }, + { + "epoch": 1.44, + "learning_rate": 4.5972215055673085e-05, + "loss": 1.2789, + "step": 2398500 + }, + { + "epoch": 1.44, + "learning_rate": 4.5970115090112525e-05, + "loss": 1.2833, + "step": 2399000 + }, + { + "epoch": 1.44, + "learning_rate": 4.596801512455196e-05, + "loss": 1.295, + "step": 2399500 + }, + { + "epoch": 1.44, + "learning_rate": 4.596591515899139e-05, + "loss": 1.2783, + "step": 2400000 + }, + { + "epoch": 1.44, + "eval_loss": 1.2449374198913574, + "eval_runtime": 1111.8994, + "eval_samples_per_second": 473.712, + "eval_steps_per_second": 78.952, + "step": 2400000 + }, + { + "epoch": 1.44, + "learning_rate": 4.596381519343083e-05, + "loss": 1.3102, + "step": 2400500 + }, + { + "epoch": 1.44, + "learning_rate": 4.5961719427801386e-05, + "loss": 1.279, + "step": 2401000 + }, + { + "epoch": 1.44, + "learning_rate": 4.595961946224082e-05, + "loss": 1.3264, + "step": 2401500 + }, + { + "epoch": 1.44, + "learning_rate": 4.595751949668025e-05, + "loss": 1.2797, + "step": 2402000 + }, + { + "epoch": 1.44, + "learning_rate": 4.595541953111969e-05, + "loss": 1.2995, + "step": 2402500 + }, + { + "epoch": 1.44, + "learning_rate": 4.595331956555912e-05, + "loss": 1.3128, + "step": 2403000 + }, + { + "epoch": 1.44, + "learning_rate": 4.595121959999856e-05, + "loss": 1.3285, + "step": 2403500 + }, + { + "epoch": 1.44, + "learning_rate": 4.594911963443799e-05, + "loss": 1.2796, + "step": 2404000 + }, + { + "epoch": 1.44, + "learning_rate": 4.594701966887743e-05, + "loss": 1.2816, + "step": 2404500 + }, + { + "epoch": 1.44, + "learning_rate": 4.594492390324799e-05, + "loss": 1.281, + "step": 2405000 + }, + { + "epoch": 1.44, + "learning_rate": 4.594282393768742e-05, + "loss": 1.2861, + "step": 2405500 + }, + { + "epoch": 1.44, + "learning_rate": 4.5940723972126854e-05, + "loss": 1.3202, + "step": 2406000 + }, + { + "epoch": 1.44, + "learning_rate": 4.593862400656629e-05, + "loss": 1.2889, + "step": 2406500 + }, + { + "epoch": 1.44, + "learning_rate": 4.593652404100573e-05, + "loss": 1.3099, + "step": 2407000 + }, + { + "epoch": 1.44, + "learning_rate": 4.593442827537629e-05, + "loss": 1.2828, + "step": 2407500 + }, + { + "epoch": 1.44, + "learning_rate": 4.593233250974684e-05, + "loss": 1.3041, + "step": 2408000 + }, + { + "epoch": 1.44, + "learning_rate": 4.5930232544186275e-05, + "loss": 1.29, + "step": 2408500 + }, + { + "epoch": 1.44, + "learning_rate": 4.592813677855683e-05, + "loss": 1.2607, + "step": 2409000 + }, + { + "epoch": 1.44, + "learning_rate": 4.592603681299627e-05, + "loss": 1.3062, + "step": 2409500 + }, + { + "epoch": 1.44, + "learning_rate": 4.59239368474357e-05, + "loss": 1.3198, + "step": 2410000 + }, + { + "epoch": 1.45, + "learning_rate": 4.5921836881875136e-05, + "loss": 1.3009, + "step": 2410500 + }, + { + "epoch": 1.45, + "learning_rate": 4.5919736916314576e-05, + "loss": 1.2754, + "step": 2411000 + }, + { + "epoch": 1.45, + "learning_rate": 4.591763695075401e-05, + "loss": 1.2774, + "step": 2411500 + }, + { + "epoch": 1.45, + "learning_rate": 4.591553698519344e-05, + "loss": 1.2868, + "step": 2412000 + }, + { + "epoch": 1.45, + "learning_rate": 4.5913437019632876e-05, + "loss": 1.2933, + "step": 2412500 + }, + { + "epoch": 1.45, + "learning_rate": 4.591133705407231e-05, + "loss": 1.2822, + "step": 2413000 + }, + { + "epoch": 1.45, + "learning_rate": 4.590924128844287e-05, + "loss": 1.3011, + "step": 2413500 + }, + { + "epoch": 1.45, + "learning_rate": 4.5907141322882304e-05, + "loss": 1.2893, + "step": 2414000 + }, + { + "epoch": 1.45, + "learning_rate": 4.5905041357321744e-05, + "loss": 1.3064, + "step": 2414500 + }, + { + "epoch": 1.45, + "learning_rate": 4.590294139176117e-05, + "loss": 1.2824, + "step": 2415000 + }, + { + "epoch": 1.45, + "learning_rate": 4.5900841426200604e-05, + "loss": 1.2849, + "step": 2415500 + }, + { + "epoch": 1.45, + "learning_rate": 4.5898741460640044e-05, + "loss": 1.3107, + "step": 2416000 + }, + { + "epoch": 1.45, + "learning_rate": 4.589664149507948e-05, + "loss": 1.2817, + "step": 2416500 + }, + { + "epoch": 1.45, + "learning_rate": 4.589454572945004e-05, + "loss": 1.328, + "step": 2417000 + }, + { + "epoch": 1.45, + "learning_rate": 4.589244576388947e-05, + "loss": 1.3029, + "step": 2417500 + }, + { + "epoch": 1.45, + "learning_rate": 4.5890345798328905e-05, + "loss": 1.2561, + "step": 2418000 + }, + { + "epoch": 1.45, + "learning_rate": 4.588824583276834e-05, + "loss": 1.2795, + "step": 2418500 + }, + { + "epoch": 1.45, + "learning_rate": 4.588614586720778e-05, + "loss": 1.297, + "step": 2419000 + }, + { + "epoch": 1.45, + "learning_rate": 4.588404590164721e-05, + "loss": 1.3036, + "step": 2419500 + }, + { + "epoch": 1.45, + "learning_rate": 4.5881950136017766e-05, + "loss": 1.3244, + "step": 2420000 + }, + { + "epoch": 1.45, + "learning_rate": 4.58798501704572e-05, + "loss": 1.2817, + "step": 2420500 + }, + { + "epoch": 1.45, + "learning_rate": 4.587775020489664e-05, + "loss": 1.2823, + "step": 2421000 + }, + { + "epoch": 1.45, + "learning_rate": 4.587565023933607e-05, + "loss": 1.2691, + "step": 2421500 + }, + { + "epoch": 1.45, + "learning_rate": 4.5873550273775506e-05, + "loss": 1.3004, + "step": 2422000 + }, + { + "epoch": 1.45, + "learning_rate": 4.5871450308214947e-05, + "loss": 1.2823, + "step": 2422500 + }, + { + "epoch": 1.45, + "learning_rate": 4.586935034265438e-05, + "loss": 1.2543, + "step": 2423000 + }, + { + "epoch": 1.45, + "learning_rate": 4.5867250377093813e-05, + "loss": 1.2933, + "step": 2423500 + }, + { + "epoch": 1.45, + "learning_rate": 4.5865154611464374e-05, + "loss": 1.279, + "step": 2424000 + }, + { + "epoch": 1.45, + "learning_rate": 4.586305884583493e-05, + "loss": 1.2722, + "step": 2424500 + }, + { + "epoch": 1.45, + "learning_rate": 4.58609672801366e-05, + "loss": 1.3164, + "step": 2425000 + }, + { + "epoch": 1.45, + "learning_rate": 4.585886731457604e-05, + "loss": 1.2859, + "step": 2425500 + }, + { + "epoch": 1.45, + "learning_rate": 4.5856767349015475e-05, + "loss": 1.3089, + "step": 2426000 + }, + { + "epoch": 1.45, + "learning_rate": 4.585466738345491e-05, + "loss": 1.2864, + "step": 2426500 + }, + { + "epoch": 1.46, + "learning_rate": 4.585256741789435e-05, + "loss": 1.2942, + "step": 2427000 + }, + { + "epoch": 1.46, + "learning_rate": 4.585046745233378e-05, + "loss": 1.2887, + "step": 2427500 + }, + { + "epoch": 1.46, + "learning_rate": 4.5848367486773215e-05, + "loss": 1.3303, + "step": 2428000 + }, + { + "epoch": 1.46, + "learning_rate": 4.5846267521212656e-05, + "loss": 1.2841, + "step": 2428500 + }, + { + "epoch": 1.46, + "learning_rate": 4.584416755565209e-05, + "loss": 1.285, + "step": 2429000 + }, + { + "epoch": 1.46, + "learning_rate": 4.5842067590091516e-05, + "loss": 1.2766, + "step": 2429500 + }, + { + "epoch": 1.46, + "learning_rate": 4.5839967624530956e-05, + "loss": 1.2875, + "step": 2430000 + }, + { + "epoch": 1.46, + "learning_rate": 4.583786765897039e-05, + "loss": 1.2884, + "step": 2430500 + }, + { + "epoch": 1.46, + "learning_rate": 4.583576769340983e-05, + "loss": 1.2892, + "step": 2431000 + }, + { + "epoch": 1.46, + "learning_rate": 4.583366772784926e-05, + "loss": 1.2817, + "step": 2431500 + }, + { + "epoch": 1.46, + "learning_rate": 4.5831567762288697e-05, + "loss": 1.2835, + "step": 2432000 + }, + { + "epoch": 1.46, + "learning_rate": 4.582946779672814e-05, + "loss": 1.2694, + "step": 2432500 + }, + { + "epoch": 1.46, + "learning_rate": 4.582737203109869e-05, + "loss": 1.3126, + "step": 2433000 + }, + { + "epoch": 1.46, + "learning_rate": 4.5825272065538124e-05, + "loss": 1.2653, + "step": 2433500 + }, + { + "epoch": 1.46, + "learning_rate": 4.582317209997756e-05, + "loss": 1.2622, + "step": 2434000 + }, + { + "epoch": 1.46, + "learning_rate": 4.5821072134417e-05, + "loss": 1.2731, + "step": 2434500 + }, + { + "epoch": 1.46, + "learning_rate": 4.581897216885643e-05, + "loss": 1.2944, + "step": 2435000 + }, + { + "epoch": 1.46, + "learning_rate": 4.5816872203295864e-05, + "loss": 1.2903, + "step": 2435500 + }, + { + "epoch": 1.46, + "learning_rate": 4.581477643766642e-05, + "loss": 1.2766, + "step": 2436000 + }, + { + "epoch": 1.46, + "learning_rate": 4.581268067203697e-05, + "loss": 1.2762, + "step": 2436500 + }, + { + "epoch": 1.46, + "learning_rate": 4.581058070647641e-05, + "loss": 1.3124, + "step": 2437000 + }, + { + "epoch": 1.46, + "learning_rate": 4.5808480740915845e-05, + "loss": 1.2866, + "step": 2437500 + }, + { + "epoch": 1.46, + "learning_rate": 4.5806380775355286e-05, + "loss": 1.2678, + "step": 2438000 + }, + { + "epoch": 1.46, + "learning_rate": 4.580428080979472e-05, + "loss": 1.2629, + "step": 2438500 + }, + { + "epoch": 1.46, + "learning_rate": 4.580218084423415e-05, + "loss": 1.2938, + "step": 2439000 + }, + { + "epoch": 1.46, + "learning_rate": 4.580008087867359e-05, + "loss": 1.2974, + "step": 2439500 + }, + { + "epoch": 1.46, + "learning_rate": 4.5797980913113026e-05, + "loss": 1.271, + "step": 2440000 + }, + { + "epoch": 1.46, + "learning_rate": 4.579588094755246e-05, + "loss": 1.3093, + "step": 2440500 + }, + { + "epoch": 1.46, + "learning_rate": 4.579378518192301e-05, + "loss": 1.3068, + "step": 2441000 + }, + { + "epoch": 1.46, + "learning_rate": 4.5791685216362453e-05, + "loss": 1.2741, + "step": 2441500 + }, + { + "epoch": 1.46, + "learning_rate": 4.578958525080189e-05, + "loss": 1.283, + "step": 2442000 + }, + { + "epoch": 1.46, + "learning_rate": 4.578748948517244e-05, + "loss": 1.2647, + "step": 2442500 + }, + { + "epoch": 1.46, + "learning_rate": 4.5785389519611874e-05, + "loss": 1.2893, + "step": 2443000 + }, + { + "epoch": 1.46, + "learning_rate": 4.5783293753982434e-05, + "loss": 1.2831, + "step": 2443500 + }, + { + "epoch": 1.47, + "learning_rate": 4.578119378842187e-05, + "loss": 1.2566, + "step": 2444000 + }, + { + "epoch": 1.47, + "learning_rate": 4.57790938228613e-05, + "loss": 1.2799, + "step": 2444500 + }, + { + "epoch": 1.47, + "learning_rate": 4.577699385730074e-05, + "loss": 1.3023, + "step": 2445000 + }, + { + "epoch": 1.47, + "learning_rate": 4.5774893891740175e-05, + "loss": 1.3199, + "step": 2445500 + }, + { + "epoch": 1.47, + "learning_rate": 4.577279392617961e-05, + "loss": 1.2825, + "step": 2446000 + }, + { + "epoch": 1.47, + "learning_rate": 4.577069396061905e-05, + "loss": 1.2875, + "step": 2446500 + }, + { + "epoch": 1.47, + "learning_rate": 4.576859399505848e-05, + "loss": 1.289, + "step": 2447000 + }, + { + "epoch": 1.47, + "learning_rate": 4.5766494029497916e-05, + "loss": 1.3108, + "step": 2447500 + }, + { + "epoch": 1.47, + "learning_rate": 4.5764394063937356e-05, + "loss": 1.2929, + "step": 2448000 + }, + { + "epoch": 1.47, + "learning_rate": 4.576229409837679e-05, + "loss": 1.2773, + "step": 2448500 + }, + { + "epoch": 1.47, + "learning_rate": 4.5760194132816216e-05, + "loss": 1.2939, + "step": 2449000 + }, + { + "epoch": 1.47, + "learning_rate": 4.5758094167255656e-05, + "loss": 1.2857, + "step": 2449500 + }, + { + "epoch": 1.47, + "learning_rate": 4.575599420169509e-05, + "loss": 1.2969, + "step": 2450000 + }, + { + "epoch": 1.47, + "learning_rate": 4.575389423613452e-05, + "loss": 1.2902, + "step": 2450500 + }, + { + "epoch": 1.47, + "learning_rate": 4.575179427057396e-05, + "loss": 1.2707, + "step": 2451000 + }, + { + "epoch": 1.47, + "learning_rate": 4.574969850494452e-05, + "loss": 1.3025, + "step": 2451500 + }, + { + "epoch": 1.47, + "learning_rate": 4.574759853938395e-05, + "loss": 1.3052, + "step": 2452000 + }, + { + "epoch": 1.47, + "learning_rate": 4.574550277375451e-05, + "loss": 1.2797, + "step": 2452500 + }, + { + "epoch": 1.47, + "learning_rate": 4.5743402808193944e-05, + "loss": 1.2865, + "step": 2453000 + }, + { + "epoch": 1.47, + "learning_rate": 4.5741302842633384e-05, + "loss": 1.2825, + "step": 2453500 + }, + { + "epoch": 1.47, + "learning_rate": 4.573920287707281e-05, + "loss": 1.3003, + "step": 2454000 + }, + { + "epoch": 1.47, + "learning_rate": 4.573710711144337e-05, + "loss": 1.316, + "step": 2454500 + }, + { + "epoch": 1.47, + "learning_rate": 4.573500714588281e-05, + "loss": 1.3039, + "step": 2455000 + }, + { + "epoch": 1.47, + "learning_rate": 4.5732907180322245e-05, + "loss": 1.2965, + "step": 2455500 + }, + { + "epoch": 1.47, + "learning_rate": 4.573080721476168e-05, + "loss": 1.2978, + "step": 2456000 + }, + { + "epoch": 1.47, + "learning_rate": 4.572870724920111e-05, + "loss": 1.2954, + "step": 2456500 + }, + { + "epoch": 1.47, + "learning_rate": 4.5726607283640545e-05, + "loss": 1.2858, + "step": 2457000 + }, + { + "epoch": 1.47, + "learning_rate": 4.572450731807998e-05, + "loss": 1.2854, + "step": 2457500 + }, + { + "epoch": 1.47, + "learning_rate": 4.572240735251942e-05, + "loss": 1.2753, + "step": 2458000 + }, + { + "epoch": 1.47, + "learning_rate": 4.572030738695885e-05, + "loss": 1.2986, + "step": 2458500 + }, + { + "epoch": 1.47, + "learning_rate": 4.5718211621329406e-05, + "loss": 1.283, + "step": 2459000 + }, + { + "epoch": 1.47, + "learning_rate": 4.571611165576884e-05, + "loss": 1.2909, + "step": 2459500 + }, + { + "epoch": 1.47, + "learning_rate": 4.571401169020828e-05, + "loss": 1.2943, + "step": 2460000 + }, + { + "epoch": 1.48, + "learning_rate": 4.571191172464771e-05, + "loss": 1.2873, + "step": 2460500 + }, + { + "epoch": 1.48, + "learning_rate": 4.570981175908715e-05, + "loss": 1.2884, + "step": 2461000 + }, + { + "epoch": 1.48, + "learning_rate": 4.570771179352659e-05, + "loss": 1.3129, + "step": 2461500 + }, + { + "epoch": 1.48, + "learning_rate": 4.570561182796602e-05, + "loss": 1.2774, + "step": 2462000 + }, + { + "epoch": 1.48, + "learning_rate": 4.5703511862405454e-05, + "loss": 1.2387, + "step": 2462500 + }, + { + "epoch": 1.48, + "learning_rate": 4.5701416096776014e-05, + "loss": 1.2889, + "step": 2463000 + }, + { + "epoch": 1.48, + "learning_rate": 4.569932033114657e-05, + "loss": 1.2891, + "step": 2463500 + }, + { + "epoch": 1.48, + "learning_rate": 4.5697220365586e-05, + "loss": 1.314, + "step": 2464000 + }, + { + "epoch": 1.48, + "learning_rate": 4.5695120400025435e-05, + "loss": 1.2999, + "step": 2464500 + }, + { + "epoch": 1.48, + "learning_rate": 4.5693020434464875e-05, + "loss": 1.2791, + "step": 2465000 + }, + { + "epoch": 1.48, + "learning_rate": 4.569092046890431e-05, + "loss": 1.2661, + "step": 2465500 + }, + { + "epoch": 1.48, + "learning_rate": 4.568882470327486e-05, + "loss": 1.2753, + "step": 2466000 + }, + { + "epoch": 1.48, + "learning_rate": 4.5686724737714296e-05, + "loss": 1.2481, + "step": 2466500 + }, + { + "epoch": 1.48, + "learning_rate": 4.5684624772153736e-05, + "loss": 1.3074, + "step": 2467000 + }, + { + "epoch": 1.48, + "learning_rate": 4.568252480659317e-05, + "loss": 1.2951, + "step": 2467500 + }, + { + "epoch": 1.48, + "learning_rate": 4.568042904096373e-05, + "loss": 1.2984, + "step": 2468000 + }, + { + "epoch": 1.48, + "learning_rate": 4.567832907540316e-05, + "loss": 1.2969, + "step": 2468500 + }, + { + "epoch": 1.48, + "learning_rate": 4.5676229109842596e-05, + "loss": 1.285, + "step": 2469000 + }, + { + "epoch": 1.48, + "learning_rate": 4.567412914428203e-05, + "loss": 1.2838, + "step": 2469500 + }, + { + "epoch": 1.48, + "learning_rate": 4.567202917872147e-05, + "loss": 1.31, + "step": 2470000 + }, + { + "epoch": 1.48, + "learning_rate": 4.5669929213160904e-05, + "loss": 1.3301, + "step": 2470500 + }, + { + "epoch": 1.48, + "learning_rate": 4.566782924760034e-05, + "loss": 1.3014, + "step": 2471000 + }, + { + "epoch": 1.48, + "learning_rate": 4.566572928203978e-05, + "loss": 1.2799, + "step": 2471500 + }, + { + "epoch": 1.48, + "learning_rate": 4.566362931647921e-05, + "loss": 1.3107, + "step": 2472000 + }, + { + "epoch": 1.48, + "learning_rate": 4.5661529350918644e-05, + "loss": 1.3026, + "step": 2472500 + }, + { + "epoch": 1.48, + "learning_rate": 4.5659429385358084e-05, + "loss": 1.2913, + "step": 2473000 + }, + { + "epoch": 1.48, + "learning_rate": 4.565732941979752e-05, + "loss": 1.3244, + "step": 2473500 + }, + { + "epoch": 1.48, + "learning_rate": 4.565523365416807e-05, + "loss": 1.3016, + "step": 2474000 + }, + { + "epoch": 1.48, + "learning_rate": 4.5653133688607505e-05, + "loss": 1.3108, + "step": 2474500 + }, + { + "epoch": 1.48, + "learning_rate": 4.5651033723046945e-05, + "loss": 1.2615, + "step": 2475000 + }, + { + "epoch": 1.48, + "learning_rate": 4.564893375748638e-05, + "loss": 1.3088, + "step": 2475500 + }, + { + "epoch": 1.48, + "learning_rate": 4.564683799185693e-05, + "loss": 1.302, + "step": 2476000 + }, + { + "epoch": 1.48, + "learning_rate": 4.5644742226227486e-05, + "loss": 1.271, + "step": 2476500 + }, + { + "epoch": 1.49, + "learning_rate": 4.5642642260666926e-05, + "loss": 1.2775, + "step": 2477000 + }, + { + "epoch": 1.49, + "learning_rate": 4.564054229510636e-05, + "loss": 1.2906, + "step": 2477500 + }, + { + "epoch": 1.49, + "learning_rate": 4.563844232954579e-05, + "loss": 1.2826, + "step": 2478000 + }, + { + "epoch": 1.49, + "learning_rate": 4.563634236398523e-05, + "loss": 1.3045, + "step": 2478500 + }, + { + "epoch": 1.49, + "learning_rate": 4.563424659835579e-05, + "loss": 1.2764, + "step": 2479000 + }, + { + "epoch": 1.49, + "learning_rate": 4.563214663279522e-05, + "loss": 1.2918, + "step": 2479500 + }, + { + "epoch": 1.49, + "learning_rate": 4.5630046667234654e-05, + "loss": 1.2914, + "step": 2480000 + }, + { + "epoch": 1.49, + "learning_rate": 4.5627946701674094e-05, + "loss": 1.2884, + "step": 2480500 + }, + { + "epoch": 1.49, + "learning_rate": 4.562584673611353e-05, + "loss": 1.3056, + "step": 2481000 + }, + { + "epoch": 1.49, + "learning_rate": 4.562374677055296e-05, + "loss": 1.2832, + "step": 2481500 + }, + { + "epoch": 1.49, + "learning_rate": 4.56216468049924e-05, + "loss": 1.2507, + "step": 2482000 + }, + { + "epoch": 1.49, + "learning_rate": 4.5619546839431835e-05, + "loss": 1.2789, + "step": 2482500 + }, + { + "epoch": 1.49, + "learning_rate": 4.561745107380239e-05, + "loss": 1.2784, + "step": 2483000 + }, + { + "epoch": 1.49, + "learning_rate": 4.561535530817294e-05, + "loss": 1.2693, + "step": 2483500 + }, + { + "epoch": 1.49, + "learning_rate": 4.561325534261238e-05, + "loss": 1.3018, + "step": 2484000 + }, + { + "epoch": 1.49, + "learning_rate": 4.5611155377051815e-05, + "loss": 1.2829, + "step": 2484500 + }, + { + "epoch": 1.49, + "learning_rate": 4.560905541149125e-05, + "loss": 1.2979, + "step": 2485000 + }, + { + "epoch": 1.49, + "learning_rate": 4.560695544593069e-05, + "loss": 1.2843, + "step": 2485500 + }, + { + "epoch": 1.49, + "learning_rate": 4.560485548037012e-05, + "loss": 1.2841, + "step": 2486000 + }, + { + "epoch": 1.49, + "learning_rate": 4.5602759714740676e-05, + "loss": 1.2954, + "step": 2486500 + }, + { + "epoch": 1.49, + "learning_rate": 4.5600663949111236e-05, + "loss": 1.2777, + "step": 2487000 + }, + { + "epoch": 1.49, + "learning_rate": 4.559856398355066e-05, + "loss": 1.3195, + "step": 2487500 + }, + { + "epoch": 1.49, + "learning_rate": 4.5596464017990103e-05, + "loss": 1.2912, + "step": 2488000 + }, + { + "epoch": 1.49, + "learning_rate": 4.559436405242954e-05, + "loss": 1.2836, + "step": 2488500 + }, + { + "epoch": 1.49, + "learning_rate": 4.559226408686897e-05, + "loss": 1.2996, + "step": 2489000 + }, + { + "epoch": 1.49, + "learning_rate": 4.559016412130841e-05, + "loss": 1.2988, + "step": 2489500 + }, + { + "epoch": 1.49, + "learning_rate": 4.5588064155747844e-05, + "loss": 1.2854, + "step": 2490000 + }, + { + "epoch": 1.49, + "learning_rate": 4.5585964190187284e-05, + "loss": 1.2872, + "step": 2490500 + }, + { + "epoch": 1.49, + "learning_rate": 4.558386842455784e-05, + "loss": 1.2494, + "step": 2491000 + }, + { + "epoch": 1.49, + "learning_rate": 4.558176845899727e-05, + "loss": 1.3069, + "step": 2491500 + }, + { + "epoch": 1.49, + "learning_rate": 4.5579668493436705e-05, + "loss": 1.2806, + "step": 2492000 + }, + { + "epoch": 1.49, + "learning_rate": 4.5577568527876145e-05, + "loss": 1.2924, + "step": 2492500 + }, + { + "epoch": 1.49, + "learning_rate": 4.55754727622467e-05, + "loss": 1.2733, + "step": 2493000 + }, + { + "epoch": 1.49, + "learning_rate": 4.557337279668613e-05, + "loss": 1.2912, + "step": 2493500 + }, + { + "epoch": 1.5, + "learning_rate": 4.5571272831125565e-05, + "loss": 1.3073, + "step": 2494000 + }, + { + "epoch": 1.5, + "learning_rate": 4.5569172865565006e-05, + "loss": 1.2972, + "step": 2494500 + }, + { + "epoch": 1.5, + "learning_rate": 4.556707290000444e-05, + "loss": 1.2806, + "step": 2495000 + }, + { + "epoch": 1.5, + "learning_rate": 4.556497293444387e-05, + "loss": 1.2528, + "step": 2495500 + }, + { + "epoch": 1.5, + "learning_rate": 4.556287296888331e-05, + "loss": 1.2791, + "step": 2496000 + }, + { + "epoch": 1.5, + "learning_rate": 4.5560773003322746e-05, + "loss": 1.2974, + "step": 2496500 + }, + { + "epoch": 1.5, + "learning_rate": 4.555867303776218e-05, + "loss": 1.2761, + "step": 2497000 + }, + { + "epoch": 1.5, + "learning_rate": 4.555657727213274e-05, + "loss": 1.2989, + "step": 2497500 + }, + { + "epoch": 1.5, + "learning_rate": 4.5554477306572174e-05, + "loss": 1.288, + "step": 2498000 + }, + { + "epoch": 1.5, + "learning_rate": 4.555237734101161e-05, + "loss": 1.2736, + "step": 2498500 + }, + { + "epoch": 1.5, + "learning_rate": 4.555027737545105e-05, + "loss": 1.2777, + "step": 2499000 + }, + { + "epoch": 1.5, + "learning_rate": 4.55481816098216e-05, + "loss": 1.2958, + "step": 2499500 + }, + { + "epoch": 1.5, + "learning_rate": 4.5546081644261034e-05, + "loss": 1.2887, + "step": 2500000 + }, + { + "epoch": 1.5, + "eval_loss": 1.2355479001998901, + "eval_runtime": 1109.1586, + "eval_samples_per_second": 474.883, + "eval_steps_per_second": 79.147, + "step": 2500000 + }, + { + "epoch": 1.5, + "learning_rate": 4.554398167870047e-05, + "loss": 1.2709, + "step": 2500500 + }, + { + "epoch": 1.5, + "learning_rate": 4.554188171313991e-05, + "loss": 1.2705, + "step": 2501000 + }, + { + "epoch": 1.5, + "learning_rate": 4.553978174757934e-05, + "loss": 1.2786, + "step": 2501500 + }, + { + "epoch": 1.5, + "learning_rate": 4.5537681782018775e-05, + "loss": 1.2982, + "step": 2502000 + }, + { + "epoch": 1.5, + "learning_rate": 4.553558181645821e-05, + "loss": 1.2999, + "step": 2502500 + }, + { + "epoch": 1.5, + "learning_rate": 4.553348605082877e-05, + "loss": 1.2726, + "step": 2503000 + }, + { + "epoch": 1.5, + "learning_rate": 4.55313860852682e-05, + "loss": 1.2899, + "step": 2503500 + }, + { + "epoch": 1.5, + "learning_rate": 4.5529286119707636e-05, + "loss": 1.2713, + "step": 2504000 + }, + { + "epoch": 1.5, + "learning_rate": 4.5527186154147076e-05, + "loss": 1.2629, + "step": 2504500 + }, + { + "epoch": 1.5, + "learning_rate": 4.55250861885865e-05, + "loss": 1.2921, + "step": 2505000 + }, + { + "epoch": 1.5, + "learning_rate": 4.552298622302594e-05, + "loss": 1.2762, + "step": 2505500 + }, + { + "epoch": 1.5, + "learning_rate": 4.55208904573965e-05, + "loss": 1.2605, + "step": 2506000 + }, + { + "epoch": 1.5, + "learning_rate": 4.551879889169817e-05, + "loss": 1.3046, + "step": 2506500 + }, + { + "epoch": 1.5, + "learning_rate": 4.551669892613761e-05, + "loss": 1.2998, + "step": 2507000 + }, + { + "epoch": 1.5, + "learning_rate": 4.5514598960577044e-05, + "loss": 1.3053, + "step": 2507500 + }, + { + "epoch": 1.5, + "learning_rate": 4.551249899501648e-05, + "loss": 1.2953, + "step": 2508000 + }, + { + "epoch": 1.5, + "learning_rate": 4.551039902945592e-05, + "loss": 1.2653, + "step": 2508500 + }, + { + "epoch": 1.5, + "learning_rate": 4.550829906389535e-05, + "loss": 1.301, + "step": 2509000 + }, + { + "epoch": 1.5, + "learning_rate": 4.5506199098334784e-05, + "loss": 1.2972, + "step": 2509500 + }, + { + "epoch": 1.5, + "learning_rate": 4.5504099132774225e-05, + "loss": 1.2996, + "step": 2510000 + }, + { + "epoch": 1.51, + "learning_rate": 4.550199916721366e-05, + "loss": 1.2793, + "step": 2510500 + }, + { + "epoch": 1.51, + "learning_rate": 4.549989920165309e-05, + "loss": 1.3054, + "step": 2511000 + }, + { + "epoch": 1.51, + "learning_rate": 4.549779923609253e-05, + "loss": 1.2679, + "step": 2511500 + }, + { + "epoch": 1.51, + "learning_rate": 4.549569927053196e-05, + "loss": 1.3086, + "step": 2512000 + }, + { + "epoch": 1.51, + "learning_rate": 4.549360350490252e-05, + "loss": 1.2989, + "step": 2512500 + }, + { + "epoch": 1.51, + "learning_rate": 4.549150353934196e-05, + "loss": 1.3138, + "step": 2513000 + }, + { + "epoch": 1.51, + "learning_rate": 4.548940777371251e-05, + "loss": 1.2683, + "step": 2513500 + }, + { + "epoch": 1.51, + "learning_rate": 4.5487307808151946e-05, + "loss": 1.3015, + "step": 2514000 + }, + { + "epoch": 1.51, + "learning_rate": 4.548520784259138e-05, + "loss": 1.3071, + "step": 2514500 + }, + { + "epoch": 1.51, + "learning_rate": 4.548310787703082e-05, + "loss": 1.283, + "step": 2515000 + }, + { + "epoch": 1.51, + "learning_rate": 4.548100791147025e-05, + "loss": 1.2628, + "step": 2515500 + }, + { + "epoch": 1.51, + "learning_rate": 4.547890794590969e-05, + "loss": 1.2838, + "step": 2516000 + }, + { + "epoch": 1.51, + "learning_rate": 4.547680798034912e-05, + "loss": 1.2543, + "step": 2516500 + }, + { + "epoch": 1.51, + "learning_rate": 4.5474708014788554e-05, + "loss": 1.2399, + "step": 2517000 + }, + { + "epoch": 1.51, + "learning_rate": 4.547260804922799e-05, + "loss": 1.3154, + "step": 2517500 + }, + { + "epoch": 1.51, + "learning_rate": 4.547050808366743e-05, + "loss": 1.277, + "step": 2518000 + }, + { + "epoch": 1.51, + "learning_rate": 4.546841231803799e-05, + "loss": 1.2831, + "step": 2518500 + }, + { + "epoch": 1.51, + "learning_rate": 4.5466312352477414e-05, + "loss": 1.3303, + "step": 2519000 + }, + { + "epoch": 1.51, + "learning_rate": 4.5464212386916855e-05, + "loss": 1.2873, + "step": 2519500 + }, + { + "epoch": 1.51, + "learning_rate": 4.546211242135629e-05, + "loss": 1.2857, + "step": 2520000 + }, + { + "epoch": 1.51, + "learning_rate": 4.546001245579572e-05, + "loss": 1.283, + "step": 2520500 + }, + { + "epoch": 1.51, + "learning_rate": 4.545791249023516e-05, + "loss": 1.2871, + "step": 2521000 + }, + { + "epoch": 1.51, + "learning_rate": 4.5455816724605715e-05, + "loss": 1.2865, + "step": 2521500 + }, + { + "epoch": 1.51, + "learning_rate": 4.545371675904515e-05, + "loss": 1.268, + "step": 2522000 + }, + { + "epoch": 1.51, + "learning_rate": 4.545161679348458e-05, + "loss": 1.2916, + "step": 2522500 + }, + { + "epoch": 1.51, + "learning_rate": 4.544951682792402e-05, + "loss": 1.3, + "step": 2523000 + }, + { + "epoch": 1.51, + "learning_rate": 4.5447416862363456e-05, + "loss": 1.3141, + "step": 2523500 + }, + { + "epoch": 1.51, + "learning_rate": 4.544532109673401e-05, + "loss": 1.2835, + "step": 2524000 + }, + { + "epoch": 1.51, + "learning_rate": 4.544322113117344e-05, + "loss": 1.2896, + "step": 2524500 + }, + { + "epoch": 1.51, + "learning_rate": 4.544112116561288e-05, + "loss": 1.2659, + "step": 2525000 + }, + { + "epoch": 1.51, + "learning_rate": 4.543902120005232e-05, + "loss": 1.276, + "step": 2525500 + }, + { + "epoch": 1.51, + "learning_rate": 4.543692123449175e-05, + "loss": 1.2937, + "step": 2526000 + }, + { + "epoch": 1.51, + "learning_rate": 4.543482126893119e-05, + "loss": 1.2792, + "step": 2526500 + }, + { + "epoch": 1.52, + "learning_rate": 4.5432721303370624e-05, + "loss": 1.2954, + "step": 2527000 + }, + { + "epoch": 1.52, + "learning_rate": 4.543062133781006e-05, + "loss": 1.2844, + "step": 2527500 + }, + { + "epoch": 1.52, + "learning_rate": 4.542852557218062e-05, + "loss": 1.2766, + "step": 2528000 + }, + { + "epoch": 1.52, + "learning_rate": 4.542642560662005e-05, + "loss": 1.3086, + "step": 2528500 + }, + { + "epoch": 1.52, + "learning_rate": 4.5424325641059485e-05, + "loss": 1.2646, + "step": 2529000 + }, + { + "epoch": 1.52, + "learning_rate": 4.5422225675498925e-05, + "loss": 1.2701, + "step": 2529500 + }, + { + "epoch": 1.52, + "learning_rate": 4.542012990986948e-05, + "loss": 1.2832, + "step": 2530000 + }, + { + "epoch": 1.52, + "learning_rate": 4.541803414424004e-05, + "loss": 1.2783, + "step": 2530500 + }, + { + "epoch": 1.52, + "learning_rate": 4.5415934178679465e-05, + "loss": 1.2968, + "step": 2531000 + }, + { + "epoch": 1.52, + "learning_rate": 4.54138342131189e-05, + "loss": 1.2805, + "step": 2531500 + }, + { + "epoch": 1.52, + "learning_rate": 4.541173424755834e-05, + "loss": 1.2604, + "step": 2532000 + }, + { + "epoch": 1.52, + "learning_rate": 4.540963428199777e-05, + "loss": 1.2809, + "step": 2532500 + }, + { + "epoch": 1.52, + "learning_rate": 4.540753851636833e-05, + "loss": 1.2952, + "step": 2533000 + }, + { + "epoch": 1.52, + "learning_rate": 4.5405438550807766e-05, + "loss": 1.3006, + "step": 2533500 + }, + { + "epoch": 1.52, + "learning_rate": 4.54033385852472e-05, + "loss": 1.2728, + "step": 2534000 + }, + { + "epoch": 1.52, + "learning_rate": 4.540123861968663e-05, + "loss": 1.2973, + "step": 2534500 + }, + { + "epoch": 1.52, + "learning_rate": 4.5399138654126073e-05, + "loss": 1.28, + "step": 2535000 + }, + { + "epoch": 1.52, + "learning_rate": 4.539703868856551e-05, + "loss": 1.2993, + "step": 2535500 + }, + { + "epoch": 1.52, + "learning_rate": 4.539493872300494e-05, + "loss": 1.276, + "step": 2536000 + }, + { + "epoch": 1.52, + "learning_rate": 4.539283875744438e-05, + "loss": 1.2823, + "step": 2536500 + }, + { + "epoch": 1.52, + "learning_rate": 4.5390742991814934e-05, + "loss": 1.262, + "step": 2537000 + }, + { + "epoch": 1.52, + "learning_rate": 4.538864302625437e-05, + "loss": 1.2664, + "step": 2537500 + }, + { + "epoch": 1.52, + "learning_rate": 4.53865430606938e-05, + "loss": 1.3091, + "step": 2538000 + }, + { + "epoch": 1.52, + "learning_rate": 4.538444309513324e-05, + "loss": 1.2859, + "step": 2538500 + }, + { + "epoch": 1.52, + "learning_rate": 4.5382347329503795e-05, + "loss": 1.2933, + "step": 2539000 + }, + { + "epoch": 1.52, + "learning_rate": 4.538024736394323e-05, + "loss": 1.2675, + "step": 2539500 + }, + { + "epoch": 1.52, + "learning_rate": 4.537815159831379e-05, + "loss": 1.2796, + "step": 2540000 + }, + { + "epoch": 1.52, + "learning_rate": 4.537605163275322e-05, + "loss": 1.3124, + "step": 2540500 + }, + { + "epoch": 1.52, + "learning_rate": 4.5373951667192656e-05, + "loss": 1.2767, + "step": 2541000 + }, + { + "epoch": 1.52, + "learning_rate": 4.537185170163209e-05, + "loss": 1.2893, + "step": 2541500 + }, + { + "epoch": 1.52, + "learning_rate": 4.536975173607153e-05, + "loss": 1.2997, + "step": 2542000 + }, + { + "epoch": 1.52, + "learning_rate": 4.536765597044209e-05, + "loss": 1.2888, + "step": 2542500 + }, + { + "epoch": 1.52, + "learning_rate": 4.5365556004881516e-05, + "loss": 1.3313, + "step": 2543000 + }, + { + "epoch": 1.52, + "learning_rate": 4.536345603932095e-05, + "loss": 1.2738, + "step": 2543500 + }, + { + "epoch": 1.53, + "learning_rate": 4.536135607376039e-05, + "loss": 1.2988, + "step": 2544000 + }, + { + "epoch": 1.53, + "learning_rate": 4.535926030813095e-05, + "loss": 1.2847, + "step": 2544500 + }, + { + "epoch": 1.53, + "learning_rate": 4.5357160342570384e-05, + "loss": 1.2844, + "step": 2545000 + }, + { + "epoch": 1.53, + "learning_rate": 4.535506037700981e-05, + "loss": 1.2749, + "step": 2545500 + }, + { + "epoch": 1.53, + "learning_rate": 4.535296041144925e-05, + "loss": 1.289, + "step": 2546000 + }, + { + "epoch": 1.53, + "learning_rate": 4.5350860445888684e-05, + "loss": 1.2605, + "step": 2546500 + }, + { + "epoch": 1.53, + "learning_rate": 4.5348768880190365e-05, + "loss": 1.324, + "step": 2547000 + }, + { + "epoch": 1.53, + "learning_rate": 4.53466689146298e-05, + "loss": 1.2942, + "step": 2547500 + }, + { + "epoch": 1.53, + "learning_rate": 4.534456894906924e-05, + "loss": 1.2843, + "step": 2548000 + }, + { + "epoch": 1.53, + "learning_rate": 4.534246898350867e-05, + "loss": 1.2994, + "step": 2548500 + }, + { + "epoch": 1.53, + "learning_rate": 4.5340369017948105e-05, + "loss": 1.2747, + "step": 2549000 + }, + { + "epoch": 1.53, + "learning_rate": 4.5338269052387546e-05, + "loss": 1.332, + "step": 2549500 + }, + { + "epoch": 1.53, + "learning_rate": 4.533616908682697e-05, + "loss": 1.2875, + "step": 2550000 + }, + { + "epoch": 1.53, + "learning_rate": 4.5334069121266406e-05, + "loss": 1.2939, + "step": 2550500 + }, + { + "epoch": 1.53, + "learning_rate": 4.5331969155705846e-05, + "loss": 1.2803, + "step": 2551000 + }, + { + "epoch": 1.53, + "learning_rate": 4.5329873390076406e-05, + "loss": 1.3065, + "step": 2551500 + }, + { + "epoch": 1.53, + "learning_rate": 4.532777342451584e-05, + "loss": 1.3055, + "step": 2552000 + }, + { + "epoch": 1.53, + "learning_rate": 4.5325673458955266e-05, + "loss": 1.2939, + "step": 2552500 + }, + { + "epoch": 1.53, + "learning_rate": 4.532357349339471e-05, + "loss": 1.3022, + "step": 2553000 + }, + { + "epoch": 1.53, + "learning_rate": 4.532147772776527e-05, + "loss": 1.2936, + "step": 2553500 + }, + { + "epoch": 1.53, + "learning_rate": 4.531938196213582e-05, + "loss": 1.2541, + "step": 2554000 + }, + { + "epoch": 1.53, + "learning_rate": 4.5317281996575254e-05, + "loss": 1.2981, + "step": 2554500 + }, + { + "epoch": 1.53, + "learning_rate": 4.5315182031014694e-05, + "loss": 1.2905, + "step": 2555000 + }, + { + "epoch": 1.53, + "learning_rate": 4.531308206545413e-05, + "loss": 1.3155, + "step": 2555500 + }, + { + "epoch": 1.53, + "learning_rate": 4.531098629982468e-05, + "loss": 1.2903, + "step": 2556000 + }, + { + "epoch": 1.53, + "learning_rate": 4.5308886334264115e-05, + "loss": 1.2703, + "step": 2556500 + }, + { + "epoch": 1.53, + "learning_rate": 4.5306786368703555e-05, + "loss": 1.2927, + "step": 2557000 + }, + { + "epoch": 1.53, + "learning_rate": 4.530468640314299e-05, + "loss": 1.2701, + "step": 2557500 + }, + { + "epoch": 1.53, + "learning_rate": 4.530258643758242e-05, + "loss": 1.289, + "step": 2558000 + }, + { + "epoch": 1.53, + "learning_rate": 4.530048647202186e-05, + "loss": 1.2629, + "step": 2558500 + }, + { + "epoch": 1.53, + "learning_rate": 4.5298390706392416e-05, + "loss": 1.2639, + "step": 2559000 + }, + { + "epoch": 1.53, + "learning_rate": 4.529629074083185e-05, + "loss": 1.3199, + "step": 2559500 + }, + { + "epoch": 1.53, + "learning_rate": 4.529419077527128e-05, + "loss": 1.2753, + "step": 2560000 + }, + { + "epoch": 1.54, + "learning_rate": 4.529209080971072e-05, + "loss": 1.2861, + "step": 2560500 + }, + { + "epoch": 1.54, + "learning_rate": 4.5289990844150156e-05, + "loss": 1.2496, + "step": 2561000 + }, + { + "epoch": 1.54, + "learning_rate": 4.5287890878589597e-05, + "loss": 1.2569, + "step": 2561500 + }, + { + "epoch": 1.54, + "learning_rate": 4.528579091302902e-05, + "loss": 1.3157, + "step": 2562000 + }, + { + "epoch": 1.54, + "learning_rate": 4.5283695147399584e-05, + "loss": 1.2619, + "step": 2562500 + }, + { + "epoch": 1.54, + "learning_rate": 4.528159518183902e-05, + "loss": 1.3042, + "step": 2563000 + }, + { + "epoch": 1.54, + "learning_rate": 4.527949521627846e-05, + "loss": 1.2966, + "step": 2563500 + }, + { + "epoch": 1.54, + "learning_rate": 4.527739525071789e-05, + "loss": 1.3004, + "step": 2564000 + }, + { + "epoch": 1.54, + "learning_rate": 4.527529528515732e-05, + "loss": 1.3088, + "step": 2564500 + }, + { + "epoch": 1.54, + "learning_rate": 4.527319531959676e-05, + "loss": 1.3073, + "step": 2565000 + }, + { + "epoch": 1.54, + "learning_rate": 4.527109535403619e-05, + "loss": 1.2696, + "step": 2565500 + }, + { + "epoch": 1.54, + "learning_rate": 4.5268995388475625e-05, + "loss": 1.294, + "step": 2566000 + }, + { + "epoch": 1.54, + "learning_rate": 4.5266895422915065e-05, + "loss": 1.2843, + "step": 2566500 + }, + { + "epoch": 1.54, + "learning_rate": 4.52647954573545e-05, + "loss": 1.2865, + "step": 2567000 + }, + { + "epoch": 1.54, + "learning_rate": 4.526269549179393e-05, + "loss": 1.2804, + "step": 2567500 + }, + { + "epoch": 1.54, + "learning_rate": 4.526059972616449e-05, + "loss": 1.2992, + "step": 2568000 + }, + { + "epoch": 1.54, + "learning_rate": 4.5258499760603926e-05, + "loss": 1.3119, + "step": 2568500 + }, + { + "epoch": 1.54, + "learning_rate": 4.525639979504336e-05, + "loss": 1.2871, + "step": 2569000 + }, + { + "epoch": 1.54, + "learning_rate": 4.52542998294828e-05, + "loss": 1.2824, + "step": 2569500 + }, + { + "epoch": 1.54, + "learning_rate": 4.525219986392223e-05, + "loss": 1.281, + "step": 2570000 + }, + { + "epoch": 1.54, + "learning_rate": 4.5250099898361666e-05, + "loss": 1.2778, + "step": 2570500 + }, + { + "epoch": 1.54, + "learning_rate": 4.5247999932801106e-05, + "loss": 1.2915, + "step": 2571000 + }, + { + "epoch": 1.54, + "learning_rate": 4.524589996724054e-05, + "loss": 1.2814, + "step": 2571500 + }, + { + "epoch": 1.54, + "learning_rate": 4.524380000167997e-05, + "loss": 1.2775, + "step": 2572000 + }, + { + "epoch": 1.54, + "learning_rate": 4.524170003611941e-05, + "loss": 1.2922, + "step": 2572500 + }, + { + "epoch": 1.54, + "learning_rate": 4.523960007055884e-05, + "loss": 1.302, + "step": 2573000 + }, + { + "epoch": 1.54, + "learning_rate": 4.5237500104998274e-05, + "loss": 1.324, + "step": 2573500 + }, + { + "epoch": 1.54, + "learning_rate": 4.5235404339368834e-05, + "loss": 1.3116, + "step": 2574000 + }, + { + "epoch": 1.54, + "learning_rate": 4.523330437380827e-05, + "loss": 1.2843, + "step": 2574500 + }, + { + "epoch": 1.54, + "learning_rate": 4.52312044082477e-05, + "loss": 1.2948, + "step": 2575000 + }, + { + "epoch": 1.54, + "learning_rate": 4.5229104442687134e-05, + "loss": 1.2783, + "step": 2575500 + }, + { + "epoch": 1.54, + "learning_rate": 4.5227004477126575e-05, + "loss": 1.2708, + "step": 2576000 + }, + { + "epoch": 1.54, + "learning_rate": 4.522490451156601e-05, + "loss": 1.2467, + "step": 2576500 + }, + { + "epoch": 1.55, + "learning_rate": 4.522280454600544e-05, + "loss": 1.2924, + "step": 2577000 + }, + { + "epoch": 1.55, + "learning_rate": 4.5220708780376e-05, + "loss": 1.3003, + "step": 2577500 + }, + { + "epoch": 1.55, + "learning_rate": 4.521861301474656e-05, + "loss": 1.2745, + "step": 2578000 + }, + { + "epoch": 1.55, + "learning_rate": 4.5216513049185996e-05, + "loss": 1.2873, + "step": 2578500 + }, + { + "epoch": 1.55, + "learning_rate": 4.521441308362543e-05, + "loss": 1.2833, + "step": 2579000 + }, + { + "epoch": 1.55, + "learning_rate": 4.521231311806486e-05, + "loss": 1.2841, + "step": 2579500 + }, + { + "epoch": 1.55, + "learning_rate": 4.5210213152504296e-05, + "loss": 1.3092, + "step": 2580000 + }, + { + "epoch": 1.55, + "learning_rate": 4.520811318694373e-05, + "loss": 1.2864, + "step": 2580500 + }, + { + "epoch": 1.55, + "learning_rate": 4.520601322138317e-05, + "loss": 1.2591, + "step": 2581000 + }, + { + "epoch": 1.55, + "learning_rate": 4.52039132558226e-05, + "loss": 1.2806, + "step": 2581500 + }, + { + "epoch": 1.55, + "learning_rate": 4.520181329026204e-05, + "loss": 1.2873, + "step": 2582000 + }, + { + "epoch": 1.55, + "learning_rate": 4.519971752463259e-05, + "loss": 1.2471, + "step": 2582500 + }, + { + "epoch": 1.55, + "learning_rate": 4.519761755907203e-05, + "loss": 1.2981, + "step": 2583000 + }, + { + "epoch": 1.55, + "learning_rate": 4.5195517593511464e-05, + "loss": 1.2825, + "step": 2583500 + }, + { + "epoch": 1.55, + "learning_rate": 4.51934176279509e-05, + "loss": 1.2977, + "step": 2584000 + }, + { + "epoch": 1.55, + "learning_rate": 4.519131766239034e-05, + "loss": 1.2983, + "step": 2584500 + }, + { + "epoch": 1.55, + "learning_rate": 4.518921769682977e-05, + "loss": 1.2695, + "step": 2585000 + }, + { + "epoch": 1.55, + "learning_rate": 4.5187117731269205e-05, + "loss": 1.2862, + "step": 2585500 + }, + { + "epoch": 1.55, + "learning_rate": 4.5185017765708645e-05, + "loss": 1.2896, + "step": 2586000 + }, + { + "epoch": 1.55, + "learning_rate": 4.51829220000792e-05, + "loss": 1.3089, + "step": 2586500 + }, + { + "epoch": 1.55, + "learning_rate": 4.518082203451863e-05, + "loss": 1.2799, + "step": 2587000 + }, + { + "epoch": 1.55, + "learning_rate": 4.517872206895807e-05, + "loss": 1.2714, + "step": 2587500 + }, + { + "epoch": 1.55, + "learning_rate": 4.5176622103397506e-05, + "loss": 1.2882, + "step": 2588000 + }, + { + "epoch": 1.55, + "learning_rate": 4.517452213783694e-05, + "loss": 1.3135, + "step": 2588500 + }, + { + "epoch": 1.55, + "learning_rate": 4.517242217227638e-05, + "loss": 1.2768, + "step": 2589000 + }, + { + "epoch": 1.55, + "learning_rate": 4.5170322206715806e-05, + "loss": 1.2649, + "step": 2589500 + }, + { + "epoch": 1.55, + "learning_rate": 4.5168230641017486e-05, + "loss": 1.2943, + "step": 2590000 + }, + { + "epoch": 1.55, + "learning_rate": 4.516613067545692e-05, + "loss": 1.2447, + "step": 2590500 + }, + { + "epoch": 1.55, + "learning_rate": 4.516403070989635e-05, + "loss": 1.2678, + "step": 2591000 + }, + { + "epoch": 1.55, + "learning_rate": 4.5161930744335794e-05, + "loss": 1.2842, + "step": 2591500 + }, + { + "epoch": 1.55, + "learning_rate": 4.515983077877523e-05, + "loss": 1.2881, + "step": 2592000 + }, + { + "epoch": 1.55, + "learning_rate": 4.515773081321466e-05, + "loss": 1.3077, + "step": 2592500 + }, + { + "epoch": 1.55, + "learning_rate": 4.51556308476541e-05, + "loss": 1.2986, + "step": 2593000 + }, + { + "epoch": 1.55, + "learning_rate": 4.5153530882093534e-05, + "loss": 1.2869, + "step": 2593500 + }, + { + "epoch": 1.56, + "learning_rate": 4.515143091653297e-05, + "loss": 1.2916, + "step": 2594000 + }, + { + "epoch": 1.56, + "learning_rate": 4.51493309509724e-05, + "loss": 1.3003, + "step": 2594500 + }, + { + "epoch": 1.56, + "learning_rate": 4.5147230985411835e-05, + "loss": 1.2656, + "step": 2595000 + }, + { + "epoch": 1.56, + "learning_rate": 4.5145131019851275e-05, + "loss": 1.2334, + "step": 2595500 + }, + { + "epoch": 1.56, + "learning_rate": 4.5143035254221835e-05, + "loss": 1.2851, + "step": 2596000 + }, + { + "epoch": 1.56, + "learning_rate": 4.514093528866127e-05, + "loss": 1.2723, + "step": 2596500 + }, + { + "epoch": 1.56, + "learning_rate": 4.5138835323100695e-05, + "loss": 1.2863, + "step": 2597000 + }, + { + "epoch": 1.56, + "learning_rate": 4.5136735357540136e-05, + "loss": 1.2901, + "step": 2597500 + }, + { + "epoch": 1.56, + "learning_rate": 4.513463539197957e-05, + "loss": 1.3008, + "step": 2598000 + }, + { + "epoch": 1.56, + "learning_rate": 4.5132535426419e-05, + "loss": 1.3031, + "step": 2598500 + }, + { + "epoch": 1.56, + "learning_rate": 4.513043546085844e-05, + "loss": 1.2627, + "step": 2599000 + }, + { + "epoch": 1.56, + "learning_rate": 4.5128335495297876e-05, + "loss": 1.2682, + "step": 2599500 + }, + { + "epoch": 1.56, + "learning_rate": 4.512623972966843e-05, + "loss": 1.3191, + "step": 2600000 + }, + { + "epoch": 1.56, + "eval_loss": 1.2294633388519287, + "eval_runtime": 1103.4979, + "eval_samples_per_second": 477.319, + "eval_steps_per_second": 79.553, + "step": 2600000 + }, + { + "epoch": 1.56, + "learning_rate": 4.512413976410786e-05, + "loss": 1.2858, + "step": 2600500 + }, + { + "epoch": 1.56, + "learning_rate": 4.5122039798547303e-05, + "loss": 1.2558, + "step": 2601000 + }, + { + "epoch": 1.56, + "learning_rate": 4.511993983298674e-05, + "loss": 1.2744, + "step": 2601500 + }, + { + "epoch": 1.56, + "learning_rate": 4.511783986742618e-05, + "loss": 1.2931, + "step": 2602000 + }, + { + "epoch": 1.56, + "learning_rate": 4.511573990186561e-05, + "loss": 1.2964, + "step": 2602500 + }, + { + "epoch": 1.56, + "learning_rate": 4.5113639936305044e-05, + "loss": 1.2838, + "step": 2603000 + }, + { + "epoch": 1.56, + "learning_rate": 4.5111539970744484e-05, + "loss": 1.3014, + "step": 2603500 + }, + { + "epoch": 1.56, + "learning_rate": 4.510944420511504e-05, + "loss": 1.2888, + "step": 2604000 + }, + { + "epoch": 1.56, + "learning_rate": 4.510734843948559e-05, + "loss": 1.2512, + "step": 2604500 + }, + { + "epoch": 1.56, + "learning_rate": 4.5105248473925025e-05, + "loss": 1.283, + "step": 2605000 + }, + { + "epoch": 1.56, + "learning_rate": 4.510314850836446e-05, + "loss": 1.3043, + "step": 2605500 + }, + { + "epoch": 1.56, + "learning_rate": 4.51010485428039e-05, + "loss": 1.2737, + "step": 2606000 + }, + { + "epoch": 1.56, + "learning_rate": 4.509895277717445e-05, + "loss": 1.277, + "step": 2606500 + }, + { + "epoch": 1.56, + "learning_rate": 4.5096852811613886e-05, + "loss": 1.3011, + "step": 2607000 + }, + { + "epoch": 1.56, + "learning_rate": 4.509475284605332e-05, + "loss": 1.3037, + "step": 2607500 + }, + { + "epoch": 1.56, + "learning_rate": 4.5092657080423886e-05, + "loss": 1.2544, + "step": 2608000 + }, + { + "epoch": 1.56, + "learning_rate": 4.509055711486332e-05, + "loss": 1.2733, + "step": 2608500 + }, + { + "epoch": 1.56, + "learning_rate": 4.5088457149302746e-05, + "loss": 1.2428, + "step": 2609000 + }, + { + "epoch": 1.56, + "learning_rate": 4.5086357183742187e-05, + "loss": 1.2869, + "step": 2609500 + }, + { + "epoch": 1.56, + "learning_rate": 4.508425721818162e-05, + "loss": 1.3026, + "step": 2610000 + }, + { + "epoch": 1.57, + "learning_rate": 4.5082157252621054e-05, + "loss": 1.2608, + "step": 2610500 + }, + { + "epoch": 1.57, + "learning_rate": 4.5080057287060494e-05, + "loss": 1.3032, + "step": 2611000 + }, + { + "epoch": 1.57, + "learning_rate": 4.507795732149993e-05, + "loss": 1.2973, + "step": 2611500 + }, + { + "epoch": 1.57, + "learning_rate": 4.507585735593936e-05, + "loss": 1.3113, + "step": 2612000 + }, + { + "epoch": 1.57, + "learning_rate": 4.50737573903788e-05, + "loss": 1.2821, + "step": 2612500 + }, + { + "epoch": 1.57, + "learning_rate": 4.5071661624749354e-05, + "loss": 1.2639, + "step": 2613000 + }, + { + "epoch": 1.57, + "learning_rate": 4.506956165918879e-05, + "loss": 1.2903, + "step": 2613500 + }, + { + "epoch": 1.57, + "learning_rate": 4.506746169362822e-05, + "loss": 1.2997, + "step": 2614000 + }, + { + "epoch": 1.57, + "learning_rate": 4.506536172806766e-05, + "loss": 1.2777, + "step": 2614500 + }, + { + "epoch": 1.57, + "learning_rate": 4.5063265962438215e-05, + "loss": 1.2692, + "step": 2615000 + }, + { + "epoch": 1.57, + "learning_rate": 4.506116599687765e-05, + "loss": 1.2957, + "step": 2615500 + }, + { + "epoch": 1.57, + "learning_rate": 4.505906603131709e-05, + "loss": 1.2923, + "step": 2616000 + }, + { + "epoch": 1.57, + "learning_rate": 4.505696606575652e-05, + "loss": 1.2605, + "step": 2616500 + }, + { + "epoch": 1.57, + "learning_rate": 4.5054866100195956e-05, + "loss": 1.3037, + "step": 2617000 + }, + { + "epoch": 1.57, + "learning_rate": 4.5052766134635396e-05, + "loss": 1.3016, + "step": 2617500 + }, + { + "epoch": 1.57, + "learning_rate": 4.505067036900595e-05, + "loss": 1.2866, + "step": 2618000 + }, + { + "epoch": 1.57, + "learning_rate": 4.504857040344538e-05, + "loss": 1.278, + "step": 2618500 + }, + { + "epoch": 1.57, + "learning_rate": 4.5046470437884817e-05, + "loss": 1.2771, + "step": 2619000 + }, + { + "epoch": 1.57, + "learning_rate": 4.504437047232426e-05, + "loss": 1.2907, + "step": 2619500 + }, + { + "epoch": 1.57, + "learning_rate": 4.504227050676369e-05, + "loss": 1.2826, + "step": 2620000 + }, + { + "epoch": 1.57, + "learning_rate": 4.5040170541203124e-05, + "loss": 1.3166, + "step": 2620500 + }, + { + "epoch": 1.57, + "learning_rate": 4.503807477557368e-05, + "loss": 1.3075, + "step": 2621000 + }, + { + "epoch": 1.57, + "learning_rate": 4.503597481001312e-05, + "loss": 1.2728, + "step": 2621500 + }, + { + "epoch": 1.57, + "learning_rate": 4.503387484445255e-05, + "loss": 1.312, + "step": 2622000 + }, + { + "epoch": 1.57, + "learning_rate": 4.5031774878891984e-05, + "loss": 1.2831, + "step": 2622500 + }, + { + "epoch": 1.57, + "learning_rate": 4.5029674913331425e-05, + "loss": 1.2878, + "step": 2623000 + }, + { + "epoch": 1.57, + "learning_rate": 4.502757494777086e-05, + "loss": 1.3032, + "step": 2623500 + }, + { + "epoch": 1.57, + "learning_rate": 4.502547498221029e-05, + "loss": 1.2846, + "step": 2624000 + }, + { + "epoch": 1.57, + "learning_rate": 4.502337921658085e-05, + "loss": 1.3033, + "step": 2624500 + }, + { + "epoch": 1.57, + "learning_rate": 4.5021279251020285e-05, + "loss": 1.3147, + "step": 2625000 + }, + { + "epoch": 1.57, + "learning_rate": 4.501917928545972e-05, + "loss": 1.2934, + "step": 2625500 + }, + { + "epoch": 1.57, + "learning_rate": 4.501707931989915e-05, + "loss": 1.282, + "step": 2626000 + }, + { + "epoch": 1.57, + "learning_rate": 4.5014979354338586e-05, + "loss": 1.2801, + "step": 2626500 + }, + { + "epoch": 1.57, + "learning_rate": 4.5012883588709146e-05, + "loss": 1.3081, + "step": 2627000 + }, + { + "epoch": 1.58, + "learning_rate": 4.50107878230797e-05, + "loss": 1.291, + "step": 2627500 + }, + { + "epoch": 1.58, + "learning_rate": 4.500868785751913e-05, + "loss": 1.2816, + "step": 2628000 + }, + { + "epoch": 1.58, + "learning_rate": 4.500658789195857e-05, + "loss": 1.3186, + "step": 2628500 + }, + { + "epoch": 1.58, + "learning_rate": 4.500448792639801e-05, + "loss": 1.2771, + "step": 2629000 + }, + { + "epoch": 1.58, + "learning_rate": 4.500238796083744e-05, + "loss": 1.281, + "step": 2629500 + }, + { + "epoch": 1.58, + "learning_rate": 4.500028799527688e-05, + "loss": 1.2775, + "step": 2630000 + }, + { + "epoch": 1.58, + "learning_rate": 4.4998188029716314e-05, + "loss": 1.2693, + "step": 2630500 + }, + { + "epoch": 1.58, + "learning_rate": 4.499608806415575e-05, + "loss": 1.2985, + "step": 2631000 + }, + { + "epoch": 1.58, + "learning_rate": 4.499398809859518e-05, + "loss": 1.2897, + "step": 2631500 + }, + { + "epoch": 1.58, + "learning_rate": 4.4991888133034614e-05, + "loss": 1.2609, + "step": 2632000 + }, + { + "epoch": 1.58, + "learning_rate": 4.4989788167474055e-05, + "loss": 1.2735, + "step": 2632500 + }, + { + "epoch": 1.58, + "learning_rate": 4.4987692401844615e-05, + "loss": 1.2951, + "step": 2633000 + }, + { + "epoch": 1.58, + "learning_rate": 4.498559243628404e-05, + "loss": 1.2729, + "step": 2633500 + }, + { + "epoch": 1.58, + "learning_rate": 4.49834966706546e-05, + "loss": 1.2747, + "step": 2634000 + }, + { + "epoch": 1.58, + "learning_rate": 4.4981396705094035e-05, + "loss": 1.2496, + "step": 2634500 + }, + { + "epoch": 1.58, + "learning_rate": 4.497930093946459e-05, + "loss": 1.2857, + "step": 2635000 + }, + { + "epoch": 1.58, + "learning_rate": 4.497720097390403e-05, + "loss": 1.252, + "step": 2635500 + }, + { + "epoch": 1.58, + "learning_rate": 4.497510100834346e-05, + "loss": 1.2492, + "step": 2636000 + }, + { + "epoch": 1.58, + "learning_rate": 4.4973001042782896e-05, + "loss": 1.2699, + "step": 2636500 + }, + { + "epoch": 1.58, + "learning_rate": 4.4970901077222336e-05, + "loss": 1.3128, + "step": 2637000 + }, + { + "epoch": 1.58, + "learning_rate": 4.496880111166177e-05, + "loss": 1.3217, + "step": 2637500 + }, + { + "epoch": 1.58, + "learning_rate": 4.49667011461012e-05, + "loss": 1.2634, + "step": 2638000 + }, + { + "epoch": 1.58, + "learning_rate": 4.496460118054064e-05, + "loss": 1.3003, + "step": 2638500 + }, + { + "epoch": 1.58, + "learning_rate": 4.49625054149112e-05, + "loss": 1.2762, + "step": 2639000 + }, + { + "epoch": 1.58, + "learning_rate": 4.496040544935063e-05, + "loss": 1.2938, + "step": 2639500 + }, + { + "epoch": 1.58, + "learning_rate": 4.495830548379007e-05, + "loss": 1.2752, + "step": 2640000 + }, + { + "epoch": 1.58, + "learning_rate": 4.49562055182295e-05, + "loss": 1.2598, + "step": 2640500 + }, + { + "epoch": 1.58, + "learning_rate": 4.495410555266893e-05, + "loss": 1.2733, + "step": 2641000 + }, + { + "epoch": 1.58, + "learning_rate": 4.495200558710837e-05, + "loss": 1.2814, + "step": 2641500 + }, + { + "epoch": 1.58, + "learning_rate": 4.4949905621547805e-05, + "loss": 1.2651, + "step": 2642000 + }, + { + "epoch": 1.58, + "learning_rate": 4.494780565598724e-05, + "loss": 1.2976, + "step": 2642500 + }, + { + "epoch": 1.58, + "learning_rate": 4.494570569042668e-05, + "loss": 1.2862, + "step": 2643000 + }, + { + "epoch": 1.58, + "learning_rate": 4.494360572486611e-05, + "loss": 1.3144, + "step": 2643500 + }, + { + "epoch": 1.59, + "learning_rate": 4.4941509959236665e-05, + "loss": 1.2564, + "step": 2644000 + }, + { + "epoch": 1.59, + "learning_rate": 4.49394099936761e-05, + "loss": 1.2608, + "step": 2644500 + }, + { + "epoch": 1.59, + "learning_rate": 4.493731002811554e-05, + "loss": 1.2598, + "step": 2645000 + }, + { + "epoch": 1.59, + "learning_rate": 4.493521006255497e-05, + "loss": 1.2775, + "step": 2645500 + }, + { + "epoch": 1.59, + "learning_rate": 4.4933110096994406e-05, + "loss": 1.2591, + "step": 2646000 + }, + { + "epoch": 1.59, + "learning_rate": 4.4931014331364966e-05, + "loss": 1.3068, + "step": 2646500 + }, + { + "epoch": 1.59, + "learning_rate": 4.49289143658044e-05, + "loss": 1.3016, + "step": 2647000 + }, + { + "epoch": 1.59, + "learning_rate": 4.492681440024383e-05, + "loss": 1.2448, + "step": 2647500 + }, + { + "epoch": 1.59, + "learning_rate": 4.4924714434683274e-05, + "loss": 1.3006, + "step": 2648000 + }, + { + "epoch": 1.59, + "learning_rate": 4.492261446912271e-05, + "loss": 1.258, + "step": 2648500 + }, + { + "epoch": 1.59, + "learning_rate": 4.492051450356214e-05, + "loss": 1.2787, + "step": 2649000 + }, + { + "epoch": 1.59, + "learning_rate": 4.4918418737932694e-05, + "loss": 1.2741, + "step": 2649500 + }, + { + "epoch": 1.59, + "learning_rate": 4.4916318772372134e-05, + "loss": 1.2789, + "step": 2650000 + }, + { + "epoch": 1.59, + "learning_rate": 4.491421880681157e-05, + "loss": 1.2427, + "step": 2650500 + }, + { + "epoch": 1.59, + "learning_rate": 4.4912118841251e-05, + "loss": 1.2882, + "step": 2651000 + }, + { + "epoch": 1.59, + "learning_rate": 4.4910023075621555e-05, + "loss": 1.2815, + "step": 2651500 + }, + { + "epoch": 1.59, + "learning_rate": 4.4907923110060995e-05, + "loss": 1.262, + "step": 2652000 + }, + { + "epoch": 1.59, + "learning_rate": 4.490582314450043e-05, + "loss": 1.3025, + "step": 2652500 + }, + { + "epoch": 1.59, + "learning_rate": 4.490372317893986e-05, + "loss": 1.2743, + "step": 2653000 + }, + { + "epoch": 1.59, + "learning_rate": 4.490162741331042e-05, + "loss": 1.3134, + "step": 2653500 + }, + { + "epoch": 1.59, + "learning_rate": 4.4899527447749856e-05, + "loss": 1.2732, + "step": 2654000 + }, + { + "epoch": 1.59, + "learning_rate": 4.489742748218929e-05, + "loss": 1.2708, + "step": 2654500 + }, + { + "epoch": 1.59, + "learning_rate": 4.489532751662873e-05, + "loss": 1.2945, + "step": 2655000 + }, + { + "epoch": 1.59, + "learning_rate": 4.489322755106816e-05, + "loss": 1.2918, + "step": 2655500 + }, + { + "epoch": 1.59, + "learning_rate": 4.4891127585507596e-05, + "loss": 1.2728, + "step": 2656000 + }, + { + "epoch": 1.59, + "learning_rate": 4.488903181987815e-05, + "loss": 1.2737, + "step": 2656500 + }, + { + "epoch": 1.59, + "learning_rate": 4.488693185431759e-05, + "loss": 1.2728, + "step": 2657000 + }, + { + "epoch": 1.59, + "learning_rate": 4.4884831888757024e-05, + "loss": 1.2816, + "step": 2657500 + }, + { + "epoch": 1.59, + "learning_rate": 4.488273612312758e-05, + "loss": 1.2608, + "step": 2658000 + }, + { + "epoch": 1.59, + "learning_rate": 4.488063615756701e-05, + "loss": 1.3047, + "step": 2658500 + }, + { + "epoch": 1.59, + "learning_rate": 4.487853619200645e-05, + "loss": 1.2897, + "step": 2659000 + }, + { + "epoch": 1.59, + "learning_rate": 4.4876436226445884e-05, + "loss": 1.2787, + "step": 2659500 + }, + { + "epoch": 1.59, + "learning_rate": 4.487433626088532e-05, + "loss": 1.2716, + "step": 2660000 + }, + { + "epoch": 1.6, + "learning_rate": 4.487223629532476e-05, + "loss": 1.296, + "step": 2660500 + }, + { + "epoch": 1.6, + "learning_rate": 4.487013632976419e-05, + "loss": 1.274, + "step": 2661000 + }, + { + "epoch": 1.6, + "learning_rate": 4.486803636420363e-05, + "loss": 1.2686, + "step": 2661500 + }, + { + "epoch": 1.6, + "learning_rate": 4.4865936398643065e-05, + "loss": 1.2757, + "step": 2662000 + }, + { + "epoch": 1.6, + "learning_rate": 4.486383643308249e-05, + "loss": 1.2919, + "step": 2662500 + }, + { + "epoch": 1.6, + "learning_rate": 4.486173646752193e-05, + "loss": 1.2609, + "step": 2663000 + }, + { + "epoch": 1.6, + "learning_rate": 4.4859636501961366e-05, + "loss": 1.2737, + "step": 2663500 + }, + { + "epoch": 1.6, + "learning_rate": 4.4857544936263046e-05, + "loss": 1.2938, + "step": 2664000 + }, + { + "epoch": 1.6, + "learning_rate": 4.485544497070248e-05, + "loss": 1.2725, + "step": 2664500 + }, + { + "epoch": 1.6, + "learning_rate": 4.485334500514191e-05, + "loss": 1.3034, + "step": 2665000 + }, + { + "epoch": 1.6, + "learning_rate": 4.4851249239512466e-05, + "loss": 1.256, + "step": 2665500 + }, + { + "epoch": 1.6, + "learning_rate": 4.484914927395191e-05, + "loss": 1.2894, + "step": 2666000 + }, + { + "epoch": 1.6, + "learning_rate": 4.484704930839134e-05, + "loss": 1.2734, + "step": 2666500 + }, + { + "epoch": 1.6, + "learning_rate": 4.484494934283078e-05, + "loss": 1.2838, + "step": 2667000 + }, + { + "epoch": 1.6, + "learning_rate": 4.4842849377270214e-05, + "loss": 1.2808, + "step": 2667500 + }, + { + "epoch": 1.6, + "learning_rate": 4.484074941170965e-05, + "loss": 1.2721, + "step": 2668000 + }, + { + "epoch": 1.6, + "learning_rate": 4.483864944614909e-05, + "loss": 1.2806, + "step": 2668500 + }, + { + "epoch": 1.6, + "learning_rate": 4.483654948058852e-05, + "loss": 1.2849, + "step": 2669000 + }, + { + "epoch": 1.6, + "learning_rate": 4.4834453714959075e-05, + "loss": 1.2884, + "step": 2669500 + }, + { + "epoch": 1.6, + "learning_rate": 4.483235374939851e-05, + "loss": 1.28, + "step": 2670000 + }, + { + "epoch": 1.6, + "learning_rate": 4.483025378383795e-05, + "loss": 1.2608, + "step": 2670500 + }, + { + "epoch": 1.6, + "learning_rate": 4.482815381827738e-05, + "loss": 1.2964, + "step": 2671000 + }, + { + "epoch": 1.6, + "learning_rate": 4.4826053852716815e-05, + "loss": 1.2929, + "step": 2671500 + }, + { + "epoch": 1.6, + "learning_rate": 4.482395808708737e-05, + "loss": 1.2757, + "step": 2672000 + }, + { + "epoch": 1.6, + "learning_rate": 4.482185812152681e-05, + "loss": 1.2923, + "step": 2672500 + }, + { + "epoch": 1.6, + "learning_rate": 4.481975815596624e-05, + "loss": 1.2737, + "step": 2673000 + }, + { + "epoch": 1.6, + "learning_rate": 4.4817658190405676e-05, + "loss": 1.2722, + "step": 2673500 + }, + { + "epoch": 1.6, + "learning_rate": 4.4815558224845116e-05, + "loss": 1.2952, + "step": 2674000 + }, + { + "epoch": 1.6, + "learning_rate": 4.481345825928454e-05, + "loss": 1.3059, + "step": 2674500 + }, + { + "epoch": 1.6, + "learning_rate": 4.481135829372398e-05, + "loss": 1.2653, + "step": 2675000 + }, + { + "epoch": 1.6, + "learning_rate": 4.4809258328163417e-05, + "loss": 1.2836, + "step": 2675500 + }, + { + "epoch": 1.6, + "learning_rate": 4.480716256253398e-05, + "loss": 1.298, + "step": 2676000 + }, + { + "epoch": 1.6, + "learning_rate": 4.480506259697341e-05, + "loss": 1.2785, + "step": 2676500 + }, + { + "epoch": 1.6, + "learning_rate": 4.4802966831343964e-05, + "loss": 1.2634, + "step": 2677000 + }, + { + "epoch": 1.61, + "learning_rate": 4.4800866865783404e-05, + "loss": 1.2827, + "step": 2677500 + }, + { + "epoch": 1.61, + "learning_rate": 4.479877110015396e-05, + "loss": 1.2763, + "step": 2678000 + }, + { + "epoch": 1.61, + "learning_rate": 4.479667113459339e-05, + "loss": 1.2864, + "step": 2678500 + }, + { + "epoch": 1.61, + "learning_rate": 4.4794571169032825e-05, + "loss": 1.2704, + "step": 2679000 + }, + { + "epoch": 1.61, + "learning_rate": 4.4792471203472265e-05, + "loss": 1.2752, + "step": 2679500 + }, + { + "epoch": 1.61, + "learning_rate": 4.47903712379117e-05, + "loss": 1.2607, + "step": 2680000 + }, + { + "epoch": 1.61, + "learning_rate": 4.478827127235113e-05, + "loss": 1.3022, + "step": 2680500 + }, + { + "epoch": 1.61, + "learning_rate": 4.478617130679057e-05, + "loss": 1.2871, + "step": 2681000 + }, + { + "epoch": 1.61, + "learning_rate": 4.4784071341230006e-05, + "loss": 1.2727, + "step": 2681500 + }, + { + "epoch": 1.61, + "learning_rate": 4.478197137566944e-05, + "loss": 1.276, + "step": 2682000 + }, + { + "epoch": 1.61, + "learning_rate": 4.477987141010887e-05, + "loss": 1.2688, + "step": 2682500 + }, + { + "epoch": 1.61, + "learning_rate": 4.477777564447943e-05, + "loss": 1.3122, + "step": 2683000 + }, + { + "epoch": 1.61, + "learning_rate": 4.4775675678918866e-05, + "loss": 1.2712, + "step": 2683500 + }, + { + "epoch": 1.61, + "learning_rate": 4.47735757133583e-05, + "loss": 1.2611, + "step": 2684000 + }, + { + "epoch": 1.61, + "learning_rate": 4.477147574779773e-05, + "loss": 1.2871, + "step": 2684500 + }, + { + "epoch": 1.61, + "learning_rate": 4.476937578223717e-05, + "loss": 1.2672, + "step": 2685000 + }, + { + "epoch": 1.61, + "learning_rate": 4.476727581667661e-05, + "loss": 1.2632, + "step": 2685500 + }, + { + "epoch": 1.61, + "learning_rate": 4.476517585111604e-05, + "loss": 1.2822, + "step": 2686000 + }, + { + "epoch": 1.61, + "learning_rate": 4.4763075885555474e-05, + "loss": 1.2799, + "step": 2686500 + }, + { + "epoch": 1.61, + "learning_rate": 4.476098011992603e-05, + "loss": 1.2917, + "step": 2687000 + }, + { + "epoch": 1.61, + "learning_rate": 4.475888015436547e-05, + "loss": 1.2349, + "step": 2687500 + }, + { + "epoch": 1.61, + "learning_rate": 4.47567801888049e-05, + "loss": 1.2994, + "step": 2688000 + }, + { + "epoch": 1.61, + "learning_rate": 4.4754680223244335e-05, + "loss": 1.2875, + "step": 2688500 + }, + { + "epoch": 1.61, + "learning_rate": 4.4752584457614895e-05, + "loss": 1.2925, + "step": 2689000 + }, + { + "epoch": 1.61, + "learning_rate": 4.475048449205433e-05, + "loss": 1.3024, + "step": 2689500 + }, + { + "epoch": 1.61, + "learning_rate": 4.474838452649376e-05, + "loss": 1.2844, + "step": 2690000 + }, + { + "epoch": 1.61, + "learning_rate": 4.47462845609332e-05, + "loss": 1.2586, + "step": 2690500 + }, + { + "epoch": 1.61, + "learning_rate": 4.474418879530376e-05, + "loss": 1.2971, + "step": 2691000 + }, + { + "epoch": 1.61, + "learning_rate": 4.4742093029674316e-05, + "loss": 1.2734, + "step": 2691500 + }, + { + "epoch": 1.61, + "learning_rate": 4.473999306411375e-05, + "loss": 1.2926, + "step": 2692000 + }, + { + "epoch": 1.61, + "learning_rate": 4.473789309855318e-05, + "loss": 1.2958, + "step": 2692500 + }, + { + "epoch": 1.61, + "learning_rate": 4.473579313299262e-05, + "loss": 1.2833, + "step": 2693000 + }, + { + "epoch": 1.61, + "learning_rate": 4.473369736736318e-05, + "loss": 1.2844, + "step": 2693500 + }, + { + "epoch": 1.62, + "learning_rate": 4.473159740180261e-05, + "loss": 1.2831, + "step": 2694000 + }, + { + "epoch": 1.62, + "learning_rate": 4.4729497436242044e-05, + "loss": 1.2842, + "step": 2694500 + }, + { + "epoch": 1.62, + "learning_rate": 4.4727397470681484e-05, + "loss": 1.2921, + "step": 2695000 + }, + { + "epoch": 1.62, + "learning_rate": 4.472529750512092e-05, + "loss": 1.2868, + "step": 2695500 + }, + { + "epoch": 1.62, + "learning_rate": 4.472319753956035e-05, + "loss": 1.3167, + "step": 2696000 + }, + { + "epoch": 1.62, + "learning_rate": 4.4721097573999784e-05, + "loss": 1.2865, + "step": 2696500 + }, + { + "epoch": 1.62, + "learning_rate": 4.4719001808370345e-05, + "loss": 1.2542, + "step": 2697000 + }, + { + "epoch": 1.62, + "learning_rate": 4.47169060427409e-05, + "loss": 1.2872, + "step": 2697500 + }, + { + "epoch": 1.62, + "learning_rate": 4.471480607718033e-05, + "loss": 1.2858, + "step": 2698000 + }, + { + "epoch": 1.62, + "learning_rate": 4.471270611161977e-05, + "loss": 1.2537, + "step": 2698500 + }, + { + "epoch": 1.62, + "learning_rate": 4.4710606146059205e-05, + "loss": 1.2622, + "step": 2699000 + }, + { + "epoch": 1.62, + "learning_rate": 4.470850618049864e-05, + "loss": 1.2992, + "step": 2699500 + }, + { + "epoch": 1.62, + "learning_rate": 4.470640621493808e-05, + "loss": 1.2761, + "step": 2700000 + }, + { + "epoch": 1.62, + "eval_loss": 1.2280163764953613, + "eval_runtime": 1111.7976, + "eval_samples_per_second": 473.755, + "eval_steps_per_second": 78.96, + "step": 2700000 + }, + { + "epoch": 1.62, + "learning_rate": 4.470430624937751e-05, + "loss": 1.281, + "step": 2700500 + }, + { + "epoch": 1.62, + "learning_rate": 4.470220628381694e-05, + "loss": 1.2715, + "step": 2701000 + }, + { + "epoch": 1.62, + "learning_rate": 4.47001105181875e-05, + "loss": 1.2663, + "step": 2701500 + }, + { + "epoch": 1.62, + "learning_rate": 4.469801055262694e-05, + "loss": 1.2829, + "step": 2702000 + }, + { + "epoch": 1.62, + "learning_rate": 4.469591058706637e-05, + "loss": 1.3043, + "step": 2702500 + }, + { + "epoch": 1.62, + "learning_rate": 4.4693810621505807e-05, + "loss": 1.3026, + "step": 2703000 + }, + { + "epoch": 1.62, + "learning_rate": 4.469171065594524e-05, + "loss": 1.288, + "step": 2703500 + }, + { + "epoch": 1.62, + "learning_rate": 4.4689610690384674e-05, + "loss": 1.2662, + "step": 2704000 + }, + { + "epoch": 1.62, + "learning_rate": 4.4687514924755234e-05, + "loss": 1.2436, + "step": 2704500 + }, + { + "epoch": 1.62, + "learning_rate": 4.4685414959194674e-05, + "loss": 1.2721, + "step": 2705000 + }, + { + "epoch": 1.62, + "learning_rate": 4.46833149936341e-05, + "loss": 1.2826, + "step": 2705500 + }, + { + "epoch": 1.62, + "learning_rate": 4.4681215028073534e-05, + "loss": 1.2634, + "step": 2706000 + }, + { + "epoch": 1.62, + "learning_rate": 4.4679119262444095e-05, + "loss": 1.2816, + "step": 2706500 + }, + { + "epoch": 1.62, + "learning_rate": 4.4677019296883535e-05, + "loss": 1.2865, + "step": 2707000 + }, + { + "epoch": 1.62, + "learning_rate": 4.467491933132297e-05, + "loss": 1.2966, + "step": 2707500 + }, + { + "epoch": 1.62, + "learning_rate": 4.467282356569352e-05, + "loss": 1.3025, + "step": 2708000 + }, + { + "epoch": 1.62, + "learning_rate": 4.4670723600132955e-05, + "loss": 1.2434, + "step": 2708500 + }, + { + "epoch": 1.62, + "learning_rate": 4.4668623634572396e-05, + "loss": 1.2705, + "step": 2709000 + }, + { + "epoch": 1.62, + "learning_rate": 4.466652366901183e-05, + "loss": 1.2885, + "step": 2709500 + }, + { + "epoch": 1.62, + "learning_rate": 4.466442370345126e-05, + "loss": 1.2539, + "step": 2710000 + }, + { + "epoch": 1.63, + "learning_rate": 4.4662323737890696e-05, + "loss": 1.2856, + "step": 2710500 + }, + { + "epoch": 1.63, + "learning_rate": 4.466022377233013e-05, + "loss": 1.2649, + "step": 2711000 + }, + { + "epoch": 1.63, + "learning_rate": 4.465812380676957e-05, + "loss": 1.2792, + "step": 2711500 + }, + { + "epoch": 1.63, + "learning_rate": 4.4656023841209e-05, + "loss": 1.2829, + "step": 2712000 + }, + { + "epoch": 1.63, + "learning_rate": 4.4653928075579563e-05, + "loss": 1.2671, + "step": 2712500 + }, + { + "epoch": 1.63, + "learning_rate": 4.465182811001899e-05, + "loss": 1.2669, + "step": 2713000 + }, + { + "epoch": 1.63, + "learning_rate": 4.464972814445843e-05, + "loss": 1.2659, + "step": 2713500 + }, + { + "epoch": 1.63, + "learning_rate": 4.4647628178897864e-05, + "loss": 1.2569, + "step": 2714000 + }, + { + "epoch": 1.63, + "learning_rate": 4.4645532413268424e-05, + "loss": 1.2823, + "step": 2714500 + }, + { + "epoch": 1.63, + "learning_rate": 4.464343244770785e-05, + "loss": 1.2627, + "step": 2715000 + }, + { + "epoch": 1.63, + "learning_rate": 4.464133248214729e-05, + "loss": 1.2723, + "step": 2715500 + }, + { + "epoch": 1.63, + "learning_rate": 4.4639232516586725e-05, + "loss": 1.282, + "step": 2716000 + }, + { + "epoch": 1.63, + "learning_rate": 4.463713255102616e-05, + "loss": 1.3103, + "step": 2716500 + }, + { + "epoch": 1.63, + "learning_rate": 4.463503678539672e-05, + "loss": 1.2907, + "step": 2717000 + }, + { + "epoch": 1.63, + "learning_rate": 4.463293681983615e-05, + "loss": 1.2643, + "step": 2717500 + }, + { + "epoch": 1.63, + "learning_rate": 4.4630836854275585e-05, + "loss": 1.299, + "step": 2718000 + }, + { + "epoch": 1.63, + "learning_rate": 4.4628736888715026e-05, + "loss": 1.2953, + "step": 2718500 + }, + { + "epoch": 1.63, + "learning_rate": 4.462663692315446e-05, + "loss": 1.2915, + "step": 2719000 + }, + { + "epoch": 1.63, + "learning_rate": 4.462453695759389e-05, + "loss": 1.2624, + "step": 2719500 + }, + { + "epoch": 1.63, + "learning_rate": 4.462243699203333e-05, + "loss": 1.2789, + "step": 2720000 + }, + { + "epoch": 1.63, + "learning_rate": 4.4620337026472766e-05, + "loss": 1.283, + "step": 2720500 + }, + { + "epoch": 1.63, + "learning_rate": 4.46182370609122e-05, + "loss": 1.2595, + "step": 2721000 + }, + { + "epoch": 1.63, + "learning_rate": 4.461614129528275e-05, + "loss": 1.2692, + "step": 2721500 + }, + { + "epoch": 1.63, + "learning_rate": 4.4614041329722193e-05, + "loss": 1.2892, + "step": 2722000 + }, + { + "epoch": 1.63, + "learning_rate": 4.461194556409275e-05, + "loss": 1.2747, + "step": 2722500 + }, + { + "epoch": 1.63, + "learning_rate": 4.460984559853218e-05, + "loss": 1.2646, + "step": 2723000 + }, + { + "epoch": 1.63, + "learning_rate": 4.4607745632971614e-05, + "loss": 1.2666, + "step": 2723500 + }, + { + "epoch": 1.63, + "learning_rate": 4.4605645667411054e-05, + "loss": 1.2622, + "step": 2724000 + }, + { + "epoch": 1.63, + "learning_rate": 4.460354570185049e-05, + "loss": 1.2708, + "step": 2724500 + }, + { + "epoch": 1.63, + "learning_rate": 4.460144993622104e-05, + "loss": 1.2832, + "step": 2725000 + }, + { + "epoch": 1.63, + "learning_rate": 4.459934997066048e-05, + "loss": 1.2835, + "step": 2725500 + }, + { + "epoch": 1.63, + "learning_rate": 4.4597250005099915e-05, + "loss": 1.2776, + "step": 2726000 + }, + { + "epoch": 1.63, + "learning_rate": 4.459515003953935e-05, + "loss": 1.2853, + "step": 2726500 + }, + { + "epoch": 1.63, + "learning_rate": 4.459305007397879e-05, + "loss": 1.3128, + "step": 2727000 + }, + { + "epoch": 1.64, + "learning_rate": 4.459095010841822e-05, + "loss": 1.252, + "step": 2727500 + }, + { + "epoch": 1.64, + "learning_rate": 4.4588850142857655e-05, + "loss": 1.2898, + "step": 2728000 + }, + { + "epoch": 1.64, + "learning_rate": 4.458675437722821e-05, + "loss": 1.2854, + "step": 2728500 + }, + { + "epoch": 1.64, + "learning_rate": 4.458465441166765e-05, + "loss": 1.2467, + "step": 2729000 + }, + { + "epoch": 1.64, + "learning_rate": 4.458255444610708e-05, + "loss": 1.2758, + "step": 2729500 + }, + { + "epoch": 1.64, + "learning_rate": 4.4580454480546516e-05, + "loss": 1.294, + "step": 2730000 + }, + { + "epoch": 1.64, + "learning_rate": 4.4578354514985956e-05, + "loss": 1.2922, + "step": 2730500 + }, + { + "epoch": 1.64, + "learning_rate": 4.457625454942539e-05, + "loss": 1.2706, + "step": 2731000 + }, + { + "epoch": 1.64, + "learning_rate": 4.457415458386482e-05, + "loss": 1.2872, + "step": 2731500 + }, + { + "epoch": 1.64, + "learning_rate": 4.457205881823538e-05, + "loss": 1.2639, + "step": 2732000 + }, + { + "epoch": 1.64, + "learning_rate": 4.456995885267482e-05, + "loss": 1.2723, + "step": 2732500 + }, + { + "epoch": 1.64, + "learning_rate": 4.456785888711425e-05, + "loss": 1.2789, + "step": 2733000 + }, + { + "epoch": 1.64, + "learning_rate": 4.456575892155369e-05, + "loss": 1.2482, + "step": 2733500 + }, + { + "epoch": 1.64, + "learning_rate": 4.4563663155924244e-05, + "loss": 1.2766, + "step": 2734000 + }, + { + "epoch": 1.64, + "learning_rate": 4.45615673902948e-05, + "loss": 1.2582, + "step": 2734500 + }, + { + "epoch": 1.64, + "learning_rate": 4.455946742473423e-05, + "loss": 1.2885, + "step": 2735000 + }, + { + "epoch": 1.64, + "learning_rate": 4.4557367459173665e-05, + "loss": 1.2748, + "step": 2735500 + }, + { + "epoch": 1.64, + "learning_rate": 4.4555267493613105e-05, + "loss": 1.2998, + "step": 2736000 + }, + { + "epoch": 1.64, + "learning_rate": 4.455316752805254e-05, + "loss": 1.309, + "step": 2736500 + }, + { + "epoch": 1.64, + "learning_rate": 4.455106756249197e-05, + "loss": 1.2683, + "step": 2737000 + }, + { + "epoch": 1.64, + "learning_rate": 4.454896759693141e-05, + "loss": 1.2889, + "step": 2737500 + }, + { + "epoch": 1.64, + "learning_rate": 4.4546867631370846e-05, + "loss": 1.2577, + "step": 2738000 + }, + { + "epoch": 1.64, + "learning_rate": 4.454476766581028e-05, + "loss": 1.2825, + "step": 2738500 + }, + { + "epoch": 1.64, + "learning_rate": 4.454266770024972e-05, + "loss": 1.2675, + "step": 2739000 + }, + { + "epoch": 1.64, + "learning_rate": 4.454056773468915e-05, + "loss": 1.2951, + "step": 2739500 + }, + { + "epoch": 1.64, + "learning_rate": 4.4538467769128586e-05, + "loss": 1.2578, + "step": 2740000 + }, + { + "epoch": 1.64, + "learning_rate": 4.453637200349915e-05, + "loss": 1.2886, + "step": 2740500 + }, + { + "epoch": 1.64, + "learning_rate": 4.453427203793858e-05, + "loss": 1.2841, + "step": 2741000 + }, + { + "epoch": 1.64, + "learning_rate": 4.4532176272309134e-05, + "loss": 1.2679, + "step": 2741500 + }, + { + "epoch": 1.64, + "learning_rate": 4.453007630674857e-05, + "loss": 1.2747, + "step": 2742000 + }, + { + "epoch": 1.64, + "learning_rate": 4.452798054111912e-05, + "loss": 1.2718, + "step": 2742500 + }, + { + "epoch": 1.64, + "learning_rate": 4.452588057555856e-05, + "loss": 1.2925, + "step": 2743000 + }, + { + "epoch": 1.64, + "learning_rate": 4.4523780609997994e-05, + "loss": 1.2809, + "step": 2743500 + }, + { + "epoch": 1.65, + "learning_rate": 4.452168064443743e-05, + "loss": 1.2814, + "step": 2744000 + }, + { + "epoch": 1.65, + "learning_rate": 4.451958067887687e-05, + "loss": 1.2981, + "step": 2744500 + }, + { + "epoch": 1.65, + "learning_rate": 4.45174807133163e-05, + "loss": 1.2569, + "step": 2745000 + }, + { + "epoch": 1.65, + "learning_rate": 4.4515380747755735e-05, + "loss": 1.2387, + "step": 2745500 + }, + { + "epoch": 1.65, + "learning_rate": 4.4513280782195175e-05, + "loss": 1.2727, + "step": 2746000 + }, + { + "epoch": 1.65, + "learning_rate": 4.451118081663461e-05, + "loss": 1.3173, + "step": 2746500 + }, + { + "epoch": 1.65, + "learning_rate": 4.450908085107404e-05, + "loss": 1.2991, + "step": 2747000 + }, + { + "epoch": 1.65, + "learning_rate": 4.45069850854446e-05, + "loss": 1.2762, + "step": 2747500 + }, + { + "epoch": 1.65, + "learning_rate": 4.4504885119884036e-05, + "loss": 1.2756, + "step": 2748000 + }, + { + "epoch": 1.65, + "learning_rate": 4.450278515432347e-05, + "loss": 1.2669, + "step": 2748500 + }, + { + "epoch": 1.65, + "learning_rate": 4.450068518876291e-05, + "loss": 1.2771, + "step": 2749000 + }, + { + "epoch": 1.65, + "learning_rate": 4.4498585223202336e-05, + "loss": 1.3009, + "step": 2749500 + }, + { + "epoch": 1.65, + "learning_rate": 4.449648525764177e-05, + "loss": 1.283, + "step": 2750000 + }, + { + "epoch": 1.65, + "learning_rate": 4.449438529208121e-05, + "loss": 1.2454, + "step": 2750500 + }, + { + "epoch": 1.65, + "learning_rate": 4.449228952645177e-05, + "loss": 1.2566, + "step": 2751000 + }, + { + "epoch": 1.65, + "learning_rate": 4.44901895608912e-05, + "loss": 1.3038, + "step": 2751500 + }, + { + "epoch": 1.65, + "learning_rate": 4.448808959533063e-05, + "loss": 1.3018, + "step": 2752000 + }, + { + "epoch": 1.65, + "learning_rate": 4.448598962977007e-05, + "loss": 1.2697, + "step": 2752500 + }, + { + "epoch": 1.65, + "learning_rate": 4.448389386414063e-05, + "loss": 1.2759, + "step": 2753000 + }, + { + "epoch": 1.65, + "learning_rate": 4.4481793898580065e-05, + "loss": 1.2569, + "step": 2753500 + }, + { + "epoch": 1.65, + "learning_rate": 4.44796939330195e-05, + "loss": 1.2954, + "step": 2754000 + }, + { + "epoch": 1.65, + "learning_rate": 4.447759396745893e-05, + "loss": 1.2901, + "step": 2754500 + }, + { + "epoch": 1.65, + "learning_rate": 4.4475494001898365e-05, + "loss": 1.2459, + "step": 2755000 + }, + { + "epoch": 1.65, + "learning_rate": 4.4473394036337805e-05, + "loss": 1.2932, + "step": 2755500 + }, + { + "epoch": 1.65, + "learning_rate": 4.447129407077724e-05, + "loss": 1.3017, + "step": 2756000 + }, + { + "epoch": 1.65, + "learning_rate": 4.446919410521667e-05, + "loss": 1.2887, + "step": 2756500 + }, + { + "epoch": 1.65, + "learning_rate": 4.446710253951835e-05, + "loss": 1.2613, + "step": 2757000 + }, + { + "epoch": 1.65, + "learning_rate": 4.4465006773888906e-05, + "loss": 1.2699, + "step": 2757500 + }, + { + "epoch": 1.65, + "learning_rate": 4.446290680832834e-05, + "loss": 1.2701, + "step": 2758000 + }, + { + "epoch": 1.65, + "learning_rate": 4.446080684276778e-05, + "loss": 1.2729, + "step": 2758500 + }, + { + "epoch": 1.65, + "learning_rate": 4.4458706877207213e-05, + "loss": 1.2623, + "step": 2759000 + }, + { + "epoch": 1.65, + "learning_rate": 4.445660691164665e-05, + "loss": 1.2595, + "step": 2759500 + }, + { + "epoch": 1.65, + "learning_rate": 4.445450694608609e-05, + "loss": 1.2573, + "step": 2760000 + }, + { + "epoch": 1.66, + "learning_rate": 4.445240698052552e-05, + "loss": 1.2304, + "step": 2760500 + }, + { + "epoch": 1.66, + "learning_rate": 4.4450307014964954e-05, + "loss": 1.2761, + "step": 2761000 + }, + { + "epoch": 1.66, + "learning_rate": 4.444820704940439e-05, + "loss": 1.2885, + "step": 2761500 + }, + { + "epoch": 1.66, + "learning_rate": 4.444611128377495e-05, + "loss": 1.2958, + "step": 2762000 + }, + { + "epoch": 1.66, + "learning_rate": 4.444401131821438e-05, + "loss": 1.2791, + "step": 2762500 + }, + { + "epoch": 1.66, + "learning_rate": 4.444191135265382e-05, + "loss": 1.2702, + "step": 2763000 + }, + { + "epoch": 1.66, + "learning_rate": 4.443981138709325e-05, + "loss": 1.2463, + "step": 2763500 + }, + { + "epoch": 1.66, + "learning_rate": 4.443771142153268e-05, + "loss": 1.2878, + "step": 2764000 + }, + { + "epoch": 1.66, + "learning_rate": 4.443561145597212e-05, + "loss": 1.3098, + "step": 2764500 + }, + { + "epoch": 1.66, + "learning_rate": 4.4433511490411555e-05, + "loss": 1.295, + "step": 2765000 + }, + { + "epoch": 1.66, + "learning_rate": 4.443141152485099e-05, + "loss": 1.2844, + "step": 2765500 + }, + { + "epoch": 1.66, + "learning_rate": 4.442931575922154e-05, + "loss": 1.2834, + "step": 2766000 + }, + { + "epoch": 1.66, + "learning_rate": 4.442721579366098e-05, + "loss": 1.3025, + "step": 2766500 + }, + { + "epoch": 1.66, + "learning_rate": 4.4425115828100416e-05, + "loss": 1.2601, + "step": 2767000 + }, + { + "epoch": 1.66, + "learning_rate": 4.442301586253985e-05, + "loss": 1.2807, + "step": 2767500 + }, + { + "epoch": 1.66, + "learning_rate": 4.442092009691041e-05, + "loss": 1.2902, + "step": 2768000 + }, + { + "epoch": 1.66, + "learning_rate": 4.441882013134984e-05, + "loss": 1.2739, + "step": 2768500 + }, + { + "epoch": 1.66, + "learning_rate": 4.441672016578928e-05, + "loss": 1.249, + "step": 2769000 + }, + { + "epoch": 1.66, + "learning_rate": 4.441462020022872e-05, + "loss": 1.2955, + "step": 2769500 + }, + { + "epoch": 1.66, + "learning_rate": 4.441252443459928e-05, + "loss": 1.2952, + "step": 2770000 + }, + { + "epoch": 1.66, + "learning_rate": 4.4410424469038704e-05, + "loss": 1.2536, + "step": 2770500 + }, + { + "epoch": 1.66, + "learning_rate": 4.440832450347814e-05, + "loss": 1.284, + "step": 2771000 + }, + { + "epoch": 1.66, + "learning_rate": 4.440622453791758e-05, + "loss": 1.2813, + "step": 2771500 + }, + { + "epoch": 1.66, + "learning_rate": 4.440412457235701e-05, + "loss": 1.2933, + "step": 2772000 + }, + { + "epoch": 1.66, + "learning_rate": 4.4402024606796445e-05, + "loss": 1.2654, + "step": 2772500 + }, + { + "epoch": 1.66, + "learning_rate": 4.4399924641235885e-05, + "loss": 1.2739, + "step": 2773000 + }, + { + "epoch": 1.66, + "learning_rate": 4.439782887560644e-05, + "loss": 1.2778, + "step": 2773500 + }, + { + "epoch": 1.66, + "learning_rate": 4.439572891004587e-05, + "loss": 1.2719, + "step": 2774000 + }, + { + "epoch": 1.66, + "learning_rate": 4.4393628944485305e-05, + "loss": 1.3077, + "step": 2774500 + }, + { + "epoch": 1.66, + "learning_rate": 4.4391528978924746e-05, + "loss": 1.2711, + "step": 2775000 + }, + { + "epoch": 1.66, + "learning_rate": 4.438942901336418e-05, + "loss": 1.2713, + "step": 2775500 + }, + { + "epoch": 1.66, + "learning_rate": 4.438732904780361e-05, + "loss": 1.2693, + "step": 2776000 + }, + { + "epoch": 1.66, + "learning_rate": 4.438522908224305e-05, + "loss": 1.2937, + "step": 2776500 + }, + { + "epoch": 1.66, + "learning_rate": 4.4383129116682486e-05, + "loss": 1.265, + "step": 2777000 + }, + { + "epoch": 1.67, + "learning_rate": 4.438103755098417e-05, + "loss": 1.2554, + "step": 2777500 + }, + { + "epoch": 1.67, + "learning_rate": 4.4378937585423593e-05, + "loss": 1.2583, + "step": 2778000 + }, + { + "epoch": 1.67, + "learning_rate": 4.4376837619863034e-05, + "loss": 1.3052, + "step": 2778500 + }, + { + "epoch": 1.67, + "learning_rate": 4.437473765430247e-05, + "loss": 1.29, + "step": 2779000 + }, + { + "epoch": 1.67, + "learning_rate": 4.43726376887419e-05, + "loss": 1.2633, + "step": 2779500 + }, + { + "epoch": 1.67, + "learning_rate": 4.437054192311246e-05, + "loss": 1.2774, + "step": 2780000 + }, + { + "epoch": 1.67, + "learning_rate": 4.4368441957551894e-05, + "loss": 1.2989, + "step": 2780500 + }, + { + "epoch": 1.67, + "learning_rate": 4.436634199199133e-05, + "loss": 1.2444, + "step": 2781000 + }, + { + "epoch": 1.67, + "learning_rate": 4.436424202643076e-05, + "loss": 1.2488, + "step": 2781500 + }, + { + "epoch": 1.67, + "learning_rate": 4.436214626080132e-05, + "loss": 1.2693, + "step": 2782000 + }, + { + "epoch": 1.67, + "learning_rate": 4.4360046295240755e-05, + "loss": 1.2537, + "step": 2782500 + }, + { + "epoch": 1.67, + "learning_rate": 4.435794632968019e-05, + "loss": 1.2616, + "step": 2783000 + }, + { + "epoch": 1.67, + "learning_rate": 4.435584636411963e-05, + "loss": 1.2884, + "step": 2783500 + }, + { + "epoch": 1.67, + "learning_rate": 4.435374639855906e-05, + "loss": 1.2757, + "step": 2784000 + }, + { + "epoch": 1.67, + "learning_rate": 4.435165063292962e-05, + "loss": 1.2979, + "step": 2784500 + }, + { + "epoch": 1.67, + "learning_rate": 4.434955066736905e-05, + "loss": 1.2576, + "step": 2785000 + }, + { + "epoch": 1.67, + "learning_rate": 4.434745070180849e-05, + "loss": 1.3013, + "step": 2785500 + }, + { + "epoch": 1.67, + "learning_rate": 4.434535073624792e-05, + "loss": 1.2771, + "step": 2786000 + }, + { + "epoch": 1.67, + "learning_rate": 4.4343250770687356e-05, + "loss": 1.2668, + "step": 2786500 + }, + { + "epoch": 1.67, + "learning_rate": 4.43411508051268e-05, + "loss": 1.2539, + "step": 2787000 + }, + { + "epoch": 1.67, + "learning_rate": 4.433905503949735e-05, + "loss": 1.3054, + "step": 2787500 + }, + { + "epoch": 1.67, + "learning_rate": 4.4336955073936784e-05, + "loss": 1.2678, + "step": 2788000 + }, + { + "epoch": 1.67, + "learning_rate": 4.433485510837622e-05, + "loss": 1.2617, + "step": 2788500 + }, + { + "epoch": 1.67, + "learning_rate": 4.433275514281566e-05, + "loss": 1.248, + "step": 2789000 + }, + { + "epoch": 1.67, + "learning_rate": 4.433065937718622e-05, + "loss": 1.268, + "step": 2789500 + }, + { + "epoch": 1.67, + "learning_rate": 4.432856361155677e-05, + "loss": 1.2703, + "step": 2790000 + }, + { + "epoch": 1.67, + "learning_rate": 4.4326463645996205e-05, + "loss": 1.2807, + "step": 2790500 + }, + { + "epoch": 1.67, + "learning_rate": 4.4324363680435645e-05, + "loss": 1.2749, + "step": 2791000 + }, + { + "epoch": 1.67, + "learning_rate": 4.432226371487508e-05, + "loss": 1.262, + "step": 2791500 + }, + { + "epoch": 1.67, + "learning_rate": 4.4320163749314505e-05, + "loss": 1.2939, + "step": 2792000 + }, + { + "epoch": 1.67, + "learning_rate": 4.4318063783753945e-05, + "loss": 1.2863, + "step": 2792500 + }, + { + "epoch": 1.67, + "learning_rate": 4.431596381819338e-05, + "loss": 1.2631, + "step": 2793000 + }, + { + "epoch": 1.67, + "learning_rate": 4.431386385263281e-05, + "loss": 1.292, + "step": 2793500 + }, + { + "epoch": 1.68, + "learning_rate": 4.431176388707225e-05, + "loss": 1.2809, + "step": 2794000 + }, + { + "epoch": 1.68, + "learning_rate": 4.4309663921511686e-05, + "loss": 1.2875, + "step": 2794500 + }, + { + "epoch": 1.68, + "learning_rate": 4.430756815588224e-05, + "loss": 1.2951, + "step": 2795000 + }, + { + "epoch": 1.68, + "learning_rate": 4.430546819032167e-05, + "loss": 1.2674, + "step": 2795500 + }, + { + "epoch": 1.68, + "learning_rate": 4.430336822476111e-05, + "loss": 1.2705, + "step": 2796000 + }, + { + "epoch": 1.68, + "learning_rate": 4.430126825920055e-05, + "loss": 1.2844, + "step": 2796500 + }, + { + "epoch": 1.68, + "learning_rate": 4.42991724935711e-05, + "loss": 1.2606, + "step": 2797000 + }, + { + "epoch": 1.68, + "learning_rate": 4.429707252801054e-05, + "loss": 1.2584, + "step": 2797500 + }, + { + "epoch": 1.68, + "learning_rate": 4.4294972562449974e-05, + "loss": 1.2507, + "step": 2798000 + }, + { + "epoch": 1.68, + "learning_rate": 4.429287259688941e-05, + "loss": 1.2705, + "step": 2798500 + }, + { + "epoch": 1.68, + "learning_rate": 4.429077263132885e-05, + "loss": 1.2818, + "step": 2799000 + }, + { + "epoch": 1.68, + "learning_rate": 4.42886768656994e-05, + "loss": 1.2817, + "step": 2799500 + }, + { + "epoch": 1.68, + "learning_rate": 4.4286576900138835e-05, + "loss": 1.2715, + "step": 2800000 + }, + { + "epoch": 1.68, + "eval_loss": 1.2226179838180542, + "eval_runtime": 1103.9672, + "eval_samples_per_second": 477.116, + "eval_steps_per_second": 79.52, + "step": 2800000 + }, + { + "epoch": 1.68, + "learning_rate": 4.428447693457827e-05, + "loss": 1.294, + "step": 2800500 + }, + { + "epoch": 1.68, + "learning_rate": 4.428237696901771e-05, + "loss": 1.2937, + "step": 2801000 + }, + { + "epoch": 1.68, + "learning_rate": 4.428028120338826e-05, + "loss": 1.3009, + "step": 2801500 + }, + { + "epoch": 1.68, + "learning_rate": 4.4278181237827695e-05, + "loss": 1.2765, + "step": 2802000 + }, + { + "epoch": 1.68, + "learning_rate": 4.427608127226713e-05, + "loss": 1.2804, + "step": 2802500 + }, + { + "epoch": 1.68, + "learning_rate": 4.427398130670657e-05, + "loss": 1.2421, + "step": 2803000 + }, + { + "epoch": 1.68, + "learning_rate": 4.427188554107713e-05, + "loss": 1.2775, + "step": 2803500 + }, + { + "epoch": 1.68, + "learning_rate": 4.4269785575516556e-05, + "loss": 1.2546, + "step": 2804000 + }, + { + "epoch": 1.68, + "learning_rate": 4.4267685609955996e-05, + "loss": 1.2518, + "step": 2804500 + }, + { + "epoch": 1.68, + "learning_rate": 4.426558564439543e-05, + "loss": 1.2727, + "step": 2805000 + }, + { + "epoch": 1.68, + "learning_rate": 4.426348567883486e-05, + "loss": 1.2878, + "step": 2805500 + }, + { + "epoch": 1.68, + "learning_rate": 4.4261385713274304e-05, + "loss": 1.2705, + "step": 2806000 + }, + { + "epoch": 1.68, + "learning_rate": 4.425928994764486e-05, + "loss": 1.2553, + "step": 2806500 + }, + { + "epoch": 1.68, + "learning_rate": 4.425718998208429e-05, + "loss": 1.2811, + "step": 2807000 + }, + { + "epoch": 1.68, + "learning_rate": 4.4255090016523724e-05, + "loss": 1.2443, + "step": 2807500 + }, + { + "epoch": 1.68, + "learning_rate": 4.4252990050963164e-05, + "loss": 1.296, + "step": 2808000 + }, + { + "epoch": 1.68, + "learning_rate": 4.4250894285333725e-05, + "loss": 1.2692, + "step": 2808500 + }, + { + "epoch": 1.68, + "learning_rate": 4.424879851970428e-05, + "loss": 1.2825, + "step": 2809000 + }, + { + "epoch": 1.68, + "learning_rate": 4.424669855414371e-05, + "loss": 1.3024, + "step": 2809500 + }, + { + "epoch": 1.68, + "learning_rate": 4.4244598588583145e-05, + "loss": 1.2659, + "step": 2810000 + }, + { + "epoch": 1.69, + "learning_rate": 4.4242498623022585e-05, + "loss": 1.274, + "step": 2810500 + }, + { + "epoch": 1.69, + "learning_rate": 4.424039865746202e-05, + "loss": 1.2771, + "step": 2811000 + }, + { + "epoch": 1.69, + "learning_rate": 4.423829869190145e-05, + "loss": 1.2545, + "step": 2811500 + }, + { + "epoch": 1.69, + "learning_rate": 4.4236198726340886e-05, + "loss": 1.3149, + "step": 2812000 + }, + { + "epoch": 1.69, + "learning_rate": 4.4234102960711446e-05, + "loss": 1.2534, + "step": 2812500 + }, + { + "epoch": 1.69, + "learning_rate": 4.423200299515088e-05, + "loss": 1.2642, + "step": 2813000 + }, + { + "epoch": 1.69, + "learning_rate": 4.422990722952143e-05, + "loss": 1.2629, + "step": 2813500 + }, + { + "epoch": 1.69, + "learning_rate": 4.4227807263960873e-05, + "loss": 1.2385, + "step": 2814000 + }, + { + "epoch": 1.69, + "learning_rate": 4.422570729840031e-05, + "loss": 1.2798, + "step": 2814500 + }, + { + "epoch": 1.69, + "learning_rate": 4.422361153277086e-05, + "loss": 1.2952, + "step": 2815000 + }, + { + "epoch": 1.69, + "learning_rate": 4.4221511567210294e-05, + "loss": 1.261, + "step": 2815500 + }, + { + "epoch": 1.69, + "learning_rate": 4.4219411601649734e-05, + "loss": 1.2523, + "step": 2816000 + }, + { + "epoch": 1.69, + "learning_rate": 4.421731163608917e-05, + "loss": 1.2845, + "step": 2816500 + }, + { + "epoch": 1.69, + "learning_rate": 4.421521167052861e-05, + "loss": 1.2959, + "step": 2817000 + }, + { + "epoch": 1.69, + "learning_rate": 4.421311170496804e-05, + "loss": 1.2691, + "step": 2817500 + }, + { + "epoch": 1.69, + "learning_rate": 4.4211011739407475e-05, + "loss": 1.2564, + "step": 2818000 + }, + { + "epoch": 1.69, + "learning_rate": 4.420891177384691e-05, + "loss": 1.2879, + "step": 2818500 + }, + { + "epoch": 1.69, + "learning_rate": 4.420681180828634e-05, + "loss": 1.2757, + "step": 2819000 + }, + { + "epoch": 1.69, + "learning_rate": 4.4204711842725775e-05, + "loss": 1.2955, + "step": 2819500 + }, + { + "epoch": 1.69, + "learning_rate": 4.4202611877165215e-05, + "loss": 1.2819, + "step": 2820000 + }, + { + "epoch": 1.69, + "learning_rate": 4.420051191160465e-05, + "loss": 1.264, + "step": 2820500 + }, + { + "epoch": 1.69, + "learning_rate": 4.41984161459752e-05, + "loss": 1.2737, + "step": 2821000 + }, + { + "epoch": 1.69, + "learning_rate": 4.4196316180414636e-05, + "loss": 1.2553, + "step": 2821500 + }, + { + "epoch": 1.69, + "learning_rate": 4.4194216214854076e-05, + "loss": 1.2577, + "step": 2822000 + }, + { + "epoch": 1.69, + "learning_rate": 4.419211624929351e-05, + "loss": 1.2754, + "step": 2822500 + }, + { + "epoch": 1.69, + "learning_rate": 4.419001628373294e-05, + "loss": 1.2544, + "step": 2823000 + }, + { + "epoch": 1.69, + "learning_rate": 4.41879205181035e-05, + "loss": 1.279, + "step": 2823500 + }, + { + "epoch": 1.69, + "learning_rate": 4.418582055254294e-05, + "loss": 1.2617, + "step": 2824000 + }, + { + "epoch": 1.69, + "learning_rate": 4.418372058698237e-05, + "loss": 1.2698, + "step": 2824500 + }, + { + "epoch": 1.69, + "learning_rate": 4.418162062142181e-05, + "loss": 1.2622, + "step": 2825000 + }, + { + "epoch": 1.69, + "learning_rate": 4.4179520655861244e-05, + "loss": 1.2624, + "step": 2825500 + }, + { + "epoch": 1.69, + "learning_rate": 4.417742069030068e-05, + "loss": 1.2539, + "step": 2826000 + }, + { + "epoch": 1.69, + "learning_rate": 4.417532072474012e-05, + "loss": 1.2666, + "step": 2826500 + }, + { + "epoch": 1.69, + "learning_rate": 4.417322075917955e-05, + "loss": 1.3247, + "step": 2827000 + }, + { + "epoch": 1.7, + "learning_rate": 4.4171120793618985e-05, + "loss": 1.3033, + "step": 2827500 + }, + { + "epoch": 1.7, + "learning_rate": 4.416902502798954e-05, + "loss": 1.2853, + "step": 2828000 + }, + { + "epoch": 1.7, + "learning_rate": 4.416692506242898e-05, + "loss": 1.2842, + "step": 2828500 + }, + { + "epoch": 1.7, + "learning_rate": 4.416482509686841e-05, + "loss": 1.2583, + "step": 2829000 + }, + { + "epoch": 1.7, + "learning_rate": 4.4162725131307845e-05, + "loss": 1.2951, + "step": 2829500 + }, + { + "epoch": 1.7, + "learning_rate": 4.41606293656784e-05, + "loss": 1.2889, + "step": 2830000 + }, + { + "epoch": 1.7, + "learning_rate": 4.415852940011784e-05, + "loss": 1.296, + "step": 2830500 + }, + { + "epoch": 1.7, + "learning_rate": 4.415642943455727e-05, + "loss": 1.2616, + "step": 2831000 + }, + { + "epoch": 1.7, + "learning_rate": 4.4154329468996706e-05, + "loss": 1.2624, + "step": 2831500 + }, + { + "epoch": 1.7, + "learning_rate": 4.4152229503436146e-05, + "loss": 1.2785, + "step": 2832000 + }, + { + "epoch": 1.7, + "learning_rate": 4.415012953787558e-05, + "loss": 1.2784, + "step": 2832500 + }, + { + "epoch": 1.7, + "learning_rate": 4.414803377224613e-05, + "loss": 1.2761, + "step": 2833000 + }, + { + "epoch": 1.7, + "learning_rate": 4.4145933806685574e-05, + "loss": 1.2756, + "step": 2833500 + }, + { + "epoch": 1.7, + "learning_rate": 4.414383384112501e-05, + "loss": 1.2913, + "step": 2834000 + }, + { + "epoch": 1.7, + "learning_rate": 4.414173387556444e-05, + "loss": 1.2844, + "step": 2834500 + }, + { + "epoch": 1.7, + "learning_rate": 4.4139638109934994e-05, + "loss": 1.2809, + "step": 2835000 + }, + { + "epoch": 1.7, + "learning_rate": 4.413754234430555e-05, + "loss": 1.2538, + "step": 2835500 + }, + { + "epoch": 1.7, + "learning_rate": 4.413544237874499e-05, + "loss": 1.2484, + "step": 2836000 + }, + { + "epoch": 1.7, + "learning_rate": 4.413334241318442e-05, + "loss": 1.2803, + "step": 2836500 + }, + { + "epoch": 1.7, + "learning_rate": 4.4131242447623855e-05, + "loss": 1.2876, + "step": 2837000 + }, + { + "epoch": 1.7, + "learning_rate": 4.4129142482063295e-05, + "loss": 1.2538, + "step": 2837500 + }, + { + "epoch": 1.7, + "learning_rate": 4.412704251650273e-05, + "loss": 1.2659, + "step": 2838000 + }, + { + "epoch": 1.7, + "learning_rate": 4.412494255094216e-05, + "loss": 1.3031, + "step": 2838500 + }, + { + "epoch": 1.7, + "learning_rate": 4.41228425853816e-05, + "loss": 1.285, + "step": 2839000 + }, + { + "epoch": 1.7, + "learning_rate": 4.4120742619821036e-05, + "loss": 1.2525, + "step": 2839500 + }, + { + "epoch": 1.7, + "learning_rate": 4.411864265426047e-05, + "loss": 1.2602, + "step": 2840000 + }, + { + "epoch": 1.7, + "learning_rate": 4.41165426886999e-05, + "loss": 1.297, + "step": 2840500 + }, + { + "epoch": 1.7, + "learning_rate": 4.4114442723139336e-05, + "loss": 1.2712, + "step": 2841000 + }, + { + "epoch": 1.7, + "learning_rate": 4.4112346957509896e-05, + "loss": 1.2449, + "step": 2841500 + }, + { + "epoch": 1.7, + "learning_rate": 4.411025119188045e-05, + "loss": 1.2277, + "step": 2842000 + }, + { + "epoch": 1.7, + "learning_rate": 4.4108155426251003e-05, + "loss": 1.2735, + "step": 2842500 + }, + { + "epoch": 1.7, + "learning_rate": 4.4106055460690444e-05, + "loss": 1.3002, + "step": 2843000 + }, + { + "epoch": 1.7, + "learning_rate": 4.410395549512988e-05, + "loss": 1.2743, + "step": 2843500 + }, + { + "epoch": 1.71, + "learning_rate": 4.410185552956931e-05, + "loss": 1.2622, + "step": 2844000 + }, + { + "epoch": 1.71, + "learning_rate": 4.409975556400875e-05, + "loss": 1.2638, + "step": 2844500 + }, + { + "epoch": 1.71, + "learning_rate": 4.4097655598448184e-05, + "loss": 1.2857, + "step": 2845000 + }, + { + "epoch": 1.71, + "learning_rate": 4.409555563288762e-05, + "loss": 1.2835, + "step": 2845500 + }, + { + "epoch": 1.71, + "learning_rate": 4.409345566732706e-05, + "loss": 1.2801, + "step": 2846000 + }, + { + "epoch": 1.71, + "learning_rate": 4.409135570176649e-05, + "loss": 1.2927, + "step": 2846500 + }, + { + "epoch": 1.71, + "learning_rate": 4.4089255736205925e-05, + "loss": 1.3046, + "step": 2847000 + }, + { + "epoch": 1.71, + "learning_rate": 4.4087159970576485e-05, + "loss": 1.2726, + "step": 2847500 + }, + { + "epoch": 1.71, + "learning_rate": 4.408506000501592e-05, + "loss": 1.2844, + "step": 2848000 + }, + { + "epoch": 1.71, + "learning_rate": 4.408296003945535e-05, + "loss": 1.2841, + "step": 2848500 + }, + { + "epoch": 1.71, + "learning_rate": 4.408086007389479e-05, + "loss": 1.283, + "step": 2849000 + }, + { + "epoch": 1.71, + "learning_rate": 4.4078760108334226e-05, + "loss": 1.2581, + "step": 2849500 + }, + { + "epoch": 1.71, + "learning_rate": 4.407666434270478e-05, + "loss": 1.287, + "step": 2850000 + }, + { + "epoch": 1.71, + "learning_rate": 4.407456437714421e-05, + "loss": 1.276, + "step": 2850500 + }, + { + "epoch": 1.71, + "learning_rate": 4.407246441158365e-05, + "loss": 1.2657, + "step": 2851000 + }, + { + "epoch": 1.71, + "learning_rate": 4.407036444602309e-05, + "loss": 1.274, + "step": 2851500 + }, + { + "epoch": 1.71, + "learning_rate": 4.406826868039364e-05, + "loss": 1.2848, + "step": 2852000 + }, + { + "epoch": 1.71, + "learning_rate": 4.4066168714833074e-05, + "loss": 1.2651, + "step": 2852500 + }, + { + "epoch": 1.71, + "learning_rate": 4.4064068749272514e-05, + "loss": 1.2794, + "step": 2853000 + }, + { + "epoch": 1.71, + "learning_rate": 4.406196878371195e-05, + "loss": 1.3028, + "step": 2853500 + }, + { + "epoch": 1.71, + "learning_rate": 4.405986881815138e-05, + "loss": 1.3084, + "step": 2854000 + }, + { + "epoch": 1.71, + "learning_rate": 4.405777305252194e-05, + "loss": 1.2676, + "step": 2854500 + }, + { + "epoch": 1.71, + "learning_rate": 4.4055673086961375e-05, + "loss": 1.2972, + "step": 2855000 + }, + { + "epoch": 1.71, + "learning_rate": 4.405357312140081e-05, + "loss": 1.2309, + "step": 2855500 + }, + { + "epoch": 1.71, + "learning_rate": 4.405147315584025e-05, + "loss": 1.2657, + "step": 2856000 + }, + { + "epoch": 1.71, + "learning_rate": 4.40493773902108e-05, + "loss": 1.2729, + "step": 2856500 + }, + { + "epoch": 1.71, + "learning_rate": 4.4047277424650235e-05, + "loss": 1.2866, + "step": 2857000 + }, + { + "epoch": 1.71, + "learning_rate": 4.404517745908967e-05, + "loss": 1.2765, + "step": 2857500 + }, + { + "epoch": 1.71, + "learning_rate": 4.404307749352911e-05, + "loss": 1.2708, + "step": 2858000 + }, + { + "epoch": 1.71, + "learning_rate": 4.404097752796854e-05, + "loss": 1.2642, + "step": 2858500 + }, + { + "epoch": 1.71, + "learning_rate": 4.4038877562407976e-05, + "loss": 1.2957, + "step": 2859000 + }, + { + "epoch": 1.71, + "learning_rate": 4.403677759684741e-05, + "loss": 1.2841, + "step": 2859500 + }, + { + "epoch": 1.71, + "learning_rate": 4.403468183121797e-05, + "loss": 1.2551, + "step": 2860000 + }, + { + "epoch": 1.71, + "learning_rate": 4.40325818656574e-05, + "loss": 1.2733, + "step": 2860500 + }, + { + "epoch": 1.72, + "learning_rate": 4.403048190009684e-05, + "loss": 1.2524, + "step": 2861000 + }, + { + "epoch": 1.72, + "learning_rate": 4.402838193453628e-05, + "loss": 1.2945, + "step": 2861500 + }, + { + "epoch": 1.72, + "learning_rate": 4.4026281968975704e-05, + "loss": 1.2743, + "step": 2862000 + }, + { + "epoch": 1.72, + "learning_rate": 4.4024182003415144e-05, + "loss": 1.2744, + "step": 2862500 + }, + { + "epoch": 1.72, + "learning_rate": 4.402208203785458e-05, + "loss": 1.2906, + "step": 2863000 + }, + { + "epoch": 1.72, + "learning_rate": 4.401998627222514e-05, + "loss": 1.2877, + "step": 2863500 + }, + { + "epoch": 1.72, + "learning_rate": 4.401788630666457e-05, + "loss": 1.2748, + "step": 2864000 + }, + { + "epoch": 1.72, + "learning_rate": 4.4015786341104005e-05, + "loss": 1.244, + "step": 2864500 + }, + { + "epoch": 1.72, + "learning_rate": 4.401368637554344e-05, + "loss": 1.2454, + "step": 2865000 + }, + { + "epoch": 1.72, + "learning_rate": 4.401158640998287e-05, + "loss": 1.2843, + "step": 2865500 + }, + { + "epoch": 1.72, + "learning_rate": 4.400948644442231e-05, + "loss": 1.2632, + "step": 2866000 + }, + { + "epoch": 1.72, + "learning_rate": 4.4007386478861745e-05, + "loss": 1.2558, + "step": 2866500 + }, + { + "epoch": 1.72, + "learning_rate": 4.400528651330118e-05, + "loss": 1.2715, + "step": 2867000 + }, + { + "epoch": 1.72, + "learning_rate": 4.400319074767173e-05, + "loss": 1.2719, + "step": 2867500 + }, + { + "epoch": 1.72, + "learning_rate": 4.400109078211117e-05, + "loss": 1.299, + "step": 2868000 + }, + { + "epoch": 1.72, + "learning_rate": 4.3998990816550606e-05, + "loss": 1.2808, + "step": 2868500 + }, + { + "epoch": 1.72, + "learning_rate": 4.399689505092116e-05, + "loss": 1.2802, + "step": 2869000 + }, + { + "epoch": 1.72, + "learning_rate": 4.39947950853606e-05, + "loss": 1.2669, + "step": 2869500 + }, + { + "epoch": 1.72, + "learning_rate": 4.399269511980003e-05, + "loss": 1.2659, + "step": 2870000 + }, + { + "epoch": 1.72, + "learning_rate": 4.399059515423947e-05, + "loss": 1.2638, + "step": 2870500 + }, + { + "epoch": 1.72, + "learning_rate": 4.398849518867891e-05, + "loss": 1.2692, + "step": 2871000 + }, + { + "epoch": 1.72, + "learning_rate": 4.398639522311834e-05, + "loss": 1.2922, + "step": 2871500 + }, + { + "epoch": 1.72, + "learning_rate": 4.3984295257557774e-05, + "loss": 1.2768, + "step": 2872000 + }, + { + "epoch": 1.72, + "learning_rate": 4.3982195291997214e-05, + "loss": 1.2906, + "step": 2872500 + }, + { + "epoch": 1.72, + "learning_rate": 4.398009952636777e-05, + "loss": 1.2858, + "step": 2873000 + }, + { + "epoch": 1.72, + "learning_rate": 4.39779995608072e-05, + "loss": 1.2792, + "step": 2873500 + }, + { + "epoch": 1.72, + "learning_rate": 4.3975903795177755e-05, + "loss": 1.292, + "step": 2874000 + }, + { + "epoch": 1.72, + "learning_rate": 4.397380382961719e-05, + "loss": 1.2844, + "step": 2874500 + }, + { + "epoch": 1.72, + "learning_rate": 4.397170386405663e-05, + "loss": 1.2774, + "step": 2875000 + }, + { + "epoch": 1.72, + "learning_rate": 4.396960389849606e-05, + "loss": 1.3109, + "step": 2875500 + }, + { + "epoch": 1.72, + "learning_rate": 4.396750813286662e-05, + "loss": 1.2479, + "step": 2876000 + }, + { + "epoch": 1.72, + "learning_rate": 4.3965408167306056e-05, + "loss": 1.2731, + "step": 2876500 + }, + { + "epoch": 1.72, + "learning_rate": 4.396330820174549e-05, + "loss": 1.2692, + "step": 2877000 + }, + { + "epoch": 1.73, + "learning_rate": 4.396121243611605e-05, + "loss": 1.2584, + "step": 2877500 + }, + { + "epoch": 1.73, + "learning_rate": 4.395911247055548e-05, + "loss": 1.2503, + "step": 2878000 + }, + { + "epoch": 1.73, + "learning_rate": 4.395701250499492e-05, + "loss": 1.2696, + "step": 2878500 + }, + { + "epoch": 1.73, + "learning_rate": 4.395491253943435e-05, + "loss": 1.2599, + "step": 2879000 + }, + { + "epoch": 1.73, + "learning_rate": 4.395281257387378e-05, + "loss": 1.279, + "step": 2879500 + }, + { + "epoch": 1.73, + "learning_rate": 4.3950716808244344e-05, + "loss": 1.2657, + "step": 2880000 + }, + { + "epoch": 1.73, + "learning_rate": 4.3948616842683784e-05, + "loss": 1.2749, + "step": 2880500 + }, + { + "epoch": 1.73, + "learning_rate": 4.394651687712321e-05, + "loss": 1.2687, + "step": 2881000 + }, + { + "epoch": 1.73, + "learning_rate": 4.394442111149377e-05, + "loss": 1.241, + "step": 2881500 + }, + { + "epoch": 1.73, + "learning_rate": 4.3942321145933204e-05, + "loss": 1.2776, + "step": 2882000 + }, + { + "epoch": 1.73, + "learning_rate": 4.3940221180372645e-05, + "loss": 1.2631, + "step": 2882500 + }, + { + "epoch": 1.73, + "learning_rate": 4.393812121481208e-05, + "loss": 1.2985, + "step": 2883000 + }, + { + "epoch": 1.73, + "learning_rate": 4.393602124925151e-05, + "loss": 1.3127, + "step": 2883500 + }, + { + "epoch": 1.73, + "learning_rate": 4.3933921283690945e-05, + "loss": 1.2485, + "step": 2884000 + }, + { + "epoch": 1.73, + "learning_rate": 4.393182131813038e-05, + "loss": 1.2889, + "step": 2884500 + }, + { + "epoch": 1.73, + "learning_rate": 4.392972135256982e-05, + "loss": 1.2742, + "step": 2885000 + }, + { + "epoch": 1.73, + "learning_rate": 4.392762138700925e-05, + "loss": 1.2753, + "step": 2885500 + }, + { + "epoch": 1.73, + "learning_rate": 4.3925525621379806e-05, + "loss": 1.252, + "step": 2886000 + }, + { + "epoch": 1.73, + "learning_rate": 4.392342565581924e-05, + "loss": 1.2911, + "step": 2886500 + }, + { + "epoch": 1.73, + "learning_rate": 4.392132569025868e-05, + "loss": 1.2844, + "step": 2887000 + }, + { + "epoch": 1.73, + "learning_rate": 4.391922572469811e-05, + "loss": 1.2594, + "step": 2887500 + }, + { + "epoch": 1.73, + "learning_rate": 4.3917125759137546e-05, + "loss": 1.2723, + "step": 2888000 + }, + { + "epoch": 1.73, + "learning_rate": 4.3915025793576987e-05, + "loss": 1.2955, + "step": 2888500 + }, + { + "epoch": 1.73, + "learning_rate": 4.391293002794754e-05, + "loss": 1.2501, + "step": 2889000 + }, + { + "epoch": 1.73, + "learning_rate": 4.3910830062386974e-05, + "loss": 1.2553, + "step": 2889500 + }, + { + "epoch": 1.73, + "learning_rate": 4.3908730096826414e-05, + "loss": 1.2625, + "step": 2890000 + }, + { + "epoch": 1.73, + "learning_rate": 4.390663013126585e-05, + "loss": 1.3029, + "step": 2890500 + }, + { + "epoch": 1.73, + "learning_rate": 4.390453016570528e-05, + "loss": 1.2679, + "step": 2891000 + }, + { + "epoch": 1.73, + "learning_rate": 4.390243020014472e-05, + "loss": 1.2776, + "step": 2891500 + }, + { + "epoch": 1.73, + "learning_rate": 4.3900330234584154e-05, + "loss": 1.2888, + "step": 2892000 + }, + { + "epoch": 1.73, + "learning_rate": 4.389823026902359e-05, + "loss": 1.2898, + "step": 2892500 + }, + { + "epoch": 1.73, + "learning_rate": 4.389613030346303e-05, + "loss": 1.3113, + "step": 2893000 + }, + { + "epoch": 1.73, + "learning_rate": 4.389403033790246e-05, + "loss": 1.268, + "step": 2893500 + }, + { + "epoch": 1.74, + "learning_rate": 4.389193037234189e-05, + "loss": 1.296, + "step": 2894000 + }, + { + "epoch": 1.74, + "learning_rate": 4.388983040678133e-05, + "loss": 1.2824, + "step": 2894500 + }, + { + "epoch": 1.74, + "learning_rate": 4.388773464115189e-05, + "loss": 1.2835, + "step": 2895000 + }, + { + "epoch": 1.74, + "learning_rate": 4.388563467559132e-05, + "loss": 1.2843, + "step": 2895500 + }, + { + "epoch": 1.74, + "learning_rate": 4.3883538909961876e-05, + "loss": 1.2494, + "step": 2896000 + }, + { + "epoch": 1.74, + "learning_rate": 4.388143894440131e-05, + "loss": 1.2731, + "step": 2896500 + }, + { + "epoch": 1.74, + "learning_rate": 4.387933897884075e-05, + "loss": 1.2725, + "step": 2897000 + }, + { + "epoch": 1.74, + "learning_rate": 4.387723901328018e-05, + "loss": 1.2774, + "step": 2897500 + }, + { + "epoch": 1.74, + "learning_rate": 4.3875139047719616e-05, + "loss": 1.2902, + "step": 2898000 + }, + { + "epoch": 1.74, + "learning_rate": 4.387303908215905e-05, + "loss": 1.2858, + "step": 2898500 + }, + { + "epoch": 1.74, + "learning_rate": 4.387093911659848e-05, + "loss": 1.2664, + "step": 2899000 + }, + { + "epoch": 1.74, + "learning_rate": 4.3868839151037924e-05, + "loss": 1.3078, + "step": 2899500 + }, + { + "epoch": 1.74, + "learning_rate": 4.3866743385408484e-05, + "loss": 1.2707, + "step": 2900000 + }, + { + "epoch": 1.74, + "eval_loss": 1.2214449644088745, + "eval_runtime": 1124.1633, + "eval_samples_per_second": 468.544, + "eval_steps_per_second": 78.091, + "step": 2900000 + }, + { + "epoch": 1.74, + "learning_rate": 4.386464341984792e-05, + "loss": 1.2383, + "step": 2900500 + }, + { + "epoch": 1.74, + "learning_rate": 4.3862543454287344e-05, + "loss": 1.2507, + "step": 2901000 + }, + { + "epoch": 1.74, + "learning_rate": 4.3860443488726784e-05, + "loss": 1.2754, + "step": 2901500 + }, + { + "epoch": 1.74, + "learning_rate": 4.385834352316622e-05, + "loss": 1.2775, + "step": 2902000 + }, + { + "epoch": 1.74, + "learning_rate": 4.385624355760565e-05, + "loss": 1.286, + "step": 2902500 + }, + { + "epoch": 1.74, + "learning_rate": 4.385414359204509e-05, + "loss": 1.2687, + "step": 2903000 + }, + { + "epoch": 1.74, + "learning_rate": 4.3852043626484525e-05, + "loss": 1.2695, + "step": 2903500 + }, + { + "epoch": 1.74, + "learning_rate": 4.384994786085508e-05, + "loss": 1.2489, + "step": 2904000 + }, + { + "epoch": 1.74, + "learning_rate": 4.384785209522564e-05, + "loss": 1.285, + "step": 2904500 + }, + { + "epoch": 1.74, + "learning_rate": 4.384575212966507e-05, + "loss": 1.2849, + "step": 2905000 + }, + { + "epoch": 1.74, + "learning_rate": 4.3843652164104506e-05, + "loss": 1.2667, + "step": 2905500 + }, + { + "epoch": 1.74, + "learning_rate": 4.384155219854394e-05, + "loss": 1.253, + "step": 2906000 + }, + { + "epoch": 1.74, + "learning_rate": 4.383945223298338e-05, + "loss": 1.2526, + "step": 2906500 + }, + { + "epoch": 1.74, + "learning_rate": 4.383735226742281e-05, + "loss": 1.2656, + "step": 2907000 + }, + { + "epoch": 1.74, + "learning_rate": 4.3835252301862246e-05, + "loss": 1.2558, + "step": 2907500 + }, + { + "epoch": 1.74, + "learning_rate": 4.38331565362328e-05, + "loss": 1.2571, + "step": 2908000 + }, + { + "epoch": 1.74, + "learning_rate": 4.383105657067224e-05, + "loss": 1.2562, + "step": 2908500 + }, + { + "epoch": 1.74, + "learning_rate": 4.3828956605111674e-05, + "loss": 1.272, + "step": 2909000 + }, + { + "epoch": 1.74, + "learning_rate": 4.382685663955111e-05, + "loss": 1.241, + "step": 2909500 + }, + { + "epoch": 1.74, + "learning_rate": 4.382475667399055e-05, + "loss": 1.2726, + "step": 2910000 + }, + { + "epoch": 1.74, + "learning_rate": 4.38226609083611e-05, + "loss": 1.2858, + "step": 2910500 + }, + { + "epoch": 1.75, + "learning_rate": 4.3820560942800534e-05, + "loss": 1.2924, + "step": 2911000 + }, + { + "epoch": 1.75, + "learning_rate": 4.381846097723997e-05, + "loss": 1.2769, + "step": 2911500 + }, + { + "epoch": 1.75, + "learning_rate": 4.381636101167941e-05, + "loss": 1.2921, + "step": 2912000 + }, + { + "epoch": 1.75, + "learning_rate": 4.381426104611884e-05, + "loss": 1.2759, + "step": 2912500 + }, + { + "epoch": 1.75, + "learning_rate": 4.3812161080558275e-05, + "loss": 1.2482, + "step": 2913000 + }, + { + "epoch": 1.75, + "learning_rate": 4.3810061114997715e-05, + "loss": 1.2885, + "step": 2913500 + }, + { + "epoch": 1.75, + "learning_rate": 4.3807969549299396e-05, + "loss": 1.2662, + "step": 2914000 + }, + { + "epoch": 1.75, + "learning_rate": 4.380586958373883e-05, + "loss": 1.2619, + "step": 2914500 + }, + { + "epoch": 1.75, + "learning_rate": 4.380376961817826e-05, + "loss": 1.2657, + "step": 2915000 + }, + { + "epoch": 1.75, + "learning_rate": 4.3801669652617696e-05, + "loss": 1.2536, + "step": 2915500 + }, + { + "epoch": 1.75, + "learning_rate": 4.379956968705713e-05, + "loss": 1.267, + "step": 2916000 + }, + { + "epoch": 1.75, + "learning_rate": 4.379746972149656e-05, + "loss": 1.2333, + "step": 2916500 + }, + { + "epoch": 1.75, + "learning_rate": 4.3795369755936e-05, + "loss": 1.2517, + "step": 2917000 + }, + { + "epoch": 1.75, + "learning_rate": 4.379326979037544e-05, + "loss": 1.2552, + "step": 2917500 + }, + { + "epoch": 1.75, + "learning_rate": 4.379116982481487e-05, + "loss": 1.2464, + "step": 2918000 + }, + { + "epoch": 1.75, + "learning_rate": 4.378906985925431e-05, + "loss": 1.2496, + "step": 2918500 + }, + { + "epoch": 1.75, + "learning_rate": 4.3786969893693744e-05, + "loss": 1.2622, + "step": 2919000 + }, + { + "epoch": 1.75, + "learning_rate": 4.37848741280643e-05, + "loss": 1.2754, + "step": 2919500 + }, + { + "epoch": 1.75, + "learning_rate": 4.378277416250373e-05, + "loss": 1.2571, + "step": 2920000 + }, + { + "epoch": 1.75, + "learning_rate": 4.378067419694317e-05, + "loss": 1.2673, + "step": 2920500 + }, + { + "epoch": 1.75, + "learning_rate": 4.3778574231382605e-05, + "loss": 1.2946, + "step": 2921000 + }, + { + "epoch": 1.75, + "learning_rate": 4.377647426582204e-05, + "loss": 1.2752, + "step": 2921500 + }, + { + "epoch": 1.75, + "learning_rate": 4.37743785001926e-05, + "loss": 1.2904, + "step": 2922000 + }, + { + "epoch": 1.75, + "learning_rate": 4.377228273456315e-05, + "loss": 1.2781, + "step": 2922500 + }, + { + "epoch": 1.75, + "learning_rate": 4.3770182769002585e-05, + "loss": 1.2698, + "step": 2923000 + }, + { + "epoch": 1.75, + "learning_rate": 4.376808280344202e-05, + "loss": 1.2699, + "step": 2923500 + }, + { + "epoch": 1.75, + "learning_rate": 4.376598283788146e-05, + "loss": 1.2648, + "step": 2924000 + }, + { + "epoch": 1.75, + "learning_rate": 4.376388287232089e-05, + "loss": 1.2872, + "step": 2924500 + }, + { + "epoch": 1.75, + "learning_rate": 4.3761782906760326e-05, + "loss": 1.2993, + "step": 2925000 + }, + { + "epoch": 1.75, + "learning_rate": 4.3759682941199766e-05, + "loss": 1.2596, + "step": 2925500 + }, + { + "epoch": 1.75, + "learning_rate": 4.37575829756392e-05, + "loss": 1.244, + "step": 2926000 + }, + { + "epoch": 1.75, + "learning_rate": 4.375548301007863e-05, + "loss": 1.2727, + "step": 2926500 + }, + { + "epoch": 1.75, + "learning_rate": 4.375338724444919e-05, + "loss": 1.2797, + "step": 2927000 + }, + { + "epoch": 1.76, + "learning_rate": 4.375128727888863e-05, + "loss": 1.2758, + "step": 2927500 + }, + { + "epoch": 1.76, + "learning_rate": 4.374918731332806e-05, + "loss": 1.237, + "step": 2928000 + }, + { + "epoch": 1.76, + "learning_rate": 4.3747087347767494e-05, + "loss": 1.2569, + "step": 2928500 + }, + { + "epoch": 1.76, + "learning_rate": 4.3744987382206934e-05, + "loss": 1.2649, + "step": 2929000 + }, + { + "epoch": 1.76, + "learning_rate": 4.374288741664637e-05, + "loss": 1.2924, + "step": 2929500 + }, + { + "epoch": 1.76, + "learning_rate": 4.374079165101692e-05, + "loss": 1.2451, + "step": 2930000 + }, + { + "epoch": 1.76, + "learning_rate": 4.373869168545636e-05, + "loss": 1.3132, + "step": 2930500 + }, + { + "epoch": 1.76, + "learning_rate": 4.3736591719895795e-05, + "loss": 1.2719, + "step": 2931000 + }, + { + "epoch": 1.76, + "learning_rate": 4.373449175433523e-05, + "loss": 1.2473, + "step": 2931500 + }, + { + "epoch": 1.76, + "learning_rate": 4.373239178877467e-05, + "loss": 1.2582, + "step": 2932000 + }, + { + "epoch": 1.76, + "learning_rate": 4.3730291823214095e-05, + "loss": 1.2387, + "step": 2932500 + }, + { + "epoch": 1.76, + "learning_rate": 4.372819185765353e-05, + "loss": 1.3085, + "step": 2933000 + }, + { + "epoch": 1.76, + "learning_rate": 4.372609189209297e-05, + "loss": 1.2424, + "step": 2933500 + }, + { + "epoch": 1.76, + "learning_rate": 4.372399612646353e-05, + "loss": 1.2901, + "step": 2934000 + }, + { + "epoch": 1.76, + "learning_rate": 4.372189616090296e-05, + "loss": 1.2433, + "step": 2934500 + }, + { + "epoch": 1.76, + "learning_rate": 4.371979619534239e-05, + "loss": 1.2585, + "step": 2935000 + }, + { + "epoch": 1.76, + "learning_rate": 4.371769622978183e-05, + "loss": 1.273, + "step": 2935500 + }, + { + "epoch": 1.76, + "learning_rate": 4.371560046415239e-05, + "loss": 1.2416, + "step": 2936000 + }, + { + "epoch": 1.76, + "learning_rate": 4.3713500498591824e-05, + "loss": 1.2494, + "step": 2936500 + }, + { + "epoch": 1.76, + "learning_rate": 4.3711400533031264e-05, + "loss": 1.2569, + "step": 2937000 + }, + { + "epoch": 1.76, + "learning_rate": 4.370930056747069e-05, + "loss": 1.2505, + "step": 2937500 + }, + { + "epoch": 1.76, + "learning_rate": 4.370720480184125e-05, + "loss": 1.2539, + "step": 2938000 + }, + { + "epoch": 1.76, + "learning_rate": 4.3705104836280684e-05, + "loss": 1.252, + "step": 2938500 + }, + { + "epoch": 1.76, + "learning_rate": 4.3703004870720124e-05, + "loss": 1.2508, + "step": 2939000 + }, + { + "epoch": 1.76, + "learning_rate": 4.370090490515956e-05, + "loss": 1.2588, + "step": 2939500 + }, + { + "epoch": 1.76, + "learning_rate": 4.3698804939598985e-05, + "loss": 1.2657, + "step": 2940000 + }, + { + "epoch": 1.76, + "learning_rate": 4.3696709173969545e-05, + "loss": 1.2612, + "step": 2940500 + }, + { + "epoch": 1.76, + "learning_rate": 4.3694609208408985e-05, + "loss": 1.2671, + "step": 2941000 + }, + { + "epoch": 1.76, + "learning_rate": 4.369250924284842e-05, + "loss": 1.2621, + "step": 2941500 + }, + { + "epoch": 1.76, + "learning_rate": 4.369040927728785e-05, + "loss": 1.3084, + "step": 2942000 + }, + { + "epoch": 1.76, + "learning_rate": 4.3688309311727286e-05, + "loss": 1.2611, + "step": 2942500 + }, + { + "epoch": 1.76, + "learning_rate": 4.3686213546097846e-05, + "loss": 1.2815, + "step": 2943000 + }, + { + "epoch": 1.76, + "learning_rate": 4.368411358053728e-05, + "loss": 1.2471, + "step": 2943500 + }, + { + "epoch": 1.77, + "learning_rate": 4.368201361497672e-05, + "loss": 1.2752, + "step": 2944000 + }, + { + "epoch": 1.77, + "learning_rate": 4.3679913649416146e-05, + "loss": 1.2855, + "step": 2944500 + }, + { + "epoch": 1.77, + "learning_rate": 4.367781368385558e-05, + "loss": 1.2629, + "step": 2945000 + }, + { + "epoch": 1.77, + "learning_rate": 4.367571371829502e-05, + "loss": 1.2581, + "step": 2945500 + }, + { + "epoch": 1.77, + "learning_rate": 4.3673613752734453e-05, + "loss": 1.2516, + "step": 2946000 + }, + { + "epoch": 1.77, + "learning_rate": 4.367151378717389e-05, + "loss": 1.2788, + "step": 2946500 + }, + { + "epoch": 1.77, + "learning_rate": 4.366941802154444e-05, + "loss": 1.281, + "step": 2947000 + }, + { + "epoch": 1.77, + "learning_rate": 4.366731805598388e-05, + "loss": 1.2421, + "step": 2947500 + }, + { + "epoch": 1.77, + "learning_rate": 4.366522229035444e-05, + "loss": 1.2779, + "step": 2948000 + }, + { + "epoch": 1.77, + "learning_rate": 4.3663122324793875e-05, + "loss": 1.2748, + "step": 2948500 + }, + { + "epoch": 1.77, + "learning_rate": 4.366102235923331e-05, + "loss": 1.2938, + "step": 2949000 + }, + { + "epoch": 1.77, + "learning_rate": 4.365892239367274e-05, + "loss": 1.2804, + "step": 2949500 + }, + { + "epoch": 1.77, + "learning_rate": 4.3656822428112175e-05, + "loss": 1.2656, + "step": 2950000 + }, + { + "epoch": 1.77, + "learning_rate": 4.3654722462551615e-05, + "loss": 1.2637, + "step": 2950500 + }, + { + "epoch": 1.77, + "learning_rate": 4.365262249699105e-05, + "loss": 1.2759, + "step": 2951000 + }, + { + "epoch": 1.77, + "learning_rate": 4.365052253143048e-05, + "loss": 1.3018, + "step": 2951500 + }, + { + "epoch": 1.77, + "learning_rate": 4.3648426765801036e-05, + "loss": 1.2506, + "step": 2952000 + }, + { + "epoch": 1.77, + "learning_rate": 4.3646326800240476e-05, + "loss": 1.2807, + "step": 2952500 + }, + { + "epoch": 1.77, + "learning_rate": 4.364422683467991e-05, + "loss": 1.2556, + "step": 2953000 + }, + { + "epoch": 1.77, + "learning_rate": 4.364212686911934e-05, + "loss": 1.2495, + "step": 2953500 + }, + { + "epoch": 1.77, + "learning_rate": 4.364002690355878e-05, + "loss": 1.2583, + "step": 2954000 + }, + { + "epoch": 1.77, + "learning_rate": 4.3637926937998216e-05, + "loss": 1.2634, + "step": 2954500 + }, + { + "epoch": 1.77, + "learning_rate": 4.363582697243765e-05, + "loss": 1.2818, + "step": 2955000 + }, + { + "epoch": 1.77, + "learning_rate": 4.363372700687709e-05, + "loss": 1.2658, + "step": 2955500 + }, + { + "epoch": 1.77, + "learning_rate": 4.3631627041316524e-05, + "loss": 1.2508, + "step": 2956000 + }, + { + "epoch": 1.77, + "learning_rate": 4.362952707575596e-05, + "loss": 1.2946, + "step": 2956500 + }, + { + "epoch": 1.77, + "learning_rate": 4.362743131012651e-05, + "loss": 1.2787, + "step": 2957000 + }, + { + "epoch": 1.77, + "learning_rate": 4.362533134456595e-05, + "loss": 1.2815, + "step": 2957500 + }, + { + "epoch": 1.77, + "learning_rate": 4.3623231379005384e-05, + "loss": 1.2718, + "step": 2958000 + }, + { + "epoch": 1.77, + "learning_rate": 4.362113141344482e-05, + "loss": 1.2848, + "step": 2958500 + }, + { + "epoch": 1.77, + "learning_rate": 4.361903144788426e-05, + "loss": 1.2711, + "step": 2959000 + }, + { + "epoch": 1.77, + "learning_rate": 4.361693568225481e-05, + "loss": 1.2805, + "step": 2959500 + }, + { + "epoch": 1.77, + "learning_rate": 4.3614835716694245e-05, + "loss": 1.2787, + "step": 2960000 + }, + { + "epoch": 1.77, + "learning_rate": 4.3612735751133685e-05, + "loss": 1.235, + "step": 2960500 + }, + { + "epoch": 1.78, + "learning_rate": 4.361063578557312e-05, + "loss": 1.2918, + "step": 2961000 + }, + { + "epoch": 1.78, + "learning_rate": 4.360853582001255e-05, + "loss": 1.3014, + "step": 2961500 + }, + { + "epoch": 1.78, + "learning_rate": 4.3606435854451986e-05, + "loss": 1.2773, + "step": 2962000 + }, + { + "epoch": 1.78, + "learning_rate": 4.3604340088822546e-05, + "loss": 1.2502, + "step": 2962500 + }, + { + "epoch": 1.78, + "learning_rate": 4.360224012326198e-05, + "loss": 1.2761, + "step": 2963000 + }, + { + "epoch": 1.78, + "learning_rate": 4.360014015770141e-05, + "loss": 1.2191, + "step": 2963500 + }, + { + "epoch": 1.78, + "learning_rate": 4.359804019214085e-05, + "loss": 1.2778, + "step": 2964000 + }, + { + "epoch": 1.78, + "learning_rate": 4.359594022658028e-05, + "loss": 1.2618, + "step": 2964500 + }, + { + "epoch": 1.78, + "learning_rate": 4.359384026101971e-05, + "loss": 1.2734, + "step": 2965000 + }, + { + "epoch": 1.78, + "learning_rate": 4.3591740295459154e-05, + "loss": 1.2641, + "step": 2965500 + }, + { + "epoch": 1.78, + "learning_rate": 4.358964032989859e-05, + "loss": 1.274, + "step": 2966000 + }, + { + "epoch": 1.78, + "learning_rate": 4.358754036433802e-05, + "loss": 1.2451, + "step": 2966500 + }, + { + "epoch": 1.78, + "learning_rate": 4.35854487986397e-05, + "loss": 1.2825, + "step": 2967000 + }, + { + "epoch": 1.78, + "learning_rate": 4.358334883307914e-05, + "loss": 1.2664, + "step": 2967500 + }, + { + "epoch": 1.78, + "learning_rate": 4.3581248867518575e-05, + "loss": 1.2477, + "step": 2968000 + }, + { + "epoch": 1.78, + "learning_rate": 4.357914890195801e-05, + "loss": 1.279, + "step": 2968500 + }, + { + "epoch": 1.78, + "learning_rate": 4.357704893639744e-05, + "loss": 1.2749, + "step": 2969000 + }, + { + "epoch": 1.78, + "learning_rate": 4.3574948970836875e-05, + "loss": 1.2427, + "step": 2969500 + }, + { + "epoch": 1.78, + "learning_rate": 4.357284900527631e-05, + "loss": 1.2901, + "step": 2970000 + }, + { + "epoch": 1.78, + "learning_rate": 4.357074903971575e-05, + "loss": 1.2501, + "step": 2970500 + }, + { + "epoch": 1.78, + "learning_rate": 4.356865327408631e-05, + "loss": 1.269, + "step": 2971000 + }, + { + "epoch": 1.78, + "learning_rate": 4.3566553308525736e-05, + "loss": 1.2711, + "step": 2971500 + }, + { + "epoch": 1.78, + "learning_rate": 4.3564457542896296e-05, + "loss": 1.2674, + "step": 2972000 + }, + { + "epoch": 1.78, + "learning_rate": 4.356235757733573e-05, + "loss": 1.2632, + "step": 2972500 + }, + { + "epoch": 1.78, + "learning_rate": 4.356025761177517e-05, + "loss": 1.2817, + "step": 2973000 + }, + { + "epoch": 1.78, + "learning_rate": 4.35581576462146e-05, + "loss": 1.24, + "step": 2973500 + }, + { + "epoch": 1.78, + "learning_rate": 4.355605768065404e-05, + "loss": 1.2769, + "step": 2974000 + }, + { + "epoch": 1.78, + "learning_rate": 4.355395771509347e-05, + "loss": 1.2673, + "step": 2974500 + }, + { + "epoch": 1.78, + "learning_rate": 4.3551857749532904e-05, + "loss": 1.2529, + "step": 2975000 + }, + { + "epoch": 1.78, + "learning_rate": 4.3549757783972344e-05, + "loss": 1.2799, + "step": 2975500 + }, + { + "epoch": 1.78, + "learning_rate": 4.3547662018342904e-05, + "loss": 1.2601, + "step": 2976000 + }, + { + "epoch": 1.78, + "learning_rate": 4.354556205278233e-05, + "loss": 1.2751, + "step": 2976500 + }, + { + "epoch": 1.78, + "learning_rate": 4.3543462087221764e-05, + "loss": 1.2592, + "step": 2977000 + }, + { + "epoch": 1.79, + "learning_rate": 4.3541362121661205e-05, + "loss": 1.2692, + "step": 2977500 + }, + { + "epoch": 1.79, + "learning_rate": 4.3539266356031765e-05, + "loss": 1.2813, + "step": 2978000 + }, + { + "epoch": 1.79, + "learning_rate": 4.353717059040232e-05, + "loss": 1.2671, + "step": 2978500 + }, + { + "epoch": 1.79, + "learning_rate": 4.353507062484175e-05, + "loss": 1.2601, + "step": 2979000 + }, + { + "epoch": 1.79, + "learning_rate": 4.3532970659281185e-05, + "loss": 1.2517, + "step": 2979500 + }, + { + "epoch": 1.79, + "learning_rate": 4.3530870693720626e-05, + "loss": 1.2983, + "step": 2980000 + }, + { + "epoch": 1.79, + "learning_rate": 4.352877072816006e-05, + "loss": 1.2892, + "step": 2980500 + }, + { + "epoch": 1.79, + "learning_rate": 4.352667076259949e-05, + "loss": 1.2624, + "step": 2981000 + }, + { + "epoch": 1.79, + "learning_rate": 4.3524570797038926e-05, + "loss": 1.2708, + "step": 2981500 + }, + { + "epoch": 1.79, + "learning_rate": 4.352247083147836e-05, + "loss": 1.2479, + "step": 2982000 + }, + { + "epoch": 1.79, + "learning_rate": 4.352037506584892e-05, + "loss": 1.2673, + "step": 2982500 + }, + { + "epoch": 1.79, + "learning_rate": 4.351827510028836e-05, + "loss": 1.2788, + "step": 2983000 + }, + { + "epoch": 1.79, + "learning_rate": 4.351617513472779e-05, + "loss": 1.2673, + "step": 2983500 + }, + { + "epoch": 1.79, + "learning_rate": 4.351407936909835e-05, + "loss": 1.262, + "step": 2984000 + }, + { + "epoch": 1.79, + "learning_rate": 4.351197940353778e-05, + "loss": 1.2664, + "step": 2984500 + }, + { + "epoch": 1.79, + "learning_rate": 4.350987943797722e-05, + "loss": 1.2552, + "step": 2985000 + }, + { + "epoch": 1.79, + "learning_rate": 4.3507779472416654e-05, + "loss": 1.2421, + "step": 2985500 + }, + { + "epoch": 1.79, + "learning_rate": 4.350567950685608e-05, + "loss": 1.2544, + "step": 2986000 + }, + { + "epoch": 1.79, + "learning_rate": 4.350357954129552e-05, + "loss": 1.2573, + "step": 2986500 + }, + { + "epoch": 1.79, + "learning_rate": 4.3501479575734955e-05, + "loss": 1.276, + "step": 2987000 + }, + { + "epoch": 1.79, + "learning_rate": 4.3499383810105515e-05, + "loss": 1.2476, + "step": 2987500 + }, + { + "epoch": 1.79, + "learning_rate": 4.3497283844544955e-05, + "loss": 1.2706, + "step": 2988000 + }, + { + "epoch": 1.79, + "learning_rate": 4.349518387898438e-05, + "loss": 1.2864, + "step": 2988500 + }, + { + "epoch": 1.79, + "learning_rate": 4.3493083913423815e-05, + "loss": 1.2899, + "step": 2989000 + }, + { + "epoch": 1.79, + "learning_rate": 4.3490983947863256e-05, + "loss": 1.2559, + "step": 2989500 + }, + { + "epoch": 1.79, + "learning_rate": 4.348888398230269e-05, + "loss": 1.2899, + "step": 2990000 + }, + { + "epoch": 1.79, + "learning_rate": 4.348678401674212e-05, + "loss": 1.2857, + "step": 2990500 + }, + { + "epoch": 1.79, + "learning_rate": 4.348468405118156e-05, + "loss": 1.2683, + "step": 2991000 + }, + { + "epoch": 1.79, + "learning_rate": 4.3482584085620996e-05, + "loss": 1.2764, + "step": 2991500 + }, + { + "epoch": 1.79, + "learning_rate": 4.348048412006043e-05, + "loss": 1.2795, + "step": 2992000 + }, + { + "epoch": 1.79, + "learning_rate": 4.347838415449987e-05, + "loss": 1.2778, + "step": 2992500 + }, + { + "epoch": 1.79, + "learning_rate": 4.3476284188939303e-05, + "loss": 1.2778, + "step": 2993000 + }, + { + "epoch": 1.79, + "learning_rate": 4.347418842330986e-05, + "loss": 1.2904, + "step": 2993500 + }, + { + "epoch": 1.8, + "learning_rate": 4.347208845774929e-05, + "loss": 1.2703, + "step": 2994000 + }, + { + "epoch": 1.8, + "learning_rate": 4.346998849218873e-05, + "loss": 1.2864, + "step": 2994500 + }, + { + "epoch": 1.8, + "learning_rate": 4.3467892726559284e-05, + "loss": 1.2469, + "step": 2995000 + }, + { + "epoch": 1.8, + "learning_rate": 4.346579276099872e-05, + "loss": 1.2605, + "step": 2995500 + }, + { + "epoch": 1.8, + "learning_rate": 4.346369279543816e-05, + "loss": 1.2682, + "step": 2996000 + }, + { + "epoch": 1.8, + "learning_rate": 4.346159282987759e-05, + "loss": 1.2911, + "step": 2996500 + }, + { + "epoch": 1.8, + "learning_rate": 4.3459497064248145e-05, + "loss": 1.2421, + "step": 2997000 + }, + { + "epoch": 1.8, + "learning_rate": 4.345739709868758e-05, + "loss": 1.2776, + "step": 2997500 + }, + { + "epoch": 1.8, + "learning_rate": 4.345529713312702e-05, + "loss": 1.2784, + "step": 2998000 + }, + { + "epoch": 1.8, + "learning_rate": 4.345319716756645e-05, + "loss": 1.2667, + "step": 2998500 + }, + { + "epoch": 1.8, + "learning_rate": 4.3451097202005886e-05, + "loss": 1.2566, + "step": 2999000 + }, + { + "epoch": 1.8, + "learning_rate": 4.3448997236445326e-05, + "loss": 1.2322, + "step": 2999500 + }, + { + "epoch": 1.8, + "learning_rate": 4.344690147081588e-05, + "loss": 1.2769, + "step": 3000000 + }, + { + "epoch": 1.8, + "eval_loss": 1.2166739702224731, + "eval_runtime": 1103.532, + "eval_samples_per_second": 477.304, + "eval_steps_per_second": 79.551, + "step": 3000000 + }, + { + "epoch": 1.8, + "learning_rate": 4.344480150525531e-05, + "loss": 1.285, + "step": 3000500 + }, + { + "epoch": 1.8, + "learning_rate": 4.3442701539694746e-05, + "loss": 1.2474, + "step": 3001000 + }, + { + "epoch": 1.8, + "learning_rate": 4.3440601574134187e-05, + "loss": 1.2763, + "step": 3001500 + }, + { + "epoch": 1.8, + "learning_rate": 4.343850580850474e-05, + "loss": 1.2567, + "step": 3002000 + }, + { + "epoch": 1.8, + "learning_rate": 4.3436405842944174e-05, + "loss": 1.2724, + "step": 3002500 + }, + { + "epoch": 1.8, + "learning_rate": 4.3434305877383614e-05, + "loss": 1.2459, + "step": 3003000 + }, + { + "epoch": 1.8, + "learning_rate": 4.343220591182305e-05, + "loss": 1.2671, + "step": 3003500 + }, + { + "epoch": 1.8, + "learning_rate": 4.34301101461936e-05, + "loss": 1.2762, + "step": 3004000 + }, + { + "epoch": 1.8, + "learning_rate": 4.3428010180633034e-05, + "loss": 1.2305, + "step": 3004500 + }, + { + "epoch": 1.8, + "learning_rate": 4.3425910215072475e-05, + "loss": 1.2917, + "step": 3005000 + }, + { + "epoch": 1.8, + "learning_rate": 4.342381024951191e-05, + "loss": 1.245, + "step": 3005500 + }, + { + "epoch": 1.8, + "learning_rate": 4.342171028395134e-05, + "loss": 1.246, + "step": 3006000 + }, + { + "epoch": 1.8, + "learning_rate": 4.341961031839078e-05, + "loss": 1.2648, + "step": 3006500 + }, + { + "epoch": 1.8, + "learning_rate": 4.3417510352830215e-05, + "loss": 1.2847, + "step": 3007000 + }, + { + "epoch": 1.8, + "learning_rate": 4.341541038726965e-05, + "loss": 1.2734, + "step": 3007500 + }, + { + "epoch": 1.8, + "learning_rate": 4.341331042170908e-05, + "loss": 1.2603, + "step": 3008000 + }, + { + "epoch": 1.8, + "learning_rate": 4.3411210456148516e-05, + "loss": 1.244, + "step": 3008500 + }, + { + "epoch": 1.8, + "learning_rate": 4.3409114690519076e-05, + "loss": 1.262, + "step": 3009000 + }, + { + "epoch": 1.8, + "learning_rate": 4.340701472495851e-05, + "loss": 1.2634, + "step": 3009500 + }, + { + "epoch": 1.8, + "learning_rate": 4.340491475939795e-05, + "loss": 1.2499, + "step": 3010000 + }, + { + "epoch": 1.8, + "learning_rate": 4.3402814793837376e-05, + "loss": 1.2837, + "step": 3010500 + }, + { + "epoch": 1.81, + "learning_rate": 4.3400714828276817e-05, + "loss": 1.2401, + "step": 3011000 + }, + { + "epoch": 1.81, + "learning_rate": 4.339861906264738e-05, + "loss": 1.2328, + "step": 3011500 + }, + { + "epoch": 1.81, + "learning_rate": 4.339651909708681e-05, + "loss": 1.2649, + "step": 3012000 + }, + { + "epoch": 1.81, + "learning_rate": 4.3394419131526244e-05, + "loss": 1.2763, + "step": 3012500 + }, + { + "epoch": 1.81, + "learning_rate": 4.339231916596568e-05, + "loss": 1.2608, + "step": 3013000 + }, + { + "epoch": 1.81, + "learning_rate": 4.339022340033624e-05, + "loss": 1.2733, + "step": 3013500 + }, + { + "epoch": 1.81, + "learning_rate": 4.338812343477567e-05, + "loss": 1.2699, + "step": 3014000 + }, + { + "epoch": 1.81, + "learning_rate": 4.3386023469215105e-05, + "loss": 1.2651, + "step": 3014500 + }, + { + "epoch": 1.81, + "learning_rate": 4.338392350365454e-05, + "loss": 1.2768, + "step": 3015000 + }, + { + "epoch": 1.81, + "learning_rate": 4.33818277380251e-05, + "loss": 1.26, + "step": 3015500 + }, + { + "epoch": 1.81, + "learning_rate": 4.337972777246453e-05, + "loss": 1.2685, + "step": 3016000 + }, + { + "epoch": 1.81, + "learning_rate": 4.3377627806903965e-05, + "loss": 1.2775, + "step": 3016500 + }, + { + "epoch": 1.81, + "learning_rate": 4.3375532041274526e-05, + "loss": 1.2761, + "step": 3017000 + }, + { + "epoch": 1.81, + "learning_rate": 4.337343207571396e-05, + "loss": 1.2665, + "step": 3017500 + }, + { + "epoch": 1.81, + "learning_rate": 4.337133211015339e-05, + "loss": 1.2405, + "step": 3018000 + }, + { + "epoch": 1.81, + "learning_rate": 4.336923214459283e-05, + "loss": 1.2575, + "step": 3018500 + }, + { + "epoch": 1.81, + "learning_rate": 4.3367132179032266e-05, + "loss": 1.2699, + "step": 3019000 + }, + { + "epoch": 1.81, + "learning_rate": 4.33650322134717e-05, + "loss": 1.2717, + "step": 3019500 + }, + { + "epoch": 1.81, + "learning_rate": 4.336293224791113e-05, + "loss": 1.2667, + "step": 3020000 + }, + { + "epoch": 1.81, + "learning_rate": 4.3360832282350567e-05, + "loss": 1.2688, + "step": 3020500 + }, + { + "epoch": 1.81, + "learning_rate": 4.335873651672113e-05, + "loss": 1.2488, + "step": 3021000 + }, + { + "epoch": 1.81, + "learning_rate": 4.335664075109168e-05, + "loss": 1.2516, + "step": 3021500 + }, + { + "epoch": 1.81, + "learning_rate": 4.3354540785531114e-05, + "loss": 1.2612, + "step": 3022000 + }, + { + "epoch": 1.81, + "learning_rate": 4.3352440819970554e-05, + "loss": 1.2439, + "step": 3022500 + }, + { + "epoch": 1.81, + "learning_rate": 4.335034085440999e-05, + "loss": 1.2796, + "step": 3023000 + }, + { + "epoch": 1.81, + "learning_rate": 4.334824088884942e-05, + "loss": 1.2655, + "step": 3023500 + }, + { + "epoch": 1.81, + "learning_rate": 4.334614092328886e-05, + "loss": 1.2487, + "step": 3024000 + }, + { + "epoch": 1.81, + "learning_rate": 4.3344040957728295e-05, + "loss": 1.2884, + "step": 3024500 + }, + { + "epoch": 1.81, + "learning_rate": 4.334194099216773e-05, + "loss": 1.2638, + "step": 3025000 + }, + { + "epoch": 1.81, + "learning_rate": 4.333984102660716e-05, + "loss": 1.2818, + "step": 3025500 + }, + { + "epoch": 1.81, + "learning_rate": 4.3337741061046595e-05, + "loss": 1.2924, + "step": 3026000 + }, + { + "epoch": 1.81, + "learning_rate": 4.3335641095486035e-05, + "loss": 1.2554, + "step": 3026500 + }, + { + "epoch": 1.81, + "learning_rate": 4.333354112992547e-05, + "loss": 1.279, + "step": 3027000 + }, + { + "epoch": 1.82, + "learning_rate": 4.333144536429602e-05, + "loss": 1.2362, + "step": 3027500 + }, + { + "epoch": 1.82, + "learning_rate": 4.332934959866658e-05, + "loss": 1.2775, + "step": 3028000 + }, + { + "epoch": 1.82, + "learning_rate": 4.3327249633106016e-05, + "loss": 1.252, + "step": 3028500 + }, + { + "epoch": 1.82, + "learning_rate": 4.3325149667545456e-05, + "loss": 1.2668, + "step": 3029000 + }, + { + "epoch": 1.82, + "learning_rate": 4.332304970198488e-05, + "loss": 1.2745, + "step": 3029500 + }, + { + "epoch": 1.82, + "learning_rate": 4.332094973642432e-05, + "loss": 1.2744, + "step": 3030000 + }, + { + "epoch": 1.82, + "learning_rate": 4.331885397079488e-05, + "loss": 1.2852, + "step": 3030500 + }, + { + "epoch": 1.82, + "learning_rate": 4.331675400523432e-05, + "loss": 1.2807, + "step": 3031000 + }, + { + "epoch": 1.82, + "learning_rate": 4.331465403967375e-05, + "loss": 1.2418, + "step": 3031500 + }, + { + "epoch": 1.82, + "learning_rate": 4.3312554074113184e-05, + "loss": 1.2644, + "step": 3032000 + }, + { + "epoch": 1.82, + "learning_rate": 4.331045410855262e-05, + "loss": 1.225, + "step": 3032500 + }, + { + "epoch": 1.82, + "learning_rate": 4.330835414299205e-05, + "loss": 1.2714, + "step": 3033000 + }, + { + "epoch": 1.82, + "learning_rate": 4.330625417743149e-05, + "loss": 1.2526, + "step": 3033500 + }, + { + "epoch": 1.82, + "learning_rate": 4.330415841180205e-05, + "loss": 1.2309, + "step": 3034000 + }, + { + "epoch": 1.82, + "learning_rate": 4.330205844624148e-05, + "loss": 1.2763, + "step": 3034500 + }, + { + "epoch": 1.82, + "learning_rate": 4.329995848068091e-05, + "loss": 1.2683, + "step": 3035000 + }, + { + "epoch": 1.82, + "learning_rate": 4.329785851512035e-05, + "loss": 1.2836, + "step": 3035500 + }, + { + "epoch": 1.82, + "learning_rate": 4.3295758549559785e-05, + "loss": 1.2767, + "step": 3036000 + }, + { + "epoch": 1.82, + "learning_rate": 4.329365858399922e-05, + "loss": 1.2516, + "step": 3036500 + }, + { + "epoch": 1.82, + "learning_rate": 4.329155861843866e-05, + "loss": 1.2932, + "step": 3037000 + }, + { + "epoch": 1.82, + "learning_rate": 4.328945865287809e-05, + "loss": 1.2552, + "step": 3037500 + }, + { + "epoch": 1.82, + "learning_rate": 4.3287362887248646e-05, + "loss": 1.2731, + "step": 3038000 + }, + { + "epoch": 1.82, + "learning_rate": 4.328526292168808e-05, + "loss": 1.272, + "step": 3038500 + }, + { + "epoch": 1.82, + "learning_rate": 4.328316295612752e-05, + "loss": 1.2585, + "step": 3039000 + }, + { + "epoch": 1.82, + "learning_rate": 4.328106299056695e-05, + "loss": 1.269, + "step": 3039500 + }, + { + "epoch": 1.82, + "learning_rate": 4.3278971424868634e-05, + "loss": 1.265, + "step": 3040000 + }, + { + "epoch": 1.82, + "learning_rate": 4.327687145930807e-05, + "loss": 1.2741, + "step": 3040500 + }, + { + "epoch": 1.82, + "learning_rate": 4.327477149374751e-05, + "loss": 1.2566, + "step": 3041000 + }, + { + "epoch": 1.82, + "learning_rate": 4.3272671528186934e-05, + "loss": 1.2786, + "step": 3041500 + }, + { + "epoch": 1.82, + "learning_rate": 4.327057156262637e-05, + "loss": 1.2618, + "step": 3042000 + }, + { + "epoch": 1.82, + "learning_rate": 4.326847159706581e-05, + "loss": 1.2394, + "step": 3042500 + }, + { + "epoch": 1.82, + "learning_rate": 4.326637163150524e-05, + "loss": 1.279, + "step": 3043000 + }, + { + "epoch": 1.82, + "learning_rate": 4.3264271665944675e-05, + "loss": 1.29, + "step": 3043500 + }, + { + "epoch": 1.83, + "learning_rate": 4.3262171700384115e-05, + "loss": 1.2744, + "step": 3044000 + }, + { + "epoch": 1.83, + "learning_rate": 4.326007593475467e-05, + "loss": 1.268, + "step": 3044500 + }, + { + "epoch": 1.83, + "learning_rate": 4.32579759691941e-05, + "loss": 1.2659, + "step": 3045000 + }, + { + "epoch": 1.83, + "learning_rate": 4.3255876003633536e-05, + "loss": 1.2498, + "step": 3045500 + }, + { + "epoch": 1.83, + "learning_rate": 4.3253776038072976e-05, + "loss": 1.2531, + "step": 3046000 + }, + { + "epoch": 1.83, + "learning_rate": 4.325167607251241e-05, + "loss": 1.2629, + "step": 3046500 + }, + { + "epoch": 1.83, + "learning_rate": 4.324957610695184e-05, + "loss": 1.2382, + "step": 3047000 + }, + { + "epoch": 1.83, + "learning_rate": 4.32474803413224e-05, + "loss": 1.2571, + "step": 3047500 + }, + { + "epoch": 1.83, + "learning_rate": 4.3245380375761837e-05, + "loss": 1.2552, + "step": 3048000 + }, + { + "epoch": 1.83, + "learning_rate": 4.324328041020127e-05, + "loss": 1.2866, + "step": 3048500 + }, + { + "epoch": 1.83, + "learning_rate": 4.324118044464071e-05, + "loss": 1.278, + "step": 3049000 + }, + { + "epoch": 1.83, + "learning_rate": 4.3239080479080144e-05, + "loss": 1.2773, + "step": 3049500 + }, + { + "epoch": 1.83, + "learning_rate": 4.323698051351958e-05, + "loss": 1.2513, + "step": 3050000 + }, + { + "epoch": 1.83, + "learning_rate": 4.323488054795902e-05, + "loss": 1.2832, + "step": 3050500 + }, + { + "epoch": 1.83, + "learning_rate": 4.323278058239845e-05, + "loss": 1.2647, + "step": 3051000 + }, + { + "epoch": 1.83, + "learning_rate": 4.3230684816769004e-05, + "loss": 1.2592, + "step": 3051500 + }, + { + "epoch": 1.83, + "learning_rate": 4.322858485120844e-05, + "loss": 1.2351, + "step": 3052000 + }, + { + "epoch": 1.83, + "learning_rate": 4.322648488564788e-05, + "loss": 1.2721, + "step": 3052500 + }, + { + "epoch": 1.83, + "learning_rate": 4.322438492008731e-05, + "loss": 1.2302, + "step": 3053000 + }, + { + "epoch": 1.83, + "learning_rate": 4.3222289154457865e-05, + "loss": 1.2492, + "step": 3053500 + }, + { + "epoch": 1.83, + "learning_rate": 4.3220189188897305e-05, + "loss": 1.2827, + "step": 3054000 + }, + { + "epoch": 1.83, + "learning_rate": 4.321808922333674e-05, + "loss": 1.264, + "step": 3054500 + }, + { + "epoch": 1.83, + "learning_rate": 4.321598925777617e-05, + "loss": 1.2814, + "step": 3055000 + }, + { + "epoch": 1.83, + "learning_rate": 4.321389769207785e-05, + "loss": 1.261, + "step": 3055500 + }, + { + "epoch": 1.83, + "learning_rate": 4.321179772651728e-05, + "loss": 1.2875, + "step": 3056000 + }, + { + "epoch": 1.83, + "learning_rate": 4.320969776095672e-05, + "loss": 1.2747, + "step": 3056500 + }, + { + "epoch": 1.83, + "learning_rate": 4.320759779539615e-05, + "loss": 1.2918, + "step": 3057000 + }, + { + "epoch": 1.83, + "learning_rate": 4.3205497829835587e-05, + "loss": 1.256, + "step": 3057500 + }, + { + "epoch": 1.83, + "learning_rate": 4.320339786427503e-05, + "loss": 1.2927, + "step": 3058000 + }, + { + "epoch": 1.83, + "learning_rate": 4.320129789871446e-05, + "loss": 1.2408, + "step": 3058500 + }, + { + "epoch": 1.83, + "learning_rate": 4.3199197933153894e-05, + "loss": 1.2707, + "step": 3059000 + }, + { + "epoch": 1.83, + "learning_rate": 4.319710216752445e-05, + "loss": 1.2634, + "step": 3059500 + }, + { + "epoch": 1.83, + "learning_rate": 4.319500220196389e-05, + "loss": 1.2753, + "step": 3060000 + }, + { + "epoch": 1.83, + "learning_rate": 4.319290643633444e-05, + "loss": 1.2585, + "step": 3060500 + }, + { + "epoch": 1.84, + "learning_rate": 4.3190806470773875e-05, + "loss": 1.2788, + "step": 3061000 + }, + { + "epoch": 1.84, + "learning_rate": 4.3188706505213315e-05, + "loss": 1.2403, + "step": 3061500 + }, + { + "epoch": 1.84, + "learning_rate": 4.318660653965275e-05, + "loss": 1.2662, + "step": 3062000 + }, + { + "epoch": 1.84, + "learning_rate": 4.318451077402331e-05, + "loss": 1.2324, + "step": 3062500 + }, + { + "epoch": 1.84, + "learning_rate": 4.3182410808462735e-05, + "loss": 1.2866, + "step": 3063000 + }, + { + "epoch": 1.84, + "learning_rate": 4.3180310842902176e-05, + "loss": 1.2782, + "step": 3063500 + }, + { + "epoch": 1.84, + "learning_rate": 4.3178215077272736e-05, + "loss": 1.251, + "step": 3064000 + }, + { + "epoch": 1.84, + "learning_rate": 4.317611511171217e-05, + "loss": 1.2853, + "step": 3064500 + }, + { + "epoch": 1.84, + "learning_rate": 4.31740151461516e-05, + "loss": 1.2673, + "step": 3065000 + }, + { + "epoch": 1.84, + "learning_rate": 4.3171919380522156e-05, + "loss": 1.254, + "step": 3065500 + }, + { + "epoch": 1.84, + "learning_rate": 4.31698194149616e-05, + "loss": 1.3198, + "step": 3066000 + }, + { + "epoch": 1.84, + "learning_rate": 4.316771944940103e-05, + "loss": 1.253, + "step": 3066500 + }, + { + "epoch": 1.84, + "learning_rate": 4.316561948384047e-05, + "loss": 1.2679, + "step": 3067000 + }, + { + "epoch": 1.84, + "learning_rate": 4.31635195182799e-05, + "loss": 1.2806, + "step": 3067500 + }, + { + "epoch": 1.84, + "learning_rate": 4.316141955271933e-05, + "loss": 1.2512, + "step": 3068000 + }, + { + "epoch": 1.84, + "learning_rate": 4.315931958715877e-05, + "loss": 1.2734, + "step": 3068500 + }, + { + "epoch": 1.84, + "learning_rate": 4.3157219621598204e-05, + "loss": 1.2736, + "step": 3069000 + }, + { + "epoch": 1.84, + "learning_rate": 4.315511965603764e-05, + "loss": 1.2782, + "step": 3069500 + }, + { + "epoch": 1.84, + "learning_rate": 4.315301969047708e-05, + "loss": 1.2426, + "step": 3070000 + }, + { + "epoch": 1.84, + "learning_rate": 4.315091972491651e-05, + "loss": 1.2562, + "step": 3070500 + }, + { + "epoch": 1.84, + "learning_rate": 4.3148819759355945e-05, + "loss": 1.2655, + "step": 3071000 + }, + { + "epoch": 1.84, + "learning_rate": 4.3146719793795385e-05, + "loss": 1.2834, + "step": 3071500 + }, + { + "epoch": 1.84, + "learning_rate": 4.314461982823482e-05, + "loss": 1.2869, + "step": 3072000 + }, + { + "epoch": 1.84, + "learning_rate": 4.314251986267425e-05, + "loss": 1.2349, + "step": 3072500 + }, + { + "epoch": 1.84, + "learning_rate": 4.3140419897113685e-05, + "loss": 1.2657, + "step": 3073000 + }, + { + "epoch": 1.84, + "learning_rate": 4.3138324131484246e-05, + "loss": 1.2556, + "step": 3073500 + }, + { + "epoch": 1.84, + "learning_rate": 4.313622416592368e-05, + "loss": 1.2601, + "step": 3074000 + }, + { + "epoch": 1.84, + "learning_rate": 4.313412840029423e-05, + "loss": 1.2684, + "step": 3074500 + }, + { + "epoch": 1.84, + "learning_rate": 4.313202843473367e-05, + "loss": 1.2592, + "step": 3075000 + }, + { + "epoch": 1.84, + "learning_rate": 4.3129928469173106e-05, + "loss": 1.2785, + "step": 3075500 + }, + { + "epoch": 1.84, + "learning_rate": 4.312782850361254e-05, + "loss": 1.2401, + "step": 3076000 + }, + { + "epoch": 1.84, + "learning_rate": 4.312572853805198e-05, + "loss": 1.2561, + "step": 3076500 + }, + { + "epoch": 1.84, + "learning_rate": 4.3123628572491414e-05, + "loss": 1.2457, + "step": 3077000 + }, + { + "epoch": 1.85, + "learning_rate": 4.312152860693085e-05, + "loss": 1.2543, + "step": 3077500 + }, + { + "epoch": 1.85, + "learning_rate": 4.31194328413014e-05, + "loss": 1.2555, + "step": 3078000 + }, + { + "epoch": 1.85, + "learning_rate": 4.311733287574084e-05, + "loss": 1.2487, + "step": 3078500 + }, + { + "epoch": 1.85, + "learning_rate": 4.3115232910180274e-05, + "loss": 1.2905, + "step": 3079000 + }, + { + "epoch": 1.85, + "learning_rate": 4.311313294461971e-05, + "loss": 1.242, + "step": 3079500 + }, + { + "epoch": 1.85, + "learning_rate": 4.311103297905915e-05, + "loss": 1.2668, + "step": 3080000 + }, + { + "epoch": 1.85, + "learning_rate": 4.3108933013498575e-05, + "loss": 1.2938, + "step": 3080500 + }, + { + "epoch": 1.85, + "learning_rate": 4.310683304793801e-05, + "loss": 1.2484, + "step": 3081000 + }, + { + "epoch": 1.85, + "learning_rate": 4.310473308237745e-05, + "loss": 1.2249, + "step": 3081500 + }, + { + "epoch": 1.85, + "learning_rate": 4.310263731674801e-05, + "loss": 1.2519, + "step": 3082000 + }, + { + "epoch": 1.85, + "learning_rate": 4.3100537351187435e-05, + "loss": 1.2675, + "step": 3082500 + }, + { + "epoch": 1.85, + "learning_rate": 4.3098437385626876e-05, + "loss": 1.2619, + "step": 3083000 + }, + { + "epoch": 1.85, + "learning_rate": 4.3096341619997436e-05, + "loss": 1.2876, + "step": 3083500 + }, + { + "epoch": 1.85, + "learning_rate": 4.309424165443687e-05, + "loss": 1.2662, + "step": 3084000 + }, + { + "epoch": 1.85, + "learning_rate": 4.30921416888763e-05, + "loss": 1.2465, + "step": 3084500 + }, + { + "epoch": 1.85, + "learning_rate": 4.3090041723315736e-05, + "loss": 1.2585, + "step": 3085000 + }, + { + "epoch": 1.85, + "learning_rate": 4.308794175775517e-05, + "loss": 1.2758, + "step": 3085500 + }, + { + "epoch": 1.85, + "learning_rate": 4.30858417921946e-05, + "loss": 1.2778, + "step": 3086000 + }, + { + "epoch": 1.85, + "learning_rate": 4.3083741826634044e-05, + "loss": 1.2478, + "step": 3086500 + }, + { + "epoch": 1.85, + "learning_rate": 4.308164186107348e-05, + "loss": 1.2621, + "step": 3087000 + }, + { + "epoch": 1.85, + "learning_rate": 4.307955029537516e-05, + "loss": 1.2696, + "step": 3087500 + }, + { + "epoch": 1.85, + "learning_rate": 4.307745032981459e-05, + "loss": 1.2838, + "step": 3088000 + }, + { + "epoch": 1.85, + "learning_rate": 4.3075350364254024e-05, + "loss": 1.2666, + "step": 3088500 + }, + { + "epoch": 1.85, + "learning_rate": 4.3073250398693465e-05, + "loss": 1.2525, + "step": 3089000 + }, + { + "epoch": 1.85, + "learning_rate": 4.30711504331329e-05, + "loss": 1.257, + "step": 3089500 + }, + { + "epoch": 1.85, + "learning_rate": 4.306905046757233e-05, + "loss": 1.2452, + "step": 3090000 + }, + { + "epoch": 1.85, + "learning_rate": 4.3066950502011765e-05, + "loss": 1.2539, + "step": 3090500 + }, + { + "epoch": 1.85, + "learning_rate": 4.30648505364512e-05, + "loss": 1.2632, + "step": 3091000 + }, + { + "epoch": 1.85, + "learning_rate": 4.306275057089064e-05, + "loss": 1.2429, + "step": 3091500 + }, + { + "epoch": 1.85, + "learning_rate": 4.30606548052612e-05, + "loss": 1.2784, + "step": 3092000 + }, + { + "epoch": 1.85, + "learning_rate": 4.3058554839700626e-05, + "loss": 1.2769, + "step": 3092500 + }, + { + "epoch": 1.85, + "learning_rate": 4.305645487414006e-05, + "loss": 1.272, + "step": 3093000 + }, + { + "epoch": 1.85, + "learning_rate": 4.30543549085795e-05, + "loss": 1.2478, + "step": 3093500 + }, + { + "epoch": 1.85, + "learning_rate": 4.305225494301893e-05, + "loss": 1.2507, + "step": 3094000 + }, + { + "epoch": 1.86, + "learning_rate": 4.3050159177389486e-05, + "loss": 1.2705, + "step": 3094500 + }, + { + "epoch": 1.86, + "learning_rate": 4.304805921182892e-05, + "loss": 1.2597, + "step": 3095000 + }, + { + "epoch": 1.86, + "learning_rate": 4.304595924626836e-05, + "loss": 1.2493, + "step": 3095500 + }, + { + "epoch": 1.86, + "learning_rate": 4.3043859280707794e-05, + "loss": 1.2844, + "step": 3096000 + }, + { + "epoch": 1.86, + "learning_rate": 4.304175931514723e-05, + "loss": 1.2512, + "step": 3096500 + }, + { + "epoch": 1.86, + "learning_rate": 4.303965934958667e-05, + "loss": 1.2771, + "step": 3097000 + }, + { + "epoch": 1.86, + "learning_rate": 4.30375593840261e-05, + "loss": 1.2674, + "step": 3097500 + }, + { + "epoch": 1.86, + "learning_rate": 4.3035459418465534e-05, + "loss": 1.2445, + "step": 3098000 + }, + { + "epoch": 1.86, + "learning_rate": 4.3033359452904974e-05, + "loss": 1.2614, + "step": 3098500 + }, + { + "epoch": 1.86, + "learning_rate": 4.303126368727553e-05, + "loss": 1.253, + "step": 3099000 + }, + { + "epoch": 1.86, + "learning_rate": 4.302916372171496e-05, + "loss": 1.2486, + "step": 3099500 + }, + { + "epoch": 1.86, + "learning_rate": 4.30270637561544e-05, + "loss": 1.2472, + "step": 3100000 + }, + { + "epoch": 1.86, + "eval_loss": 1.209123134613037, + "eval_runtime": 1124.5793, + "eval_samples_per_second": 468.371, + "eval_steps_per_second": 78.062, + "step": 3100000 + }, + { + "epoch": 1.86, + "learning_rate": 4.3024963790593835e-05, + "loss": 1.2343, + "step": 3100500 + }, + { + "epoch": 1.86, + "learning_rate": 4.302286802496439e-05, + "loss": 1.224, + "step": 3101000 + }, + { + "epoch": 1.86, + "learning_rate": 4.302076805940382e-05, + "loss": 1.26, + "step": 3101500 + }, + { + "epoch": 1.86, + "learning_rate": 4.301866809384326e-05, + "loss": 1.2775, + "step": 3102000 + }, + { + "epoch": 1.86, + "learning_rate": 4.3016568128282696e-05, + "loss": 1.2587, + "step": 3102500 + }, + { + "epoch": 1.86, + "learning_rate": 4.301447236265325e-05, + "loss": 1.2448, + "step": 3103000 + }, + { + "epoch": 1.86, + "learning_rate": 4.301237659702381e-05, + "loss": 1.2553, + "step": 3103500 + }, + { + "epoch": 1.86, + "learning_rate": 4.3010280831394363e-05, + "loss": 1.2446, + "step": 3104000 + }, + { + "epoch": 1.86, + "learning_rate": 4.3008180865833804e-05, + "loss": 1.29, + "step": 3104500 + }, + { + "epoch": 1.86, + "learning_rate": 4.300608090027324e-05, + "loss": 1.2545, + "step": 3105000 + }, + { + "epoch": 1.86, + "learning_rate": 4.300398093471267e-05, + "loss": 1.2646, + "step": 3105500 + }, + { + "epoch": 1.86, + "learning_rate": 4.300188096915211e-05, + "loss": 1.2599, + "step": 3106000 + }, + { + "epoch": 1.86, + "learning_rate": 4.299978100359154e-05, + "loss": 1.2752, + "step": 3106500 + }, + { + "epoch": 1.86, + "learning_rate": 4.299768103803097e-05, + "loss": 1.276, + "step": 3107000 + }, + { + "epoch": 1.86, + "learning_rate": 4.299558107247041e-05, + "loss": 1.2405, + "step": 3107500 + }, + { + "epoch": 1.86, + "learning_rate": 4.2993481106909845e-05, + "loss": 1.2672, + "step": 3108000 + }, + { + "epoch": 1.86, + "learning_rate": 4.299138114134928e-05, + "loss": 1.2679, + "step": 3108500 + }, + { + "epoch": 1.86, + "learning_rate": 4.298928537571983e-05, + "loss": 1.2633, + "step": 3109000 + }, + { + "epoch": 1.86, + "learning_rate": 4.298718541015927e-05, + "loss": 1.2684, + "step": 3109500 + }, + { + "epoch": 1.86, + "learning_rate": 4.2985085444598705e-05, + "loss": 1.2687, + "step": 3110000 + }, + { + "epoch": 1.86, + "learning_rate": 4.298298547903814e-05, + "loss": 1.2805, + "step": 3110500 + }, + { + "epoch": 1.87, + "learning_rate": 4.298088551347758e-05, + "loss": 1.2818, + "step": 3111000 + }, + { + "epoch": 1.87, + "learning_rate": 4.297878974784813e-05, + "loss": 1.2645, + "step": 3111500 + }, + { + "epoch": 1.87, + "learning_rate": 4.2976689782287566e-05, + "loss": 1.2534, + "step": 3112000 + }, + { + "epoch": 1.87, + "learning_rate": 4.2974589816727006e-05, + "loss": 1.2574, + "step": 3112500 + }, + { + "epoch": 1.87, + "learning_rate": 4.297248985116644e-05, + "loss": 1.2234, + "step": 3113000 + }, + { + "epoch": 1.87, + "learning_rate": 4.297038988560587e-05, + "loss": 1.2387, + "step": 3113500 + }, + { + "epoch": 1.87, + "learning_rate": 4.2968289920045313e-05, + "loss": 1.2629, + "step": 3114000 + }, + { + "epoch": 1.87, + "learning_rate": 4.296619415441587e-05, + "loss": 1.2619, + "step": 3114500 + }, + { + "epoch": 1.87, + "learning_rate": 4.29640941888553e-05, + "loss": 1.282, + "step": 3115000 + }, + { + "epoch": 1.87, + "learning_rate": 4.2961994223294734e-05, + "loss": 1.2529, + "step": 3115500 + }, + { + "epoch": 1.87, + "learning_rate": 4.2959894257734174e-05, + "loss": 1.2745, + "step": 3116000 + }, + { + "epoch": 1.87, + "learning_rate": 4.295779429217361e-05, + "loss": 1.2258, + "step": 3116500 + }, + { + "epoch": 1.87, + "learning_rate": 4.295569432661304e-05, + "loss": 1.2688, + "step": 3117000 + }, + { + "epoch": 1.87, + "learning_rate": 4.2953598560983595e-05, + "loss": 1.2593, + "step": 3117500 + }, + { + "epoch": 1.87, + "learning_rate": 4.2951498595423035e-05, + "loss": 1.2499, + "step": 3118000 + }, + { + "epoch": 1.87, + "learning_rate": 4.294939862986247e-05, + "loss": 1.2717, + "step": 3118500 + }, + { + "epoch": 1.87, + "learning_rate": 4.294729866430191e-05, + "loss": 1.2887, + "step": 3119000 + }, + { + "epoch": 1.87, + "learning_rate": 4.294519869874134e-05, + "loss": 1.2704, + "step": 3119500 + }, + { + "epoch": 1.87, + "learning_rate": 4.2943098733180776e-05, + "loss": 1.2706, + "step": 3120000 + }, + { + "epoch": 1.87, + "learning_rate": 4.294100296755133e-05, + "loss": 1.2656, + "step": 3120500 + }, + { + "epoch": 1.87, + "learning_rate": 4.293890300199077e-05, + "loss": 1.2632, + "step": 3121000 + }, + { + "epoch": 1.87, + "learning_rate": 4.29368030364302e-05, + "loss": 1.3073, + "step": 3121500 + }, + { + "epoch": 1.87, + "learning_rate": 4.2934703070869636e-05, + "loss": 1.2898, + "step": 3122000 + }, + { + "epoch": 1.87, + "learning_rate": 4.2932603105309077e-05, + "loss": 1.2434, + "step": 3122500 + }, + { + "epoch": 1.87, + "learning_rate": 4.293050733967963e-05, + "loss": 1.2646, + "step": 3123000 + }, + { + "epoch": 1.87, + "learning_rate": 4.2928407374119064e-05, + "loss": 1.2725, + "step": 3123500 + }, + { + "epoch": 1.87, + "learning_rate": 4.29263074085585e-05, + "loss": 1.2688, + "step": 3124000 + }, + { + "epoch": 1.87, + "learning_rate": 4.292420744299794e-05, + "loss": 1.2712, + "step": 3124500 + }, + { + "epoch": 1.87, + "learning_rate": 4.292211167736849e-05, + "loss": 1.2777, + "step": 3125000 + }, + { + "epoch": 1.87, + "learning_rate": 4.2920011711807924e-05, + "loss": 1.2765, + "step": 3125500 + }, + { + "epoch": 1.87, + "learning_rate": 4.2917911746247365e-05, + "loss": 1.2509, + "step": 3126000 + }, + { + "epoch": 1.87, + "learning_rate": 4.29158117806868e-05, + "loss": 1.2731, + "step": 3126500 + }, + { + "epoch": 1.87, + "learning_rate": 4.291371181512623e-05, + "loss": 1.2582, + "step": 3127000 + }, + { + "epoch": 1.88, + "learning_rate": 4.2911616049496785e-05, + "loss": 1.2426, + "step": 3127500 + }, + { + "epoch": 1.88, + "learning_rate": 4.2909516083936225e-05, + "loss": 1.2886, + "step": 3128000 + }, + { + "epoch": 1.88, + "learning_rate": 4.290741611837566e-05, + "loss": 1.2447, + "step": 3128500 + }, + { + "epoch": 1.88, + "learning_rate": 4.290531615281509e-05, + "loss": 1.2977, + "step": 3129000 + }, + { + "epoch": 1.88, + "learning_rate": 4.290321618725453e-05, + "loss": 1.292, + "step": 3129500 + }, + { + "epoch": 1.88, + "learning_rate": 4.2901116221693966e-05, + "loss": 1.2684, + "step": 3130000 + }, + { + "epoch": 1.88, + "learning_rate": 4.28990162561334e-05, + "loss": 1.2826, + "step": 3130500 + }, + { + "epoch": 1.88, + "learning_rate": 4.289691629057283e-05, + "loss": 1.2486, + "step": 3131000 + }, + { + "epoch": 1.88, + "learning_rate": 4.289482052494339e-05, + "loss": 1.2773, + "step": 3131500 + }, + { + "epoch": 1.88, + "learning_rate": 4.2892720559382827e-05, + "loss": 1.2591, + "step": 3132000 + }, + { + "epoch": 1.88, + "learning_rate": 4.289062059382226e-05, + "loss": 1.2362, + "step": 3132500 + }, + { + "epoch": 1.88, + "learning_rate": 4.288852482819282e-05, + "loss": 1.2681, + "step": 3133000 + }, + { + "epoch": 1.88, + "learning_rate": 4.2886429062563374e-05, + "loss": 1.2386, + "step": 3133500 + }, + { + "epoch": 1.88, + "learning_rate": 4.288432909700281e-05, + "loss": 1.2811, + "step": 3134000 + }, + { + "epoch": 1.88, + "learning_rate": 4.288223333137337e-05, + "loss": 1.2685, + "step": 3134500 + }, + { + "epoch": 1.88, + "learning_rate": 4.2880133365812794e-05, + "loss": 1.2437, + "step": 3135000 + }, + { + "epoch": 1.88, + "learning_rate": 4.2878033400252235e-05, + "loss": 1.2618, + "step": 3135500 + }, + { + "epoch": 1.88, + "learning_rate": 4.287593343469167e-05, + "loss": 1.2622, + "step": 3136000 + }, + { + "epoch": 1.88, + "learning_rate": 4.28738334691311e-05, + "loss": 1.2779, + "step": 3136500 + }, + { + "epoch": 1.88, + "learning_rate": 4.287173350357054e-05, + "loss": 1.2474, + "step": 3137000 + }, + { + "epoch": 1.88, + "learning_rate": 4.2869633538009975e-05, + "loss": 1.2797, + "step": 3137500 + }, + { + "epoch": 1.88, + "learning_rate": 4.286753357244941e-05, + "loss": 1.2861, + "step": 3138000 + }, + { + "epoch": 1.88, + "learning_rate": 4.286543360688885e-05, + "loss": 1.2461, + "step": 3138500 + }, + { + "epoch": 1.88, + "learning_rate": 4.286333364132828e-05, + "loss": 1.2612, + "step": 3139000 + }, + { + "epoch": 1.88, + "learning_rate": 4.2861233675767716e-05, + "loss": 1.2474, + "step": 3139500 + }, + { + "epoch": 1.88, + "learning_rate": 4.2859133710207156e-05, + "loss": 1.2752, + "step": 3140000 + }, + { + "epoch": 1.88, + "learning_rate": 4.285703374464658e-05, + "loss": 1.2569, + "step": 3140500 + }, + { + "epoch": 1.88, + "learning_rate": 4.285493377908602e-05, + "loss": 1.2494, + "step": 3141000 + }, + { + "epoch": 1.88, + "learning_rate": 4.2852833813525457e-05, + "loss": 1.2851, + "step": 3141500 + }, + { + "epoch": 1.88, + "learning_rate": 4.285073384796489e-05, + "loss": 1.2685, + "step": 3142000 + }, + { + "epoch": 1.88, + "learning_rate": 4.284863388240433e-05, + "loss": 1.2703, + "step": 3142500 + }, + { + "epoch": 1.88, + "learning_rate": 4.2846542316706004e-05, + "loss": 1.2539, + "step": 3143000 + }, + { + "epoch": 1.88, + "learning_rate": 4.2844442351145444e-05, + "loss": 1.2712, + "step": 3143500 + }, + { + "epoch": 1.88, + "learning_rate": 4.284234238558488e-05, + "loss": 1.2302, + "step": 3144000 + }, + { + "epoch": 1.89, + "learning_rate": 4.284024661995543e-05, + "loss": 1.2721, + "step": 3144500 + }, + { + "epoch": 1.89, + "learning_rate": 4.2838146654394865e-05, + "loss": 1.2504, + "step": 3145000 + }, + { + "epoch": 1.89, + "learning_rate": 4.2836046688834305e-05, + "loss": 1.2896, + "step": 3145500 + }, + { + "epoch": 1.89, + "learning_rate": 4.283394672327374e-05, + "loss": 1.2657, + "step": 3146000 + }, + { + "epoch": 1.89, + "learning_rate": 4.283184675771317e-05, + "loss": 1.2266, + "step": 3146500 + }, + { + "epoch": 1.89, + "learning_rate": 4.282974679215261e-05, + "loss": 1.2686, + "step": 3147000 + }, + { + "epoch": 1.89, + "learning_rate": 4.2827646826592045e-05, + "loss": 1.2506, + "step": 3147500 + }, + { + "epoch": 1.89, + "learning_rate": 4.282554686103148e-05, + "loss": 1.2483, + "step": 3148000 + }, + { + "epoch": 1.89, + "learning_rate": 4.282344689547091e-05, + "loss": 1.2456, + "step": 3148500 + }, + { + "epoch": 1.89, + "learning_rate": 4.2821346929910346e-05, + "loss": 1.2521, + "step": 3149000 + }, + { + "epoch": 1.89, + "learning_rate": 4.2819251164280906e-05, + "loss": 1.2561, + "step": 3149500 + }, + { + "epoch": 1.89, + "learning_rate": 4.281715539865146e-05, + "loss": 1.25, + "step": 3150000 + }, + { + "epoch": 1.89, + "learning_rate": 4.281505963302201e-05, + "loss": 1.2924, + "step": 3150500 + }, + { + "epoch": 1.89, + "learning_rate": 4.2812959667461454e-05, + "loss": 1.243, + "step": 3151000 + }, + { + "epoch": 1.89, + "learning_rate": 4.281085970190089e-05, + "loss": 1.2674, + "step": 3151500 + }, + { + "epoch": 1.89, + "learning_rate": 4.280875973634032e-05, + "loss": 1.2568, + "step": 3152000 + }, + { + "epoch": 1.89, + "learning_rate": 4.280665977077976e-05, + "loss": 1.2808, + "step": 3152500 + }, + { + "epoch": 1.89, + "learning_rate": 4.2804559805219194e-05, + "loss": 1.2469, + "step": 3153000 + }, + { + "epoch": 1.89, + "learning_rate": 4.280245983965863e-05, + "loss": 1.2478, + "step": 3153500 + }, + { + "epoch": 1.89, + "learning_rate": 4.280035987409807e-05, + "loss": 1.275, + "step": 3154000 + }, + { + "epoch": 1.89, + "learning_rate": 4.27982599085375e-05, + "loss": 1.2479, + "step": 3154500 + }, + { + "epoch": 1.89, + "learning_rate": 4.2796159942976935e-05, + "loss": 1.2588, + "step": 3155000 + }, + { + "epoch": 1.89, + "learning_rate": 4.279405997741637e-05, + "loss": 1.2645, + "step": 3155500 + }, + { + "epoch": 1.89, + "learning_rate": 4.27919600118558e-05, + "loss": 1.2647, + "step": 3156000 + }, + { + "epoch": 1.89, + "learning_rate": 4.278986004629524e-05, + "loss": 1.266, + "step": 3156500 + }, + { + "epoch": 1.89, + "learning_rate": 4.27877642806658e-05, + "loss": 1.2583, + "step": 3157000 + }, + { + "epoch": 1.89, + "learning_rate": 4.278566431510523e-05, + "loss": 1.2339, + "step": 3157500 + }, + { + "epoch": 1.89, + "learning_rate": 4.278356434954466e-05, + "loss": 1.299, + "step": 3158000 + }, + { + "epoch": 1.89, + "learning_rate": 4.27814643839841e-05, + "loss": 1.2854, + "step": 3158500 + }, + { + "epoch": 1.89, + "learning_rate": 4.2779364418423536e-05, + "loss": 1.2619, + "step": 3159000 + }, + { + "epoch": 1.89, + "learning_rate": 4.277726445286297e-05, + "loss": 1.2424, + "step": 3159500 + }, + { + "epoch": 1.89, + "learning_rate": 4.277516448730241e-05, + "loss": 1.2615, + "step": 3160000 + }, + { + "epoch": 1.89, + "learning_rate": 4.277306452174184e-05, + "loss": 1.2373, + "step": 3160500 + }, + { + "epoch": 1.9, + "learning_rate": 4.27709687561124e-05, + "loss": 1.2556, + "step": 3161000 + }, + { + "epoch": 1.9, + "learning_rate": 4.276886879055183e-05, + "loss": 1.229, + "step": 3161500 + }, + { + "epoch": 1.9, + "learning_rate": 4.276676882499127e-05, + "loss": 1.2517, + "step": 3162000 + }, + { + "epoch": 1.9, + "learning_rate": 4.2764668859430704e-05, + "loss": 1.266, + "step": 3162500 + }, + { + "epoch": 1.9, + "learning_rate": 4.276256889387014e-05, + "loss": 1.2544, + "step": 3163000 + }, + { + "epoch": 1.9, + "learning_rate": 4.27604731282407e-05, + "loss": 1.2833, + "step": 3163500 + }, + { + "epoch": 1.9, + "learning_rate": 4.275837316268013e-05, + "loss": 1.2409, + "step": 3164000 + }, + { + "epoch": 1.9, + "learning_rate": 4.2756273197119565e-05, + "loss": 1.3014, + "step": 3164500 + }, + { + "epoch": 1.9, + "learning_rate": 4.2754173231559005e-05, + "loss": 1.2382, + "step": 3165000 + }, + { + "epoch": 1.9, + "learning_rate": 4.275207326599844e-05, + "loss": 1.2918, + "step": 3165500 + }, + { + "epoch": 1.9, + "learning_rate": 4.274997330043787e-05, + "loss": 1.2665, + "step": 3166000 + }, + { + "epoch": 1.9, + "learning_rate": 4.274787333487731e-05, + "loss": 1.2504, + "step": 3166500 + }, + { + "epoch": 1.9, + "learning_rate": 4.2745773369316746e-05, + "loss": 1.2563, + "step": 3167000 + }, + { + "epoch": 1.9, + "learning_rate": 4.27436776036873e-05, + "loss": 1.2843, + "step": 3167500 + }, + { + "epoch": 1.9, + "learning_rate": 4.274157763812673e-05, + "loss": 1.256, + "step": 3168000 + }, + { + "epoch": 1.9, + "learning_rate": 4.273947767256617e-05, + "loss": 1.2548, + "step": 3168500 + }, + { + "epoch": 1.9, + "learning_rate": 4.2737377707005606e-05, + "loss": 1.2641, + "step": 3169000 + }, + { + "epoch": 1.9, + "learning_rate": 4.273528194137616e-05, + "loss": 1.2389, + "step": 3169500 + }, + { + "epoch": 1.9, + "learning_rate": 4.273318197581559e-05, + "loss": 1.2574, + "step": 3170000 + }, + { + "epoch": 1.9, + "learning_rate": 4.2731082010255034e-05, + "loss": 1.2612, + "step": 3170500 + }, + { + "epoch": 1.9, + "learning_rate": 4.272898204469447e-05, + "loss": 1.2486, + "step": 3171000 + }, + { + "epoch": 1.9, + "learning_rate": 4.272688627906502e-05, + "loss": 1.218, + "step": 3171500 + }, + { + "epoch": 1.9, + "learning_rate": 4.2724790513435574e-05, + "loss": 1.3072, + "step": 3172000 + }, + { + "epoch": 1.9, + "learning_rate": 4.2722690547875014e-05, + "loss": 1.2725, + "step": 3172500 + }, + { + "epoch": 1.9, + "learning_rate": 4.272059058231445e-05, + "loss": 1.2654, + "step": 3173000 + }, + { + "epoch": 1.9, + "learning_rate": 4.271849061675388e-05, + "loss": 1.2903, + "step": 3173500 + }, + { + "epoch": 1.9, + "learning_rate": 4.271639065119332e-05, + "loss": 1.2635, + "step": 3174000 + }, + { + "epoch": 1.9, + "learning_rate": 4.2714290685632755e-05, + "loss": 1.2605, + "step": 3174500 + }, + { + "epoch": 1.9, + "learning_rate": 4.271219072007219e-05, + "loss": 1.2348, + "step": 3175000 + }, + { + "epoch": 1.9, + "learning_rate": 4.271009075451163e-05, + "loss": 1.2429, + "step": 3175500 + }, + { + "epoch": 1.9, + "learning_rate": 4.270799498888218e-05, + "loss": 1.2244, + "step": 3176000 + }, + { + "epoch": 1.9, + "learning_rate": 4.2705895023321616e-05, + "loss": 1.2714, + "step": 3176500 + }, + { + "epoch": 1.9, + "learning_rate": 4.270379925769217e-05, + "loss": 1.2345, + "step": 3177000 + }, + { + "epoch": 1.91, + "learning_rate": 4.270169929213161e-05, + "loss": 1.2598, + "step": 3177500 + }, + { + "epoch": 1.91, + "learning_rate": 4.269959932657104e-05, + "loss": 1.2585, + "step": 3178000 + }, + { + "epoch": 1.91, + "learning_rate": 4.2697499361010477e-05, + "loss": 1.2597, + "step": 3178500 + }, + { + "epoch": 1.91, + "learning_rate": 4.269539939544992e-05, + "loss": 1.2622, + "step": 3179000 + }, + { + "epoch": 1.91, + "learning_rate": 4.269330362982047e-05, + "loss": 1.2243, + "step": 3179500 + }, + { + "epoch": 1.91, + "learning_rate": 4.2691203664259904e-05, + "loss": 1.2413, + "step": 3180000 + }, + { + "epoch": 1.91, + "learning_rate": 4.268910369869934e-05, + "loss": 1.2682, + "step": 3180500 + }, + { + "epoch": 1.91, + "learning_rate": 4.268700373313878e-05, + "loss": 1.2487, + "step": 3181000 + }, + { + "epoch": 1.91, + "learning_rate": 4.268490376757821e-05, + "loss": 1.2745, + "step": 3181500 + }, + { + "epoch": 1.91, + "learning_rate": 4.2682808001948765e-05, + "loss": 1.2335, + "step": 3182000 + }, + { + "epoch": 1.91, + "learning_rate": 4.26807080363882e-05, + "loss": 1.2597, + "step": 3182500 + }, + { + "epoch": 1.91, + "learning_rate": 4.267860807082764e-05, + "loss": 1.2745, + "step": 3183000 + }, + { + "epoch": 1.91, + "learning_rate": 4.267651230519819e-05, + "loss": 1.2564, + "step": 3183500 + }, + { + "epoch": 1.91, + "learning_rate": 4.2674412339637625e-05, + "loss": 1.2421, + "step": 3184000 + }, + { + "epoch": 1.91, + "learning_rate": 4.2672312374077065e-05, + "loss": 1.253, + "step": 3184500 + }, + { + "epoch": 1.91, + "learning_rate": 4.26702124085165e-05, + "loss": 1.2387, + "step": 3185000 + }, + { + "epoch": 1.91, + "learning_rate": 4.266811244295593e-05, + "loss": 1.2667, + "step": 3185500 + }, + { + "epoch": 1.91, + "learning_rate": 4.266601247739537e-05, + "loss": 1.2406, + "step": 3186000 + }, + { + "epoch": 1.91, + "learning_rate": 4.2663912511834806e-05, + "loss": 1.2563, + "step": 3186500 + }, + { + "epoch": 1.91, + "learning_rate": 4.266181254627424e-05, + "loss": 1.2614, + "step": 3187000 + }, + { + "epoch": 1.91, + "learning_rate": 4.265971258071368e-05, + "loss": 1.2685, + "step": 3187500 + }, + { + "epoch": 1.91, + "learning_rate": 4.265761681508423e-05, + "loss": 1.2725, + "step": 3188000 + }, + { + "epoch": 1.91, + "learning_rate": 4.265551684952367e-05, + "loss": 1.2487, + "step": 3188500 + }, + { + "epoch": 1.91, + "learning_rate": 4.26534168839631e-05, + "loss": 1.2513, + "step": 3189000 + }, + { + "epoch": 1.91, + "learning_rate": 4.265131691840254e-05, + "loss": 1.2562, + "step": 3189500 + }, + { + "epoch": 1.91, + "learning_rate": 4.2649216952841974e-05, + "loss": 1.2393, + "step": 3190000 + }, + { + "epoch": 1.91, + "learning_rate": 4.264711698728141e-05, + "loss": 1.2384, + "step": 3190500 + }, + { + "epoch": 1.91, + "learning_rate": 4.264502122165197e-05, + "loss": 1.2399, + "step": 3191000 + }, + { + "epoch": 1.91, + "learning_rate": 4.26429212560914e-05, + "loss": 1.2503, + "step": 3191500 + }, + { + "epoch": 1.91, + "learning_rate": 4.2640821290530835e-05, + "loss": 1.2649, + "step": 3192000 + }, + { + "epoch": 1.91, + "learning_rate": 4.2638721324970275e-05, + "loss": 1.248, + "step": 3192500 + }, + { + "epoch": 1.91, + "learning_rate": 4.263662135940971e-05, + "loss": 1.2149, + "step": 3193000 + }, + { + "epoch": 1.91, + "learning_rate": 4.263452139384914e-05, + "loss": 1.2251, + "step": 3193500 + }, + { + "epoch": 1.91, + "learning_rate": 4.2632421428288575e-05, + "loss": 1.2697, + "step": 3194000 + }, + { + "epoch": 1.92, + "learning_rate": 4.263032146272801e-05, + "loss": 1.2772, + "step": 3194500 + }, + { + "epoch": 1.92, + "learning_rate": 4.262822989702969e-05, + "loss": 1.2437, + "step": 3195000 + }, + { + "epoch": 1.92, + "learning_rate": 4.262612993146912e-05, + "loss": 1.2492, + "step": 3195500 + }, + { + "epoch": 1.92, + "learning_rate": 4.2624029965908556e-05, + "loss": 1.2316, + "step": 3196000 + }, + { + "epoch": 1.92, + "learning_rate": 4.2621930000347996e-05, + "loss": 1.2439, + "step": 3196500 + }, + { + "epoch": 1.92, + "learning_rate": 4.261983003478743e-05, + "loss": 1.2871, + "step": 3197000 + }, + { + "epoch": 1.92, + "learning_rate": 4.261773006922686e-05, + "loss": 1.2511, + "step": 3197500 + }, + { + "epoch": 1.92, + "learning_rate": 4.2615630103666304e-05, + "loss": 1.2308, + "step": 3198000 + }, + { + "epoch": 1.92, + "learning_rate": 4.261353013810573e-05, + "loss": 1.2586, + "step": 3198500 + }, + { + "epoch": 1.92, + "learning_rate": 4.261143437247629e-05, + "loss": 1.253, + "step": 3199000 + }, + { + "epoch": 1.92, + "learning_rate": 4.2609338606846844e-05, + "loss": 1.2774, + "step": 3199500 + }, + { + "epoch": 1.92, + "learning_rate": 4.2607238641286284e-05, + "loss": 1.2039, + "step": 3200000 + }, + { + "epoch": 1.92, + "eval_loss": 1.2074214220046997, + "eval_runtime": 1108.4818, + "eval_samples_per_second": 475.172, + "eval_steps_per_second": 79.196, + "step": 3200000 + }, + { + "epoch": 1.92, + "learning_rate": 4.260513867572572e-05, + "loss": 1.2663, + "step": 3200500 + }, + { + "epoch": 1.92, + "learning_rate": 4.260303871016515e-05, + "loss": 1.253, + "step": 3201000 + }, + { + "epoch": 1.92, + "learning_rate": 4.260093874460459e-05, + "loss": 1.2607, + "step": 3201500 + }, + { + "epoch": 1.92, + "learning_rate": 4.2598838779044025e-05, + "loss": 1.2493, + "step": 3202000 + }, + { + "epoch": 1.92, + "learning_rate": 4.259674301341458e-05, + "loss": 1.2476, + "step": 3202500 + }, + { + "epoch": 1.92, + "learning_rate": 4.259464304785401e-05, + "loss": 1.2731, + "step": 3203000 + }, + { + "epoch": 1.92, + "learning_rate": 4.259254308229345e-05, + "loss": 1.2501, + "step": 3203500 + }, + { + "epoch": 1.92, + "learning_rate": 4.2590443116732886e-05, + "loss": 1.2505, + "step": 3204000 + }, + { + "epoch": 1.92, + "learning_rate": 4.258834315117232e-05, + "loss": 1.2692, + "step": 3204500 + }, + { + "epoch": 1.92, + "learning_rate": 4.258624318561176e-05, + "loss": 1.245, + "step": 3205000 + }, + { + "epoch": 1.92, + "learning_rate": 4.258414741998231e-05, + "loss": 1.2613, + "step": 3205500 + }, + { + "epoch": 1.92, + "learning_rate": 4.2582047454421746e-05, + "loss": 1.2524, + "step": 3206000 + }, + { + "epoch": 1.92, + "learning_rate": 4.257994748886119e-05, + "loss": 1.2695, + "step": 3206500 + }, + { + "epoch": 1.92, + "learning_rate": 4.257784752330062e-05, + "loss": 1.2779, + "step": 3207000 + }, + { + "epoch": 1.92, + "learning_rate": 4.2575747557740054e-05, + "loss": 1.2607, + "step": 3207500 + }, + { + "epoch": 1.92, + "learning_rate": 4.257364759217949e-05, + "loss": 1.2864, + "step": 3208000 + }, + { + "epoch": 1.92, + "learning_rate": 4.257155602648116e-05, + "loss": 1.2378, + "step": 3208500 + }, + { + "epoch": 1.92, + "learning_rate": 4.25694560609206e-05, + "loss": 1.2748, + "step": 3209000 + }, + { + "epoch": 1.92, + "learning_rate": 4.2567356095360034e-05, + "loss": 1.3, + "step": 3209500 + }, + { + "epoch": 1.92, + "learning_rate": 4.256525612979947e-05, + "loss": 1.2768, + "step": 3210000 + }, + { + "epoch": 1.92, + "learning_rate": 4.256315616423891e-05, + "loss": 1.2322, + "step": 3210500 + }, + { + "epoch": 1.93, + "learning_rate": 4.256105619867834e-05, + "loss": 1.2621, + "step": 3211000 + }, + { + "epoch": 1.93, + "learning_rate": 4.2558956233117775e-05, + "loss": 1.2457, + "step": 3211500 + }, + { + "epoch": 1.93, + "learning_rate": 4.2556860467488335e-05, + "loss": 1.2913, + "step": 3212000 + }, + { + "epoch": 1.93, + "learning_rate": 4.255476050192777e-05, + "loss": 1.2595, + "step": 3212500 + }, + { + "epoch": 1.93, + "learning_rate": 4.25526605363672e-05, + "loss": 1.3004, + "step": 3213000 + }, + { + "epoch": 1.93, + "learning_rate": 4.255056057080664e-05, + "loss": 1.2453, + "step": 3213500 + }, + { + "epoch": 1.93, + "learning_rate": 4.2548460605246076e-05, + "loss": 1.2538, + "step": 3214000 + }, + { + "epoch": 1.93, + "learning_rate": 4.254636063968551e-05, + "loss": 1.2464, + "step": 3214500 + }, + { + "epoch": 1.93, + "learning_rate": 4.254426067412495e-05, + "loss": 1.28, + "step": 3215000 + }, + { + "epoch": 1.93, + "learning_rate": 4.2542160708564376e-05, + "loss": 1.2426, + "step": 3215500 + }, + { + "epoch": 1.93, + "learning_rate": 4.254006074300381e-05, + "loss": 1.2328, + "step": 3216000 + }, + { + "epoch": 1.93, + "learning_rate": 4.253796077744325e-05, + "loss": 1.2959, + "step": 3216500 + }, + { + "epoch": 1.93, + "learning_rate": 4.2535860811882684e-05, + "loss": 1.2548, + "step": 3217000 + }, + { + "epoch": 1.93, + "learning_rate": 4.253376504625324e-05, + "loss": 1.2665, + "step": 3217500 + }, + { + "epoch": 1.93, + "learning_rate": 4.253166508069267e-05, + "loss": 1.2709, + "step": 3218000 + }, + { + "epoch": 1.93, + "learning_rate": 4.252956511513211e-05, + "loss": 1.2179, + "step": 3218500 + }, + { + "epoch": 1.93, + "learning_rate": 4.2527465149571544e-05, + "loss": 1.2469, + "step": 3219000 + }, + { + "epoch": 1.93, + "learning_rate": 4.252536518401098e-05, + "loss": 1.2408, + "step": 3219500 + }, + { + "epoch": 1.93, + "learning_rate": 4.252326941838154e-05, + "loss": 1.2658, + "step": 3220000 + }, + { + "epoch": 1.93, + "learning_rate": 4.252116945282097e-05, + "loss": 1.2749, + "step": 3220500 + }, + { + "epoch": 1.93, + "learning_rate": 4.2519069487260405e-05, + "loss": 1.259, + "step": 3221000 + }, + { + "epoch": 1.93, + "learning_rate": 4.2516969521699845e-05, + "loss": 1.2758, + "step": 3221500 + }, + { + "epoch": 1.93, + "learning_rate": 4.251486955613928e-05, + "loss": 1.2234, + "step": 3222000 + }, + { + "epoch": 1.93, + "learning_rate": 4.251276959057871e-05, + "loss": 1.258, + "step": 3222500 + }, + { + "epoch": 1.93, + "learning_rate": 4.251066962501815e-05, + "loss": 1.258, + "step": 3223000 + }, + { + "epoch": 1.93, + "learning_rate": 4.2508569659457586e-05, + "loss": 1.2398, + "step": 3223500 + }, + { + "epoch": 1.93, + "learning_rate": 4.250647389382814e-05, + "loss": 1.228, + "step": 3224000 + }, + { + "epoch": 1.93, + "learning_rate": 4.250437392826757e-05, + "loss": 1.2617, + "step": 3224500 + }, + { + "epoch": 1.93, + "learning_rate": 4.250227396270701e-05, + "loss": 1.2707, + "step": 3225000 + }, + { + "epoch": 1.93, + "learning_rate": 4.2500173997146447e-05, + "loss": 1.2718, + "step": 3225500 + }, + { + "epoch": 1.93, + "learning_rate": 4.2498078231517e-05, + "loss": 1.2469, + "step": 3226000 + }, + { + "epoch": 1.93, + "learning_rate": 4.249598246588756e-05, + "loss": 1.2453, + "step": 3226500 + }, + { + "epoch": 1.93, + "learning_rate": 4.2493882500326994e-05, + "loss": 1.2633, + "step": 3227000 + }, + { + "epoch": 1.94, + "learning_rate": 4.249178253476643e-05, + "loss": 1.2703, + "step": 3227500 + }, + { + "epoch": 1.94, + "learning_rate": 4.248968256920586e-05, + "loss": 1.2821, + "step": 3228000 + }, + { + "epoch": 1.94, + "learning_rate": 4.24875826036453e-05, + "loss": 1.2644, + "step": 3228500 + }, + { + "epoch": 1.94, + "learning_rate": 4.248548683801586e-05, + "loss": 1.245, + "step": 3229000 + }, + { + "epoch": 1.94, + "learning_rate": 4.248338687245529e-05, + "loss": 1.2813, + "step": 3229500 + }, + { + "epoch": 1.94, + "learning_rate": 4.248128690689472e-05, + "loss": 1.241, + "step": 3230000 + }, + { + "epoch": 1.94, + "learning_rate": 4.247918694133416e-05, + "loss": 1.2352, + "step": 3230500 + }, + { + "epoch": 1.94, + "learning_rate": 4.2477086975773595e-05, + "loss": 1.2492, + "step": 3231000 + }, + { + "epoch": 1.94, + "learning_rate": 4.247498701021303e-05, + "loss": 1.2615, + "step": 3231500 + }, + { + "epoch": 1.94, + "learning_rate": 4.247288704465247e-05, + "loss": 1.2399, + "step": 3232000 + }, + { + "epoch": 1.94, + "learning_rate": 4.24707870790919e-05, + "loss": 1.2631, + "step": 3232500 + }, + { + "epoch": 1.94, + "learning_rate": 4.2468691313462456e-05, + "loss": 1.2476, + "step": 3233000 + }, + { + "epoch": 1.94, + "learning_rate": 4.2466595547833016e-05, + "loss": 1.2551, + "step": 3233500 + }, + { + "epoch": 1.94, + "learning_rate": 4.246449558227245e-05, + "loss": 1.2667, + "step": 3234000 + }, + { + "epoch": 1.94, + "learning_rate": 4.246239561671188e-05, + "loss": 1.2636, + "step": 3234500 + }, + { + "epoch": 1.94, + "learning_rate": 4.246029565115132e-05, + "loss": 1.219, + "step": 3235000 + }, + { + "epoch": 1.94, + "learning_rate": 4.245819568559076e-05, + "loss": 1.2677, + "step": 3235500 + }, + { + "epoch": 1.94, + "learning_rate": 4.245609572003019e-05, + "loss": 1.2509, + "step": 3236000 + }, + { + "epoch": 1.94, + "learning_rate": 4.2453995754469624e-05, + "loss": 1.2561, + "step": 3236500 + }, + { + "epoch": 1.94, + "learning_rate": 4.2451895788909064e-05, + "loss": 1.2597, + "step": 3237000 + }, + { + "epoch": 1.94, + "learning_rate": 4.24497958233485e-05, + "loss": 1.2341, + "step": 3237500 + }, + { + "epoch": 1.94, + "learning_rate": 4.244769585778793e-05, + "loss": 1.2512, + "step": 3238000 + }, + { + "epoch": 1.94, + "learning_rate": 4.244559589222737e-05, + "loss": 1.2507, + "step": 3238500 + }, + { + "epoch": 1.94, + "learning_rate": 4.2443495926666805e-05, + "loss": 1.2449, + "step": 3239000 + }, + { + "epoch": 1.94, + "learning_rate": 4.244139596110624e-05, + "loss": 1.2726, + "step": 3239500 + }, + { + "epoch": 1.94, + "learning_rate": 4.243930019547679e-05, + "loss": 1.2887, + "step": 3240000 + }, + { + "epoch": 1.94, + "learning_rate": 4.243720022991623e-05, + "loss": 1.2233, + "step": 3240500 + }, + { + "epoch": 1.94, + "learning_rate": 4.2435100264355666e-05, + "loss": 1.2724, + "step": 3241000 + }, + { + "epoch": 1.94, + "learning_rate": 4.24330002987951e-05, + "loss": 1.2573, + "step": 3241500 + }, + { + "epoch": 1.94, + "learning_rate": 4.243090453316565e-05, + "loss": 1.2553, + "step": 3242000 + }, + { + "epoch": 1.94, + "learning_rate": 4.242880876753621e-05, + "loss": 1.2407, + "step": 3242500 + }, + { + "epoch": 1.94, + "learning_rate": 4.2426708801975646e-05, + "loss": 1.286, + "step": 3243000 + }, + { + "epoch": 1.94, + "learning_rate": 4.242460883641508e-05, + "loss": 1.2374, + "step": 3243500 + }, + { + "epoch": 1.94, + "learning_rate": 4.2422513070785633e-05, + "loss": 1.2559, + "step": 3244000 + }, + { + "epoch": 1.95, + "learning_rate": 4.2420413105225074e-05, + "loss": 1.2762, + "step": 3244500 + }, + { + "epoch": 1.95, + "learning_rate": 4.241831313966451e-05, + "loss": 1.2666, + "step": 3245000 + }, + { + "epoch": 1.95, + "learning_rate": 4.241621317410394e-05, + "loss": 1.28, + "step": 3245500 + }, + { + "epoch": 1.95, + "learning_rate": 4.24141174084745e-05, + "loss": 1.2758, + "step": 3246000 + }, + { + "epoch": 1.95, + "learning_rate": 4.2412017442913934e-05, + "loss": 1.2608, + "step": 3246500 + }, + { + "epoch": 1.95, + "learning_rate": 4.240991747735337e-05, + "loss": 1.2418, + "step": 3247000 + }, + { + "epoch": 1.95, + "learning_rate": 4.24078175117928e-05, + "loss": 1.2353, + "step": 3247500 + }, + { + "epoch": 1.95, + "learning_rate": 4.240571754623224e-05, + "loss": 1.2717, + "step": 3248000 + }, + { + "epoch": 1.95, + "learning_rate": 4.2403617580671675e-05, + "loss": 1.2264, + "step": 3248500 + }, + { + "epoch": 1.95, + "learning_rate": 4.240152181504223e-05, + "loss": 1.2419, + "step": 3249000 + }, + { + "epoch": 1.95, + "learning_rate": 4.239942184948167e-05, + "loss": 1.2698, + "step": 3249500 + }, + { + "epoch": 1.95, + "learning_rate": 4.23973218839211e-05, + "loss": 1.2194, + "step": 3250000 + }, + { + "epoch": 1.95, + "learning_rate": 4.2395221918360536e-05, + "loss": 1.2654, + "step": 3250500 + }, + { + "epoch": 1.95, + "learning_rate": 4.239312615273109e-05, + "loss": 1.2396, + "step": 3251000 + }, + { + "epoch": 1.95, + "learning_rate": 4.239102618717053e-05, + "loss": 1.2614, + "step": 3251500 + }, + { + "epoch": 1.95, + "learning_rate": 4.238892622160996e-05, + "loss": 1.2914, + "step": 3252000 + }, + { + "epoch": 1.95, + "learning_rate": 4.2386826256049396e-05, + "loss": 1.2677, + "step": 3252500 + }, + { + "epoch": 1.95, + "learning_rate": 4.238472629048884e-05, + "loss": 1.2343, + "step": 3253000 + }, + { + "epoch": 1.95, + "learning_rate": 4.238262632492827e-05, + "loss": 1.2466, + "step": 3253500 + }, + { + "epoch": 1.95, + "learning_rate": 4.2380526359367704e-05, + "loss": 1.2603, + "step": 3254000 + }, + { + "epoch": 1.95, + "learning_rate": 4.2378426393807144e-05, + "loss": 1.249, + "step": 3254500 + }, + { + "epoch": 1.95, + "learning_rate": 4.237632642824658e-05, + "loss": 1.262, + "step": 3255000 + }, + { + "epoch": 1.95, + "learning_rate": 4.237422646268601e-05, + "loss": 1.2699, + "step": 3255500 + }, + { + "epoch": 1.95, + "learning_rate": 4.237212649712545e-05, + "loss": 1.2444, + "step": 3256000 + }, + { + "epoch": 1.95, + "learning_rate": 4.237002653156488e-05, + "loss": 1.2671, + "step": 3256500 + }, + { + "epoch": 1.95, + "learning_rate": 4.236793076593544e-05, + "loss": 1.2631, + "step": 3257000 + }, + { + "epoch": 1.95, + "learning_rate": 4.236583080037488e-05, + "loss": 1.2807, + "step": 3257500 + }, + { + "epoch": 1.95, + "learning_rate": 4.236373083481431e-05, + "loss": 1.2529, + "step": 3258000 + }, + { + "epoch": 1.95, + "learning_rate": 4.2361630869253745e-05, + "loss": 1.2813, + "step": 3258500 + }, + { + "epoch": 1.95, + "learning_rate": 4.235953090369318e-05, + "loss": 1.2389, + "step": 3259000 + }, + { + "epoch": 1.95, + "learning_rate": 4.235743093813261e-05, + "loss": 1.2514, + "step": 3259500 + }, + { + "epoch": 1.95, + "learning_rate": 4.235533517250317e-05, + "loss": 1.2191, + "step": 3260000 + }, + { + "epoch": 1.95, + "learning_rate": 4.2353235206942606e-05, + "loss": 1.2344, + "step": 3260500 + }, + { + "epoch": 1.96, + "learning_rate": 4.2351135241382046e-05, + "loss": 1.2406, + "step": 3261000 + }, + { + "epoch": 1.96, + "learning_rate": 4.234903527582147e-05, + "loss": 1.2533, + "step": 3261500 + }, + { + "epoch": 1.96, + "learning_rate": 4.2346935310260906e-05, + "loss": 1.2339, + "step": 3262000 + }, + { + "epoch": 1.96, + "learning_rate": 4.2344835344700346e-05, + "loss": 1.256, + "step": 3262500 + }, + { + "epoch": 1.96, + "learning_rate": 4.234273537913978e-05, + "loss": 1.231, + "step": 3263000 + }, + { + "epoch": 1.96, + "learning_rate": 4.2340635413579213e-05, + "loss": 1.2715, + "step": 3263500 + }, + { + "epoch": 1.96, + "learning_rate": 4.2338539647949774e-05, + "loss": 1.2553, + "step": 3264000 + }, + { + "epoch": 1.96, + "learning_rate": 4.233643968238921e-05, + "loss": 1.2449, + "step": 3264500 + }, + { + "epoch": 1.96, + "learning_rate": 4.233433971682864e-05, + "loss": 1.2567, + "step": 3265000 + }, + { + "epoch": 1.96, + "learning_rate": 4.233223975126808e-05, + "loss": 1.2715, + "step": 3265500 + }, + { + "epoch": 1.96, + "learning_rate": 4.2330143985638634e-05, + "loss": 1.2638, + "step": 3266000 + }, + { + "epoch": 1.96, + "learning_rate": 4.232804402007807e-05, + "loss": 1.2496, + "step": 3266500 + }, + { + "epoch": 1.96, + "learning_rate": 4.23259440545175e-05, + "loss": 1.2638, + "step": 3267000 + }, + { + "epoch": 1.96, + "learning_rate": 4.232384408895694e-05, + "loss": 1.2339, + "step": 3267500 + }, + { + "epoch": 1.96, + "learning_rate": 4.23217483233275e-05, + "loss": 1.2569, + "step": 3268000 + }, + { + "epoch": 1.96, + "learning_rate": 4.231964835776693e-05, + "loss": 1.2453, + "step": 3268500 + }, + { + "epoch": 1.96, + "learning_rate": 4.231755259213749e-05, + "loss": 1.2279, + "step": 3269000 + }, + { + "epoch": 1.96, + "learning_rate": 4.231545262657692e-05, + "loss": 1.2554, + "step": 3269500 + }, + { + "epoch": 1.96, + "learning_rate": 4.231335266101636e-05, + "loss": 1.2364, + "step": 3270000 + }, + { + "epoch": 1.96, + "learning_rate": 4.2311252695455796e-05, + "loss": 1.2518, + "step": 3270500 + }, + { + "epoch": 1.96, + "learning_rate": 4.230915272989523e-05, + "loss": 1.2416, + "step": 3271000 + }, + { + "epoch": 1.96, + "learning_rate": 4.230705276433466e-05, + "loss": 1.2475, + "step": 3271500 + }, + { + "epoch": 1.96, + "learning_rate": 4.2304956998705223e-05, + "loss": 1.2499, + "step": 3272000 + }, + { + "epoch": 1.96, + "learning_rate": 4.230285703314466e-05, + "loss": 1.2287, + "step": 3272500 + }, + { + "epoch": 1.96, + "learning_rate": 4.230076126751521e-05, + "loss": 1.2467, + "step": 3273000 + }, + { + "epoch": 1.96, + "learning_rate": 4.229866130195465e-05, + "loss": 1.2494, + "step": 3273500 + }, + { + "epoch": 1.96, + "learning_rate": 4.2296561336394084e-05, + "loss": 1.2477, + "step": 3274000 + }, + { + "epoch": 1.96, + "learning_rate": 4.229446137083352e-05, + "loss": 1.257, + "step": 3274500 + }, + { + "epoch": 1.96, + "learning_rate": 4.229236140527296e-05, + "loss": 1.2359, + "step": 3275000 + }, + { + "epoch": 1.96, + "learning_rate": 4.229026563964351e-05, + "loss": 1.259, + "step": 3275500 + }, + { + "epoch": 1.96, + "learning_rate": 4.2288165674082945e-05, + "loss": 1.2504, + "step": 3276000 + }, + { + "epoch": 1.96, + "learning_rate": 4.228606570852238e-05, + "loss": 1.2543, + "step": 3276500 + }, + { + "epoch": 1.96, + "learning_rate": 4.228396994289294e-05, + "loss": 1.2482, + "step": 3277000 + }, + { + "epoch": 1.96, + "learning_rate": 4.228186997733237e-05, + "loss": 1.2365, + "step": 3277500 + }, + { + "epoch": 1.97, + "learning_rate": 4.2279770011771806e-05, + "loss": 1.273, + "step": 3278000 + }, + { + "epoch": 1.97, + "learning_rate": 4.2277670046211246e-05, + "loss": 1.2801, + "step": 3278500 + }, + { + "epoch": 1.97, + "learning_rate": 4.227557008065068e-05, + "loss": 1.2342, + "step": 3279000 + }, + { + "epoch": 1.97, + "learning_rate": 4.227347011509011e-05, + "loss": 1.2276, + "step": 3279500 + }, + { + "epoch": 1.97, + "learning_rate": 4.227137014952955e-05, + "loss": 1.262, + "step": 3280000 + }, + { + "epoch": 1.97, + "learning_rate": 4.226927018396898e-05, + "loss": 1.2404, + "step": 3280500 + }, + { + "epoch": 1.97, + "learning_rate": 4.226717021840841e-05, + "loss": 1.2392, + "step": 3281000 + }, + { + "epoch": 1.97, + "learning_rate": 4.2265070252847853e-05, + "loss": 1.2491, + "step": 3281500 + }, + { + "epoch": 1.97, + "learning_rate": 4.226297028728729e-05, + "loss": 1.2557, + "step": 3282000 + }, + { + "epoch": 1.97, + "learning_rate": 4.226087032172672e-05, + "loss": 1.2356, + "step": 3282500 + }, + { + "epoch": 1.97, + "learning_rate": 4.225877035616616e-05, + "loss": 1.252, + "step": 3283000 + }, + { + "epoch": 1.97, + "learning_rate": 4.2256670390605594e-05, + "loss": 1.272, + "step": 3283500 + }, + { + "epoch": 1.97, + "learning_rate": 4.225457462497615e-05, + "loss": 1.2863, + "step": 3284000 + }, + { + "epoch": 1.97, + "learning_rate": 4.225247885934671e-05, + "loss": 1.2522, + "step": 3284500 + }, + { + "epoch": 1.97, + "learning_rate": 4.225037889378614e-05, + "loss": 1.2599, + "step": 3285000 + }, + { + "epoch": 1.97, + "learning_rate": 4.2248278928225575e-05, + "loss": 1.2393, + "step": 3285500 + }, + { + "epoch": 1.97, + "learning_rate": 4.224617896266501e-05, + "loss": 1.2691, + "step": 3286000 + }, + { + "epoch": 1.97, + "learning_rate": 4.224407899710445e-05, + "loss": 1.2463, + "step": 3286500 + }, + { + "epoch": 1.97, + "learning_rate": 4.224197903154388e-05, + "loss": 1.2306, + "step": 3287000 + }, + { + "epoch": 1.97, + "learning_rate": 4.2239879065983315e-05, + "loss": 1.2334, + "step": 3287500 + }, + { + "epoch": 1.97, + "learning_rate": 4.2237779100422756e-05, + "loss": 1.2817, + "step": 3288000 + }, + { + "epoch": 1.97, + "learning_rate": 4.223567913486219e-05, + "loss": 1.261, + "step": 3288500 + }, + { + "epoch": 1.97, + "learning_rate": 4.223357916930162e-05, + "loss": 1.2443, + "step": 3289000 + }, + { + "epoch": 1.97, + "learning_rate": 4.2231483403672176e-05, + "loss": 1.2694, + "step": 3289500 + }, + { + "epoch": 1.97, + "learning_rate": 4.2229383438111616e-05, + "loss": 1.2468, + "step": 3290000 + }, + { + "epoch": 1.97, + "learning_rate": 4.222728347255105e-05, + "loss": 1.2669, + "step": 3290500 + }, + { + "epoch": 1.97, + "learning_rate": 4.222518350699048e-05, + "loss": 1.2399, + "step": 3291000 + }, + { + "epoch": 1.97, + "learning_rate": 4.2223083541429924e-05, + "loss": 1.2915, + "step": 3291500 + }, + { + "epoch": 1.97, + "learning_rate": 4.222098357586936e-05, + "loss": 1.2601, + "step": 3292000 + }, + { + "epoch": 1.97, + "learning_rate": 4.221888361030879e-05, + "loss": 1.2774, + "step": 3292500 + }, + { + "epoch": 1.97, + "learning_rate": 4.2216783644748224e-05, + "loss": 1.2671, + "step": 3293000 + }, + { + "epoch": 1.97, + "learning_rate": 4.2214687879118784e-05, + "loss": 1.2529, + "step": 3293500 + }, + { + "epoch": 1.97, + "learning_rate": 4.221259211348934e-05, + "loss": 1.2297, + "step": 3294000 + }, + { + "epoch": 1.98, + "learning_rate": 4.221049214792877e-05, + "loss": 1.249, + "step": 3294500 + }, + { + "epoch": 1.98, + "learning_rate": 4.220839218236821e-05, + "loss": 1.215, + "step": 3295000 + }, + { + "epoch": 1.98, + "learning_rate": 4.2206292216807645e-05, + "loss": 1.2759, + "step": 3295500 + }, + { + "epoch": 1.98, + "learning_rate": 4.220419225124708e-05, + "loss": 1.2869, + "step": 3296000 + }, + { + "epoch": 1.98, + "learning_rate": 4.220209228568652e-05, + "loss": 1.2703, + "step": 3296500 + }, + { + "epoch": 1.98, + "learning_rate": 4.219999652005707e-05, + "loss": 1.2599, + "step": 3297000 + }, + { + "epoch": 1.98, + "learning_rate": 4.2197896554496506e-05, + "loss": 1.251, + "step": 3297500 + }, + { + "epoch": 1.98, + "learning_rate": 4.219579658893594e-05, + "loss": 1.2561, + "step": 3298000 + }, + { + "epoch": 1.98, + "learning_rate": 4.219369662337538e-05, + "loss": 1.2464, + "step": 3298500 + }, + { + "epoch": 1.98, + "learning_rate": 4.219160085774593e-05, + "loss": 1.2714, + "step": 3299000 + }, + { + "epoch": 1.98, + "learning_rate": 4.218950509211649e-05, + "loss": 1.2492, + "step": 3299500 + }, + { + "epoch": 1.98, + "learning_rate": 4.218740512655592e-05, + "loss": 1.2767, + "step": 3300000 + }, + { + "epoch": 1.98, + "eval_loss": 1.2033525705337524, + "eval_runtime": 1102.9314, + "eval_samples_per_second": 477.564, + "eval_steps_per_second": 79.594, + "step": 3300000 + }, + { + "epoch": 1.98, + "learning_rate": 4.218530516099536e-05, + "loss": 1.2708, + "step": 3300500 + }, + { + "epoch": 1.98, + "learning_rate": 4.2183205195434794e-05, + "loss": 1.2561, + "step": 3301000 + }, + { + "epoch": 1.98, + "learning_rate": 4.218110522987423e-05, + "loss": 1.2472, + "step": 3301500 + }, + { + "epoch": 1.98, + "learning_rate": 4.217900526431367e-05, + "loss": 1.2677, + "step": 3302000 + }, + { + "epoch": 1.98, + "learning_rate": 4.21769052987531e-05, + "loss": 1.2653, + "step": 3302500 + }, + { + "epoch": 1.98, + "learning_rate": 4.2174805333192534e-05, + "loss": 1.2271, + "step": 3303000 + }, + { + "epoch": 1.98, + "learning_rate": 4.217270956756309e-05, + "loss": 1.2333, + "step": 3303500 + }, + { + "epoch": 1.98, + "learning_rate": 4.217060960200253e-05, + "loss": 1.2417, + "step": 3304000 + }, + { + "epoch": 1.98, + "learning_rate": 4.216850963644196e-05, + "loss": 1.2627, + "step": 3304500 + }, + { + "epoch": 1.98, + "learning_rate": 4.2166409670881395e-05, + "loss": 1.257, + "step": 3305000 + }, + { + "epoch": 1.98, + "learning_rate": 4.2164309705320835e-05, + "loss": 1.2694, + "step": 3305500 + }, + { + "epoch": 1.98, + "learning_rate": 4.216220973976027e-05, + "loss": 1.2381, + "step": 3306000 + }, + { + "epoch": 1.98, + "learning_rate": 4.21601097741997e-05, + "loss": 1.2477, + "step": 3306500 + }, + { + "epoch": 1.98, + "learning_rate": 4.215800980863914e-05, + "loss": 1.2748, + "step": 3307000 + }, + { + "epoch": 1.98, + "learning_rate": 4.215590984307857e-05, + "loss": 1.2409, + "step": 3307500 + }, + { + "epoch": 1.98, + "learning_rate": 4.215381407744913e-05, + "loss": 1.2652, + "step": 3308000 + }, + { + "epoch": 1.98, + "learning_rate": 4.215171411188856e-05, + "loss": 1.2544, + "step": 3308500 + }, + { + "epoch": 1.98, + "learning_rate": 4.2149614146328e-05, + "loss": 1.2546, + "step": 3309000 + }, + { + "epoch": 1.98, + "learning_rate": 4.214751418076744e-05, + "loss": 1.2503, + "step": 3309500 + }, + { + "epoch": 1.98, + "learning_rate": 4.214541421520687e-05, + "loss": 1.2463, + "step": 3310000 + }, + { + "epoch": 1.98, + "learning_rate": 4.2143314249646304e-05, + "loss": 1.2593, + "step": 3310500 + }, + { + "epoch": 1.99, + "learning_rate": 4.214121428408574e-05, + "loss": 1.2696, + "step": 3311000 + }, + { + "epoch": 1.99, + "learning_rate": 4.213911431852518e-05, + "loss": 1.2522, + "step": 3311500 + }, + { + "epoch": 1.99, + "learning_rate": 4.213701855289573e-05, + "loss": 1.2546, + "step": 3312000 + }, + { + "epoch": 1.99, + "learning_rate": 4.213492278726629e-05, + "loss": 1.2457, + "step": 3312500 + }, + { + "epoch": 1.99, + "learning_rate": 4.2132822821705725e-05, + "loss": 1.2387, + "step": 3313000 + }, + { + "epoch": 1.99, + "learning_rate": 4.213072285614516e-05, + "loss": 1.2456, + "step": 3313500 + }, + { + "epoch": 1.99, + "learning_rate": 4.21286228905846e-05, + "loss": 1.2579, + "step": 3314000 + }, + { + "epoch": 1.99, + "learning_rate": 4.2126522925024025e-05, + "loss": 1.2592, + "step": 3314500 + }, + { + "epoch": 1.99, + "learning_rate": 4.212442295946346e-05, + "loss": 1.2537, + "step": 3315000 + }, + { + "epoch": 1.99, + "learning_rate": 4.212233139376514e-05, + "loss": 1.2857, + "step": 3315500 + }, + { + "epoch": 1.99, + "learning_rate": 4.212023142820458e-05, + "loss": 1.2259, + "step": 3316000 + }, + { + "epoch": 1.99, + "learning_rate": 4.211813146264401e-05, + "loss": 1.27, + "step": 3316500 + }, + { + "epoch": 1.99, + "learning_rate": 4.2116031497083446e-05, + "loss": 1.2551, + "step": 3317000 + }, + { + "epoch": 1.99, + "learning_rate": 4.2113931531522886e-05, + "loss": 1.2545, + "step": 3317500 + }, + { + "epoch": 1.99, + "learning_rate": 4.211183156596232e-05, + "loss": 1.2653, + "step": 3318000 + }, + { + "epoch": 1.99, + "learning_rate": 4.210973160040175e-05, + "loss": 1.2679, + "step": 3318500 + }, + { + "epoch": 1.99, + "learning_rate": 4.2107631634841194e-05, + "loss": 1.2723, + "step": 3319000 + }, + { + "epoch": 1.99, + "learning_rate": 4.210553166928062e-05, + "loss": 1.2721, + "step": 3319500 + }, + { + "epoch": 1.99, + "learning_rate": 4.2103431703720054e-05, + "loss": 1.2557, + "step": 3320000 + }, + { + "epoch": 1.99, + "learning_rate": 4.2101331738159494e-05, + "loss": 1.2296, + "step": 3320500 + }, + { + "epoch": 1.99, + "learning_rate": 4.209923177259893e-05, + "loss": 1.281, + "step": 3321000 + }, + { + "epoch": 1.99, + "learning_rate": 4.209713600696948e-05, + "loss": 1.2263, + "step": 3321500 + }, + { + "epoch": 1.99, + "learning_rate": 4.209503604140892e-05, + "loss": 1.2488, + "step": 3322000 + }, + { + "epoch": 1.99, + "learning_rate": 4.2092936075848355e-05, + "loss": 1.2555, + "step": 3322500 + }, + { + "epoch": 1.99, + "learning_rate": 4.209083611028779e-05, + "loss": 1.238, + "step": 3323000 + }, + { + "epoch": 1.99, + "learning_rate": 4.208874034465835e-05, + "loss": 1.2498, + "step": 3323500 + }, + { + "epoch": 1.99, + "learning_rate": 4.208664037909778e-05, + "loss": 1.2839, + "step": 3324000 + }, + { + "epoch": 1.99, + "learning_rate": 4.2084540413537215e-05, + "loss": 1.2234, + "step": 3324500 + }, + { + "epoch": 1.99, + "learning_rate": 4.2082453047770016e-05, + "loss": 1.2577, + "step": 3325000 + }, + { + "epoch": 1.99, + "learning_rate": 4.208035308220945e-05, + "loss": 1.2438, + "step": 3325500 + }, + { + "epoch": 1.99, + "learning_rate": 4.207825311664888e-05, + "loss": 1.2413, + "step": 3326000 + }, + { + "epoch": 1.99, + "learning_rate": 4.2076153151088316e-05, + "loss": 1.2751, + "step": 3326500 + }, + { + "epoch": 1.99, + "learning_rate": 4.2074053185527757e-05, + "loss": 1.2194, + "step": 3327000 + }, + { + "epoch": 1.99, + "learning_rate": 4.207195321996719e-05, + "loss": 1.2353, + "step": 3327500 + }, + { + "epoch": 2.0, + "learning_rate": 4.2069853254406623e-05, + "loss": 1.2602, + "step": 3328000 + }, + { + "epoch": 2.0, + "learning_rate": 4.2067753288846064e-05, + "loss": 1.236, + "step": 3328500 + }, + { + "epoch": 2.0, + "learning_rate": 4.20656533232855e-05, + "loss": 1.2609, + "step": 3329000 + }, + { + "epoch": 2.0, + "learning_rate": 4.206355335772494e-05, + "loss": 1.2641, + "step": 3329500 + }, + { + "epoch": 2.0, + "learning_rate": 4.206145339216437e-05, + "loss": 1.2569, + "step": 3330000 + }, + { + "epoch": 2.0, + "learning_rate": 4.2059353426603804e-05, + "loss": 1.2481, + "step": 3330500 + }, + { + "epoch": 2.0, + "learning_rate": 4.205725346104324e-05, + "loss": 1.2419, + "step": 3331000 + }, + { + "epoch": 2.0, + "learning_rate": 4.205515349548267e-05, + "loss": 1.2608, + "step": 3331500 + }, + { + "epoch": 2.0, + "learning_rate": 4.2053053529922105e-05, + "loss": 1.2305, + "step": 3332000 + }, + { + "epoch": 2.0, + "learning_rate": 4.2050953564361545e-05, + "loss": 1.2409, + "step": 3332500 + }, + { + "epoch": 2.0, + "learning_rate": 4.2048857798732105e-05, + "loss": 1.2405, + "step": 3333000 + }, + { + "epoch": 2.0, + "learning_rate": 4.204676203310266e-05, + "loss": 1.2761, + "step": 3333500 + }, + { + "epoch": 2.0, + "learning_rate": 4.204466206754209e-05, + "loss": 1.2441, + "step": 3334000 + }, + { + "epoch": 2.0, + "learning_rate": 4.2042562101981526e-05, + "loss": 1.2889, + "step": 3334500 + }, + { + "epoch": 2.0, + "learning_rate": 4.2040462136420966e-05, + "loss": 1.2861, + "step": 3335000 + }, + { + "epoch": 2.0, + "learning_rate": 4.20383621708604e-05, + "loss": 1.2382, + "step": 3335500 + }, + { + "epoch": 2.0, + "learning_rate": 4.203627060516207e-05, + "loss": 1.2626, + "step": 3336000 + }, + { + "epoch": 2.0, + "learning_rate": 4.203417063960151e-05, + "loss": 1.2198, + "step": 3336500 + }, + { + "epoch": 2.0, + "learning_rate": 4.203207067404095e-05, + "loss": 1.2162, + "step": 3337000 + }, + { + "epoch": 2.0, + "learning_rate": 4.202997070848038e-05, + "loss": 1.2305, + "step": 3337500 + }, + { + "epoch": 2.0, + "learning_rate": 4.2027870742919814e-05, + "loss": 1.2166, + "step": 3338000 + }, + { + "epoch": 2.0, + "learning_rate": 4.2025770777359254e-05, + "loss": 1.2541, + "step": 3338500 + }, + { + "epoch": 2.0, + "learning_rate": 4.202367081179869e-05, + "loss": 1.2032, + "step": 3339000 + }, + { + "epoch": 2.0, + "learning_rate": 4.202157084623812e-05, + "loss": 1.209, + "step": 3339500 + }, + { + "epoch": 2.0, + "learning_rate": 4.201947088067756e-05, + "loss": 1.1905, + "step": 3340000 + }, + { + "epoch": 2.0, + "learning_rate": 4.2017370915116995e-05, + "loss": 1.2406, + "step": 3340500 + }, + { + "epoch": 2.0, + "learning_rate": 4.201527094955642e-05, + "loss": 1.1992, + "step": 3341000 + }, + { + "epoch": 2.0, + "learning_rate": 4.201317518392698e-05, + "loss": 1.2321, + "step": 3341500 + }, + { + "epoch": 2.0, + "learning_rate": 4.201107521836642e-05, + "loss": 1.2198, + "step": 3342000 + }, + { + "epoch": 2.0, + "learning_rate": 4.2008975252805855e-05, + "loss": 1.2011, + "step": 3342500 + }, + { + "epoch": 2.0, + "learning_rate": 4.200687528724529e-05, + "loss": 1.2217, + "step": 3343000 + }, + { + "epoch": 2.0, + "learning_rate": 4.200477952161585e-05, + "loss": 1.2124, + "step": 3343500 + }, + { + "epoch": 2.0, + "learning_rate": 4.200267955605528e-05, + "loss": 1.2352, + "step": 3344000 + }, + { + "epoch": 2.01, + "learning_rate": 4.2000579590494716e-05, + "loss": 1.2127, + "step": 3344500 + }, + { + "epoch": 2.01, + "learning_rate": 4.1998479624934156e-05, + "loss": 1.22, + "step": 3345000 + }, + { + "epoch": 2.01, + "learning_rate": 4.199638385930471e-05, + "loss": 1.237, + "step": 3345500 + }, + { + "epoch": 2.01, + "learning_rate": 4.199428389374414e-05, + "loss": 1.2126, + "step": 3346000 + }, + { + "epoch": 2.01, + "learning_rate": 4.199218392818358e-05, + "loss": 1.2094, + "step": 3346500 + }, + { + "epoch": 2.01, + "learning_rate": 4.199008396262302e-05, + "loss": 1.231, + "step": 3347000 + }, + { + "epoch": 2.01, + "learning_rate": 4.198798399706245e-05, + "loss": 1.2546, + "step": 3347500 + }, + { + "epoch": 2.01, + "learning_rate": 4.1985888231433004e-05, + "loss": 1.2435, + "step": 3348000 + }, + { + "epoch": 2.01, + "learning_rate": 4.198378826587244e-05, + "loss": 1.2206, + "step": 3348500 + }, + { + "epoch": 2.01, + "learning_rate": 4.1981692500243e-05, + "loss": 1.1801, + "step": 3349000 + }, + { + "epoch": 2.01, + "learning_rate": 4.197959253468243e-05, + "loss": 1.2382, + "step": 3349500 + }, + { + "epoch": 2.01, + "learning_rate": 4.1977492569121865e-05, + "loss": 1.2125, + "step": 3350000 + }, + { + "epoch": 2.01, + "learning_rate": 4.1975392603561305e-05, + "loss": 1.2069, + "step": 3350500 + }, + { + "epoch": 2.01, + "learning_rate": 4.197329263800074e-05, + "loss": 1.1793, + "step": 3351000 + }, + { + "epoch": 2.01, + "learning_rate": 4.197119267244017e-05, + "loss": 1.2416, + "step": 3351500 + }, + { + "epoch": 2.01, + "learning_rate": 4.196909270687961e-05, + "loss": 1.2327, + "step": 3352000 + }, + { + "epoch": 2.01, + "learning_rate": 4.196699274131904e-05, + "loss": 1.2248, + "step": 3352500 + }, + { + "epoch": 2.01, + "learning_rate": 4.196489277575847e-05, + "loss": 1.2351, + "step": 3353000 + }, + { + "epoch": 2.01, + "learning_rate": 4.196279281019791e-05, + "loss": 1.2355, + "step": 3353500 + }, + { + "epoch": 2.01, + "learning_rate": 4.1960692844637346e-05, + "loss": 1.1975, + "step": 3354000 + }, + { + "epoch": 2.01, + "learning_rate": 4.195859287907678e-05, + "loss": 1.2151, + "step": 3354500 + }, + { + "epoch": 2.01, + "learning_rate": 4.195650131337846e-05, + "loss": 1.2216, + "step": 3355000 + }, + { + "epoch": 2.01, + "learning_rate": 4.1954401347817893e-05, + "loss": 1.2075, + "step": 3355500 + }, + { + "epoch": 2.01, + "learning_rate": 4.1952301382257334e-05, + "loss": 1.2321, + "step": 3356000 + }, + { + "epoch": 2.01, + "learning_rate": 4.195020141669677e-05, + "loss": 1.2194, + "step": 3356500 + }, + { + "epoch": 2.01, + "learning_rate": 4.19481014511362e-05, + "loss": 1.2372, + "step": 3357000 + }, + { + "epoch": 2.01, + "learning_rate": 4.1946001485575634e-05, + "loss": 1.2411, + "step": 3357500 + }, + { + "epoch": 2.01, + "learning_rate": 4.194390152001507e-05, + "loss": 1.2438, + "step": 3358000 + }, + { + "epoch": 2.01, + "learning_rate": 4.194180155445451e-05, + "loss": 1.2259, + "step": 3358500 + }, + { + "epoch": 2.01, + "learning_rate": 4.193970578882507e-05, + "loss": 1.2535, + "step": 3359000 + }, + { + "epoch": 2.01, + "learning_rate": 4.19376058232645e-05, + "loss": 1.2145, + "step": 3359500 + }, + { + "epoch": 2.01, + "learning_rate": 4.1935510057635055e-05, + "loss": 1.2192, + "step": 3360000 + }, + { + "epoch": 2.01, + "learning_rate": 4.193341009207449e-05, + "loss": 1.2145, + "step": 3360500 + }, + { + "epoch": 2.02, + "learning_rate": 4.193131012651393e-05, + "loss": 1.2104, + "step": 3361000 + }, + { + "epoch": 2.02, + "learning_rate": 4.192921016095336e-05, + "loss": 1.2195, + "step": 3361500 + }, + { + "epoch": 2.02, + "learning_rate": 4.1927114395323916e-05, + "loss": 1.1939, + "step": 3362000 + }, + { + "epoch": 2.02, + "learning_rate": 4.192501442976335e-05, + "loss": 1.2449, + "step": 3362500 + }, + { + "epoch": 2.02, + "learning_rate": 4.192291446420279e-05, + "loss": 1.2233, + "step": 3363000 + }, + { + "epoch": 2.02, + "learning_rate": 4.192081449864222e-05, + "loss": 1.2038, + "step": 3363500 + }, + { + "epoch": 2.02, + "learning_rate": 4.1918714533081656e-05, + "loss": 1.2076, + "step": 3364000 + }, + { + "epoch": 2.02, + "learning_rate": 4.191661456752109e-05, + "loss": 1.2652, + "step": 3364500 + }, + { + "epoch": 2.02, + "learning_rate": 4.191451460196052e-05, + "loss": 1.219, + "step": 3365000 + }, + { + "epoch": 2.02, + "learning_rate": 4.1912414636399964e-05, + "loss": 1.1913, + "step": 3365500 + }, + { + "epoch": 2.02, + "learning_rate": 4.19103146708394e-05, + "loss": 1.2265, + "step": 3366000 + }, + { + "epoch": 2.02, + "learning_rate": 4.190821470527883e-05, + "loss": 1.2151, + "step": 3366500 + }, + { + "epoch": 2.02, + "learning_rate": 4.190611473971827e-05, + "loss": 1.2252, + "step": 3367000 + }, + { + "epoch": 2.02, + "learning_rate": 4.1904014774157704e-05, + "loss": 1.2086, + "step": 3367500 + }, + { + "epoch": 2.02, + "learning_rate": 4.1901923208459385e-05, + "loss": 1.215, + "step": 3368000 + }, + { + "epoch": 2.02, + "learning_rate": 4.189982324289882e-05, + "loss": 1.2167, + "step": 3368500 + }, + { + "epoch": 2.02, + "learning_rate": 4.189772327733825e-05, + "loss": 1.2496, + "step": 3369000 + }, + { + "epoch": 2.02, + "learning_rate": 4.1895623311777685e-05, + "loss": 1.2426, + "step": 3369500 + }, + { + "epoch": 2.02, + "learning_rate": 4.189352334621712e-05, + "loss": 1.2192, + "step": 3370000 + }, + { + "epoch": 2.02, + "learning_rate": 4.189142338065655e-05, + "loss": 1.2157, + "step": 3370500 + }, + { + "epoch": 2.02, + "learning_rate": 4.188932341509599e-05, + "loss": 1.2326, + "step": 3371000 + }, + { + "epoch": 2.02, + "learning_rate": 4.1887227649466546e-05, + "loss": 1.2165, + "step": 3371500 + }, + { + "epoch": 2.02, + "learning_rate": 4.188512768390598e-05, + "loss": 1.2458, + "step": 3372000 + }, + { + "epoch": 2.02, + "learning_rate": 4.188302771834542e-05, + "loss": 1.2398, + "step": 3372500 + }, + { + "epoch": 2.02, + "learning_rate": 4.188092775278485e-05, + "loss": 1.2033, + "step": 3373000 + }, + { + "epoch": 2.02, + "learning_rate": 4.1878827787224286e-05, + "loss": 1.2292, + "step": 3373500 + }, + { + "epoch": 2.02, + "learning_rate": 4.187673202159484e-05, + "loss": 1.2279, + "step": 3374000 + }, + { + "epoch": 2.02, + "learning_rate": 4.187463205603428e-05, + "loss": 1.2338, + "step": 3374500 + }, + { + "epoch": 2.02, + "learning_rate": 4.1872532090473714e-05, + "loss": 1.1969, + "step": 3375000 + }, + { + "epoch": 2.02, + "learning_rate": 4.187043212491315e-05, + "loss": 1.1877, + "step": 3375500 + }, + { + "epoch": 2.02, + "learning_rate": 4.186833215935259e-05, + "loss": 1.2651, + "step": 3376000 + }, + { + "epoch": 2.02, + "learning_rate": 4.186623639372314e-05, + "loss": 1.2203, + "step": 3376500 + }, + { + "epoch": 2.02, + "learning_rate": 4.18641406280937e-05, + "loss": 1.2326, + "step": 3377000 + }, + { + "epoch": 2.02, + "learning_rate": 4.1862040662533135e-05, + "loss": 1.226, + "step": 3377500 + }, + { + "epoch": 2.03, + "learning_rate": 4.185994069697257e-05, + "loss": 1.2216, + "step": 3378000 + }, + { + "epoch": 2.03, + "learning_rate": 4.185784073141201e-05, + "loss": 1.2174, + "step": 3378500 + }, + { + "epoch": 2.03, + "learning_rate": 4.1855740765851435e-05, + "loss": 1.2111, + "step": 3379000 + }, + { + "epoch": 2.03, + "learning_rate": 4.1853640800290875e-05, + "loss": 1.2416, + "step": 3379500 + }, + { + "epoch": 2.03, + "learning_rate": 4.185154083473031e-05, + "loss": 1.2204, + "step": 3380000 + }, + { + "epoch": 2.03, + "learning_rate": 4.184944086916974e-05, + "loss": 1.2264, + "step": 3380500 + }, + { + "epoch": 2.03, + "learning_rate": 4.184734090360918e-05, + "loss": 1.224, + "step": 3381000 + }, + { + "epoch": 2.03, + "learning_rate": 4.1845245137979736e-05, + "loss": 1.199, + "step": 3381500 + }, + { + "epoch": 2.03, + "learning_rate": 4.184314517241917e-05, + "loss": 1.2083, + "step": 3382000 + }, + { + "epoch": 2.03, + "learning_rate": 4.18410452068586e-05, + "loss": 1.2069, + "step": 3382500 + }, + { + "epoch": 2.03, + "learning_rate": 4.183894524129804e-05, + "loss": 1.2393, + "step": 3383000 + }, + { + "epoch": 2.03, + "learning_rate": 4.183684527573748e-05, + "loss": 1.2264, + "step": 3383500 + }, + { + "epoch": 2.03, + "learning_rate": 4.183474531017691e-05, + "loss": 1.2552, + "step": 3384000 + }, + { + "epoch": 2.03, + "learning_rate": 4.1832649544547464e-05, + "loss": 1.2178, + "step": 3384500 + }, + { + "epoch": 2.03, + "learning_rate": 4.1830549578986904e-05, + "loss": 1.2385, + "step": 3385000 + }, + { + "epoch": 2.03, + "learning_rate": 4.182844961342634e-05, + "loss": 1.2373, + "step": 3385500 + }, + { + "epoch": 2.03, + "learning_rate": 4.182634964786577e-05, + "loss": 1.198, + "step": 3386000 + }, + { + "epoch": 2.03, + "learning_rate": 4.182424968230521e-05, + "loss": 1.2394, + "step": 3386500 + }, + { + "epoch": 2.03, + "learning_rate": 4.1822149716744645e-05, + "loss": 1.2254, + "step": 3387000 + }, + { + "epoch": 2.03, + "learning_rate": 4.1820049751184085e-05, + "loss": 1.2216, + "step": 3387500 + }, + { + "epoch": 2.03, + "learning_rate": 4.181794978562352e-05, + "loss": 1.2435, + "step": 3388000 + }, + { + "epoch": 2.03, + "learning_rate": 4.181585401999407e-05, + "loss": 1.1905, + "step": 3388500 + }, + { + "epoch": 2.03, + "learning_rate": 4.1813758254364625e-05, + "loss": 1.2216, + "step": 3389000 + }, + { + "epoch": 2.03, + "learning_rate": 4.181165828880406e-05, + "loss": 1.2113, + "step": 3389500 + }, + { + "epoch": 2.03, + "learning_rate": 4.18095583232435e-05, + "loss": 1.2289, + "step": 3390000 + }, + { + "epoch": 2.03, + "learning_rate": 4.180745835768293e-05, + "loss": 1.2403, + "step": 3390500 + }, + { + "epoch": 2.03, + "learning_rate": 4.1805358392122366e-05, + "loss": 1.2307, + "step": 3391000 + }, + { + "epoch": 2.03, + "learning_rate": 4.1803258426561806e-05, + "loss": 1.2269, + "step": 3391500 + }, + { + "epoch": 2.03, + "learning_rate": 4.180115846100124e-05, + "loss": 1.2084, + "step": 3392000 + }, + { + "epoch": 2.03, + "learning_rate": 4.179905849544067e-05, + "loss": 1.2356, + "step": 3392500 + }, + { + "epoch": 2.03, + "learning_rate": 4.179696272981123e-05, + "loss": 1.2204, + "step": 3393000 + }, + { + "epoch": 2.03, + "learning_rate": 4.179486276425067e-05, + "loss": 1.2372, + "step": 3393500 + }, + { + "epoch": 2.03, + "learning_rate": 4.179276699862122e-05, + "loss": 1.2304, + "step": 3394000 + }, + { + "epoch": 2.04, + "learning_rate": 4.1790667033060654e-05, + "loss": 1.2305, + "step": 3394500 + }, + { + "epoch": 2.04, + "learning_rate": 4.1788567067500094e-05, + "loss": 1.2504, + "step": 3395000 + }, + { + "epoch": 2.04, + "learning_rate": 4.178646710193953e-05, + "loss": 1.2269, + "step": 3395500 + }, + { + "epoch": 2.04, + "learning_rate": 4.178437133631008e-05, + "loss": 1.2155, + "step": 3396000 + }, + { + "epoch": 2.04, + "learning_rate": 4.1782271370749515e-05, + "loss": 1.2351, + "step": 3396500 + }, + { + "epoch": 2.04, + "learning_rate": 4.1780171405188955e-05, + "loss": 1.2375, + "step": 3397000 + }, + { + "epoch": 2.04, + "learning_rate": 4.177807143962839e-05, + "loss": 1.2498, + "step": 3397500 + }, + { + "epoch": 2.04, + "learning_rate": 4.177597567399894e-05, + "loss": 1.2327, + "step": 3398000 + }, + { + "epoch": 2.04, + "learning_rate": 4.1773875708438375e-05, + "loss": 1.2327, + "step": 3398500 + }, + { + "epoch": 2.04, + "learning_rate": 4.1771775742877816e-05, + "loss": 1.2099, + "step": 3399000 + }, + { + "epoch": 2.04, + "learning_rate": 4.176967577731725e-05, + "loss": 1.232, + "step": 3399500 + }, + { + "epoch": 2.04, + "learning_rate": 4.176757581175669e-05, + "loss": 1.2407, + "step": 3400000 + }, + { + "epoch": 2.04, + "eval_loss": 1.1995346546173096, + "eval_runtime": 1104.2629, + "eval_samples_per_second": 476.988, + "eval_steps_per_second": 79.498, + "step": 3400000 + }, + { + "epoch": 2.04, + "learning_rate": 4.176547584619612e-05, + "loss": 1.2394, + "step": 3400500 + }, + { + "epoch": 2.04, + "learning_rate": 4.1763375880635556e-05, + "loss": 1.2466, + "step": 3401000 + }, + { + "epoch": 2.04, + "learning_rate": 4.1761275915074997e-05, + "loss": 1.2169, + "step": 3401500 + }, + { + "epoch": 2.04, + "learning_rate": 4.175917594951443e-05, + "loss": 1.2255, + "step": 3402000 + }, + { + "epoch": 2.04, + "learning_rate": 4.1757075983953863e-05, + "loss": 1.2384, + "step": 3402500 + }, + { + "epoch": 2.04, + "learning_rate": 4.175498441825554e-05, + "loss": 1.2456, + "step": 3403000 + }, + { + "epoch": 2.04, + "learning_rate": 4.17528886526261e-05, + "loss": 1.248, + "step": 3403500 + }, + { + "epoch": 2.04, + "learning_rate": 4.175078868706553e-05, + "loss": 1.2369, + "step": 3404000 + }, + { + "epoch": 2.04, + "learning_rate": 4.174868872150497e-05, + "loss": 1.2359, + "step": 3404500 + }, + { + "epoch": 2.04, + "learning_rate": 4.17465887559444e-05, + "loss": 1.2531, + "step": 3405000 + }, + { + "epoch": 2.04, + "learning_rate": 4.174448879038383e-05, + "loss": 1.2493, + "step": 3405500 + }, + { + "epoch": 2.04, + "learning_rate": 4.174238882482327e-05, + "loss": 1.2164, + "step": 3406000 + }, + { + "epoch": 2.04, + "learning_rate": 4.1740288859262705e-05, + "loss": 1.2284, + "step": 3406500 + }, + { + "epoch": 2.04, + "learning_rate": 4.1738188893702145e-05, + "loss": 1.2662, + "step": 3407000 + }, + { + "epoch": 2.04, + "learning_rate": 4.173608892814158e-05, + "loss": 1.2377, + "step": 3407500 + }, + { + "epoch": 2.04, + "learning_rate": 4.173398896258101e-05, + "loss": 1.223, + "step": 3408000 + }, + { + "epoch": 2.04, + "learning_rate": 4.173188899702045e-05, + "loss": 1.2476, + "step": 3408500 + }, + { + "epoch": 2.04, + "learning_rate": 4.1729789031459886e-05, + "loss": 1.2279, + "step": 3409000 + }, + { + "epoch": 2.04, + "learning_rate": 4.172768906589932e-05, + "loss": 1.2808, + "step": 3409500 + }, + { + "epoch": 2.04, + "learning_rate": 4.172558910033876e-05, + "loss": 1.2192, + "step": 3410000 + }, + { + "epoch": 2.04, + "learning_rate": 4.172349333470931e-05, + "loss": 1.2598, + "step": 3410500 + }, + { + "epoch": 2.05, + "learning_rate": 4.172139336914875e-05, + "loss": 1.2182, + "step": 3411000 + }, + { + "epoch": 2.05, + "learning_rate": 4.17192976035193e-05, + "loss": 1.2103, + "step": 3411500 + }, + { + "epoch": 2.05, + "learning_rate": 4.1717197637958734e-05, + "loss": 1.2197, + "step": 3412000 + }, + { + "epoch": 2.05, + "learning_rate": 4.1715097672398174e-05, + "loss": 1.2106, + "step": 3412500 + }, + { + "epoch": 2.05, + "learning_rate": 4.171299770683761e-05, + "loss": 1.2403, + "step": 3413000 + }, + { + "epoch": 2.05, + "learning_rate": 4.171090194120816e-05, + "loss": 1.2443, + "step": 3413500 + }, + { + "epoch": 2.05, + "learning_rate": 4.17088019756476e-05, + "loss": 1.1998, + "step": 3414000 + }, + { + "epoch": 2.05, + "learning_rate": 4.1706706210018155e-05, + "loss": 1.231, + "step": 3414500 + }, + { + "epoch": 2.05, + "learning_rate": 4.170460624445759e-05, + "loss": 1.2469, + "step": 3415000 + }, + { + "epoch": 2.05, + "learning_rate": 4.170250627889702e-05, + "loss": 1.2463, + "step": 3415500 + }, + { + "epoch": 2.05, + "learning_rate": 4.170040631333646e-05, + "loss": 1.2177, + "step": 3416000 + }, + { + "epoch": 2.05, + "learning_rate": 4.1698306347775895e-05, + "loss": 1.2238, + "step": 3416500 + }, + { + "epoch": 2.05, + "learning_rate": 4.169620638221533e-05, + "loss": 1.2112, + "step": 3417000 + }, + { + "epoch": 2.05, + "learning_rate": 4.169410641665477e-05, + "loss": 1.2427, + "step": 3417500 + }, + { + "epoch": 2.05, + "learning_rate": 4.16920064510942e-05, + "loss": 1.2545, + "step": 3418000 + }, + { + "epoch": 2.05, + "learning_rate": 4.1689906485533636e-05, + "loss": 1.2304, + "step": 3418500 + }, + { + "epoch": 2.05, + "learning_rate": 4.1687806519973076e-05, + "loss": 1.2084, + "step": 3419000 + }, + { + "epoch": 2.05, + "learning_rate": 4.168570655441251e-05, + "loss": 1.2293, + "step": 3419500 + }, + { + "epoch": 2.05, + "learning_rate": 4.1683606588851936e-05, + "loss": 1.222, + "step": 3420000 + }, + { + "epoch": 2.05, + "learning_rate": 4.1681506623291377e-05, + "loss": 1.2257, + "step": 3420500 + }, + { + "epoch": 2.05, + "learning_rate": 4.167940665773081e-05, + "loss": 1.1888, + "step": 3421000 + }, + { + "epoch": 2.05, + "learning_rate": 4.1677306692170243e-05, + "loss": 1.2141, + "step": 3421500 + }, + { + "epoch": 2.05, + "learning_rate": 4.1675210926540804e-05, + "loss": 1.2359, + "step": 3422000 + }, + { + "epoch": 2.05, + "learning_rate": 4.167311096098024e-05, + "loss": 1.2536, + "step": 3422500 + }, + { + "epoch": 2.05, + "learning_rate": 4.167101099541967e-05, + "loss": 1.2324, + "step": 3423000 + }, + { + "epoch": 2.05, + "learning_rate": 4.166891102985911e-05, + "loss": 1.222, + "step": 3423500 + }, + { + "epoch": 2.05, + "learning_rate": 4.1666811064298544e-05, + "loss": 1.2551, + "step": 3424000 + }, + { + "epoch": 2.05, + "learning_rate": 4.166471109873798e-05, + "loss": 1.2209, + "step": 3424500 + }, + { + "epoch": 2.05, + "learning_rate": 4.166261113317742e-05, + "loss": 1.2397, + "step": 3425000 + }, + { + "epoch": 2.05, + "learning_rate": 4.166051956747909e-05, + "loss": 1.2731, + "step": 3425500 + }, + { + "epoch": 2.05, + "learning_rate": 4.165841960191853e-05, + "loss": 1.2385, + "step": 3426000 + }, + { + "epoch": 2.05, + "learning_rate": 4.1656319636357966e-05, + "loss": 1.242, + "step": 3426500 + }, + { + "epoch": 2.05, + "learning_rate": 4.16542196707974e-05, + "loss": 1.2372, + "step": 3427000 + }, + { + "epoch": 2.05, + "learning_rate": 4.165211970523683e-05, + "loss": 1.1994, + "step": 3427500 + }, + { + "epoch": 2.06, + "learning_rate": 4.1650019739676266e-05, + "loss": 1.2346, + "step": 3428000 + }, + { + "epoch": 2.06, + "learning_rate": 4.16479197741157e-05, + "loss": 1.2372, + "step": 3428500 + }, + { + "epoch": 2.06, + "learning_rate": 4.164581980855514e-05, + "loss": 1.2293, + "step": 3429000 + }, + { + "epoch": 2.06, + "learning_rate": 4.164371984299457e-05, + "loss": 1.2457, + "step": 3429500 + }, + { + "epoch": 2.06, + "learning_rate": 4.1641619877434007e-05, + "loss": 1.2465, + "step": 3430000 + }, + { + "epoch": 2.06, + "learning_rate": 4.163951991187345e-05, + "loss": 1.2104, + "step": 3430500 + }, + { + "epoch": 2.06, + "learning_rate": 4.163741994631288e-05, + "loss": 1.2047, + "step": 3431000 + }, + { + "epoch": 2.06, + "learning_rate": 4.1635324180683434e-05, + "loss": 1.2, + "step": 3431500 + }, + { + "epoch": 2.06, + "learning_rate": 4.1633224215122874e-05, + "loss": 1.2296, + "step": 3432000 + }, + { + "epoch": 2.06, + "learning_rate": 4.163112424956231e-05, + "loss": 1.2321, + "step": 3432500 + }, + { + "epoch": 2.06, + "learning_rate": 4.162902428400174e-05, + "loss": 1.2377, + "step": 3433000 + }, + { + "epoch": 2.06, + "learning_rate": 4.162692431844118e-05, + "loss": 1.228, + "step": 3433500 + }, + { + "epoch": 2.06, + "learning_rate": 4.1624828552811735e-05, + "loss": 1.2278, + "step": 3434000 + }, + { + "epoch": 2.06, + "learning_rate": 4.162272858725117e-05, + "loss": 1.2536, + "step": 3434500 + }, + { + "epoch": 2.06, + "learning_rate": 4.16206286216906e-05, + "loss": 1.2067, + "step": 3435000 + }, + { + "epoch": 2.06, + "learning_rate": 4.161852865613004e-05, + "loss": 1.2149, + "step": 3435500 + }, + { + "epoch": 2.06, + "learning_rate": 4.1616428690569475e-05, + "loss": 1.2283, + "step": 3436000 + }, + { + "epoch": 2.06, + "learning_rate": 4.161433292494003e-05, + "loss": 1.2661, + "step": 3436500 + }, + { + "epoch": 2.06, + "learning_rate": 4.161223295937946e-05, + "loss": 1.2302, + "step": 3437000 + }, + { + "epoch": 2.06, + "learning_rate": 4.16101329938189e-05, + "loss": 1.2363, + "step": 3437500 + }, + { + "epoch": 2.06, + "learning_rate": 4.1608033028258336e-05, + "loss": 1.2443, + "step": 3438000 + }, + { + "epoch": 2.06, + "learning_rate": 4.160593306269777e-05, + "loss": 1.2265, + "step": 3438500 + }, + { + "epoch": 2.06, + "learning_rate": 4.160383729706833e-05, + "loss": 1.2134, + "step": 3439000 + }, + { + "epoch": 2.06, + "learning_rate": 4.160173733150776e-05, + "loss": 1.2167, + "step": 3439500 + }, + { + "epoch": 2.06, + "learning_rate": 4.15996373659472e-05, + "loss": 1.2021, + "step": 3440000 + }, + { + "epoch": 2.06, + "learning_rate": 4.159753740038664e-05, + "loss": 1.2388, + "step": 3440500 + }, + { + "epoch": 2.06, + "learning_rate": 4.159544163475719e-05, + "loss": 1.2328, + "step": 3441000 + }, + { + "epoch": 2.06, + "learning_rate": 4.1593341669196624e-05, + "loss": 1.2251, + "step": 3441500 + }, + { + "epoch": 2.06, + "learning_rate": 4.159124170363606e-05, + "loss": 1.2456, + "step": 3442000 + }, + { + "epoch": 2.06, + "learning_rate": 4.15891417380755e-05, + "loss": 1.2272, + "step": 3442500 + }, + { + "epoch": 2.06, + "learning_rate": 4.158704597244605e-05, + "loss": 1.1844, + "step": 3443000 + }, + { + "epoch": 2.06, + "learning_rate": 4.1584946006885485e-05, + "loss": 1.2297, + "step": 3443500 + }, + { + "epoch": 2.06, + "learning_rate": 4.158284604132492e-05, + "loss": 1.2249, + "step": 3444000 + }, + { + "epoch": 2.07, + "learning_rate": 4.158075027569548e-05, + "loss": 1.254, + "step": 3444500 + }, + { + "epoch": 2.07, + "learning_rate": 4.157865031013491e-05, + "loss": 1.2145, + "step": 3445000 + }, + { + "epoch": 2.07, + "learning_rate": 4.1576550344574346e-05, + "loss": 1.2321, + "step": 3445500 + }, + { + "epoch": 2.07, + "learning_rate": 4.1574450379013786e-05, + "loss": 1.2322, + "step": 3446000 + }, + { + "epoch": 2.07, + "learning_rate": 4.157235041345322e-05, + "loss": 1.2327, + "step": 3446500 + }, + { + "epoch": 2.07, + "learning_rate": 4.157025044789265e-05, + "loss": 1.2095, + "step": 3447000 + }, + { + "epoch": 2.07, + "learning_rate": 4.156815048233209e-05, + "loss": 1.2214, + "step": 3447500 + }, + { + "epoch": 2.07, + "learning_rate": 4.1566050516771526e-05, + "loss": 1.262, + "step": 3448000 + }, + { + "epoch": 2.07, + "learning_rate": 4.156395475114208e-05, + "loss": 1.2392, + "step": 3448500 + }, + { + "epoch": 2.07, + "learning_rate": 4.1561854785581513e-05, + "loss": 1.1848, + "step": 3449000 + }, + { + "epoch": 2.07, + "learning_rate": 4.1559754820020954e-05, + "loss": 1.2267, + "step": 3449500 + }, + { + "epoch": 2.07, + "learning_rate": 4.155765485446039e-05, + "loss": 1.2404, + "step": 3450000 + }, + { + "epoch": 2.07, + "learning_rate": 4.155555488889982e-05, + "loss": 1.221, + "step": 3450500 + }, + { + "epoch": 2.07, + "learning_rate": 4.1553459123270374e-05, + "loss": 1.2428, + "step": 3451000 + }, + { + "epoch": 2.07, + "learning_rate": 4.1551359157709814e-05, + "loss": 1.2228, + "step": 3451500 + }, + { + "epoch": 2.07, + "learning_rate": 4.154925919214925e-05, + "loss": 1.2029, + "step": 3452000 + }, + { + "epoch": 2.07, + "learning_rate": 4.154715922658868e-05, + "loss": 1.2183, + "step": 3452500 + }, + { + "epoch": 2.07, + "learning_rate": 4.154505926102812e-05, + "loss": 1.2054, + "step": 3453000 + }, + { + "epoch": 2.07, + "learning_rate": 4.1542959295467555e-05, + "loss": 1.2406, + "step": 3453500 + }, + { + "epoch": 2.07, + "learning_rate": 4.154086352983811e-05, + "loss": 1.2163, + "step": 3454000 + }, + { + "epoch": 2.07, + "learning_rate": 4.153876356427755e-05, + "loss": 1.2391, + "step": 3454500 + }, + { + "epoch": 2.07, + "learning_rate": 4.153666359871698e-05, + "loss": 1.2256, + "step": 3455000 + }, + { + "epoch": 2.07, + "learning_rate": 4.1534563633156416e-05, + "loss": 1.2156, + "step": 3455500 + }, + { + "epoch": 2.07, + "learning_rate": 4.1532463667595856e-05, + "loss": 1.2426, + "step": 3456000 + }, + { + "epoch": 2.07, + "learning_rate": 4.153036370203528e-05, + "loss": 1.2507, + "step": 3456500 + }, + { + "epoch": 2.07, + "learning_rate": 4.1528263736474716e-05, + "loss": 1.2142, + "step": 3457000 + }, + { + "epoch": 2.07, + "learning_rate": 4.1526163770914156e-05, + "loss": 1.2403, + "step": 3457500 + }, + { + "epoch": 2.07, + "learning_rate": 4.152406800528472e-05, + "loss": 1.2124, + "step": 3458000 + }, + { + "epoch": 2.07, + "learning_rate": 4.152196803972415e-05, + "loss": 1.2475, + "step": 3458500 + }, + { + "epoch": 2.07, + "learning_rate": 4.1519872274094704e-05, + "loss": 1.2159, + "step": 3459000 + }, + { + "epoch": 2.07, + "learning_rate": 4.1517772308534144e-05, + "loss": 1.2058, + "step": 3459500 + }, + { + "epoch": 2.07, + "learning_rate": 4.151567234297358e-05, + "loss": 1.2364, + "step": 3460000 + }, + { + "epoch": 2.07, + "learning_rate": 4.151357237741301e-05, + "loss": 1.1895, + "step": 3460500 + }, + { + "epoch": 2.08, + "learning_rate": 4.151147241185245e-05, + "loss": 1.2301, + "step": 3461000 + }, + { + "epoch": 2.08, + "learning_rate": 4.150937244629188e-05, + "loss": 1.2055, + "step": 3461500 + }, + { + "epoch": 2.08, + "learning_rate": 4.150727248073131e-05, + "loss": 1.2452, + "step": 3462000 + }, + { + "epoch": 2.08, + "learning_rate": 4.150517251517075e-05, + "loss": 1.2129, + "step": 3462500 + }, + { + "epoch": 2.08, + "learning_rate": 4.1503080949472425e-05, + "loss": 1.2424, + "step": 3463000 + }, + { + "epoch": 2.08, + "learning_rate": 4.1500980983911865e-05, + "loss": 1.2232, + "step": 3463500 + }, + { + "epoch": 2.08, + "learning_rate": 4.14988810183513e-05, + "loss": 1.2265, + "step": 3464000 + }, + { + "epoch": 2.08, + "learning_rate": 4.149678105279073e-05, + "loss": 1.2374, + "step": 3464500 + }, + { + "epoch": 2.08, + "learning_rate": 4.149468948709241e-05, + "loss": 1.2224, + "step": 3465000 + }, + { + "epoch": 2.08, + "learning_rate": 4.1492589521531846e-05, + "loss": 1.2308, + "step": 3465500 + }, + { + "epoch": 2.08, + "learning_rate": 4.149048955597128e-05, + "loss": 1.2467, + "step": 3466000 + }, + { + "epoch": 2.08, + "learning_rate": 4.148838959041071e-05, + "loss": 1.2269, + "step": 3466500 + }, + { + "epoch": 2.08, + "learning_rate": 4.1486289624850153e-05, + "loss": 1.2319, + "step": 3467000 + }, + { + "epoch": 2.08, + "learning_rate": 4.148418965928959e-05, + "loss": 1.222, + "step": 3467500 + }, + { + "epoch": 2.08, + "learning_rate": 4.148208969372902e-05, + "loss": 1.231, + "step": 3468000 + }, + { + "epoch": 2.08, + "learning_rate": 4.147998972816846e-05, + "loss": 1.2495, + "step": 3468500 + }, + { + "epoch": 2.08, + "learning_rate": 4.1477889762607894e-05, + "loss": 1.2213, + "step": 3469000 + }, + { + "epoch": 2.08, + "learning_rate": 4.147578979704733e-05, + "loss": 1.235, + "step": 3469500 + }, + { + "epoch": 2.08, + "learning_rate": 4.147368983148677e-05, + "loss": 1.2433, + "step": 3470000 + }, + { + "epoch": 2.08, + "learning_rate": 4.14715898659262e-05, + "loss": 1.2384, + "step": 3470500 + }, + { + "epoch": 2.08, + "learning_rate": 4.146948990036563e-05, + "loss": 1.2304, + "step": 3471000 + }, + { + "epoch": 2.08, + "learning_rate": 4.146738993480507e-05, + "loss": 1.2309, + "step": 3471500 + }, + { + "epoch": 2.08, + "learning_rate": 4.146529416917563e-05, + "loss": 1.2203, + "step": 3472000 + }, + { + "epoch": 2.08, + "learning_rate": 4.146319420361506e-05, + "loss": 1.2341, + "step": 3472500 + }, + { + "epoch": 2.08, + "learning_rate": 4.1461094238054495e-05, + "loss": 1.2289, + "step": 3473000 + }, + { + "epoch": 2.08, + "learning_rate": 4.145899427249393e-05, + "loss": 1.2271, + "step": 3473500 + }, + { + "epoch": 2.08, + "learning_rate": 4.145689430693336e-05, + "loss": 1.2185, + "step": 3474000 + }, + { + "epoch": 2.08, + "learning_rate": 4.14547943413728e-05, + "loss": 1.2475, + "step": 3474500 + }, + { + "epoch": 2.08, + "learning_rate": 4.1452694375812236e-05, + "loss": 1.2306, + "step": 3475000 + }, + { + "epoch": 2.08, + "learning_rate": 4.145059441025167e-05, + "loss": 1.2225, + "step": 3475500 + }, + { + "epoch": 2.08, + "learning_rate": 4.144849864462222e-05, + "loss": 1.2461, + "step": 3476000 + }, + { + "epoch": 2.08, + "learning_rate": 4.144639867906166e-05, + "loss": 1.244, + "step": 3476500 + }, + { + "epoch": 2.08, + "learning_rate": 4.1444302913432224e-05, + "loss": 1.2447, + "step": 3477000 + }, + { + "epoch": 2.08, + "learning_rate": 4.144220294787166e-05, + "loss": 1.2357, + "step": 3477500 + }, + { + "epoch": 2.09, + "learning_rate": 4.1440102982311084e-05, + "loss": 1.2141, + "step": 3478000 + }, + { + "epoch": 2.09, + "learning_rate": 4.1438003016750524e-05, + "loss": 1.2331, + "step": 3478500 + }, + { + "epoch": 2.09, + "learning_rate": 4.143590305118996e-05, + "loss": 1.2179, + "step": 3479000 + }, + { + "epoch": 2.09, + "learning_rate": 4.143380728556052e-05, + "loss": 1.234, + "step": 3479500 + }, + { + "epoch": 2.09, + "learning_rate": 4.143170731999995e-05, + "loss": 1.2484, + "step": 3480000 + }, + { + "epoch": 2.09, + "learning_rate": 4.1429607354439385e-05, + "loss": 1.2231, + "step": 3480500 + }, + { + "epoch": 2.09, + "learning_rate": 4.142750738887882e-05, + "loss": 1.228, + "step": 3481000 + }, + { + "epoch": 2.09, + "learning_rate": 4.142540742331826e-05, + "loss": 1.2168, + "step": 3481500 + }, + { + "epoch": 2.09, + "learning_rate": 4.142330745775769e-05, + "loss": 1.2402, + "step": 3482000 + }, + { + "epoch": 2.09, + "learning_rate": 4.1421207492197125e-05, + "loss": 1.2272, + "step": 3482500 + }, + { + "epoch": 2.09, + "learning_rate": 4.1419107526636566e-05, + "loss": 1.2321, + "step": 3483000 + }, + { + "epoch": 2.09, + "learning_rate": 4.1417007561076e-05, + "loss": 1.2245, + "step": 3483500 + }, + { + "epoch": 2.09, + "learning_rate": 4.141490759551543e-05, + "loss": 1.227, + "step": 3484000 + }, + { + "epoch": 2.09, + "learning_rate": 4.141280762995487e-05, + "loss": 1.244, + "step": 3484500 + }, + { + "epoch": 2.09, + "learning_rate": 4.1410707664394306e-05, + "loss": 1.2173, + "step": 3485000 + }, + { + "epoch": 2.09, + "learning_rate": 4.140861189876486e-05, + "loss": 1.2061, + "step": 3485500 + }, + { + "epoch": 2.09, + "learning_rate": 4.140651613313541e-05, + "loss": 1.235, + "step": 3486000 + }, + { + "epoch": 2.09, + "learning_rate": 4.140441616757485e-05, + "loss": 1.2211, + "step": 3486500 + }, + { + "epoch": 2.09, + "learning_rate": 4.140231620201429e-05, + "loss": 1.2069, + "step": 3487000 + }, + { + "epoch": 2.09, + "learning_rate": 4.140021623645372e-05, + "loss": 1.2444, + "step": 3487500 + }, + { + "epoch": 2.09, + "learning_rate": 4.1398120470824274e-05, + "loss": 1.2429, + "step": 3488000 + }, + { + "epoch": 2.09, + "learning_rate": 4.1396020505263714e-05, + "loss": 1.2227, + "step": 3488500 + }, + { + "epoch": 2.09, + "learning_rate": 4.139392053970315e-05, + "loss": 1.2269, + "step": 3489000 + }, + { + "epoch": 2.09, + "learning_rate": 4.139182057414258e-05, + "loss": 1.2472, + "step": 3489500 + }, + { + "epoch": 2.09, + "learning_rate": 4.138972060858202e-05, + "loss": 1.2061, + "step": 3490000 + }, + { + "epoch": 2.09, + "learning_rate": 4.1387620643021455e-05, + "loss": 1.24, + "step": 3490500 + }, + { + "epoch": 2.09, + "learning_rate": 4.138552487739201e-05, + "loss": 1.2378, + "step": 3491000 + }, + { + "epoch": 2.09, + "learning_rate": 4.138342491183144e-05, + "loss": 1.2219, + "step": 3491500 + }, + { + "epoch": 2.09, + "learning_rate": 4.138132494627088e-05, + "loss": 1.2121, + "step": 3492000 + }, + { + "epoch": 2.09, + "learning_rate": 4.1379224980710316e-05, + "loss": 1.2498, + "step": 3492500 + }, + { + "epoch": 2.09, + "learning_rate": 4.137712501514975e-05, + "loss": 1.2114, + "step": 3493000 + }, + { + "epoch": 2.09, + "learning_rate": 4.137502504958919e-05, + "loss": 1.1985, + "step": 3493500 + }, + { + "epoch": 2.09, + "learning_rate": 4.137293348389086e-05, + "loss": 1.2494, + "step": 3494000 + }, + { + "epoch": 2.1, + "learning_rate": 4.13708335183303e-05, + "loss": 1.2352, + "step": 3494500 + }, + { + "epoch": 2.1, + "learning_rate": 4.136873355276973e-05, + "loss": 1.2027, + "step": 3495000 + }, + { + "epoch": 2.1, + "learning_rate": 4.136663358720917e-05, + "loss": 1.2658, + "step": 3495500 + }, + { + "epoch": 2.1, + "learning_rate": 4.1364533621648604e-05, + "loss": 1.2395, + "step": 3496000 + }, + { + "epoch": 2.1, + "learning_rate": 4.136243365608804e-05, + "loss": 1.2204, + "step": 3496500 + }, + { + "epoch": 2.1, + "learning_rate": 4.136033369052748e-05, + "loss": 1.2042, + "step": 3497000 + }, + { + "epoch": 2.1, + "learning_rate": 4.135823372496691e-05, + "loss": 1.2156, + "step": 3497500 + }, + { + "epoch": 2.1, + "learning_rate": 4.1356133759406344e-05, + "loss": 1.2516, + "step": 3498000 + }, + { + "epoch": 2.1, + "learning_rate": 4.13540379937769e-05, + "loss": 1.2226, + "step": 3498500 + }, + { + "epoch": 2.1, + "learning_rate": 4.135193802821634e-05, + "loss": 1.2301, + "step": 3499000 + }, + { + "epoch": 2.1, + "learning_rate": 4.134983806265577e-05, + "loss": 1.2171, + "step": 3499500 + }, + { + "epoch": 2.1, + "learning_rate": 4.1347738097095205e-05, + "loss": 1.2252, + "step": 3500000 + }, + { + "epoch": 2.1, + "eval_loss": 1.1996427774429321, + "eval_runtime": 1100.4525, + "eval_samples_per_second": 478.639, + "eval_steps_per_second": 79.774, + "step": 3500000 + }, + { + "epoch": 2.1, + "learning_rate": 4.1345638131534645e-05, + "loss": 1.248, + "step": 3500500 + }, + { + "epoch": 2.1, + "learning_rate": 4.13435423659052e-05, + "loss": 1.235, + "step": 3501000 + }, + { + "epoch": 2.1, + "learning_rate": 4.134144240034463e-05, + "loss": 1.2172, + "step": 3501500 + }, + { + "epoch": 2.1, + "learning_rate": 4.1339342434784066e-05, + "loss": 1.2192, + "step": 3502000 + }, + { + "epoch": 2.1, + "learning_rate": 4.1337242469223506e-05, + "loss": 1.2215, + "step": 3502500 + }, + { + "epoch": 2.1, + "learning_rate": 4.133514250366294e-05, + "loss": 1.2154, + "step": 3503000 + }, + { + "epoch": 2.1, + "learning_rate": 4.133304253810237e-05, + "loss": 1.255, + "step": 3503500 + }, + { + "epoch": 2.1, + "learning_rate": 4.133094677247293e-05, + "loss": 1.2364, + "step": 3504000 + }, + { + "epoch": 2.1, + "learning_rate": 4.132884680691237e-05, + "loss": 1.2171, + "step": 3504500 + }, + { + "epoch": 2.1, + "learning_rate": 4.13267468413518e-05, + "loss": 1.2397, + "step": 3505000 + }, + { + "epoch": 2.1, + "learning_rate": 4.132464687579124e-05, + "loss": 1.226, + "step": 3505500 + }, + { + "epoch": 2.1, + "learning_rate": 4.1322546910230674e-05, + "loss": 1.2268, + "step": 3506000 + }, + { + "epoch": 2.1, + "learning_rate": 4.132045114460123e-05, + "loss": 1.2206, + "step": 3506500 + }, + { + "epoch": 2.1, + "learning_rate": 4.131835537897178e-05, + "loss": 1.2344, + "step": 3507000 + }, + { + "epoch": 2.1, + "learning_rate": 4.1316255413411214e-05, + "loss": 1.2253, + "step": 3507500 + }, + { + "epoch": 2.1, + "learning_rate": 4.1314155447850655e-05, + "loss": 1.2526, + "step": 3508000 + }, + { + "epoch": 2.1, + "learning_rate": 4.131205548229009e-05, + "loss": 1.2111, + "step": 3508500 + }, + { + "epoch": 2.1, + "learning_rate": 4.130995551672952e-05, + "loss": 1.2455, + "step": 3509000 + }, + { + "epoch": 2.1, + "learning_rate": 4.130785555116896e-05, + "loss": 1.2818, + "step": 3509500 + }, + { + "epoch": 2.1, + "learning_rate": 4.1305755585608395e-05, + "loss": 1.2294, + "step": 3510000 + }, + { + "epoch": 2.1, + "learning_rate": 4.130365562004783e-05, + "loss": 1.1845, + "step": 3510500 + }, + { + "epoch": 2.1, + "learning_rate": 4.130155565448727e-05, + "loss": 1.2215, + "step": 3511000 + }, + { + "epoch": 2.11, + "learning_rate": 4.12994556889267e-05, + "loss": 1.2234, + "step": 3511500 + }, + { + "epoch": 2.11, + "learning_rate": 4.1297355723366136e-05, + "loss": 1.2079, + "step": 3512000 + }, + { + "epoch": 2.11, + "learning_rate": 4.1295259957736696e-05, + "loss": 1.2202, + "step": 3512500 + }, + { + "epoch": 2.11, + "learning_rate": 4.129315999217613e-05, + "loss": 1.2175, + "step": 3513000 + }, + { + "epoch": 2.11, + "learning_rate": 4.129106002661556e-05, + "loss": 1.2392, + "step": 3513500 + }, + { + "epoch": 2.11, + "learning_rate": 4.1288960061055e-05, + "loss": 1.2231, + "step": 3514000 + }, + { + "epoch": 2.11, + "learning_rate": 4.128686009549443e-05, + "loss": 1.2203, + "step": 3514500 + }, + { + "epoch": 2.11, + "learning_rate": 4.1284760129933864e-05, + "loss": 1.2551, + "step": 3515000 + }, + { + "epoch": 2.11, + "learning_rate": 4.1282660164373304e-05, + "loss": 1.2142, + "step": 3515500 + }, + { + "epoch": 2.11, + "learning_rate": 4.128056019881274e-05, + "loss": 1.2432, + "step": 3516000 + }, + { + "epoch": 2.11, + "learning_rate": 4.127846863311442e-05, + "loss": 1.2502, + "step": 3516500 + }, + { + "epoch": 2.11, + "learning_rate": 4.127636866755385e-05, + "loss": 1.2369, + "step": 3517000 + }, + { + "epoch": 2.11, + "learning_rate": 4.1274268701993285e-05, + "loss": 1.2499, + "step": 3517500 + }, + { + "epoch": 2.11, + "learning_rate": 4.1272168736432725e-05, + "loss": 1.2356, + "step": 3518000 + }, + { + "epoch": 2.11, + "learning_rate": 4.127006877087216e-05, + "loss": 1.2287, + "step": 3518500 + }, + { + "epoch": 2.11, + "learning_rate": 4.12679688053116e-05, + "loss": 1.2071, + "step": 3519000 + }, + { + "epoch": 2.11, + "learning_rate": 4.126587303968215e-05, + "loss": 1.2358, + "step": 3519500 + }, + { + "epoch": 2.11, + "learning_rate": 4.1263773074121586e-05, + "loss": 1.2269, + "step": 3520000 + }, + { + "epoch": 2.11, + "learning_rate": 4.126167310856102e-05, + "loss": 1.2406, + "step": 3520500 + }, + { + "epoch": 2.11, + "learning_rate": 4.125957314300046e-05, + "loss": 1.2383, + "step": 3521000 + }, + { + "epoch": 2.11, + "learning_rate": 4.125747317743989e-05, + "loss": 1.2097, + "step": 3521500 + }, + { + "epoch": 2.11, + "learning_rate": 4.1255377411810446e-05, + "loss": 1.2258, + "step": 3522000 + }, + { + "epoch": 2.11, + "learning_rate": 4.125327744624988e-05, + "loss": 1.2323, + "step": 3522500 + }, + { + "epoch": 2.11, + "learning_rate": 4.125117748068932e-05, + "loss": 1.2419, + "step": 3523000 + }, + { + "epoch": 2.11, + "learning_rate": 4.1249077515128753e-05, + "loss": 1.2142, + "step": 3523500 + }, + { + "epoch": 2.11, + "learning_rate": 4.124698174949931e-05, + "loss": 1.2518, + "step": 3524000 + }, + { + "epoch": 2.11, + "learning_rate": 4.124488178393875e-05, + "loss": 1.2314, + "step": 3524500 + }, + { + "epoch": 2.11, + "learning_rate": 4.124278181837818e-05, + "loss": 1.2163, + "step": 3525000 + }, + { + "epoch": 2.11, + "learning_rate": 4.1240681852817614e-05, + "loss": 1.2036, + "step": 3525500 + }, + { + "epoch": 2.11, + "learning_rate": 4.1238581887257054e-05, + "loss": 1.2061, + "step": 3526000 + }, + { + "epoch": 2.11, + "learning_rate": 4.123648192169648e-05, + "loss": 1.24, + "step": 3526500 + }, + { + "epoch": 2.11, + "learning_rate": 4.1234381956135915e-05, + "loss": 1.2288, + "step": 3527000 + }, + { + "epoch": 2.11, + "learning_rate": 4.1232281990575355e-05, + "loss": 1.2306, + "step": 3527500 + }, + { + "epoch": 2.12, + "learning_rate": 4.1230186224945915e-05, + "loss": 1.2451, + "step": 3528000 + }, + { + "epoch": 2.12, + "learning_rate": 4.122809045931647e-05, + "loss": 1.2354, + "step": 3528500 + }, + { + "epoch": 2.12, + "learning_rate": 4.12259904937559e-05, + "loss": 1.2141, + "step": 3529000 + }, + { + "epoch": 2.12, + "learning_rate": 4.1223890528195336e-05, + "loss": 1.233, + "step": 3529500 + }, + { + "epoch": 2.12, + "learning_rate": 4.1221790562634776e-05, + "loss": 1.2152, + "step": 3530000 + }, + { + "epoch": 2.12, + "learning_rate": 4.121969059707421e-05, + "loss": 1.2528, + "step": 3530500 + }, + { + "epoch": 2.12, + "learning_rate": 4.121759063151364e-05, + "loss": 1.2356, + "step": 3531000 + }, + { + "epoch": 2.12, + "learning_rate": 4.1215490665953076e-05, + "loss": 1.2097, + "step": 3531500 + }, + { + "epoch": 2.12, + "learning_rate": 4.1213394900323637e-05, + "loss": 1.2307, + "step": 3532000 + }, + { + "epoch": 2.12, + "learning_rate": 4.121129493476307e-05, + "loss": 1.2151, + "step": 3532500 + }, + { + "epoch": 2.12, + "learning_rate": 4.120919496920251e-05, + "loss": 1.2007, + "step": 3533000 + }, + { + "epoch": 2.12, + "learning_rate": 4.120709500364194e-05, + "loss": 1.2277, + "step": 3533500 + }, + { + "epoch": 2.12, + "learning_rate": 4.12049992380125e-05, + "loss": 1.225, + "step": 3534000 + }, + { + "epoch": 2.12, + "learning_rate": 4.120289927245193e-05, + "loss": 1.2351, + "step": 3534500 + }, + { + "epoch": 2.12, + "learning_rate": 4.120079930689137e-05, + "loss": 1.2407, + "step": 3535000 + }, + { + "epoch": 2.12, + "learning_rate": 4.1198699341330804e-05, + "loss": 1.2415, + "step": 3535500 + }, + { + "epoch": 2.12, + "learning_rate": 4.119659937577023e-05, + "loss": 1.2279, + "step": 3536000 + }, + { + "epoch": 2.12, + "learning_rate": 4.119449941020967e-05, + "loss": 1.2467, + "step": 3536500 + }, + { + "epoch": 2.12, + "learning_rate": 4.119240364458023e-05, + "loss": 1.2529, + "step": 3537000 + }, + { + "epoch": 2.12, + "learning_rate": 4.1190303679019665e-05, + "loss": 1.2579, + "step": 3537500 + }, + { + "epoch": 2.12, + "learning_rate": 4.11882037134591e-05, + "loss": 1.243, + "step": 3538000 + }, + { + "epoch": 2.12, + "learning_rate": 4.118610374789853e-05, + "loss": 1.2212, + "step": 3538500 + }, + { + "epoch": 2.12, + "learning_rate": 4.1184003782337966e-05, + "loss": 1.2285, + "step": 3539000 + }, + { + "epoch": 2.12, + "learning_rate": 4.1181903816777406e-05, + "loss": 1.2538, + "step": 3539500 + }, + { + "epoch": 2.12, + "learning_rate": 4.117980385121684e-05, + "loss": 1.2445, + "step": 3540000 + }, + { + "epoch": 2.12, + "learning_rate": 4.11777080855874e-05, + "loss": 1.2141, + "step": 3540500 + }, + { + "epoch": 2.12, + "learning_rate": 4.1175608120026826e-05, + "loss": 1.2616, + "step": 3541000 + }, + { + "epoch": 2.12, + "learning_rate": 4.1173508154466267e-05, + "loss": 1.2623, + "step": 3541500 + }, + { + "epoch": 2.12, + "learning_rate": 4.11714081889057e-05, + "loss": 1.221, + "step": 3542000 + }, + { + "epoch": 2.12, + "learning_rate": 4.1169308223345133e-05, + "loss": 1.241, + "step": 3542500 + }, + { + "epoch": 2.12, + "learning_rate": 4.116721245771569e-05, + "loss": 1.2284, + "step": 3543000 + }, + { + "epoch": 2.12, + "learning_rate": 4.116511249215513e-05, + "loss": 1.2147, + "step": 3543500 + }, + { + "epoch": 2.12, + "learning_rate": 4.116301252659456e-05, + "loss": 1.2416, + "step": 3544000 + }, + { + "epoch": 2.13, + "learning_rate": 4.1160912561033994e-05, + "loss": 1.2338, + "step": 3544500 + }, + { + "epoch": 2.13, + "learning_rate": 4.1158812595473434e-05, + "loss": 1.2398, + "step": 3545000 + }, + { + "epoch": 2.13, + "learning_rate": 4.115671682984399e-05, + "loss": 1.2359, + "step": 3545500 + }, + { + "epoch": 2.13, + "learning_rate": 4.115461686428342e-05, + "loss": 1.2084, + "step": 3546000 + }, + { + "epoch": 2.13, + "learning_rate": 4.115251689872286e-05, + "loss": 1.2191, + "step": 3546500 + }, + { + "epoch": 2.13, + "learning_rate": 4.1150416933162295e-05, + "loss": 1.2479, + "step": 3547000 + }, + { + "epoch": 2.13, + "learning_rate": 4.114831696760173e-05, + "loss": 1.2123, + "step": 3547500 + }, + { + "epoch": 2.13, + "learning_rate": 4.114621700204117e-05, + "loss": 1.2314, + "step": 3548000 + }, + { + "epoch": 2.13, + "learning_rate": 4.114412123641172e-05, + "loss": 1.2204, + "step": 3548500 + }, + { + "epoch": 2.13, + "learning_rate": 4.1142021270851156e-05, + "loss": 1.2198, + "step": 3549000 + }, + { + "epoch": 2.13, + "learning_rate": 4.113992130529059e-05, + "loss": 1.21, + "step": 3549500 + }, + { + "epoch": 2.13, + "learning_rate": 4.113782133973003e-05, + "loss": 1.2273, + "step": 3550000 + }, + { + "epoch": 2.13, + "learning_rate": 4.113572137416946e-05, + "loss": 1.2323, + "step": 3550500 + }, + { + "epoch": 2.13, + "learning_rate": 4.1133625608540017e-05, + "loss": 1.2359, + "step": 3551000 + }, + { + "epoch": 2.13, + "learning_rate": 4.113152564297945e-05, + "loss": 1.2505, + "step": 3551500 + }, + { + "epoch": 2.13, + "learning_rate": 4.112942987735001e-05, + "loss": 1.2459, + "step": 3552000 + }, + { + "epoch": 2.13, + "learning_rate": 4.112732991178945e-05, + "loss": 1.2046, + "step": 3552500 + }, + { + "epoch": 2.13, + "learning_rate": 4.1125234146160004e-05, + "loss": 1.2202, + "step": 3553000 + }, + { + "epoch": 2.13, + "learning_rate": 4.112313418059944e-05, + "loss": 1.2222, + "step": 3553500 + }, + { + "epoch": 2.13, + "learning_rate": 4.112103421503888e-05, + "loss": 1.251, + "step": 3554000 + }, + { + "epoch": 2.13, + "learning_rate": 4.111893424947831e-05, + "loss": 1.2437, + "step": 3554500 + }, + { + "epoch": 2.13, + "learning_rate": 4.111683428391774e-05, + "loss": 1.2565, + "step": 3555000 + }, + { + "epoch": 2.13, + "learning_rate": 4.111473431835718e-05, + "loss": 1.2552, + "step": 3555500 + }, + { + "epoch": 2.13, + "learning_rate": 4.111263435279661e-05, + "loss": 1.2169, + "step": 3556000 + }, + { + "epoch": 2.13, + "learning_rate": 4.1110534387236045e-05, + "loss": 1.2241, + "step": 3556500 + }, + { + "epoch": 2.13, + "learning_rate": 4.1108434421675485e-05, + "loss": 1.239, + "step": 3557000 + }, + { + "epoch": 2.13, + "learning_rate": 4.110633445611492e-05, + "loss": 1.2272, + "step": 3557500 + }, + { + "epoch": 2.13, + "learning_rate": 4.110423449055435e-05, + "loss": 1.2569, + "step": 3558000 + }, + { + "epoch": 2.13, + "learning_rate": 4.110213452499379e-05, + "loss": 1.2195, + "step": 3558500 + }, + { + "epoch": 2.13, + "learning_rate": 4.1100034559433226e-05, + "loss": 1.2572, + "step": 3559000 + }, + { + "epoch": 2.13, + "learning_rate": 4.109793879380378e-05, + "loss": 1.2303, + "step": 3559500 + }, + { + "epoch": 2.13, + "learning_rate": 4.109583882824321e-05, + "loss": 1.2465, + "step": 3560000 + }, + { + "epoch": 2.13, + "learning_rate": 4.109373886268265e-05, + "loss": 1.2202, + "step": 3560500 + }, + { + "epoch": 2.13, + "learning_rate": 4.109163889712209e-05, + "loss": 1.239, + "step": 3561000 + }, + { + "epoch": 2.14, + "learning_rate": 4.108954313149264e-05, + "loss": 1.2675, + "step": 3561500 + }, + { + "epoch": 2.14, + "learning_rate": 4.108744316593208e-05, + "loss": 1.2567, + "step": 3562000 + }, + { + "epoch": 2.14, + "learning_rate": 4.1085343200371514e-05, + "loss": 1.2044, + "step": 3562500 + }, + { + "epoch": 2.14, + "learning_rate": 4.108324323481095e-05, + "loss": 1.2437, + "step": 3563000 + }, + { + "epoch": 2.14, + "learning_rate": 4.108114326925039e-05, + "loss": 1.2077, + "step": 3563500 + }, + { + "epoch": 2.14, + "learning_rate": 4.107904330368982e-05, + "loss": 1.2339, + "step": 3564000 + }, + { + "epoch": 2.14, + "learning_rate": 4.1076943338129255e-05, + "loss": 1.2318, + "step": 3564500 + }, + { + "epoch": 2.14, + "learning_rate": 4.1074843372568695e-05, + "loss": 1.2257, + "step": 3565000 + }, + { + "epoch": 2.14, + "learning_rate": 4.107275180687036e-05, + "loss": 1.2363, + "step": 3565500 + }, + { + "epoch": 2.14, + "learning_rate": 4.10706518413098e-05, + "loss": 1.234, + "step": 3566000 + }, + { + "epoch": 2.14, + "learning_rate": 4.1068551875749236e-05, + "loss": 1.2525, + "step": 3566500 + }, + { + "epoch": 2.14, + "learning_rate": 4.106645191018867e-05, + "loss": 1.2372, + "step": 3567000 + }, + { + "epoch": 2.14, + "learning_rate": 4.106435194462811e-05, + "loss": 1.2331, + "step": 3567500 + }, + { + "epoch": 2.14, + "learning_rate": 4.106225197906754e-05, + "loss": 1.2273, + "step": 3568000 + }, + { + "epoch": 2.14, + "learning_rate": 4.1060156213438096e-05, + "loss": 1.2108, + "step": 3568500 + }, + { + "epoch": 2.14, + "learning_rate": 4.1058056247877536e-05, + "loss": 1.2362, + "step": 3569000 + }, + { + "epoch": 2.14, + "learning_rate": 4.105596048224809e-05, + "loss": 1.2525, + "step": 3569500 + }, + { + "epoch": 2.14, + "learning_rate": 4.1053860516687524e-05, + "loss": 1.2313, + "step": 3570000 + }, + { + "epoch": 2.14, + "learning_rate": 4.105176055112696e-05, + "loss": 1.2501, + "step": 3570500 + }, + { + "epoch": 2.14, + "learning_rate": 4.10496605855664e-05, + "loss": 1.2583, + "step": 3571000 + }, + { + "epoch": 2.14, + "learning_rate": 4.104756062000583e-05, + "loss": 1.2126, + "step": 3571500 + }, + { + "epoch": 2.14, + "learning_rate": 4.1045460654445264e-05, + "loss": 1.2128, + "step": 3572000 + }, + { + "epoch": 2.14, + "learning_rate": 4.1043360688884704e-05, + "loss": 1.2059, + "step": 3572500 + }, + { + "epoch": 2.14, + "learning_rate": 4.104126072332414e-05, + "loss": 1.1771, + "step": 3573000 + }, + { + "epoch": 2.14, + "learning_rate": 4.103916075776357e-05, + "loss": 1.2223, + "step": 3573500 + }, + { + "epoch": 2.14, + "learning_rate": 4.103706079220301e-05, + "loss": 1.2126, + "step": 3574000 + }, + { + "epoch": 2.14, + "learning_rate": 4.1034960826642445e-05, + "loss": 1.2197, + "step": 3574500 + }, + { + "epoch": 2.14, + "learning_rate": 4.103286086108187e-05, + "loss": 1.229, + "step": 3575000 + }, + { + "epoch": 2.14, + "learning_rate": 4.103076509545243e-05, + "loss": 1.1865, + "step": 3575500 + }, + { + "epoch": 2.14, + "learning_rate": 4.102866512989187e-05, + "loss": 1.2268, + "step": 3576000 + }, + { + "epoch": 2.14, + "learning_rate": 4.1026569364262426e-05, + "loss": 1.2483, + "step": 3576500 + }, + { + "epoch": 2.14, + "learning_rate": 4.102446939870186e-05, + "loss": 1.2043, + "step": 3577000 + }, + { + "epoch": 2.14, + "learning_rate": 4.10223694331413e-05, + "loss": 1.2436, + "step": 3577500 + }, + { + "epoch": 2.15, + "learning_rate": 4.102026946758073e-05, + "loss": 1.2041, + "step": 3578000 + }, + { + "epoch": 2.15, + "learning_rate": 4.1018173701951287e-05, + "loss": 1.1972, + "step": 3578500 + }, + { + "epoch": 2.15, + "learning_rate": 4.101607373639072e-05, + "loss": 1.2306, + "step": 3579000 + }, + { + "epoch": 2.15, + "learning_rate": 4.101397377083016e-05, + "loss": 1.2229, + "step": 3579500 + }, + { + "epoch": 2.15, + "learning_rate": 4.1011873805269594e-05, + "loss": 1.2086, + "step": 3580000 + }, + { + "epoch": 2.15, + "learning_rate": 4.100977803964015e-05, + "loss": 1.2452, + "step": 3580500 + }, + { + "epoch": 2.15, + "learning_rate": 4.100767807407958e-05, + "loss": 1.2577, + "step": 3581000 + }, + { + "epoch": 2.15, + "learning_rate": 4.100557810851902e-05, + "loss": 1.2387, + "step": 3581500 + }, + { + "epoch": 2.15, + "learning_rate": 4.1003478142958454e-05, + "loss": 1.2188, + "step": 3582000 + }, + { + "epoch": 2.15, + "learning_rate": 4.100137817739789e-05, + "loss": 1.2117, + "step": 3582500 + }, + { + "epoch": 2.15, + "learning_rate": 4.099927821183733e-05, + "loss": 1.2442, + "step": 3583000 + }, + { + "epoch": 2.15, + "learning_rate": 4.099717824627676e-05, + "loss": 1.2268, + "step": 3583500 + }, + { + "epoch": 2.15, + "learning_rate": 4.09950782807162e-05, + "loss": 1.1932, + "step": 3584000 + }, + { + "epoch": 2.15, + "learning_rate": 4.099297831515563e-05, + "loss": 1.2111, + "step": 3584500 + }, + { + "epoch": 2.15, + "learning_rate": 4.099089094938843e-05, + "loss": 1.2372, + "step": 3585000 + }, + { + "epoch": 2.15, + "learning_rate": 4.098879098382787e-05, + "loss": 1.2462, + "step": 3585500 + }, + { + "epoch": 2.15, + "learning_rate": 4.0986691018267296e-05, + "loss": 1.2332, + "step": 3586000 + }, + { + "epoch": 2.15, + "learning_rate": 4.098459105270673e-05, + "loss": 1.2067, + "step": 3586500 + }, + { + "epoch": 2.15, + "learning_rate": 4.098249108714617e-05, + "loss": 1.2383, + "step": 3587000 + }, + { + "epoch": 2.15, + "learning_rate": 4.09803911215856e-05, + "loss": 1.2644, + "step": 3587500 + }, + { + "epoch": 2.15, + "learning_rate": 4.0978291156025037e-05, + "loss": 1.2061, + "step": 3588000 + }, + { + "epoch": 2.15, + "learning_rate": 4.097619119046448e-05, + "loss": 1.2285, + "step": 3588500 + }, + { + "epoch": 2.15, + "learning_rate": 4.097409122490391e-05, + "loss": 1.2196, + "step": 3589000 + }, + { + "epoch": 2.15, + "learning_rate": 4.0971991259343344e-05, + "loss": 1.2348, + "step": 3589500 + }, + { + "epoch": 2.15, + "learning_rate": 4.0969891293782784e-05, + "loss": 1.202, + "step": 3590000 + }, + { + "epoch": 2.15, + "learning_rate": 4.096779132822222e-05, + "loss": 1.2456, + "step": 3590500 + }, + { + "epoch": 2.15, + "learning_rate": 4.096569136266166e-05, + "loss": 1.1937, + "step": 3591000 + }, + { + "epoch": 2.15, + "learning_rate": 4.096359559703221e-05, + "loss": 1.1942, + "step": 3591500 + }, + { + "epoch": 2.15, + "learning_rate": 4.0961495631471645e-05, + "loss": 1.2394, + "step": 3592000 + }, + { + "epoch": 2.15, + "learning_rate": 4.095939566591108e-05, + "loss": 1.2142, + "step": 3592500 + }, + { + "epoch": 2.15, + "learning_rate": 4.095729570035052e-05, + "loss": 1.244, + "step": 3593000 + }, + { + "epoch": 2.15, + "learning_rate": 4.095519993472107e-05, + "loss": 1.2574, + "step": 3593500 + }, + { + "epoch": 2.15, + "learning_rate": 4.0953104169091626e-05, + "loss": 1.2328, + "step": 3594000 + }, + { + "epoch": 2.16, + "learning_rate": 4.095100420353106e-05, + "loss": 1.2613, + "step": 3594500 + }, + { + "epoch": 2.16, + "learning_rate": 4.094890423797049e-05, + "loss": 1.2338, + "step": 3595000 + }, + { + "epoch": 2.16, + "learning_rate": 4.094680427240993e-05, + "loss": 1.2463, + "step": 3595500 + }, + { + "epoch": 2.16, + "learning_rate": 4.0944704306849366e-05, + "loss": 1.2282, + "step": 3596000 + }, + { + "epoch": 2.16, + "learning_rate": 4.0942604341288806e-05, + "loss": 1.1977, + "step": 3596500 + }, + { + "epoch": 2.16, + "learning_rate": 4.094050437572824e-05, + "loss": 1.2276, + "step": 3597000 + }, + { + "epoch": 2.16, + "learning_rate": 4.093840441016767e-05, + "loss": 1.2254, + "step": 3597500 + }, + { + "epoch": 2.16, + "learning_rate": 4.0936304444607114e-05, + "loss": 1.2333, + "step": 3598000 + }, + { + "epoch": 2.16, + "learning_rate": 4.093420867897767e-05, + "loss": 1.2006, + "step": 3598500 + }, + { + "epoch": 2.16, + "learning_rate": 4.09321087134171e-05, + "loss": 1.2201, + "step": 3599000 + }, + { + "epoch": 2.16, + "learning_rate": 4.0930012947787654e-05, + "loss": 1.2306, + "step": 3599500 + }, + { + "epoch": 2.16, + "learning_rate": 4.092791298222709e-05, + "loss": 1.2166, + "step": 3600000 + }, + { + "epoch": 2.16, + "eval_loss": 1.1922718286514282, + "eval_runtime": 1098.7908, + "eval_samples_per_second": 479.363, + "eval_steps_per_second": 79.894, + "step": 3600000 + }, + { + "epoch": 2.16, + "learning_rate": 4.092581301666653e-05, + "loss": 1.2243, + "step": 3600500 + }, + { + "epoch": 2.16, + "learning_rate": 4.092371305110596e-05, + "loss": 1.2198, + "step": 3601000 + }, + { + "epoch": 2.16, + "learning_rate": 4.0921613085545395e-05, + "loss": 1.2253, + "step": 3601500 + }, + { + "epoch": 2.16, + "learning_rate": 4.0919513119984835e-05, + "loss": 1.2125, + "step": 3602000 + }, + { + "epoch": 2.16, + "learning_rate": 4.091741315442427e-05, + "loss": 1.2157, + "step": 3602500 + }, + { + "epoch": 2.16, + "learning_rate": 4.09153131888637e-05, + "loss": 1.2382, + "step": 3603000 + }, + { + "epoch": 2.16, + "learning_rate": 4.091321742323426e-05, + "loss": 1.2563, + "step": 3603500 + }, + { + "epoch": 2.16, + "learning_rate": 4.0911121657604816e-05, + "loss": 1.1968, + "step": 3604000 + }, + { + "epoch": 2.16, + "learning_rate": 4.090902169204425e-05, + "loss": 1.2192, + "step": 3604500 + }, + { + "epoch": 2.16, + "learning_rate": 4.090692172648368e-05, + "loss": 1.2573, + "step": 3605000 + }, + { + "epoch": 2.16, + "learning_rate": 4.090482176092312e-05, + "loss": 1.2138, + "step": 3605500 + }, + { + "epoch": 2.16, + "learning_rate": 4.0902721795362556e-05, + "loss": 1.2522, + "step": 3606000 + }, + { + "epoch": 2.16, + "learning_rate": 4.090062182980199e-05, + "loss": 1.2643, + "step": 3606500 + }, + { + "epoch": 2.16, + "learning_rate": 4.089852186424143e-05, + "loss": 1.2317, + "step": 3607000 + }, + { + "epoch": 2.16, + "learning_rate": 4.0896421898680864e-05, + "loss": 1.2595, + "step": 3607500 + }, + { + "epoch": 2.16, + "learning_rate": 4.089432613305142e-05, + "loss": 1.2503, + "step": 3608000 + }, + { + "epoch": 2.16, + "learning_rate": 4.089222616749085e-05, + "loss": 1.2369, + "step": 3608500 + }, + { + "epoch": 2.16, + "learning_rate": 4.0890130401861404e-05, + "loss": 1.2174, + "step": 3609000 + }, + { + "epoch": 2.16, + "learning_rate": 4.0888030436300844e-05, + "loss": 1.1942, + "step": 3609500 + }, + { + "epoch": 2.16, + "learning_rate": 4.088593047074028e-05, + "loss": 1.1876, + "step": 3610000 + }, + { + "epoch": 2.16, + "learning_rate": 4.088383050517972e-05, + "loss": 1.2287, + "step": 3610500 + }, + { + "epoch": 2.16, + "learning_rate": 4.088173053961915e-05, + "loss": 1.234, + "step": 3611000 + }, + { + "epoch": 2.17, + "learning_rate": 4.0879630574058585e-05, + "loss": 1.2279, + "step": 3611500 + }, + { + "epoch": 2.17, + "learning_rate": 4.0877530608498025e-05, + "loss": 1.2328, + "step": 3612000 + }, + { + "epoch": 2.17, + "learning_rate": 4.087543484286858e-05, + "loss": 1.2313, + "step": 3612500 + }, + { + "epoch": 2.17, + "learning_rate": 4.087333487730801e-05, + "loss": 1.2286, + "step": 3613000 + }, + { + "epoch": 2.17, + "learning_rate": 4.0871234911747446e-05, + "loss": 1.2097, + "step": 3613500 + }, + { + "epoch": 2.17, + "learning_rate": 4.0869134946186886e-05, + "loss": 1.2304, + "step": 3614000 + }, + { + "epoch": 2.17, + "learning_rate": 4.086703498062632e-05, + "loss": 1.2479, + "step": 3614500 + }, + { + "epoch": 2.17, + "learning_rate": 4.086493501506575e-05, + "loss": 1.24, + "step": 3615000 + }, + { + "epoch": 2.17, + "learning_rate": 4.0862835049505186e-05, + "loss": 1.1988, + "step": 3615500 + }, + { + "epoch": 2.17, + "learning_rate": 4.086073508394462e-05, + "loss": 1.2198, + "step": 3616000 + }, + { + "epoch": 2.17, + "learning_rate": 4.085863511838405e-05, + "loss": 1.2425, + "step": 3616500 + }, + { + "epoch": 2.17, + "learning_rate": 4.0856535152823494e-05, + "loss": 1.2301, + "step": 3617000 + }, + { + "epoch": 2.17, + "learning_rate": 4.085443518726293e-05, + "loss": 1.2253, + "step": 3617500 + }, + { + "epoch": 2.17, + "learning_rate": 4.085233522170236e-05, + "loss": 1.228, + "step": 3618000 + }, + { + "epoch": 2.17, + "learning_rate": 4.085023945607292e-05, + "loss": 1.215, + "step": 3618500 + }, + { + "epoch": 2.17, + "learning_rate": 4.0848139490512354e-05, + "loss": 1.205, + "step": 3619000 + }, + { + "epoch": 2.17, + "learning_rate": 4.084603952495179e-05, + "loss": 1.229, + "step": 3619500 + }, + { + "epoch": 2.17, + "learning_rate": 4.084394375932235e-05, + "loss": 1.2065, + "step": 3620000 + }, + { + "epoch": 2.17, + "learning_rate": 4.08418479936929e-05, + "loss": 1.2249, + "step": 3620500 + }, + { + "epoch": 2.17, + "learning_rate": 4.083974802813234e-05, + "loss": 1.2448, + "step": 3621000 + }, + { + "epoch": 2.17, + "learning_rate": 4.0837648062571775e-05, + "loss": 1.2269, + "step": 3621500 + }, + { + "epoch": 2.17, + "learning_rate": 4.083554809701121e-05, + "loss": 1.2452, + "step": 3622000 + }, + { + "epoch": 2.17, + "learning_rate": 4.083344813145064e-05, + "loss": 1.2182, + "step": 3622500 + }, + { + "epoch": 2.17, + "learning_rate": 4.0831348165890076e-05, + "loss": 1.2113, + "step": 3623000 + }, + { + "epoch": 2.17, + "learning_rate": 4.082924820032951e-05, + "loss": 1.2451, + "step": 3623500 + }, + { + "epoch": 2.17, + "learning_rate": 4.082714823476895e-05, + "loss": 1.2117, + "step": 3624000 + }, + { + "epoch": 2.17, + "learning_rate": 4.082505246913951e-05, + "loss": 1.2248, + "step": 3624500 + }, + { + "epoch": 2.17, + "learning_rate": 4.0822952503578936e-05, + "loss": 1.2089, + "step": 3625000 + }, + { + "epoch": 2.17, + "learning_rate": 4.082085253801838e-05, + "loss": 1.2073, + "step": 3625500 + }, + { + "epoch": 2.17, + "learning_rate": 4.081875257245781e-05, + "loss": 1.2196, + "step": 3626000 + }, + { + "epoch": 2.17, + "learning_rate": 4.0816652606897244e-05, + "loss": 1.2505, + "step": 3626500 + }, + { + "epoch": 2.17, + "learning_rate": 4.0814552641336684e-05, + "loss": 1.2404, + "step": 3627000 + }, + { + "epoch": 2.17, + "learning_rate": 4.081245267577612e-05, + "loss": 1.2212, + "step": 3627500 + }, + { + "epoch": 2.18, + "learning_rate": 4.081035271021555e-05, + "loss": 1.2246, + "step": 3628000 + }, + { + "epoch": 2.18, + "learning_rate": 4.080825274465499e-05, + "loss": 1.2115, + "step": 3628500 + }, + { + "epoch": 2.18, + "learning_rate": 4.0806156979025545e-05, + "loss": 1.232, + "step": 3629000 + }, + { + "epoch": 2.18, + "learning_rate": 4.080405701346498e-05, + "loss": 1.2346, + "step": 3629500 + }, + { + "epoch": 2.18, + "learning_rate": 4.080195704790441e-05, + "loss": 1.2611, + "step": 3630000 + }, + { + "epoch": 2.18, + "learning_rate": 4.079985708234385e-05, + "loss": 1.2433, + "step": 3630500 + }, + { + "epoch": 2.18, + "learning_rate": 4.0797757116783285e-05, + "loss": 1.2363, + "step": 3631000 + }, + { + "epoch": 2.18, + "learning_rate": 4.079565715122272e-05, + "loss": 1.2324, + "step": 3631500 + }, + { + "epoch": 2.18, + "learning_rate": 4.079355718566216e-05, + "loss": 1.2324, + "step": 3632000 + }, + { + "epoch": 2.18, + "learning_rate": 4.079145722010159e-05, + "loss": 1.2142, + "step": 3632500 + }, + { + "epoch": 2.18, + "learning_rate": 4.0789361454472146e-05, + "loss": 1.2238, + "step": 3633000 + }, + { + "epoch": 2.18, + "learning_rate": 4.078726148891158e-05, + "loss": 1.2173, + "step": 3633500 + }, + { + "epoch": 2.18, + "learning_rate": 4.078516152335102e-05, + "loss": 1.2172, + "step": 3634000 + }, + { + "epoch": 2.18, + "learning_rate": 4.078306155779045e-05, + "loss": 1.2255, + "step": 3634500 + }, + { + "epoch": 2.18, + "learning_rate": 4.078096579216101e-05, + "loss": 1.2241, + "step": 3635000 + }, + { + "epoch": 2.18, + "learning_rate": 4.077887002653156e-05, + "loss": 1.2359, + "step": 3635500 + }, + { + "epoch": 2.18, + "learning_rate": 4.0776770060971e-05, + "loss": 1.1963, + "step": 3636000 + }, + { + "epoch": 2.18, + "learning_rate": 4.0774670095410434e-05, + "loss": 1.2616, + "step": 3636500 + }, + { + "epoch": 2.18, + "learning_rate": 4.077257012984987e-05, + "loss": 1.2506, + "step": 3637000 + }, + { + "epoch": 2.18, + "learning_rate": 4.077047016428931e-05, + "loss": 1.2387, + "step": 3637500 + }, + { + "epoch": 2.18, + "learning_rate": 4.076837019872874e-05, + "loss": 1.2228, + "step": 3638000 + }, + { + "epoch": 2.18, + "learning_rate": 4.0766270233168175e-05, + "loss": 1.2354, + "step": 3638500 + }, + { + "epoch": 2.18, + "learning_rate": 4.0764170267607615e-05, + "loss": 1.2344, + "step": 3639000 + }, + { + "epoch": 2.18, + "learning_rate": 4.076207030204705e-05, + "loss": 1.2388, + "step": 3639500 + }, + { + "epoch": 2.18, + "learning_rate": 4.07599745364176e-05, + "loss": 1.2291, + "step": 3640000 + }, + { + "epoch": 2.18, + "learning_rate": 4.0757874570857035e-05, + "loss": 1.21, + "step": 3640500 + }, + { + "epoch": 2.18, + "learning_rate": 4.0755774605296476e-05, + "loss": 1.2431, + "step": 3641000 + }, + { + "epoch": 2.18, + "learning_rate": 4.075367463973591e-05, + "loss": 1.2163, + "step": 3641500 + }, + { + "epoch": 2.18, + "learning_rate": 4.075158307403758e-05, + "loss": 1.2121, + "step": 3642000 + }, + { + "epoch": 2.18, + "learning_rate": 4.0749483108477016e-05, + "loss": 1.2476, + "step": 3642500 + }, + { + "epoch": 2.18, + "learning_rate": 4.0747383142916456e-05, + "loss": 1.268, + "step": 3643000 + }, + { + "epoch": 2.18, + "learning_rate": 4.074528737728702e-05, + "loss": 1.2429, + "step": 3643500 + }, + { + "epoch": 2.18, + "learning_rate": 4.0743187411726443e-05, + "loss": 1.2271, + "step": 3644000 + }, + { + "epoch": 2.19, + "learning_rate": 4.074108744616588e-05, + "loss": 1.2201, + "step": 3644500 + }, + { + "epoch": 2.19, + "learning_rate": 4.073898748060532e-05, + "loss": 1.2605, + "step": 3645000 + }, + { + "epoch": 2.19, + "learning_rate": 4.073688751504475e-05, + "loss": 1.2273, + "step": 3645500 + }, + { + "epoch": 2.19, + "learning_rate": 4.0734787549484184e-05, + "loss": 1.2194, + "step": 3646000 + }, + { + "epoch": 2.19, + "learning_rate": 4.0732687583923624e-05, + "loss": 1.2482, + "step": 3646500 + }, + { + "epoch": 2.19, + "learning_rate": 4.073058761836306e-05, + "loss": 1.2196, + "step": 3647000 + }, + { + "epoch": 2.19, + "learning_rate": 4.072848765280249e-05, + "loss": 1.2508, + "step": 3647500 + }, + { + "epoch": 2.19, + "learning_rate": 4.072638768724193e-05, + "loss": 1.2245, + "step": 3648000 + }, + { + "epoch": 2.19, + "learning_rate": 4.0724287721681365e-05, + "loss": 1.2326, + "step": 3648500 + }, + { + "epoch": 2.19, + "learning_rate": 4.072219195605192e-05, + "loss": 1.2206, + "step": 3649000 + }, + { + "epoch": 2.19, + "learning_rate": 4.072009199049136e-05, + "loss": 1.234, + "step": 3649500 + }, + { + "epoch": 2.19, + "learning_rate": 4.071799202493079e-05, + "loss": 1.2378, + "step": 3650000 + }, + { + "epoch": 2.19, + "learning_rate": 4.0715892059370226e-05, + "loss": 1.2241, + "step": 3650500 + }, + { + "epoch": 2.19, + "learning_rate": 4.0713792093809666e-05, + "loss": 1.2061, + "step": 3651000 + }, + { + "epoch": 2.19, + "learning_rate": 4.07116921282491e-05, + "loss": 1.2387, + "step": 3651500 + }, + { + "epoch": 2.19, + "learning_rate": 4.070959636261965e-05, + "loss": 1.2423, + "step": 3652000 + }, + { + "epoch": 2.19, + "learning_rate": 4.0707496397059086e-05, + "loss": 1.2047, + "step": 3652500 + }, + { + "epoch": 2.19, + "learning_rate": 4.0705396431498527e-05, + "loss": 1.2417, + "step": 3653000 + }, + { + "epoch": 2.19, + "learning_rate": 4.070329646593796e-05, + "loss": 1.2263, + "step": 3653500 + }, + { + "epoch": 2.19, + "learning_rate": 4.0701196500377393e-05, + "loss": 1.2074, + "step": 3654000 + }, + { + "epoch": 2.19, + "learning_rate": 4.069910073474795e-05, + "loss": 1.2336, + "step": 3654500 + }, + { + "epoch": 2.19, + "learning_rate": 4.069700076918739e-05, + "loss": 1.2786, + "step": 3655000 + }, + { + "epoch": 2.19, + "learning_rate": 4.069490080362682e-05, + "loss": 1.2572, + "step": 3655500 + }, + { + "epoch": 2.19, + "learning_rate": 4.069280083806626e-05, + "loss": 1.2065, + "step": 3656000 + }, + { + "epoch": 2.19, + "learning_rate": 4.0690700872505694e-05, + "loss": 1.2142, + "step": 3656500 + }, + { + "epoch": 2.19, + "learning_rate": 4.068860090694512e-05, + "loss": 1.2379, + "step": 3657000 + }, + { + "epoch": 2.19, + "learning_rate": 4.068650094138456e-05, + "loss": 1.2094, + "step": 3657500 + }, + { + "epoch": 2.19, + "learning_rate": 4.0684400975823995e-05, + "loss": 1.2192, + "step": 3658000 + }, + { + "epoch": 2.19, + "learning_rate": 4.068230101026343e-05, + "loss": 1.2333, + "step": 3658500 + }, + { + "epoch": 2.19, + "learning_rate": 4.068020944456511e-05, + "loss": 1.1983, + "step": 3659000 + }, + { + "epoch": 2.19, + "learning_rate": 4.067810947900454e-05, + "loss": 1.2629, + "step": 3659500 + }, + { + "epoch": 2.19, + "learning_rate": 4.067600951344398e-05, + "loss": 1.2069, + "step": 3660000 + }, + { + "epoch": 2.19, + "learning_rate": 4.0673909547883416e-05, + "loss": 1.2524, + "step": 3660500 + }, + { + "epoch": 2.19, + "learning_rate": 4.067180958232285e-05, + "loss": 1.246, + "step": 3661000 + }, + { + "epoch": 2.2, + "learning_rate": 4.06697138166934e-05, + "loss": 1.2532, + "step": 3661500 + }, + { + "epoch": 2.2, + "learning_rate": 4.066761385113284e-05, + "loss": 1.212, + "step": 3662000 + }, + { + "epoch": 2.2, + "learning_rate": 4.066551388557228e-05, + "loss": 1.2123, + "step": 3662500 + }, + { + "epoch": 2.2, + "learning_rate": 4.066341392001172e-05, + "loss": 1.2382, + "step": 3663000 + }, + { + "epoch": 2.2, + "learning_rate": 4.066131395445115e-05, + "loss": 1.2281, + "step": 3663500 + }, + { + "epoch": 2.2, + "learning_rate": 4.0659222388752824e-05, + "loss": 1.2386, + "step": 3664000 + }, + { + "epoch": 2.2, + "learning_rate": 4.065712242319226e-05, + "loss": 1.2523, + "step": 3664500 + }, + { + "epoch": 2.2, + "learning_rate": 4.065502665756282e-05, + "loss": 1.2435, + "step": 3665000 + }, + { + "epoch": 2.2, + "learning_rate": 4.0652926692002244e-05, + "loss": 1.2438, + "step": 3665500 + }, + { + "epoch": 2.2, + "learning_rate": 4.0650826726441685e-05, + "loss": 1.2506, + "step": 3666000 + }, + { + "epoch": 2.2, + "learning_rate": 4.064872676088112e-05, + "loss": 1.2182, + "step": 3666500 + }, + { + "epoch": 2.2, + "learning_rate": 4.064662679532055e-05, + "loss": 1.2312, + "step": 3667000 + }, + { + "epoch": 2.2, + "learning_rate": 4.064452682975999e-05, + "loss": 1.2317, + "step": 3667500 + }, + { + "epoch": 2.2, + "learning_rate": 4.0642426864199425e-05, + "loss": 1.2195, + "step": 3668000 + }, + { + "epoch": 2.2, + "learning_rate": 4.0640326898638866e-05, + "loss": 1.1908, + "step": 3668500 + }, + { + "epoch": 2.2, + "learning_rate": 4.06382269330783e-05, + "loss": 1.2278, + "step": 3669000 + }, + { + "epoch": 2.2, + "learning_rate": 4.063612696751773e-05, + "loss": 1.2565, + "step": 3669500 + }, + { + "epoch": 2.2, + "learning_rate": 4.063402700195717e-05, + "loss": 1.242, + "step": 3670000 + }, + { + "epoch": 2.2, + "learning_rate": 4.0631927036396606e-05, + "loss": 1.2207, + "step": 3670500 + }, + { + "epoch": 2.2, + "learning_rate": 4.062983127076716e-05, + "loss": 1.2385, + "step": 3671000 + }, + { + "epoch": 2.2, + "learning_rate": 4.062773130520659e-05, + "loss": 1.2234, + "step": 3671500 + }, + { + "epoch": 2.2, + "learning_rate": 4.0625631339646033e-05, + "loss": 1.2211, + "step": 3672000 + }, + { + "epoch": 2.2, + "learning_rate": 4.062353137408547e-05, + "loss": 1.2357, + "step": 3672500 + }, + { + "epoch": 2.2, + "learning_rate": 4.062143560845602e-05, + "loss": 1.2332, + "step": 3673000 + }, + { + "epoch": 2.2, + "learning_rate": 4.0619339842826574e-05, + "loss": 1.2096, + "step": 3673500 + }, + { + "epoch": 2.2, + "learning_rate": 4.061723987726601e-05, + "loss": 1.2311, + "step": 3674000 + }, + { + "epoch": 2.2, + "learning_rate": 4.061513991170545e-05, + "loss": 1.2103, + "step": 3674500 + }, + { + "epoch": 2.2, + "learning_rate": 4.061303994614488e-05, + "loss": 1.2281, + "step": 3675000 + }, + { + "epoch": 2.2, + "learning_rate": 4.061093998058432e-05, + "loss": 1.2381, + "step": 3675500 + }, + { + "epoch": 2.2, + "learning_rate": 4.0608840015023755e-05, + "loss": 1.2298, + "step": 3676000 + }, + { + "epoch": 2.2, + "learning_rate": 4.060674004946319e-05, + "loss": 1.2287, + "step": 3676500 + }, + { + "epoch": 2.2, + "learning_rate": 4.060464008390263e-05, + "loss": 1.2298, + "step": 3677000 + }, + { + "epoch": 2.2, + "learning_rate": 4.060254011834206e-05, + "loss": 1.2294, + "step": 3677500 + }, + { + "epoch": 2.21, + "learning_rate": 4.060044015278149e-05, + "loss": 1.1901, + "step": 3678000 + }, + { + "epoch": 2.21, + "learning_rate": 4.059834018722093e-05, + "loss": 1.2617, + "step": 3678500 + }, + { + "epoch": 2.21, + "learning_rate": 4.059624442159149e-05, + "loss": 1.2458, + "step": 3679000 + }, + { + "epoch": 2.21, + "learning_rate": 4.059414445603092e-05, + "loss": 1.2122, + "step": 3679500 + }, + { + "epoch": 2.21, + "learning_rate": 4.0592044490470356e-05, + "loss": 1.2364, + "step": 3680000 + }, + { + "epoch": 2.21, + "learning_rate": 4.058994872484091e-05, + "loss": 1.2302, + "step": 3680500 + }, + { + "epoch": 2.21, + "learning_rate": 4.058784875928035e-05, + "loss": 1.2124, + "step": 3681000 + }, + { + "epoch": 2.21, + "learning_rate": 4.0585748793719784e-05, + "loss": 1.2392, + "step": 3681500 + }, + { + "epoch": 2.21, + "learning_rate": 4.058364882815922e-05, + "loss": 1.2562, + "step": 3682000 + }, + { + "epoch": 2.21, + "learning_rate": 4.058154886259866e-05, + "loss": 1.2149, + "step": 3682500 + }, + { + "epoch": 2.21, + "learning_rate": 4.0579448897038084e-05, + "loss": 1.2255, + "step": 3683000 + }, + { + "epoch": 2.21, + "learning_rate": 4.0577348931477524e-05, + "loss": 1.209, + "step": 3683500 + }, + { + "epoch": 2.21, + "learning_rate": 4.057524896591696e-05, + "loss": 1.225, + "step": 3684000 + }, + { + "epoch": 2.21, + "learning_rate": 4.057314900035639e-05, + "loss": 1.2127, + "step": 3684500 + }, + { + "epoch": 2.21, + "learning_rate": 4.057104903479583e-05, + "loss": 1.2378, + "step": 3685000 + }, + { + "epoch": 2.21, + "learning_rate": 4.0568953269166385e-05, + "loss": 1.2231, + "step": 3685500 + }, + { + "epoch": 2.21, + "learning_rate": 4.056685330360582e-05, + "loss": 1.1944, + "step": 3686000 + }, + { + "epoch": 2.21, + "learning_rate": 4.056475333804525e-05, + "loss": 1.2109, + "step": 3686500 + }, + { + "epoch": 2.21, + "learning_rate": 4.056265757241581e-05, + "loss": 1.2287, + "step": 3687000 + }, + { + "epoch": 2.21, + "learning_rate": 4.0560557606855246e-05, + "loss": 1.2089, + "step": 3687500 + }, + { + "epoch": 2.21, + "learning_rate": 4.055845764129468e-05, + "loss": 1.2439, + "step": 3688000 + }, + { + "epoch": 2.21, + "learning_rate": 4.055635767573411e-05, + "loss": 1.2123, + "step": 3688500 + }, + { + "epoch": 2.21, + "learning_rate": 4.055425771017355e-05, + "loss": 1.2368, + "step": 3689000 + }, + { + "epoch": 2.21, + "learning_rate": 4.055216194454411e-05, + "loss": 1.2622, + "step": 3689500 + }, + { + "epoch": 2.21, + "learning_rate": 4.055006197898354e-05, + "loss": 1.2529, + "step": 3690000 + }, + { + "epoch": 2.21, + "learning_rate": 4.054796201342298e-05, + "loss": 1.2587, + "step": 3690500 + }, + { + "epoch": 2.21, + "learning_rate": 4.0545862047862413e-05, + "loss": 1.2493, + "step": 3691000 + }, + { + "epoch": 2.21, + "learning_rate": 4.054376208230185e-05, + "loss": 1.1992, + "step": 3691500 + }, + { + "epoch": 2.21, + "learning_rate": 4.054166211674129e-05, + "loss": 1.2125, + "step": 3692000 + }, + { + "epoch": 2.21, + "learning_rate": 4.053956215118072e-05, + "loss": 1.2267, + "step": 3692500 + }, + { + "epoch": 2.21, + "learning_rate": 4.0537462185620154e-05, + "loss": 1.2221, + "step": 3693000 + }, + { + "epoch": 2.21, + "learning_rate": 4.0535362220059594e-05, + "loss": 1.2125, + "step": 3693500 + }, + { + "epoch": 2.21, + "learning_rate": 4.053326645443015e-05, + "loss": 1.2395, + "step": 3694000 + }, + { + "epoch": 2.22, + "learning_rate": 4.053116648886958e-05, + "loss": 1.2272, + "step": 3694500 + }, + { + "epoch": 2.22, + "learning_rate": 4.0529066523309015e-05, + "loss": 1.2237, + "step": 3695000 + }, + { + "epoch": 2.22, + "learning_rate": 4.0526966557748455e-05, + "loss": 1.2141, + "step": 3695500 + }, + { + "epoch": 2.22, + "learning_rate": 4.052486659218789e-05, + "loss": 1.2262, + "step": 3696000 + }, + { + "epoch": 2.22, + "learning_rate": 4.052276662662732e-05, + "loss": 1.2308, + "step": 3696500 + }, + { + "epoch": 2.22, + "learning_rate": 4.052066666106676e-05, + "loss": 1.2152, + "step": 3697000 + }, + { + "epoch": 2.22, + "learning_rate": 4.0518566695506196e-05, + "loss": 1.2367, + "step": 3697500 + }, + { + "epoch": 2.22, + "learning_rate": 4.051646672994562e-05, + "loss": 1.2155, + "step": 3698000 + }, + { + "epoch": 2.22, + "learning_rate": 4.051437096431618e-05, + "loss": 1.2354, + "step": 3698500 + }, + { + "epoch": 2.22, + "learning_rate": 4.051227519868674e-05, + "loss": 1.2028, + "step": 3699000 + }, + { + "epoch": 2.22, + "learning_rate": 4.0510175233126177e-05, + "loss": 1.2355, + "step": 3699500 + }, + { + "epoch": 2.22, + "learning_rate": 4.050807526756561e-05, + "loss": 1.2172, + "step": 3700000 + }, + { + "epoch": 2.22, + "eval_loss": 1.192676305770874, + "eval_runtime": 1108.3091, + "eval_samples_per_second": 475.246, + "eval_steps_per_second": 79.208, + "step": 3700000 + }, + { + "epoch": 2.22, + "learning_rate": 4.050597530200505e-05, + "loss": 1.2434, + "step": 3700500 + }, + { + "epoch": 2.22, + "learning_rate": 4.0503875336444484e-05, + "loss": 1.2336, + "step": 3701000 + }, + { + "epoch": 2.22, + "learning_rate": 4.050177537088392e-05, + "loss": 1.2125, + "step": 3701500 + }, + { + "epoch": 2.22, + "learning_rate": 4.049967540532336e-05, + "loss": 1.2513, + "step": 3702000 + }, + { + "epoch": 2.22, + "learning_rate": 4.049757543976279e-05, + "loss": 1.2471, + "step": 3702500 + }, + { + "epoch": 2.22, + "learning_rate": 4.0495479674133344e-05, + "loss": 1.2444, + "step": 3703000 + }, + { + "epoch": 2.22, + "learning_rate": 4.049337970857278e-05, + "loss": 1.2351, + "step": 3703500 + }, + { + "epoch": 2.22, + "learning_rate": 4.049128394294333e-05, + "loss": 1.2162, + "step": 3704000 + }, + { + "epoch": 2.22, + "learning_rate": 4.048918397738277e-05, + "loss": 1.2194, + "step": 3704500 + }, + { + "epoch": 2.22, + "learning_rate": 4.0487084011822205e-05, + "loss": 1.2149, + "step": 3705000 + }, + { + "epoch": 2.22, + "learning_rate": 4.048498404626164e-05, + "loss": 1.2341, + "step": 3705500 + }, + { + "epoch": 2.22, + "learning_rate": 4.04828882806322e-05, + "loss": 1.2217, + "step": 3706000 + }, + { + "epoch": 2.22, + "learning_rate": 4.048078831507163e-05, + "loss": 1.2377, + "step": 3706500 + }, + { + "epoch": 2.22, + "learning_rate": 4.0478688349511066e-05, + "loss": 1.1999, + "step": 3707000 + }, + { + "epoch": 2.22, + "learning_rate": 4.0476588383950506e-05, + "loss": 1.2312, + "step": 3707500 + }, + { + "epoch": 2.22, + "learning_rate": 4.047448841838994e-05, + "loss": 1.2233, + "step": 3708000 + }, + { + "epoch": 2.22, + "learning_rate": 4.047238845282937e-05, + "loss": 1.2217, + "step": 3708500 + }, + { + "epoch": 2.22, + "learning_rate": 4.047028848726881e-05, + "loss": 1.2382, + "step": 3709000 + }, + { + "epoch": 2.22, + "learning_rate": 4.046818852170825e-05, + "loss": 1.2415, + "step": 3709500 + }, + { + "epoch": 2.22, + "learning_rate": 4.04660927560788e-05, + "loss": 1.209, + "step": 3710000 + }, + { + "epoch": 2.22, + "learning_rate": 4.0463992790518234e-05, + "loss": 1.2346, + "step": 3710500 + }, + { + "epoch": 2.22, + "learning_rate": 4.0461892824957674e-05, + "loss": 1.2342, + "step": 3711000 + }, + { + "epoch": 2.23, + "learning_rate": 4.045979285939711e-05, + "loss": 1.236, + "step": 3711500 + }, + { + "epoch": 2.23, + "learning_rate": 4.045769289383654e-05, + "loss": 1.2323, + "step": 3712000 + }, + { + "epoch": 2.23, + "learning_rate": 4.0455597128207094e-05, + "loss": 1.2306, + "step": 3712500 + }, + { + "epoch": 2.23, + "learning_rate": 4.0453497162646535e-05, + "loss": 1.2228, + "step": 3713000 + }, + { + "epoch": 2.23, + "learning_rate": 4.045139719708597e-05, + "loss": 1.2014, + "step": 3713500 + }, + { + "epoch": 2.23, + "learning_rate": 4.04492972315254e-05, + "loss": 1.2309, + "step": 3714000 + }, + { + "epoch": 2.23, + "learning_rate": 4.044720146589596e-05, + "loss": 1.2202, + "step": 3714500 + }, + { + "epoch": 2.23, + "learning_rate": 4.0445101500335395e-05, + "loss": 1.2246, + "step": 3715000 + }, + { + "epoch": 2.23, + "learning_rate": 4.044300573470595e-05, + "loss": 1.234, + "step": 3715500 + }, + { + "epoch": 2.23, + "learning_rate": 4.044090576914538e-05, + "loss": 1.2274, + "step": 3716000 + }, + { + "epoch": 2.23, + "learning_rate": 4.043880580358482e-05, + "loss": 1.2261, + "step": 3716500 + }, + { + "epoch": 2.23, + "learning_rate": 4.0436705838024256e-05, + "loss": 1.2294, + "step": 3717000 + }, + { + "epoch": 2.23, + "learning_rate": 4.043460587246369e-05, + "loss": 1.2285, + "step": 3717500 + }, + { + "epoch": 2.23, + "learning_rate": 4.043250590690313e-05, + "loss": 1.2242, + "step": 3718000 + }, + { + "epoch": 2.23, + "learning_rate": 4.043040594134256e-05, + "loss": 1.2183, + "step": 3718500 + }, + { + "epoch": 2.23, + "learning_rate": 4.0428305975782e-05, + "loss": 1.2699, + "step": 3719000 + }, + { + "epoch": 2.23, + "learning_rate": 4.042620601022143e-05, + "loss": 1.2293, + "step": 3719500 + }, + { + "epoch": 2.23, + "learning_rate": 4.0424106044660864e-05, + "loss": 1.2611, + "step": 3720000 + }, + { + "epoch": 2.23, + "learning_rate": 4.04220060791003e-05, + "loss": 1.2013, + "step": 3720500 + }, + { + "epoch": 2.23, + "learning_rate": 4.041990611353974e-05, + "loss": 1.2404, + "step": 3721000 + }, + { + "epoch": 2.23, + "learning_rate": 4.04178103479103e-05, + "loss": 1.234, + "step": 3721500 + }, + { + "epoch": 2.23, + "learning_rate": 4.0415710382349724e-05, + "loss": 1.2356, + "step": 3722000 + }, + { + "epoch": 2.23, + "learning_rate": 4.0413610416789165e-05, + "loss": 1.239, + "step": 3722500 + }, + { + "epoch": 2.23, + "learning_rate": 4.04115104512286e-05, + "loss": 1.2559, + "step": 3723000 + }, + { + "epoch": 2.23, + "learning_rate": 4.040941048566803e-05, + "loss": 1.2246, + "step": 3723500 + }, + { + "epoch": 2.23, + "learning_rate": 4.040731052010747e-05, + "loss": 1.2373, + "step": 3724000 + }, + { + "epoch": 2.23, + "learning_rate": 4.0405210554546905e-05, + "loss": 1.2451, + "step": 3724500 + }, + { + "epoch": 2.23, + "learning_rate": 4.040311058898634e-05, + "loss": 1.2407, + "step": 3725000 + }, + { + "epoch": 2.23, + "learning_rate": 4.040101482335689e-05, + "loss": 1.2441, + "step": 3725500 + }, + { + "epoch": 2.23, + "learning_rate": 4.039891485779633e-05, + "loss": 1.209, + "step": 3726000 + }, + { + "epoch": 2.23, + "learning_rate": 4.0396814892235766e-05, + "loss": 1.2323, + "step": 3726500 + }, + { + "epoch": 2.23, + "learning_rate": 4.03947149266752e-05, + "loss": 1.2416, + "step": 3727000 + }, + { + "epoch": 2.23, + "learning_rate": 4.039261496111464e-05, + "loss": 1.2182, + "step": 3727500 + }, + { + "epoch": 2.24, + "learning_rate": 4.039051919548519e-05, + "loss": 1.2359, + "step": 3728000 + }, + { + "epoch": 2.24, + "learning_rate": 4.038841922992463e-05, + "loss": 1.2215, + "step": 3728500 + }, + { + "epoch": 2.24, + "learning_rate": 4.038631926436407e-05, + "loss": 1.2037, + "step": 3729000 + }, + { + "epoch": 2.24, + "learning_rate": 4.03842192988035e-05, + "loss": 1.2159, + "step": 3729500 + }, + { + "epoch": 2.24, + "learning_rate": 4.0382119333242934e-05, + "loss": 1.2106, + "step": 3730000 + }, + { + "epoch": 2.24, + "learning_rate": 4.038002356761349e-05, + "loss": 1.2295, + "step": 3730500 + }, + { + "epoch": 2.24, + "learning_rate": 4.037792360205293e-05, + "loss": 1.2282, + "step": 3731000 + }, + { + "epoch": 2.24, + "learning_rate": 4.037582363649236e-05, + "loss": 1.2091, + "step": 3731500 + }, + { + "epoch": 2.24, + "learning_rate": 4.0373723670931795e-05, + "loss": 1.2213, + "step": 3732000 + }, + { + "epoch": 2.24, + "learning_rate": 4.037162790530235e-05, + "loss": 1.2201, + "step": 3732500 + }, + { + "epoch": 2.24, + "learning_rate": 4.036952793974179e-05, + "loss": 1.2254, + "step": 3733000 + }, + { + "epoch": 2.24, + "learning_rate": 4.036742797418122e-05, + "loss": 1.2457, + "step": 3733500 + }, + { + "epoch": 2.24, + "learning_rate": 4.0365332208551775e-05, + "loss": 1.1988, + "step": 3734000 + }, + { + "epoch": 2.24, + "learning_rate": 4.0363232242991216e-05, + "loss": 1.2321, + "step": 3734500 + }, + { + "epoch": 2.24, + "learning_rate": 4.036113227743065e-05, + "loss": 1.2142, + "step": 3735000 + }, + { + "epoch": 2.24, + "learning_rate": 4.035903231187008e-05, + "loss": 1.2166, + "step": 3735500 + }, + { + "epoch": 2.24, + "learning_rate": 4.035693234630952e-05, + "loss": 1.2453, + "step": 3736000 + }, + { + "epoch": 2.24, + "learning_rate": 4.0354832380748956e-05, + "loss": 1.252, + "step": 3736500 + }, + { + "epoch": 2.24, + "learning_rate": 4.035273661511951e-05, + "loss": 1.2193, + "step": 3737000 + }, + { + "epoch": 2.24, + "learning_rate": 4.035063664955894e-05, + "loss": 1.2318, + "step": 3737500 + }, + { + "epoch": 2.24, + "learning_rate": 4.0348536683998384e-05, + "loss": 1.1904, + "step": 3738000 + }, + { + "epoch": 2.24, + "learning_rate": 4.034643671843782e-05, + "loss": 1.2344, + "step": 3738500 + }, + { + "epoch": 2.24, + "learning_rate": 4.034433675287725e-05, + "loss": 1.2372, + "step": 3739000 + }, + { + "epoch": 2.24, + "learning_rate": 4.034223678731669e-05, + "loss": 1.251, + "step": 3739500 + }, + { + "epoch": 2.24, + "learning_rate": 4.0340136821756124e-05, + "loss": 1.1963, + "step": 3740000 + }, + { + "epoch": 2.24, + "learning_rate": 4.033804105612668e-05, + "loss": 1.1995, + "step": 3740500 + }, + { + "epoch": 2.24, + "learning_rate": 4.033594109056611e-05, + "loss": 1.2382, + "step": 3741000 + }, + { + "epoch": 2.24, + "learning_rate": 4.033384112500555e-05, + "loss": 1.2309, + "step": 3741500 + }, + { + "epoch": 2.24, + "learning_rate": 4.0331741159444985e-05, + "loss": 1.2407, + "step": 3742000 + }, + { + "epoch": 2.24, + "learning_rate": 4.032964119388442e-05, + "loss": 1.2217, + "step": 3742500 + }, + { + "epoch": 2.24, + "learning_rate": 4.032754122832386e-05, + "loss": 1.2138, + "step": 3743000 + }, + { + "epoch": 2.24, + "learning_rate": 4.032544126276329e-05, + "loss": 1.2027, + "step": 3743500 + }, + { + "epoch": 2.24, + "learning_rate": 4.0323341297202725e-05, + "loss": 1.2205, + "step": 3744000 + }, + { + "epoch": 2.24, + "learning_rate": 4.032124133164216e-05, + "loss": 1.2238, + "step": 3744500 + }, + { + "epoch": 2.25, + "learning_rate": 4.031914136608159e-05, + "loss": 1.2236, + "step": 3745000 + }, + { + "epoch": 2.25, + "learning_rate": 4.031704140052103e-05, + "loss": 1.2203, + "step": 3745500 + }, + { + "epoch": 2.25, + "learning_rate": 4.0314941434960466e-05, + "loss": 1.2014, + "step": 3746000 + }, + { + "epoch": 2.25, + "learning_rate": 4.031284566933102e-05, + "loss": 1.2485, + "step": 3746500 + }, + { + "epoch": 2.25, + "learning_rate": 4.031074990370158e-05, + "loss": 1.2402, + "step": 3747000 + }, + { + "epoch": 2.25, + "learning_rate": 4.0308649938141013e-05, + "loss": 1.2122, + "step": 3747500 + }, + { + "epoch": 2.25, + "learning_rate": 4.0306549972580454e-05, + "loss": 1.2152, + "step": 3748000 + }, + { + "epoch": 2.25, + "learning_rate": 4.030445000701989e-05, + "loss": 1.2247, + "step": 3748500 + }, + { + "epoch": 2.25, + "learning_rate": 4.0302350041459314e-05, + "loss": 1.2051, + "step": 3749000 + }, + { + "epoch": 2.25, + "learning_rate": 4.0300254275829874e-05, + "loss": 1.2276, + "step": 3749500 + }, + { + "epoch": 2.25, + "learning_rate": 4.0298154310269314e-05, + "loss": 1.2253, + "step": 3750000 + }, + { + "epoch": 2.25, + "learning_rate": 4.029605434470875e-05, + "loss": 1.2133, + "step": 3750500 + }, + { + "epoch": 2.25, + "learning_rate": 4.029395437914818e-05, + "loss": 1.2134, + "step": 3751000 + }, + { + "epoch": 2.25, + "learning_rate": 4.0291854413587615e-05, + "loss": 1.221, + "step": 3751500 + }, + { + "epoch": 2.25, + "learning_rate": 4.0289758647958175e-05, + "loss": 1.2105, + "step": 3752000 + }, + { + "epoch": 2.25, + "learning_rate": 4.028765868239761e-05, + "loss": 1.2212, + "step": 3752500 + }, + { + "epoch": 2.25, + "learning_rate": 4.028555871683705e-05, + "loss": 1.2855, + "step": 3753000 + }, + { + "epoch": 2.25, + "learning_rate": 4.0283458751276476e-05, + "loss": 1.2226, + "step": 3753500 + }, + { + "epoch": 2.25, + "learning_rate": 4.0281362985647036e-05, + "loss": 1.1972, + "step": 3754000 + }, + { + "epoch": 2.25, + "learning_rate": 4.027926302008647e-05, + "loss": 1.2402, + "step": 3754500 + }, + { + "epoch": 2.25, + "learning_rate": 4.027716305452591e-05, + "loss": 1.254, + "step": 3755000 + }, + { + "epoch": 2.25, + "learning_rate": 4.027506308896534e-05, + "loss": 1.2116, + "step": 3755500 + }, + { + "epoch": 2.25, + "learning_rate": 4.02729673233359e-05, + "loss": 1.2319, + "step": 3756000 + }, + { + "epoch": 2.25, + "learning_rate": 4.027086735777533e-05, + "loss": 1.2095, + "step": 3756500 + }, + { + "epoch": 2.25, + "learning_rate": 4.026876739221477e-05, + "loss": 1.2341, + "step": 3757000 + }, + { + "epoch": 2.25, + "learning_rate": 4.0266671626585324e-05, + "loss": 1.2214, + "step": 3757500 + }, + { + "epoch": 2.25, + "learning_rate": 4.026457166102476e-05, + "loss": 1.2548, + "step": 3758000 + }, + { + "epoch": 2.25, + "learning_rate": 4.02624716954642e-05, + "loss": 1.2287, + "step": 3758500 + }, + { + "epoch": 2.25, + "learning_rate": 4.026037172990363e-05, + "loss": 1.2418, + "step": 3759000 + }, + { + "epoch": 2.25, + "learning_rate": 4.0258271764343065e-05, + "loss": 1.2027, + "step": 3759500 + }, + { + "epoch": 2.25, + "learning_rate": 4.025617599871362e-05, + "loss": 1.2405, + "step": 3760000 + }, + { + "epoch": 2.25, + "learning_rate": 4.025407603315306e-05, + "loss": 1.1929, + "step": 3760500 + }, + { + "epoch": 2.25, + "learning_rate": 4.025197606759249e-05, + "loss": 1.2297, + "step": 3761000 + }, + { + "epoch": 2.26, + "learning_rate": 4.0249876102031925e-05, + "loss": 1.229, + "step": 3761500 + }, + { + "epoch": 2.26, + "learning_rate": 4.024778033640248e-05, + "loss": 1.2265, + "step": 3762000 + }, + { + "epoch": 2.26, + "learning_rate": 4.024568037084192e-05, + "loss": 1.2253, + "step": 3762500 + }, + { + "epoch": 2.26, + "learning_rate": 4.024358040528135e-05, + "loss": 1.1993, + "step": 3763000 + }, + { + "epoch": 2.26, + "learning_rate": 4.0241480439720786e-05, + "loss": 1.2261, + "step": 3763500 + }, + { + "epoch": 2.26, + "learning_rate": 4.0239380474160226e-05, + "loss": 1.2344, + "step": 3764000 + }, + { + "epoch": 2.26, + "learning_rate": 4.023728050859966e-05, + "loss": 1.2172, + "step": 3764500 + }, + { + "epoch": 2.26, + "learning_rate": 4.023518054303909e-05, + "loss": 1.2214, + "step": 3765000 + }, + { + "epoch": 2.26, + "learning_rate": 4.0233080577478527e-05, + "loss": 1.208, + "step": 3765500 + }, + { + "epoch": 2.26, + "learning_rate": 4.023098061191796e-05, + "loss": 1.206, + "step": 3766000 + }, + { + "epoch": 2.26, + "learning_rate": 4.022888484628852e-05, + "loss": 1.2395, + "step": 3766500 + }, + { + "epoch": 2.26, + "learning_rate": 4.0226789080659074e-05, + "loss": 1.2404, + "step": 3767000 + }, + { + "epoch": 2.26, + "learning_rate": 4.0224689115098514e-05, + "loss": 1.2381, + "step": 3767500 + }, + { + "epoch": 2.26, + "learning_rate": 4.022258914953795e-05, + "loss": 1.224, + "step": 3768000 + }, + { + "epoch": 2.26, + "learning_rate": 4.022048918397738e-05, + "loss": 1.2424, + "step": 3768500 + }, + { + "epoch": 2.26, + "learning_rate": 4.021838921841682e-05, + "loss": 1.2153, + "step": 3769000 + }, + { + "epoch": 2.26, + "learning_rate": 4.0216289252856255e-05, + "loss": 1.225, + "step": 3769500 + }, + { + "epoch": 2.26, + "learning_rate": 4.021419348722681e-05, + "loss": 1.2436, + "step": 3770000 + }, + { + "epoch": 2.26, + "learning_rate": 4.021209352166624e-05, + "loss": 1.2217, + "step": 3770500 + }, + { + "epoch": 2.26, + "learning_rate": 4.020999355610568e-05, + "loss": 1.2131, + "step": 3771000 + }, + { + "epoch": 2.26, + "learning_rate": 4.0207893590545116e-05, + "loss": 1.2384, + "step": 3771500 + }, + { + "epoch": 2.26, + "learning_rate": 4.020579362498455e-05, + "loss": 1.2014, + "step": 3772000 + }, + { + "epoch": 2.26, + "learning_rate": 4.020369365942398e-05, + "loss": 1.232, + "step": 3772500 + }, + { + "epoch": 2.26, + "learning_rate": 4.0201593693863416e-05, + "loss": 1.2192, + "step": 3773000 + }, + { + "epoch": 2.26, + "learning_rate": 4.0199493728302856e-05, + "loss": 1.2221, + "step": 3773500 + }, + { + "epoch": 2.26, + "learning_rate": 4.0197397962673417e-05, + "loss": 1.2128, + "step": 3774000 + }, + { + "epoch": 2.26, + "learning_rate": 4.019529799711285e-05, + "loss": 1.2326, + "step": 3774500 + }, + { + "epoch": 2.26, + "learning_rate": 4.019319803155228e-05, + "loss": 1.2197, + "step": 3775000 + }, + { + "epoch": 2.26, + "learning_rate": 4.019110226592284e-05, + "loss": 1.2171, + "step": 3775500 + }, + { + "epoch": 2.26, + "learning_rate": 4.018900230036228e-05, + "loss": 1.1986, + "step": 3776000 + }, + { + "epoch": 2.26, + "learning_rate": 4.018690233480171e-05, + "loss": 1.2416, + "step": 3776500 + }, + { + "epoch": 2.26, + "learning_rate": 4.0184802369241144e-05, + "loss": 1.2002, + "step": 3777000 + }, + { + "epoch": 2.26, + "learning_rate": 4.018270240368058e-05, + "loss": 1.2206, + "step": 3777500 + }, + { + "epoch": 2.27, + "learning_rate": 4.018060243812001e-05, + "loss": 1.2283, + "step": 3778000 + }, + { + "epoch": 2.27, + "learning_rate": 4.0178502472559445e-05, + "loss": 1.2574, + "step": 3778500 + }, + { + "epoch": 2.27, + "learning_rate": 4.0176406706930005e-05, + "loss": 1.2058, + "step": 3779000 + }, + { + "epoch": 2.27, + "learning_rate": 4.0174306741369445e-05, + "loss": 1.2127, + "step": 3779500 + }, + { + "epoch": 2.27, + "learning_rate": 4.017220677580887e-05, + "loss": 1.2163, + "step": 3780000 + }, + { + "epoch": 2.27, + "learning_rate": 4.017010681024831e-05, + "loss": 1.2169, + "step": 3780500 + }, + { + "epoch": 2.27, + "learning_rate": 4.0168006844687745e-05, + "loss": 1.1947, + "step": 3781000 + }, + { + "epoch": 2.27, + "learning_rate": 4.016590687912718e-05, + "loss": 1.2692, + "step": 3781500 + }, + { + "epoch": 2.27, + "learning_rate": 4.016380691356662e-05, + "loss": 1.2323, + "step": 3782000 + }, + { + "epoch": 2.27, + "learning_rate": 4.016170694800605e-05, + "loss": 1.1993, + "step": 3782500 + }, + { + "epoch": 2.27, + "learning_rate": 4.0159606982445486e-05, + "loss": 1.2416, + "step": 3783000 + }, + { + "epoch": 2.27, + "learning_rate": 4.015751121681604e-05, + "loss": 1.2126, + "step": 3783500 + }, + { + "epoch": 2.27, + "learning_rate": 4.015541125125548e-05, + "loss": 1.2343, + "step": 3784000 + }, + { + "epoch": 2.27, + "learning_rate": 4.015331128569491e-05, + "loss": 1.2284, + "step": 3784500 + }, + { + "epoch": 2.27, + "learning_rate": 4.015121132013435e-05, + "loss": 1.2446, + "step": 3785000 + }, + { + "epoch": 2.27, + "learning_rate": 4.014911135457379e-05, + "loss": 1.2092, + "step": 3785500 + }, + { + "epoch": 2.27, + "learning_rate": 4.014701558894434e-05, + "loss": 1.2086, + "step": 3786000 + }, + { + "epoch": 2.27, + "learning_rate": 4.0144915623383774e-05, + "loss": 1.2135, + "step": 3786500 + }, + { + "epoch": 2.27, + "learning_rate": 4.0142815657823214e-05, + "loss": 1.2134, + "step": 3787000 + }, + { + "epoch": 2.27, + "learning_rate": 4.014071569226265e-05, + "loss": 1.2327, + "step": 3787500 + }, + { + "epoch": 2.27, + "learning_rate": 4.013861572670208e-05, + "loss": 1.2734, + "step": 3788000 + }, + { + "epoch": 2.27, + "learning_rate": 4.0136519961072635e-05, + "loss": 1.2331, + "step": 3788500 + }, + { + "epoch": 2.27, + "learning_rate": 4.0134419995512075e-05, + "loss": 1.2391, + "step": 3789000 + }, + { + "epoch": 2.27, + "learning_rate": 4.013232002995151e-05, + "loss": 1.2187, + "step": 3789500 + }, + { + "epoch": 2.27, + "learning_rate": 4.013022006439094e-05, + "loss": 1.2273, + "step": 3790000 + }, + { + "epoch": 2.27, + "learning_rate": 4.012812009883038e-05, + "loss": 1.2226, + "step": 3790500 + }, + { + "epoch": 2.27, + "learning_rate": 4.0126020133269816e-05, + "loss": 1.2266, + "step": 3791000 + }, + { + "epoch": 2.27, + "learning_rate": 4.012392436764037e-05, + "loss": 1.2393, + "step": 3791500 + }, + { + "epoch": 2.27, + "learning_rate": 4.012182860201092e-05, + "loss": 1.2179, + "step": 3792000 + }, + { + "epoch": 2.27, + "learning_rate": 4.0119728636450356e-05, + "loss": 1.2371, + "step": 3792500 + }, + { + "epoch": 2.27, + "learning_rate": 4.0117628670889797e-05, + "loss": 1.2009, + "step": 3793000 + }, + { + "epoch": 2.27, + "learning_rate": 4.011552870532923e-05, + "loss": 1.2375, + "step": 3793500 + }, + { + "epoch": 2.27, + "learning_rate": 4.011342873976867e-05, + "loss": 1.2117, + "step": 3794000 + }, + { + "epoch": 2.27, + "learning_rate": 4.0111328774208104e-05, + "loss": 1.2382, + "step": 3794500 + }, + { + "epoch": 2.28, + "learning_rate": 4.010922880864754e-05, + "loss": 1.225, + "step": 3795000 + }, + { + "epoch": 2.28, + "learning_rate": 4.010712884308698e-05, + "loss": 1.2032, + "step": 3795500 + }, + { + "epoch": 2.28, + "learning_rate": 4.010502887752641e-05, + "loss": 1.2264, + "step": 3796000 + }, + { + "epoch": 2.28, + "learning_rate": 4.0102933111896964e-05, + "loss": 1.2476, + "step": 3796500 + }, + { + "epoch": 2.28, + "learning_rate": 4.010083734626752e-05, + "loss": 1.2524, + "step": 3797000 + }, + { + "epoch": 2.28, + "learning_rate": 4.009873738070695e-05, + "loss": 1.2193, + "step": 3797500 + }, + { + "epoch": 2.28, + "learning_rate": 4.009663741514639e-05, + "loss": 1.2413, + "step": 3798000 + }, + { + "epoch": 2.28, + "learning_rate": 4.0094537449585825e-05, + "loss": 1.2038, + "step": 3798500 + }, + { + "epoch": 2.28, + "learning_rate": 4.009243748402526e-05, + "loss": 1.2231, + "step": 3799000 + }, + { + "epoch": 2.28, + "learning_rate": 4.00903375184647e-05, + "loss": 1.228, + "step": 3799500 + }, + { + "epoch": 2.28, + "learning_rate": 4.008824175283525e-05, + "loss": 1.2295, + "step": 3800000 + }, + { + "epoch": 2.28, + "eval_loss": 1.1859376430511475, + "eval_runtime": 1106.7769, + "eval_samples_per_second": 475.904, + "eval_steps_per_second": 79.318, + "step": 3800000 + }, + { + "epoch": 2.28, + "learning_rate": 4.0086141787274686e-05, + "loss": 1.2295, + "step": 3800500 + }, + { + "epoch": 2.28, + "learning_rate": 4.0084041821714126e-05, + "loss": 1.2621, + "step": 3801000 + }, + { + "epoch": 2.28, + "learning_rate": 4.008194185615356e-05, + "loss": 1.235, + "step": 3801500 + }, + { + "epoch": 2.28, + "learning_rate": 4.007984189059299e-05, + "loss": 1.2179, + "step": 3802000 + }, + { + "epoch": 2.28, + "learning_rate": 4.007774192503243e-05, + "loss": 1.2533, + "step": 3802500 + }, + { + "epoch": 2.28, + "learning_rate": 4.007564195947187e-05, + "loss": 1.2081, + "step": 3803000 + }, + { + "epoch": 2.28, + "learning_rate": 4.00735419939113e-05, + "loss": 1.2337, + "step": 3803500 + }, + { + "epoch": 2.28, + "learning_rate": 4.007144202835074e-05, + "loss": 1.2429, + "step": 3804000 + }, + { + "epoch": 2.28, + "learning_rate": 4.0069346262721294e-05, + "loss": 1.2493, + "step": 3804500 + }, + { + "epoch": 2.28, + "learning_rate": 4.006724629716073e-05, + "loss": 1.2188, + "step": 3805000 + }, + { + "epoch": 2.28, + "learning_rate": 4.006514633160016e-05, + "loss": 1.2603, + "step": 3805500 + }, + { + "epoch": 2.28, + "learning_rate": 4.00630463660396e-05, + "loss": 1.2455, + "step": 3806000 + }, + { + "epoch": 2.28, + "learning_rate": 4.0060946400479035e-05, + "loss": 1.2194, + "step": 3806500 + }, + { + "epoch": 2.28, + "learning_rate": 4.005885063484959e-05, + "loss": 1.2155, + "step": 3807000 + }, + { + "epoch": 2.28, + "learning_rate": 4.005675066928902e-05, + "loss": 1.2382, + "step": 3807500 + }, + { + "epoch": 2.28, + "learning_rate": 4.005465490365958e-05, + "loss": 1.2212, + "step": 3808000 + }, + { + "epoch": 2.28, + "learning_rate": 4.0052554938099015e-05, + "loss": 1.2376, + "step": 3808500 + }, + { + "epoch": 2.28, + "learning_rate": 4.005045497253845e-05, + "loss": 1.239, + "step": 3809000 + }, + { + "epoch": 2.28, + "learning_rate": 4.004835500697789e-05, + "loss": 1.2201, + "step": 3809500 + }, + { + "epoch": 2.28, + "learning_rate": 4.004625504141732e-05, + "loss": 1.2301, + "step": 3810000 + }, + { + "epoch": 2.28, + "learning_rate": 4.0044155075856756e-05, + "loss": 1.2337, + "step": 3810500 + }, + { + "epoch": 2.28, + "learning_rate": 4.004205931022731e-05, + "loss": 1.2127, + "step": 3811000 + }, + { + "epoch": 2.29, + "learning_rate": 4.003995934466675e-05, + "loss": 1.2537, + "step": 3811500 + }, + { + "epoch": 2.29, + "learning_rate": 4.003785937910618e-05, + "loss": 1.218, + "step": 3812000 + }, + { + "epoch": 2.29, + "learning_rate": 4.003575941354562e-05, + "loss": 1.2413, + "step": 3812500 + }, + { + "epoch": 2.29, + "learning_rate": 4.003365944798506e-05, + "loss": 1.2389, + "step": 3813000 + }, + { + "epoch": 2.29, + "learning_rate": 4.003155948242449e-05, + "loss": 1.2313, + "step": 3813500 + }, + { + "epoch": 2.29, + "learning_rate": 4.002945951686392e-05, + "loss": 1.2005, + "step": 3814000 + }, + { + "epoch": 2.29, + "learning_rate": 4.002735955130336e-05, + "loss": 1.2185, + "step": 3814500 + }, + { + "epoch": 2.29, + "learning_rate": 4.002525958574279e-05, + "loss": 1.1975, + "step": 3815000 + }, + { + "epoch": 2.29, + "learning_rate": 4.002316382011335e-05, + "loss": 1.2022, + "step": 3815500 + }, + { + "epoch": 2.29, + "learning_rate": 4.0021063854552785e-05, + "loss": 1.2372, + "step": 3816000 + }, + { + "epoch": 2.29, + "learning_rate": 4.0018968088923345e-05, + "loss": 1.2136, + "step": 3816500 + }, + { + "epoch": 2.29, + "learning_rate": 4.001686812336278e-05, + "loss": 1.2496, + "step": 3817000 + }, + { + "epoch": 2.29, + "learning_rate": 4.001476815780221e-05, + "loss": 1.2184, + "step": 3817500 + }, + { + "epoch": 2.29, + "learning_rate": 4.001266819224165e-05, + "loss": 1.2038, + "step": 3818000 + }, + { + "epoch": 2.29, + "learning_rate": 4.001056822668108e-05, + "loss": 1.2349, + "step": 3818500 + }, + { + "epoch": 2.29, + "learning_rate": 4.000847246105164e-05, + "loss": 1.2321, + "step": 3819000 + }, + { + "epoch": 2.29, + "learning_rate": 4.000637249549107e-05, + "loss": 1.2216, + "step": 3819500 + }, + { + "epoch": 2.29, + "learning_rate": 4.000427252993051e-05, + "loss": 1.2294, + "step": 3820000 + }, + { + "epoch": 2.29, + "learning_rate": 4.0002176764301066e-05, + "loss": 1.218, + "step": 3820500 + }, + { + "epoch": 2.29, + "learning_rate": 4.00000767987405e-05, + "loss": 1.2304, + "step": 3821000 + }, + { + "epoch": 2.29, + "learning_rate": 3.999797683317993e-05, + "loss": 1.229, + "step": 3821500 + }, + { + "epoch": 2.29, + "learning_rate": 3.9995876867619374e-05, + "loss": 1.2447, + "step": 3822000 + }, + { + "epoch": 2.29, + "learning_rate": 3.999377690205881e-05, + "loss": 1.2044, + "step": 3822500 + }, + { + "epoch": 2.29, + "learning_rate": 3.999167693649824e-05, + "loss": 1.2295, + "step": 3823000 + }, + { + "epoch": 2.29, + "learning_rate": 3.9989576970937674e-05, + "loss": 1.2095, + "step": 3823500 + }, + { + "epoch": 2.29, + "learning_rate": 3.998747700537711e-05, + "loss": 1.2296, + "step": 3824000 + }, + { + "epoch": 2.29, + "learning_rate": 3.998537703981655e-05, + "loss": 1.2479, + "step": 3824500 + }, + { + "epoch": 2.29, + "learning_rate": 3.998327707425598e-05, + "loss": 1.202, + "step": 3825000 + }, + { + "epoch": 2.29, + "learning_rate": 3.9981177108695415e-05, + "loss": 1.2597, + "step": 3825500 + }, + { + "epoch": 2.29, + "learning_rate": 3.9979077143134855e-05, + "loss": 1.2141, + "step": 3826000 + }, + { + "epoch": 2.29, + "learning_rate": 3.997697717757429e-05, + "loss": 1.2214, + "step": 3826500 + }, + { + "epoch": 2.29, + "learning_rate": 3.997487721201372e-05, + "loss": 1.2346, + "step": 3827000 + }, + { + "epoch": 2.29, + "learning_rate": 3.9972781446384275e-05, + "loss": 1.2272, + "step": 3827500 + }, + { + "epoch": 2.3, + "learning_rate": 3.9970681480823716e-05, + "loss": 1.2402, + "step": 3828000 + }, + { + "epoch": 2.3, + "learning_rate": 3.996858151526315e-05, + "loss": 1.2146, + "step": 3828500 + }, + { + "epoch": 2.3, + "learning_rate": 3.996648154970258e-05, + "loss": 1.2089, + "step": 3829000 + }, + { + "epoch": 2.3, + "learning_rate": 3.996438158414202e-05, + "loss": 1.2236, + "step": 3829500 + }, + { + "epoch": 2.3, + "learning_rate": 3.9962281618581456e-05, + "loss": 1.2246, + "step": 3830000 + }, + { + "epoch": 2.3, + "learning_rate": 3.996018585295201e-05, + "loss": 1.2155, + "step": 3830500 + }, + { + "epoch": 2.3, + "learning_rate": 3.995808588739144e-05, + "loss": 1.2294, + "step": 3831000 + }, + { + "epoch": 2.3, + "learning_rate": 3.9955985921830883e-05, + "loss": 1.244, + "step": 3831500 + }, + { + "epoch": 2.3, + "learning_rate": 3.995388595627032e-05, + "loss": 1.203, + "step": 3832000 + }, + { + "epoch": 2.3, + "learning_rate": 3.995178599070975e-05, + "loss": 1.2403, + "step": 3832500 + }, + { + "epoch": 2.3, + "learning_rate": 3.994968602514919e-05, + "loss": 1.2284, + "step": 3833000 + }, + { + "epoch": 2.3, + "learning_rate": 3.9947590259519744e-05, + "loss": 1.1975, + "step": 3833500 + }, + { + "epoch": 2.3, + "learning_rate": 3.994549029395918e-05, + "loss": 1.2493, + "step": 3834000 + }, + { + "epoch": 2.3, + "learning_rate": 3.994339032839862e-05, + "loss": 1.2343, + "step": 3834500 + }, + { + "epoch": 2.3, + "learning_rate": 3.994129036283805e-05, + "loss": 1.2659, + "step": 3835000 + }, + { + "epoch": 2.3, + "learning_rate": 3.9939190397277485e-05, + "loss": 1.2187, + "step": 3835500 + }, + { + "epoch": 2.3, + "learning_rate": 3.993709463164804e-05, + "loss": 1.1905, + "step": 3836000 + }, + { + "epoch": 2.3, + "learning_rate": 3.993499466608748e-05, + "loss": 1.2247, + "step": 3836500 + }, + { + "epoch": 2.3, + "learning_rate": 3.993289470052691e-05, + "loss": 1.217, + "step": 3837000 + }, + { + "epoch": 2.3, + "learning_rate": 3.9930794734966346e-05, + "loss": 1.2256, + "step": 3837500 + }, + { + "epoch": 2.3, + "learning_rate": 3.9928694769405786e-05, + "loss": 1.2343, + "step": 3838000 + }, + { + "epoch": 2.3, + "learning_rate": 3.992659480384521e-05, + "loss": 1.2218, + "step": 3838500 + }, + { + "epoch": 2.3, + "learning_rate": 3.9924494838284646e-05, + "loss": 1.2373, + "step": 3839000 + }, + { + "epoch": 2.3, + "learning_rate": 3.992239907265521e-05, + "loss": 1.2347, + "step": 3839500 + }, + { + "epoch": 2.3, + "learning_rate": 3.9920299107094646e-05, + "loss": 1.2353, + "step": 3840000 + }, + { + "epoch": 2.3, + "learning_rate": 3.991819914153408e-05, + "loss": 1.2096, + "step": 3840500 + }, + { + "epoch": 2.3, + "learning_rate": 3.9916099175973513e-05, + "loss": 1.2193, + "step": 3841000 + }, + { + "epoch": 2.3, + "learning_rate": 3.991399921041295e-05, + "loss": 1.2244, + "step": 3841500 + }, + { + "epoch": 2.3, + "learning_rate": 3.991189924485238e-05, + "loss": 1.2308, + "step": 3842000 + }, + { + "epoch": 2.3, + "learning_rate": 3.990980347922294e-05, + "loss": 1.2142, + "step": 3842500 + }, + { + "epoch": 2.3, + "learning_rate": 3.990770351366238e-05, + "loss": 1.2318, + "step": 3843000 + }, + { + "epoch": 2.3, + "learning_rate": 3.990560354810181e-05, + "loss": 1.2424, + "step": 3843500 + }, + { + "epoch": 2.3, + "learning_rate": 3.990350358254124e-05, + "loss": 1.2326, + "step": 3844000 + }, + { + "epoch": 2.3, + "learning_rate": 3.990140361698068e-05, + "loss": 1.2217, + "step": 3844500 + }, + { + "epoch": 2.31, + "learning_rate": 3.9899312051282355e-05, + "loss": 1.2055, + "step": 3845000 + }, + { + "epoch": 2.31, + "learning_rate": 3.9897212085721795e-05, + "loss": 1.1714, + "step": 3845500 + }, + { + "epoch": 2.31, + "learning_rate": 3.989511212016123e-05, + "loss": 1.2076, + "step": 3846000 + }, + { + "epoch": 2.31, + "learning_rate": 3.989301215460067e-05, + "loss": 1.2242, + "step": 3846500 + }, + { + "epoch": 2.31, + "learning_rate": 3.98909121890401e-05, + "loss": 1.2223, + "step": 3847000 + }, + { + "epoch": 2.31, + "learning_rate": 3.9888812223479536e-05, + "loss": 1.2323, + "step": 3847500 + }, + { + "epoch": 2.31, + "learning_rate": 3.988671225791897e-05, + "loss": 1.2468, + "step": 3848000 + }, + { + "epoch": 2.31, + "learning_rate": 3.98846122923584e-05, + "loss": 1.227, + "step": 3848500 + }, + { + "epoch": 2.31, + "learning_rate": 3.9882512326797836e-05, + "loss": 1.2182, + "step": 3849000 + }, + { + "epoch": 2.31, + "learning_rate": 3.9880416561168397e-05, + "loss": 1.244, + "step": 3849500 + }, + { + "epoch": 2.31, + "learning_rate": 3.987831659560784e-05, + "loss": 1.2073, + "step": 3850000 + }, + { + "epoch": 2.31, + "learning_rate": 3.9876216630047263e-05, + "loss": 1.2531, + "step": 3850500 + }, + { + "epoch": 2.31, + "learning_rate": 3.98741166644867e-05, + "loss": 1.2744, + "step": 3851000 + }, + { + "epoch": 2.31, + "learning_rate": 3.987201669892614e-05, + "loss": 1.2052, + "step": 3851500 + }, + { + "epoch": 2.31, + "learning_rate": 3.98699209332967e-05, + "loss": 1.2213, + "step": 3852000 + }, + { + "epoch": 2.31, + "learning_rate": 3.986782096773613e-05, + "loss": 1.2197, + "step": 3852500 + }, + { + "epoch": 2.31, + "learning_rate": 3.9865725202106685e-05, + "loss": 1.2312, + "step": 3853000 + }, + { + "epoch": 2.31, + "learning_rate": 3.9863625236546125e-05, + "loss": 1.2026, + "step": 3853500 + }, + { + "epoch": 2.31, + "learning_rate": 3.986152527098556e-05, + "loss": 1.2241, + "step": 3854000 + }, + { + "epoch": 2.31, + "learning_rate": 3.985942530542499e-05, + "loss": 1.221, + "step": 3854500 + }, + { + "epoch": 2.31, + "learning_rate": 3.9857325339864425e-05, + "loss": 1.2316, + "step": 3855000 + }, + { + "epoch": 2.31, + "learning_rate": 3.985522537430386e-05, + "loss": 1.2236, + "step": 3855500 + }, + { + "epoch": 2.31, + "learning_rate": 3.985312540874329e-05, + "loss": 1.2518, + "step": 3856000 + }, + { + "epoch": 2.31, + "learning_rate": 3.985102544318273e-05, + "loss": 1.2274, + "step": 3856500 + }, + { + "epoch": 2.31, + "learning_rate": 3.984892967755329e-05, + "loss": 1.2127, + "step": 3857000 + }, + { + "epoch": 2.31, + "learning_rate": 3.9846833911923846e-05, + "loss": 1.2219, + "step": 3857500 + }, + { + "epoch": 2.31, + "learning_rate": 3.984473394636328e-05, + "loss": 1.2206, + "step": 3858000 + }, + { + "epoch": 2.31, + "learning_rate": 3.984263398080271e-05, + "loss": 1.22, + "step": 3858500 + }, + { + "epoch": 2.31, + "learning_rate": 3.9840534015242153e-05, + "loss": 1.2221, + "step": 3859000 + }, + { + "epoch": 2.31, + "learning_rate": 3.983843824961271e-05, + "loss": 1.207, + "step": 3859500 + }, + { + "epoch": 2.31, + "learning_rate": 3.983634248398326e-05, + "loss": 1.2375, + "step": 3860000 + }, + { + "epoch": 2.31, + "learning_rate": 3.9834242518422694e-05, + "loss": 1.2418, + "step": 3860500 + }, + { + "epoch": 2.31, + "learning_rate": 3.9832142552862134e-05, + "loss": 1.2286, + "step": 3861000 + }, + { + "epoch": 2.32, + "learning_rate": 3.983004258730157e-05, + "loss": 1.2387, + "step": 3861500 + }, + { + "epoch": 2.32, + "learning_rate": 3.9827942621741e-05, + "loss": 1.2128, + "step": 3862000 + }, + { + "epoch": 2.32, + "learning_rate": 3.982584265618044e-05, + "loss": 1.2518, + "step": 3862500 + }, + { + "epoch": 2.32, + "learning_rate": 3.9823742690619875e-05, + "loss": 1.2373, + "step": 3863000 + }, + { + "epoch": 2.32, + "learning_rate": 3.982164272505931e-05, + "loss": 1.2124, + "step": 3863500 + }, + { + "epoch": 2.32, + "learning_rate": 3.981954275949875e-05, + "loss": 1.2217, + "step": 3864000 + }, + { + "epoch": 2.32, + "learning_rate": 3.9817442793938175e-05, + "loss": 1.2218, + "step": 3864500 + }, + { + "epoch": 2.32, + "learning_rate": 3.981534282837761e-05, + "loss": 1.1981, + "step": 3865000 + }, + { + "epoch": 2.32, + "learning_rate": 3.981324706274817e-05, + "loss": 1.2045, + "step": 3865500 + }, + { + "epoch": 2.32, + "learning_rate": 3.981114709718761e-05, + "loss": 1.2512, + "step": 3866000 + }, + { + "epoch": 2.32, + "learning_rate": 3.980904713162704e-05, + "loss": 1.2418, + "step": 3866500 + }, + { + "epoch": 2.32, + "learning_rate": 3.9806947166066476e-05, + "loss": 1.2223, + "step": 3867000 + }, + { + "epoch": 2.32, + "learning_rate": 3.980484720050591e-05, + "loss": 1.2658, + "step": 3867500 + }, + { + "epoch": 2.32, + "learning_rate": 3.980274723494534e-05, + "loss": 1.2289, + "step": 3868000 + }, + { + "epoch": 2.32, + "learning_rate": 3.980064726938478e-05, + "loss": 1.2233, + "step": 3868500 + }, + { + "epoch": 2.32, + "learning_rate": 3.979854730382422e-05, + "loss": 1.205, + "step": 3869000 + }, + { + "epoch": 2.32, + "learning_rate": 3.979645153819477e-05, + "loss": 1.2314, + "step": 3869500 + }, + { + "epoch": 2.32, + "learning_rate": 3.9794351572634204e-05, + "loss": 1.2227, + "step": 3870000 + }, + { + "epoch": 2.32, + "learning_rate": 3.9792251607073644e-05, + "loss": 1.232, + "step": 3870500 + }, + { + "epoch": 2.32, + "learning_rate": 3.979015164151308e-05, + "loss": 1.2337, + "step": 3871000 + }, + { + "epoch": 2.32, + "learning_rate": 3.978805167595251e-05, + "loss": 1.2419, + "step": 3871500 + }, + { + "epoch": 2.32, + "learning_rate": 3.978595171039195e-05, + "loss": 1.207, + "step": 3872000 + }, + { + "epoch": 2.32, + "learning_rate": 3.9783851744831385e-05, + "loss": 1.2061, + "step": 3872500 + }, + { + "epoch": 2.32, + "learning_rate": 3.978175177927082e-05, + "loss": 1.2766, + "step": 3873000 + }, + { + "epoch": 2.32, + "learning_rate": 3.977965181371026e-05, + "loss": 1.2395, + "step": 3873500 + }, + { + "epoch": 2.32, + "learning_rate": 3.977755184814969e-05, + "loss": 1.2211, + "step": 3874000 + }, + { + "epoch": 2.32, + "learning_rate": 3.9775456082520245e-05, + "loss": 1.2168, + "step": 3874500 + }, + { + "epoch": 2.32, + "learning_rate": 3.977335611695968e-05, + "loss": 1.2173, + "step": 3875000 + }, + { + "epoch": 2.32, + "learning_rate": 3.977125615139912e-05, + "loss": 1.2133, + "step": 3875500 + }, + { + "epoch": 2.32, + "learning_rate": 3.976915618583855e-05, + "loss": 1.1967, + "step": 3876000 + }, + { + "epoch": 2.32, + "learning_rate": 3.9767056220277986e-05, + "loss": 1.2088, + "step": 3876500 + }, + { + "epoch": 2.32, + "learning_rate": 3.9764956254717426e-05, + "loss": 1.2168, + "step": 3877000 + }, + { + "epoch": 2.32, + "learning_rate": 3.976285628915685e-05, + "loss": 1.2124, + "step": 3877500 + }, + { + "epoch": 2.33, + "learning_rate": 3.976075632359629e-05, + "loss": 1.2249, + "step": 3878000 + }, + { + "epoch": 2.33, + "learning_rate": 3.9758660557966854e-05, + "loss": 1.2312, + "step": 3878500 + }, + { + "epoch": 2.33, + "learning_rate": 3.975656059240629e-05, + "loss": 1.2233, + "step": 3879000 + }, + { + "epoch": 2.33, + "learning_rate": 3.975446062684572e-05, + "loss": 1.2363, + "step": 3879500 + }, + { + "epoch": 2.33, + "learning_rate": 3.9752364861216274e-05, + "loss": 1.2148, + "step": 3880000 + }, + { + "epoch": 2.33, + "learning_rate": 3.9750264895655714e-05, + "loss": 1.2229, + "step": 3880500 + }, + { + "epoch": 2.33, + "learning_rate": 3.974816493009515e-05, + "loss": 1.216, + "step": 3881000 + }, + { + "epoch": 2.33, + "learning_rate": 3.974606496453458e-05, + "loss": 1.2214, + "step": 3881500 + }, + { + "epoch": 2.33, + "learning_rate": 3.9743964998974015e-05, + "loss": 1.2161, + "step": 3882000 + }, + { + "epoch": 2.33, + "learning_rate": 3.974186503341345e-05, + "loss": 1.2188, + "step": 3882500 + }, + { + "epoch": 2.33, + "learning_rate": 3.973976926778401e-05, + "loss": 1.2454, + "step": 3883000 + }, + { + "epoch": 2.33, + "learning_rate": 3.973766930222344e-05, + "loss": 1.2071, + "step": 3883500 + }, + { + "epoch": 2.33, + "learning_rate": 3.973556933666288e-05, + "loss": 1.2426, + "step": 3884000 + }, + { + "epoch": 2.33, + "learning_rate": 3.973346937110231e-05, + "loss": 1.2363, + "step": 3884500 + }, + { + "epoch": 2.33, + "learning_rate": 3.973136940554175e-05, + "loss": 1.2436, + "step": 3885000 + }, + { + "epoch": 2.33, + "learning_rate": 3.972926943998118e-05, + "loss": 1.2437, + "step": 3885500 + }, + { + "epoch": 2.33, + "learning_rate": 3.9727169474420616e-05, + "loss": 1.2279, + "step": 3886000 + }, + { + "epoch": 2.33, + "learning_rate": 3.9725069508860056e-05, + "loss": 1.2067, + "step": 3886500 + }, + { + "epoch": 2.33, + "learning_rate": 3.972297374323061e-05, + "loss": 1.2206, + "step": 3887000 + }, + { + "epoch": 2.33, + "learning_rate": 3.972087797760117e-05, + "loss": 1.2149, + "step": 3887500 + }, + { + "epoch": 2.33, + "learning_rate": 3.9718778012040604e-05, + "loss": 1.2389, + "step": 3888000 + }, + { + "epoch": 2.33, + "learning_rate": 3.971667804648004e-05, + "loss": 1.2087, + "step": 3888500 + }, + { + "epoch": 2.33, + "learning_rate": 3.971457808091948e-05, + "loss": 1.2387, + "step": 3889000 + }, + { + "epoch": 2.33, + "learning_rate": 3.9712478115358904e-05, + "loss": 1.2262, + "step": 3889500 + }, + { + "epoch": 2.33, + "learning_rate": 3.971037814979834e-05, + "loss": 1.2241, + "step": 3890000 + }, + { + "epoch": 2.33, + "learning_rate": 3.970827818423778e-05, + "loss": 1.2458, + "step": 3890500 + }, + { + "epoch": 2.33, + "learning_rate": 3.970617821867721e-05, + "loss": 1.2198, + "step": 3891000 + }, + { + "epoch": 2.33, + "learning_rate": 3.9704078253116645e-05, + "loss": 1.2494, + "step": 3891500 + }, + { + "epoch": 2.33, + "learning_rate": 3.9701982487487205e-05, + "loss": 1.2222, + "step": 3892000 + }, + { + "epoch": 2.33, + "learning_rate": 3.969988252192664e-05, + "loss": 1.2218, + "step": 3892500 + }, + { + "epoch": 2.33, + "learning_rate": 3.969778255636607e-05, + "loss": 1.226, + "step": 3893000 + }, + { + "epoch": 2.33, + "learning_rate": 3.969568259080551e-05, + "loss": 1.2203, + "step": 3893500 + }, + { + "epoch": 2.33, + "learning_rate": 3.9693582625244946e-05, + "loss": 1.2118, + "step": 3894000 + }, + { + "epoch": 2.33, + "learning_rate": 3.969148265968438e-05, + "loss": 1.1966, + "step": 3894500 + }, + { + "epoch": 2.34, + "learning_rate": 3.968938269412382e-05, + "loss": 1.2097, + "step": 3895000 + }, + { + "epoch": 2.34, + "learning_rate": 3.968728272856325e-05, + "loss": 1.2611, + "step": 3895500 + }, + { + "epoch": 2.34, + "learning_rate": 3.9685186962933806e-05, + "loss": 1.2342, + "step": 3896000 + }, + { + "epoch": 2.34, + "learning_rate": 3.968308699737324e-05, + "loss": 1.2161, + "step": 3896500 + }, + { + "epoch": 2.34, + "learning_rate": 3.968098703181268e-05, + "loss": 1.2041, + "step": 3897000 + }, + { + "epoch": 2.34, + "learning_rate": 3.9678887066252113e-05, + "loss": 1.211, + "step": 3897500 + }, + { + "epoch": 2.34, + "learning_rate": 3.967678710069155e-05, + "loss": 1.2134, + "step": 3898000 + }, + { + "epoch": 2.34, + "learning_rate": 3.967468713513099e-05, + "loss": 1.2278, + "step": 3898500 + }, + { + "epoch": 2.34, + "learning_rate": 3.967258716957042e-05, + "loss": 1.1962, + "step": 3899000 + }, + { + "epoch": 2.34, + "learning_rate": 3.9670487204009854e-05, + "loss": 1.223, + "step": 3899500 + }, + { + "epoch": 2.34, + "learning_rate": 3.9668391438380414e-05, + "loss": 1.2325, + "step": 3900000 + }, + { + "epoch": 2.34, + "eval_loss": 1.182997465133667, + "eval_runtime": 1103.215, + "eval_samples_per_second": 477.441, + "eval_steps_per_second": 79.574, + "step": 3900000 + }, + { + "epoch": 2.34, + "learning_rate": 3.966629147281985e-05, + "loss": 1.2135, + "step": 3900500 + }, + { + "epoch": 2.34, + "learning_rate": 3.96641957071904e-05, + "loss": 1.2202, + "step": 3901000 + }, + { + "epoch": 2.34, + "learning_rate": 3.9662095741629835e-05, + "loss": 1.2039, + "step": 3901500 + }, + { + "epoch": 2.34, + "learning_rate": 3.9659995776069275e-05, + "loss": 1.2438, + "step": 3902000 + }, + { + "epoch": 2.34, + "learning_rate": 3.965789581050871e-05, + "loss": 1.2209, + "step": 3902500 + }, + { + "epoch": 2.34, + "learning_rate": 3.965580004487926e-05, + "loss": 1.2199, + "step": 3903000 + }, + { + "epoch": 2.34, + "learning_rate": 3.9653700079318696e-05, + "loss": 1.2234, + "step": 3903500 + }, + { + "epoch": 2.34, + "learning_rate": 3.9651600113758136e-05, + "loss": 1.2066, + "step": 3904000 + }, + { + "epoch": 2.34, + "learning_rate": 3.964950014819757e-05, + "loss": 1.2392, + "step": 3904500 + }, + { + "epoch": 2.34, + "learning_rate": 3.964740438256812e-05, + "loss": 1.1814, + "step": 3905000 + }, + { + "epoch": 2.34, + "learning_rate": 3.964530441700756e-05, + "loss": 1.2365, + "step": 3905500 + }, + { + "epoch": 2.34, + "learning_rate": 3.9643204451446997e-05, + "loss": 1.227, + "step": 3906000 + }, + { + "epoch": 2.34, + "learning_rate": 3.964110448588643e-05, + "loss": 1.2452, + "step": 3906500 + }, + { + "epoch": 2.34, + "learning_rate": 3.963900452032587e-05, + "loss": 1.2149, + "step": 3907000 + }, + { + "epoch": 2.34, + "learning_rate": 3.9636904554765304e-05, + "loss": 1.2287, + "step": 3907500 + }, + { + "epoch": 2.34, + "learning_rate": 3.963480458920474e-05, + "loss": 1.2221, + "step": 3908000 + }, + { + "epoch": 2.34, + "learning_rate": 3.963270462364418e-05, + "loss": 1.2155, + "step": 3908500 + }, + { + "epoch": 2.34, + "learning_rate": 3.9630604658083604e-05, + "loss": 1.2551, + "step": 3909000 + }, + { + "epoch": 2.34, + "learning_rate": 3.962850469252304e-05, + "loss": 1.2434, + "step": 3909500 + }, + { + "epoch": 2.34, + "learning_rate": 3.96264089268936e-05, + "loss": 1.2046, + "step": 3910000 + }, + { + "epoch": 2.34, + "learning_rate": 3.962430896133304e-05, + "loss": 1.2169, + "step": 3910500 + }, + { + "epoch": 2.34, + "learning_rate": 3.962220899577247e-05, + "loss": 1.2146, + "step": 3911000 + }, + { + "epoch": 2.35, + "learning_rate": 3.9620113230143025e-05, + "loss": 1.2568, + "step": 3911500 + }, + { + "epoch": 2.35, + "learning_rate": 3.961801326458246e-05, + "loss": 1.2267, + "step": 3912000 + }, + { + "epoch": 2.35, + "learning_rate": 3.96159132990219e-05, + "loss": 1.218, + "step": 3912500 + }, + { + "epoch": 2.35, + "learning_rate": 3.961381333346133e-05, + "loss": 1.2001, + "step": 3913000 + }, + { + "epoch": 2.35, + "learning_rate": 3.9611713367900766e-05, + "loss": 1.2187, + "step": 3913500 + }, + { + "epoch": 2.35, + "learning_rate": 3.9609617602271326e-05, + "loss": 1.2243, + "step": 3914000 + }, + { + "epoch": 2.35, + "learning_rate": 3.960751763671076e-05, + "loss": 1.2377, + "step": 3914500 + }, + { + "epoch": 2.35, + "learning_rate": 3.960541767115019e-05, + "loss": 1.214, + "step": 3915000 + }, + { + "epoch": 2.35, + "learning_rate": 3.960331770558963e-05, + "loss": 1.2322, + "step": 3915500 + }, + { + "epoch": 2.35, + "learning_rate": 3.960121774002907e-05, + "loss": 1.2044, + "step": 3916000 + }, + { + "epoch": 2.35, + "learning_rate": 3.9599117774468493e-05, + "loss": 1.228, + "step": 3916500 + }, + { + "epoch": 2.35, + "learning_rate": 3.9597017808907934e-05, + "loss": 1.2246, + "step": 3917000 + }, + { + "epoch": 2.35, + "learning_rate": 3.959491784334737e-05, + "loss": 1.2071, + "step": 3917500 + }, + { + "epoch": 2.35, + "learning_rate": 3.95928178777868e-05, + "loss": 1.2354, + "step": 3918000 + }, + { + "epoch": 2.35, + "learning_rate": 3.9590722112157354e-05, + "loss": 1.2243, + "step": 3918500 + }, + { + "epoch": 2.35, + "learning_rate": 3.9588626346527915e-05, + "loss": 1.2141, + "step": 3919000 + }, + { + "epoch": 2.35, + "learning_rate": 3.9586526380967355e-05, + "loss": 1.2364, + "step": 3919500 + }, + { + "epoch": 2.35, + "learning_rate": 3.958442641540679e-05, + "loss": 1.2201, + "step": 3920000 + }, + { + "epoch": 2.35, + "learning_rate": 3.958232644984622e-05, + "loss": 1.221, + "step": 3920500 + }, + { + "epoch": 2.35, + "learning_rate": 3.9580226484285655e-05, + "loss": 1.2547, + "step": 3921000 + }, + { + "epoch": 2.35, + "learning_rate": 3.9578130718656215e-05, + "loss": 1.2474, + "step": 3921500 + }, + { + "epoch": 2.35, + "learning_rate": 3.957603075309565e-05, + "loss": 1.2294, + "step": 3922000 + }, + { + "epoch": 2.35, + "learning_rate": 3.957393078753509e-05, + "loss": 1.263, + "step": 3922500 + }, + { + "epoch": 2.35, + "learning_rate": 3.957183082197452e-05, + "loss": 1.2571, + "step": 3923000 + }, + { + "epoch": 2.35, + "learning_rate": 3.956973085641395e-05, + "loss": 1.2371, + "step": 3923500 + }, + { + "epoch": 2.35, + "learning_rate": 3.956763089085339e-05, + "loss": 1.2215, + "step": 3924000 + }, + { + "epoch": 2.35, + "learning_rate": 3.956553092529282e-05, + "loss": 1.2212, + "step": 3924500 + }, + { + "epoch": 2.35, + "learning_rate": 3.9563430959732256e-05, + "loss": 1.2221, + "step": 3925000 + }, + { + "epoch": 2.35, + "learning_rate": 3.956133519410282e-05, + "loss": 1.2038, + "step": 3925500 + }, + { + "epoch": 2.35, + "learning_rate": 3.955923522854225e-05, + "loss": 1.2145, + "step": 3926000 + }, + { + "epoch": 2.35, + "learning_rate": 3.9557135262981684e-05, + "loss": 1.2251, + "step": 3926500 + }, + { + "epoch": 2.35, + "learning_rate": 3.955503529742112e-05, + "loss": 1.2476, + "step": 3927000 + }, + { + "epoch": 2.35, + "learning_rate": 3.955293533186056e-05, + "loss": 1.2225, + "step": 3927500 + }, + { + "epoch": 2.35, + "learning_rate": 3.955083956623111e-05, + "loss": 1.213, + "step": 3928000 + }, + { + "epoch": 2.36, + "learning_rate": 3.9548739600670544e-05, + "loss": 1.1864, + "step": 3928500 + }, + { + "epoch": 2.36, + "learning_rate": 3.9546639635109985e-05, + "loss": 1.2299, + "step": 3929000 + }, + { + "epoch": 2.36, + "learning_rate": 3.954453966954942e-05, + "loss": 1.2347, + "step": 3929500 + }, + { + "epoch": 2.36, + "learning_rate": 3.954244390391998e-05, + "loss": 1.2048, + "step": 3930000 + }, + { + "epoch": 2.36, + "learning_rate": 3.9540343938359405e-05, + "loss": 1.2036, + "step": 3930500 + }, + { + "epoch": 2.36, + "learning_rate": 3.9538243972798845e-05, + "loss": 1.2505, + "step": 3931000 + }, + { + "epoch": 2.36, + "learning_rate": 3.953614400723828e-05, + "loss": 1.2062, + "step": 3931500 + }, + { + "epoch": 2.36, + "learning_rate": 3.953404404167771e-05, + "loss": 1.2233, + "step": 3932000 + }, + { + "epoch": 2.36, + "learning_rate": 3.953194407611715e-05, + "loss": 1.2271, + "step": 3932500 + }, + { + "epoch": 2.36, + "learning_rate": 3.9529844110556586e-05, + "loss": 1.208, + "step": 3933000 + }, + { + "epoch": 2.36, + "learning_rate": 3.952774414499602e-05, + "loss": 1.1932, + "step": 3933500 + }, + { + "epoch": 2.36, + "learning_rate": 3.952564417943546e-05, + "loss": 1.2209, + "step": 3934000 + }, + { + "epoch": 2.36, + "learning_rate": 3.952354841380601e-05, + "loss": 1.2182, + "step": 3934500 + }, + { + "epoch": 2.36, + "learning_rate": 3.952144844824545e-05, + "loss": 1.2195, + "step": 3935000 + }, + { + "epoch": 2.36, + "learning_rate": 3.951934848268488e-05, + "loss": 1.2237, + "step": 3935500 + }, + { + "epoch": 2.36, + "learning_rate": 3.951724851712432e-05, + "loss": 1.2259, + "step": 3936000 + }, + { + "epoch": 2.36, + "learning_rate": 3.9515148551563754e-05, + "loss": 1.2238, + "step": 3936500 + }, + { + "epoch": 2.36, + "learning_rate": 3.951305278593431e-05, + "loss": 1.2428, + "step": 3937000 + }, + { + "epoch": 2.36, + "learning_rate": 3.951095282037375e-05, + "loss": 1.21, + "step": 3937500 + }, + { + "epoch": 2.36, + "learning_rate": 3.950885285481318e-05, + "loss": 1.2413, + "step": 3938000 + }, + { + "epoch": 2.36, + "learning_rate": 3.9506752889252615e-05, + "loss": 1.2355, + "step": 3938500 + }, + { + "epoch": 2.36, + "learning_rate": 3.950465712362317e-05, + "loss": 1.2127, + "step": 3939000 + }, + { + "epoch": 2.36, + "learning_rate": 3.950255715806261e-05, + "loss": 1.2229, + "step": 3939500 + }, + { + "epoch": 2.36, + "learning_rate": 3.950045719250204e-05, + "loss": 1.2357, + "step": 3940000 + }, + { + "epoch": 2.36, + "learning_rate": 3.9498357226941475e-05, + "loss": 1.2254, + "step": 3940500 + }, + { + "epoch": 2.36, + "learning_rate": 3.9496257261380916e-05, + "loss": 1.2408, + "step": 3941000 + }, + { + "epoch": 2.36, + "learning_rate": 3.949416149575147e-05, + "loss": 1.2336, + "step": 3941500 + }, + { + "epoch": 2.36, + "learning_rate": 3.94920615301909e-05, + "loss": 1.2255, + "step": 3942000 + }, + { + "epoch": 2.36, + "learning_rate": 3.9489961564630336e-05, + "loss": 1.2097, + "step": 3942500 + }, + { + "epoch": 2.36, + "learning_rate": 3.9487861599069776e-05, + "loss": 1.2258, + "step": 3943000 + }, + { + "epoch": 2.36, + "learning_rate": 3.948576163350921e-05, + "loss": 1.2371, + "step": 3943500 + }, + { + "epoch": 2.36, + "learning_rate": 3.948366586787976e-05, + "loss": 1.2487, + "step": 3944000 + }, + { + "epoch": 2.36, + "learning_rate": 3.9481565902319204e-05, + "loss": 1.2133, + "step": 3944500 + }, + { + "epoch": 2.37, + "learning_rate": 3.947946593675864e-05, + "loss": 1.2195, + "step": 3945000 + }, + { + "epoch": 2.37, + "learning_rate": 3.947736597119807e-05, + "loss": 1.2327, + "step": 3945500 + }, + { + "epoch": 2.37, + "learning_rate": 3.947526600563751e-05, + "loss": 1.2133, + "step": 3946000 + }, + { + "epoch": 2.37, + "learning_rate": 3.9473166040076944e-05, + "loss": 1.2001, + "step": 3946500 + }, + { + "epoch": 2.37, + "learning_rate": 3.947106607451638e-05, + "loss": 1.2304, + "step": 3947000 + }, + { + "epoch": 2.37, + "learning_rate": 3.946896610895582e-05, + "loss": 1.2162, + "step": 3947500 + }, + { + "epoch": 2.37, + "learning_rate": 3.946687034332637e-05, + "loss": 1.2176, + "step": 3948000 + }, + { + "epoch": 2.37, + "learning_rate": 3.9464770377765805e-05, + "loss": 1.1931, + "step": 3948500 + }, + { + "epoch": 2.37, + "learning_rate": 3.946267041220524e-05, + "loss": 1.2594, + "step": 3949000 + }, + { + "epoch": 2.37, + "learning_rate": 3.946057044664468e-05, + "loss": 1.2181, + "step": 3949500 + }, + { + "epoch": 2.37, + "learning_rate": 3.945847048108411e-05, + "loss": 1.2022, + "step": 3950000 + }, + { + "epoch": 2.37, + "learning_rate": 3.945637051552354e-05, + "loss": 1.2217, + "step": 3950500 + }, + { + "epoch": 2.37, + "learning_rate": 3.94542747498941e-05, + "loss": 1.2317, + "step": 3951000 + }, + { + "epoch": 2.37, + "learning_rate": 3.945217478433354e-05, + "loss": 1.22, + "step": 3951500 + }, + { + "epoch": 2.37, + "learning_rate": 3.945007481877297e-05, + "loss": 1.2306, + "step": 3952000 + }, + { + "epoch": 2.37, + "learning_rate": 3.944797485321241e-05, + "loss": 1.2177, + "step": 3952500 + }, + { + "epoch": 2.37, + "learning_rate": 3.944587488765184e-05, + "loss": 1.1972, + "step": 3953000 + }, + { + "epoch": 2.37, + "learning_rate": 3.94437791220224e-05, + "loss": 1.2126, + "step": 3953500 + }, + { + "epoch": 2.37, + "learning_rate": 3.9441679156461834e-05, + "loss": 1.227, + "step": 3954000 + }, + { + "epoch": 2.37, + "learning_rate": 3.9439579190901274e-05, + "loss": 1.2464, + "step": 3954500 + }, + { + "epoch": 2.37, + "learning_rate": 3.943748342527183e-05, + "loss": 1.2256, + "step": 3955000 + }, + { + "epoch": 2.37, + "learning_rate": 3.943538345971126e-05, + "loss": 1.2448, + "step": 3955500 + }, + { + "epoch": 2.37, + "learning_rate": 3.9433287694081814e-05, + "loss": 1.2113, + "step": 3956000 + }, + { + "epoch": 2.37, + "learning_rate": 3.943118772852125e-05, + "loss": 1.2208, + "step": 3956500 + }, + { + "epoch": 2.37, + "learning_rate": 3.942908776296069e-05, + "loss": 1.2053, + "step": 3957000 + }, + { + "epoch": 2.37, + "learning_rate": 3.942698779740012e-05, + "loss": 1.2084, + "step": 3957500 + }, + { + "epoch": 2.37, + "learning_rate": 3.942488783183956e-05, + "loss": 1.2354, + "step": 3958000 + }, + { + "epoch": 2.37, + "learning_rate": 3.9422787866278995e-05, + "loss": 1.235, + "step": 3958500 + }, + { + "epoch": 2.37, + "learning_rate": 3.942068790071843e-05, + "loss": 1.242, + "step": 3959000 + }, + { + "epoch": 2.37, + "learning_rate": 3.941858793515787e-05, + "loss": 1.2198, + "step": 3959500 + }, + { + "epoch": 2.37, + "learning_rate": 3.9416487969597296e-05, + "loss": 1.204, + "step": 3960000 + }, + { + "epoch": 2.37, + "learning_rate": 3.941438800403673e-05, + "loss": 1.1948, + "step": 3960500 + }, + { + "epoch": 2.37, + "learning_rate": 3.941229223840729e-05, + "loss": 1.2152, + "step": 3961000 + }, + { + "epoch": 2.38, + "learning_rate": 3.941019227284673e-05, + "loss": 1.2248, + "step": 3961500 + }, + { + "epoch": 2.38, + "learning_rate": 3.940809230728616e-05, + "loss": 1.2, + "step": 3962000 + }, + { + "epoch": 2.38, + "learning_rate": 3.940599234172559e-05, + "loss": 1.2139, + "step": 3962500 + }, + { + "epoch": 2.38, + "learning_rate": 3.940389237616503e-05, + "loss": 1.2378, + "step": 3963000 + }, + { + "epoch": 2.38, + "learning_rate": 3.9401792410604464e-05, + "loss": 1.2209, + "step": 3963500 + }, + { + "epoch": 2.38, + "learning_rate": 3.93996924450439e-05, + "loss": 1.2103, + "step": 3964000 + }, + { + "epoch": 2.38, + "learning_rate": 3.939759667941446e-05, + "loss": 1.2015, + "step": 3964500 + }, + { + "epoch": 2.38, + "learning_rate": 3.939549671385389e-05, + "loss": 1.2247, + "step": 3965000 + }, + { + "epoch": 2.38, + "learning_rate": 3.9393396748293324e-05, + "loss": 1.2298, + "step": 3965500 + }, + { + "epoch": 2.38, + "learning_rate": 3.9391296782732764e-05, + "loss": 1.2251, + "step": 3966000 + }, + { + "epoch": 2.38, + "learning_rate": 3.93891968171722e-05, + "loss": 1.2227, + "step": 3966500 + }, + { + "epoch": 2.38, + "learning_rate": 3.938709685161163e-05, + "loss": 1.2028, + "step": 3967000 + }, + { + "epoch": 2.38, + "learning_rate": 3.938499688605107e-05, + "loss": 1.2052, + "step": 3967500 + }, + { + "epoch": 2.38, + "learning_rate": 3.9382896920490505e-05, + "loss": 1.2249, + "step": 3968000 + }, + { + "epoch": 2.38, + "learning_rate": 3.938080115486106e-05, + "loss": 1.222, + "step": 3968500 + }, + { + "epoch": 2.38, + "learning_rate": 3.937870118930049e-05, + "loss": 1.2299, + "step": 3969000 + }, + { + "epoch": 2.38, + "learning_rate": 3.937660122373993e-05, + "loss": 1.2218, + "step": 3969500 + }, + { + "epoch": 2.38, + "learning_rate": 3.9374501258179366e-05, + "loss": 1.225, + "step": 3970000 + }, + { + "epoch": 2.38, + "learning_rate": 3.937240549254992e-05, + "loss": 1.2338, + "step": 3970500 + }, + { + "epoch": 2.38, + "learning_rate": 3.937030972692048e-05, + "loss": 1.1964, + "step": 3971000 + }, + { + "epoch": 2.38, + "learning_rate": 3.936820976135991e-05, + "loss": 1.1903, + "step": 3971500 + }, + { + "epoch": 2.38, + "learning_rate": 3.936610979579935e-05, + "loss": 1.236, + "step": 3972000 + }, + { + "epoch": 2.38, + "learning_rate": 3.936400983023878e-05, + "loss": 1.194, + "step": 3972500 + }, + { + "epoch": 2.38, + "learning_rate": 3.936190986467822e-05, + "loss": 1.2338, + "step": 3973000 + }, + { + "epoch": 2.38, + "learning_rate": 3.9359809899117654e-05, + "loss": 1.2481, + "step": 3973500 + }, + { + "epoch": 2.38, + "learning_rate": 3.935770993355709e-05, + "loss": 1.2367, + "step": 3974000 + }, + { + "epoch": 2.38, + "learning_rate": 3.935560996799653e-05, + "loss": 1.2048, + "step": 3974500 + }, + { + "epoch": 2.38, + "learning_rate": 3.935351000243596e-05, + "loss": 1.2289, + "step": 3975000 + }, + { + "epoch": 2.38, + "learning_rate": 3.9351410036875394e-05, + "loss": 1.2092, + "step": 3975500 + }, + { + "epoch": 2.38, + "learning_rate": 3.934931427124595e-05, + "loss": 1.2271, + "step": 3976000 + }, + { + "epoch": 2.38, + "learning_rate": 3.934721430568539e-05, + "loss": 1.23, + "step": 3976500 + }, + { + "epoch": 2.38, + "learning_rate": 3.934511434012482e-05, + "loss": 1.2616, + "step": 3977000 + }, + { + "epoch": 2.38, + "learning_rate": 3.9343014374564255e-05, + "loss": 1.2253, + "step": 3977500 + }, + { + "epoch": 2.38, + "learning_rate": 3.9340914409003695e-05, + "loss": 1.2187, + "step": 3978000 + }, + { + "epoch": 2.39, + "learning_rate": 3.933881444344313e-05, + "loss": 1.2422, + "step": 3978500 + }, + { + "epoch": 2.39, + "learning_rate": 3.933671447788256e-05, + "loss": 1.2065, + "step": 3979000 + }, + { + "epoch": 2.39, + "learning_rate": 3.9334614512322e-05, + "loss": 1.1699, + "step": 3979500 + }, + { + "epoch": 2.39, + "learning_rate": 3.9332518746692556e-05, + "loss": 1.1928, + "step": 3980000 + }, + { + "epoch": 2.39, + "learning_rate": 3.933042298106311e-05, + "loss": 1.2199, + "step": 3980500 + }, + { + "epoch": 2.39, + "learning_rate": 3.932832301550254e-05, + "loss": 1.2142, + "step": 3981000 + }, + { + "epoch": 2.39, + "learning_rate": 3.932622304994198e-05, + "loss": 1.2195, + "step": 3981500 + }, + { + "epoch": 2.39, + "learning_rate": 3.932412308438142e-05, + "loss": 1.2127, + "step": 3982000 + }, + { + "epoch": 2.39, + "learning_rate": 3.932202731875197e-05, + "loss": 1.2224, + "step": 3982500 + }, + { + "epoch": 2.39, + "learning_rate": 3.9319927353191404e-05, + "loss": 1.2491, + "step": 3983000 + }, + { + "epoch": 2.39, + "learning_rate": 3.9317827387630844e-05, + "loss": 1.2436, + "step": 3983500 + }, + { + "epoch": 2.39, + "learning_rate": 3.931572742207028e-05, + "loss": 1.2479, + "step": 3984000 + }, + { + "epoch": 2.39, + "learning_rate": 3.931362745650971e-05, + "loss": 1.1887, + "step": 3984500 + }, + { + "epoch": 2.39, + "learning_rate": 3.931152749094915e-05, + "loss": 1.2188, + "step": 3985000 + }, + { + "epoch": 2.39, + "learning_rate": 3.9309427525388585e-05, + "loss": 1.1963, + "step": 3985500 + }, + { + "epoch": 2.39, + "learning_rate": 3.930732755982802e-05, + "loss": 1.1988, + "step": 3986000 + }, + { + "epoch": 2.39, + "learning_rate": 3.930523179419857e-05, + "loss": 1.2259, + "step": 3986500 + }, + { + "epoch": 2.39, + "learning_rate": 3.930313182863801e-05, + "loss": 1.2126, + "step": 3987000 + }, + { + "epoch": 2.39, + "learning_rate": 3.9301036063008566e-05, + "loss": 1.1968, + "step": 3987500 + }, + { + "epoch": 2.39, + "learning_rate": 3.9298936097448e-05, + "loss": 1.2178, + "step": 3988000 + }, + { + "epoch": 2.39, + "learning_rate": 3.929683613188744e-05, + "loss": 1.204, + "step": 3988500 + }, + { + "epoch": 2.39, + "learning_rate": 3.929473616632687e-05, + "loss": 1.2282, + "step": 3989000 + }, + { + "epoch": 2.39, + "learning_rate": 3.9292640400697426e-05, + "loss": 1.1988, + "step": 3989500 + }, + { + "epoch": 2.39, + "learning_rate": 3.929054043513686e-05, + "loss": 1.2136, + "step": 3990000 + }, + { + "epoch": 2.39, + "learning_rate": 3.92884404695763e-05, + "loss": 1.2268, + "step": 3990500 + }, + { + "epoch": 2.39, + "learning_rate": 3.9286340504015733e-05, + "loss": 1.2081, + "step": 3991000 + }, + { + "epoch": 2.39, + "learning_rate": 3.928424053845517e-05, + "loss": 1.2573, + "step": 3991500 + }, + { + "epoch": 2.39, + "learning_rate": 3.928214057289461e-05, + "loss": 1.2153, + "step": 3992000 + }, + { + "epoch": 2.39, + "learning_rate": 3.928004060733404e-05, + "loss": 1.2559, + "step": 3992500 + }, + { + "epoch": 2.39, + "learning_rate": 3.9277940641773474e-05, + "loss": 1.2327, + "step": 3993000 + }, + { + "epoch": 2.39, + "learning_rate": 3.9275840676212914e-05, + "loss": 1.2099, + "step": 3993500 + }, + { + "epoch": 2.39, + "learning_rate": 3.927374071065234e-05, + "loss": 1.2278, + "step": 3994000 + }, + { + "epoch": 2.39, + "learning_rate": 3.92716449450229e-05, + "loss": 1.25, + "step": 3994500 + }, + { + "epoch": 2.4, + "learning_rate": 3.9269544979462335e-05, + "loss": 1.2103, + "step": 3995000 + }, + { + "epoch": 2.4, + "learning_rate": 3.9267449213832895e-05, + "loss": 1.2158, + "step": 3995500 + }, + { + "epoch": 2.4, + "learning_rate": 3.926534924827233e-05, + "loss": 1.214, + "step": 3996000 + }, + { + "epoch": 2.4, + "learning_rate": 3.926324928271176e-05, + "loss": 1.2043, + "step": 3996500 + }, + { + "epoch": 2.4, + "learning_rate": 3.92611493171512e-05, + "loss": 1.1996, + "step": 3997000 + }, + { + "epoch": 2.4, + "learning_rate": 3.9259049351590636e-05, + "loss": 1.1874, + "step": 3997500 + }, + { + "epoch": 2.4, + "learning_rate": 3.925694938603007e-05, + "loss": 1.2523, + "step": 3998000 + }, + { + "epoch": 2.4, + "learning_rate": 3.925484942046951e-05, + "loss": 1.2168, + "step": 3998500 + }, + { + "epoch": 2.4, + "learning_rate": 3.9252749454908936e-05, + "loss": 1.212, + "step": 3999000 + }, + { + "epoch": 2.4, + "learning_rate": 3.9250653689279496e-05, + "loss": 1.2271, + "step": 3999500 + }, + { + "epoch": 2.4, + "learning_rate": 3.924855372371893e-05, + "loss": 1.2031, + "step": 4000000 + }, + { + "epoch": 2.4, + "eval_loss": 1.1803804636001587, + "eval_runtime": 1110.222, + "eval_samples_per_second": 474.428, + "eval_steps_per_second": 79.072, + "step": 4000000 + }, + { + "epoch": 2.4, + "learning_rate": 3.924645375815837e-05, + "loss": 1.2029, + "step": 4000500 + }, + { + "epoch": 2.4, + "learning_rate": 3.92443537925978e-05, + "loss": 1.2338, + "step": 4001000 + }, + { + "epoch": 2.4, + "learning_rate": 3.924225382703723e-05, + "loss": 1.2061, + "step": 4001500 + }, + { + "epoch": 2.4, + "learning_rate": 3.924015806140779e-05, + "loss": 1.2055, + "step": 4002000 + }, + { + "epoch": 2.4, + "learning_rate": 3.923805809584723e-05, + "loss": 1.2464, + "step": 4002500 + }, + { + "epoch": 2.4, + "learning_rate": 3.9235958130286664e-05, + "loss": 1.2344, + "step": 4003000 + }, + { + "epoch": 2.4, + "learning_rate": 3.92338581647261e-05, + "loss": 1.2164, + "step": 4003500 + }, + { + "epoch": 2.4, + "learning_rate": 3.923175819916553e-05, + "loss": 1.2256, + "step": 4004000 + }, + { + "epoch": 2.4, + "learning_rate": 3.9229658233604965e-05, + "loss": 1.2295, + "step": 4004500 + }, + { + "epoch": 2.4, + "learning_rate": 3.9227558268044405e-05, + "loss": 1.2117, + "step": 4005000 + }, + { + "epoch": 2.4, + "learning_rate": 3.922545830248384e-05, + "loss": 1.2196, + "step": 4005500 + }, + { + "epoch": 2.4, + "learning_rate": 3.922336253685439e-05, + "loss": 1.2075, + "step": 4006000 + }, + { + "epoch": 2.4, + "learning_rate": 3.9221262571293825e-05, + "loss": 1.2173, + "step": 4006500 + }, + { + "epoch": 2.4, + "learning_rate": 3.9219162605733266e-05, + "loss": 1.2265, + "step": 4007000 + }, + { + "epoch": 2.4, + "learning_rate": 3.92170626401727e-05, + "loss": 1.2209, + "step": 4007500 + }, + { + "epoch": 2.4, + "learning_rate": 3.921496687454326e-05, + "loss": 1.2143, + "step": 4008000 + }, + { + "epoch": 2.4, + "learning_rate": 3.9212866908982686e-05, + "loss": 1.1951, + "step": 4008500 + }, + { + "epoch": 2.4, + "learning_rate": 3.9210766943422126e-05, + "loss": 1.203, + "step": 4009000 + }, + { + "epoch": 2.4, + "learning_rate": 3.920866697786156e-05, + "loss": 1.1978, + "step": 4009500 + }, + { + "epoch": 2.4, + "learning_rate": 3.920656701230099e-05, + "loss": 1.2072, + "step": 4010000 + }, + { + "epoch": 2.4, + "learning_rate": 3.9204475446602674e-05, + "loss": 1.2454, + "step": 4010500 + }, + { + "epoch": 2.4, + "learning_rate": 3.9202375481042114e-05, + "loss": 1.1936, + "step": 4011000 + }, + { + "epoch": 2.41, + "learning_rate": 3.920027551548155e-05, + "loss": 1.2084, + "step": 4011500 + }, + { + "epoch": 2.41, + "learning_rate": 3.919817554992098e-05, + "loss": 1.1898, + "step": 4012000 + }, + { + "epoch": 2.41, + "learning_rate": 3.919607558436042e-05, + "loss": 1.2102, + "step": 4012500 + }, + { + "epoch": 2.41, + "learning_rate": 3.919397561879985e-05, + "loss": 1.2079, + "step": 4013000 + }, + { + "epoch": 2.41, + "learning_rate": 3.919187985317041e-05, + "loss": 1.2116, + "step": 4013500 + }, + { + "epoch": 2.41, + "learning_rate": 3.918977988760984e-05, + "loss": 1.1974, + "step": 4014000 + }, + { + "epoch": 2.41, + "learning_rate": 3.918767992204928e-05, + "loss": 1.2262, + "step": 4014500 + }, + { + "epoch": 2.41, + "learning_rate": 3.9185579956488715e-05, + "loss": 1.2148, + "step": 4015000 + }, + { + "epoch": 2.41, + "learning_rate": 3.918348419085927e-05, + "loss": 1.2092, + "step": 4015500 + }, + { + "epoch": 2.41, + "learning_rate": 3.91813842252987e-05, + "loss": 1.2172, + "step": 4016000 + }, + { + "epoch": 2.41, + "learning_rate": 3.917928425973814e-05, + "loss": 1.1798, + "step": 4016500 + }, + { + "epoch": 2.41, + "learning_rate": 3.9177184294177576e-05, + "loss": 1.227, + "step": 4017000 + }, + { + "epoch": 2.41, + "learning_rate": 3.9175084328617016e-05, + "loss": 1.2369, + "step": 4017500 + }, + { + "epoch": 2.41, + "learning_rate": 3.917298856298757e-05, + "loss": 1.2196, + "step": 4018000 + }, + { + "epoch": 2.41, + "learning_rate": 3.9170888597427e-05, + "loss": 1.2308, + "step": 4018500 + }, + { + "epoch": 2.41, + "learning_rate": 3.916878863186644e-05, + "loss": 1.2334, + "step": 4019000 + }, + { + "epoch": 2.41, + "learning_rate": 3.916668866630588e-05, + "loss": 1.2158, + "step": 4019500 + }, + { + "epoch": 2.41, + "learning_rate": 3.916458870074531e-05, + "loss": 1.2299, + "step": 4020000 + }, + { + "epoch": 2.41, + "learning_rate": 3.916248873518474e-05, + "loss": 1.2024, + "step": 4020500 + }, + { + "epoch": 2.41, + "learning_rate": 3.916038876962418e-05, + "loss": 1.247, + "step": 4021000 + }, + { + "epoch": 2.41, + "learning_rate": 3.915828880406361e-05, + "loss": 1.1977, + "step": 4021500 + }, + { + "epoch": 2.41, + "learning_rate": 3.915619303843417e-05, + "loss": 1.2243, + "step": 4022000 + }, + { + "epoch": 2.41, + "learning_rate": 3.91540930728736e-05, + "loss": 1.214, + "step": 4022500 + }, + { + "epoch": 2.41, + "learning_rate": 3.915199310731304e-05, + "loss": 1.2222, + "step": 4023000 + }, + { + "epoch": 2.41, + "learning_rate": 3.91498973416836e-05, + "loss": 1.2033, + "step": 4023500 + }, + { + "epoch": 2.41, + "learning_rate": 3.914779737612303e-05, + "loss": 1.2374, + "step": 4024000 + }, + { + "epoch": 2.41, + "learning_rate": 3.914569741056247e-05, + "loss": 1.2157, + "step": 4024500 + }, + { + "epoch": 2.41, + "learning_rate": 3.91435974450019e-05, + "loss": 1.2258, + "step": 4025000 + }, + { + "epoch": 2.41, + "learning_rate": 3.914149747944133e-05, + "loss": 1.2522, + "step": 4025500 + }, + { + "epoch": 2.41, + "learning_rate": 3.913939751388077e-05, + "loss": 1.2239, + "step": 4026000 + }, + { + "epoch": 2.41, + "learning_rate": 3.9137297548320206e-05, + "loss": 1.2323, + "step": 4026500 + }, + { + "epoch": 2.41, + "learning_rate": 3.9135201782690766e-05, + "loss": 1.2101, + "step": 4027000 + }, + { + "epoch": 2.41, + "learning_rate": 3.913310181713019e-05, + "loss": 1.2292, + "step": 4027500 + }, + { + "epoch": 2.41, + "learning_rate": 3.913100185156963e-05, + "loss": 1.2309, + "step": 4028000 + }, + { + "epoch": 2.42, + "learning_rate": 3.912890188600907e-05, + "loss": 1.2354, + "step": 4028500 + }, + { + "epoch": 2.42, + "learning_rate": 3.91268019204485e-05, + "loss": 1.21, + "step": 4029000 + }, + { + "epoch": 2.42, + "learning_rate": 3.912470615481906e-05, + "loss": 1.2372, + "step": 4029500 + }, + { + "epoch": 2.42, + "learning_rate": 3.9122606189258494e-05, + "loss": 1.2124, + "step": 4030000 + }, + { + "epoch": 2.42, + "learning_rate": 3.912050622369793e-05, + "loss": 1.2474, + "step": 4030500 + }, + { + "epoch": 2.42, + "learning_rate": 3.911840625813737e-05, + "loss": 1.2053, + "step": 4031000 + }, + { + "epoch": 2.42, + "learning_rate": 3.91163062925768e-05, + "loss": 1.217, + "step": 4031500 + }, + { + "epoch": 2.42, + "learning_rate": 3.911421472687848e-05, + "loss": 1.2627, + "step": 4032000 + }, + { + "epoch": 2.42, + "learning_rate": 3.9112114761317915e-05, + "loss": 1.2186, + "step": 4032500 + }, + { + "epoch": 2.42, + "learning_rate": 3.911001479575735e-05, + "loss": 1.2202, + "step": 4033000 + }, + { + "epoch": 2.42, + "learning_rate": 3.910791483019679e-05, + "loss": 1.2264, + "step": 4033500 + }, + { + "epoch": 2.42, + "learning_rate": 3.910581486463622e-05, + "loss": 1.2483, + "step": 4034000 + }, + { + "epoch": 2.42, + "learning_rate": 3.910371489907565e-05, + "loss": 1.2219, + "step": 4034500 + }, + { + "epoch": 2.42, + "learning_rate": 3.910161913344621e-05, + "loss": 1.2246, + "step": 4035000 + }, + { + "epoch": 2.42, + "learning_rate": 3.909951916788565e-05, + "loss": 1.2301, + "step": 4035500 + }, + { + "epoch": 2.42, + "learning_rate": 3.90974234022562e-05, + "loss": 1.2103, + "step": 4036000 + }, + { + "epoch": 2.42, + "learning_rate": 3.9095323436695637e-05, + "loss": 1.2154, + "step": 4036500 + }, + { + "epoch": 2.42, + "learning_rate": 3.909322347113508e-05, + "loss": 1.2242, + "step": 4037000 + }, + { + "epoch": 2.42, + "learning_rate": 3.909112350557451e-05, + "loss": 1.2288, + "step": 4037500 + }, + { + "epoch": 2.42, + "learning_rate": 3.9089023540013944e-05, + "loss": 1.2116, + "step": 4038000 + }, + { + "epoch": 2.42, + "learning_rate": 3.9086923574453384e-05, + "loss": 1.2543, + "step": 4038500 + }, + { + "epoch": 2.42, + "learning_rate": 3.908483200875505e-05, + "loss": 1.2354, + "step": 4039000 + }, + { + "epoch": 2.42, + "learning_rate": 3.908273204319449e-05, + "loss": 1.2052, + "step": 4039500 + }, + { + "epoch": 2.42, + "learning_rate": 3.9080632077633925e-05, + "loss": 1.2095, + "step": 4040000 + }, + { + "epoch": 2.42, + "learning_rate": 3.907853211207336e-05, + "loss": 1.248, + "step": 4040500 + }, + { + "epoch": 2.42, + "learning_rate": 3.90764321465128e-05, + "loss": 1.2232, + "step": 4041000 + }, + { + "epoch": 2.42, + "learning_rate": 3.907433218095223e-05, + "loss": 1.2078, + "step": 4041500 + }, + { + "epoch": 2.42, + "learning_rate": 3.9072232215391665e-05, + "loss": 1.1842, + "step": 4042000 + }, + { + "epoch": 2.42, + "learning_rate": 3.9070132249831105e-05, + "loss": 1.2017, + "step": 4042500 + }, + { + "epoch": 2.42, + "learning_rate": 3.906803228427054e-05, + "loss": 1.2267, + "step": 4043000 + }, + { + "epoch": 2.42, + "learning_rate": 3.906593231870997e-05, + "loss": 1.2406, + "step": 4043500 + }, + { + "epoch": 2.42, + "learning_rate": 3.9063832353149406e-05, + "loss": 1.2154, + "step": 4044000 + }, + { + "epoch": 2.42, + "learning_rate": 3.906173238758884e-05, + "loss": 1.2191, + "step": 4044500 + }, + { + "epoch": 2.43, + "learning_rate": 3.905963242202828e-05, + "loss": 1.2114, + "step": 4045000 + }, + { + "epoch": 2.43, + "learning_rate": 3.905753245646771e-05, + "loss": 1.2223, + "step": 4045500 + }, + { + "epoch": 2.43, + "learning_rate": 3.9055432490907146e-05, + "loss": 1.25, + "step": 4046000 + }, + { + "epoch": 2.43, + "learning_rate": 3.90533367252777e-05, + "loss": 1.2221, + "step": 4046500 + }, + { + "epoch": 2.43, + "learning_rate": 3.905123675971714e-05, + "loss": 1.2487, + "step": 4047000 + }, + { + "epoch": 2.43, + "learning_rate": 3.9049136794156574e-05, + "loss": 1.2401, + "step": 4047500 + }, + { + "epoch": 2.43, + "learning_rate": 3.904703682859601e-05, + "loss": 1.2349, + "step": 4048000 + }, + { + "epoch": 2.43, + "learning_rate": 3.904493686303545e-05, + "loss": 1.2031, + "step": 4048500 + }, + { + "epoch": 2.43, + "learning_rate": 3.904283689747488e-05, + "loss": 1.2308, + "step": 4049000 + }, + { + "epoch": 2.43, + "learning_rate": 3.9040736931914314e-05, + "loss": 1.2282, + "step": 4049500 + }, + { + "epoch": 2.43, + "learning_rate": 3.9038636966353755e-05, + "loss": 1.2161, + "step": 4050000 + }, + { + "epoch": 2.43, + "learning_rate": 3.903654120072431e-05, + "loss": 1.1919, + "step": 4050500 + }, + { + "epoch": 2.43, + "learning_rate": 3.903444123516374e-05, + "loss": 1.2266, + "step": 4051000 + }, + { + "epoch": 2.43, + "learning_rate": 3.9032341269603175e-05, + "loss": 1.2017, + "step": 4051500 + }, + { + "epoch": 2.43, + "learning_rate": 3.9030241304042615e-05, + "loss": 1.2076, + "step": 4052000 + }, + { + "epoch": 2.43, + "learning_rate": 3.902814133848205e-05, + "loss": 1.1977, + "step": 4052500 + }, + { + "epoch": 2.43, + "learning_rate": 3.902604137292148e-05, + "loss": 1.2084, + "step": 4053000 + }, + { + "epoch": 2.43, + "learning_rate": 3.902394140736092e-05, + "loss": 1.2279, + "step": 4053500 + }, + { + "epoch": 2.43, + "learning_rate": 3.9021841441800356e-05, + "loss": 1.2395, + "step": 4054000 + }, + { + "epoch": 2.43, + "learning_rate": 3.901974147623979e-05, + "loss": 1.2089, + "step": 4054500 + }, + { + "epoch": 2.43, + "learning_rate": 3.901764151067922e-05, + "loss": 1.2309, + "step": 4055000 + }, + { + "epoch": 2.43, + "learning_rate": 3.901554574504978e-05, + "loss": 1.222, + "step": 4055500 + }, + { + "epoch": 2.43, + "learning_rate": 3.9013445779489217e-05, + "loss": 1.204, + "step": 4056000 + }, + { + "epoch": 2.43, + "learning_rate": 3.901134581392866e-05, + "loss": 1.2047, + "step": 4056500 + }, + { + "epoch": 2.43, + "learning_rate": 3.9009245848368084e-05, + "loss": 1.2174, + "step": 4057000 + }, + { + "epoch": 2.43, + "learning_rate": 3.900714588280752e-05, + "loss": 1.2433, + "step": 4057500 + }, + { + "epoch": 2.43, + "learning_rate": 3.900505011717808e-05, + "loss": 1.2193, + "step": 4058000 + }, + { + "epoch": 2.43, + "learning_rate": 3.900295015161752e-05, + "loss": 1.2104, + "step": 4058500 + }, + { + "epoch": 2.43, + "learning_rate": 3.9000850186056944e-05, + "loss": 1.2314, + "step": 4059000 + }, + { + "epoch": 2.43, + "learning_rate": 3.899875022049638e-05, + "loss": 1.2266, + "step": 4059500 + }, + { + "epoch": 2.43, + "learning_rate": 3.899665445486694e-05, + "loss": 1.2373, + "step": 4060000 + }, + { + "epoch": 2.43, + "learning_rate": 3.899455448930638e-05, + "loss": 1.2424, + "step": 4060500 + }, + { + "epoch": 2.43, + "learning_rate": 3.899245872367693e-05, + "loss": 1.2053, + "step": 4061000 + }, + { + "epoch": 2.44, + "learning_rate": 3.8990358758116365e-05, + "loss": 1.1964, + "step": 4061500 + }, + { + "epoch": 2.44, + "learning_rate": 3.8988258792555806e-05, + "loss": 1.2406, + "step": 4062000 + }, + { + "epoch": 2.44, + "learning_rate": 3.898615882699524e-05, + "loss": 1.208, + "step": 4062500 + }, + { + "epoch": 2.44, + "learning_rate": 3.898405886143467e-05, + "loss": 1.2105, + "step": 4063000 + }, + { + "epoch": 2.44, + "learning_rate": 3.898195889587411e-05, + "loss": 1.2196, + "step": 4063500 + }, + { + "epoch": 2.44, + "learning_rate": 3.897985893031354e-05, + "loss": 1.2089, + "step": 4064000 + }, + { + "epoch": 2.44, + "learning_rate": 3.897775896475297e-05, + "loss": 1.232, + "step": 4064500 + }, + { + "epoch": 2.44, + "learning_rate": 3.897565899919241e-05, + "loss": 1.2286, + "step": 4065000 + }, + { + "epoch": 2.44, + "learning_rate": 3.8973559033631847e-05, + "loss": 1.2042, + "step": 4065500 + }, + { + "epoch": 2.44, + "learning_rate": 3.897146326800241e-05, + "loss": 1.2156, + "step": 4066000 + }, + { + "epoch": 2.44, + "learning_rate": 3.8969363302441834e-05, + "loss": 1.2196, + "step": 4066500 + }, + { + "epoch": 2.44, + "learning_rate": 3.8967263336881274e-05, + "loss": 1.1966, + "step": 4067000 + }, + { + "epoch": 2.44, + "learning_rate": 3.896516337132071e-05, + "loss": 1.2194, + "step": 4067500 + }, + { + "epoch": 2.44, + "learning_rate": 3.896306340576014e-05, + "loss": 1.2137, + "step": 4068000 + }, + { + "epoch": 2.44, + "learning_rate": 3.896096344019958e-05, + "loss": 1.209, + "step": 4068500 + }, + { + "epoch": 2.44, + "learning_rate": 3.8958863474639014e-05, + "loss": 1.2157, + "step": 4069000 + }, + { + "epoch": 2.44, + "learning_rate": 3.895676350907845e-05, + "loss": 1.2316, + "step": 4069500 + }, + { + "epoch": 2.44, + "learning_rate": 3.895466774344901e-05, + "loss": 1.23, + "step": 4070000 + }, + { + "epoch": 2.44, + "learning_rate": 3.895256777788844e-05, + "loss": 1.2541, + "step": 4070500 + }, + { + "epoch": 2.44, + "learning_rate": 3.8950467812327875e-05, + "loss": 1.2285, + "step": 4071000 + }, + { + "epoch": 2.44, + "learning_rate": 3.894837204669843e-05, + "loss": 1.2466, + "step": 4071500 + }, + { + "epoch": 2.44, + "learning_rate": 3.894627208113787e-05, + "loss": 1.2337, + "step": 4072000 + }, + { + "epoch": 2.44, + "learning_rate": 3.89441721155773e-05, + "loss": 1.2221, + "step": 4072500 + }, + { + "epoch": 2.44, + "learning_rate": 3.8942072150016736e-05, + "loss": 1.2102, + "step": 4073000 + }, + { + "epoch": 2.44, + "learning_rate": 3.8939972184456176e-05, + "loss": 1.2122, + "step": 4073500 + }, + { + "epoch": 2.44, + "learning_rate": 3.893787221889561e-05, + "loss": 1.2219, + "step": 4074000 + }, + { + "epoch": 2.44, + "learning_rate": 3.893577225333504e-05, + "loss": 1.2086, + "step": 4074500 + }, + { + "epoch": 2.44, + "learning_rate": 3.893367228777448e-05, + "loss": 1.2127, + "step": 4075000 + }, + { + "epoch": 2.44, + "learning_rate": 3.893157652214504e-05, + "loss": 1.1915, + "step": 4075500 + }, + { + "epoch": 2.44, + "learning_rate": 3.892947655658447e-05, + "loss": 1.2305, + "step": 4076000 + }, + { + "epoch": 2.44, + "learning_rate": 3.892737659102391e-05, + "loss": 1.2378, + "step": 4076500 + }, + { + "epoch": 2.44, + "learning_rate": 3.8925276625463344e-05, + "loss": 1.2216, + "step": 4077000 + }, + { + "epoch": 2.44, + "learning_rate": 3.89231808598339e-05, + "loss": 1.1979, + "step": 4077500 + }, + { + "epoch": 2.44, + "learning_rate": 3.892108089427333e-05, + "loss": 1.2064, + "step": 4078000 + }, + { + "epoch": 2.45, + "learning_rate": 3.891898092871277e-05, + "loss": 1.2332, + "step": 4078500 + }, + { + "epoch": 2.45, + "learning_rate": 3.8916885163083325e-05, + "loss": 1.2236, + "step": 4079000 + }, + { + "epoch": 2.45, + "learning_rate": 3.891478519752276e-05, + "loss": 1.2301, + "step": 4079500 + }, + { + "epoch": 2.45, + "learning_rate": 3.891268523196219e-05, + "loss": 1.2126, + "step": 4080000 + }, + { + "epoch": 2.45, + "learning_rate": 3.891058526640163e-05, + "loss": 1.2311, + "step": 4080500 + }, + { + "epoch": 2.45, + "learning_rate": 3.8908485300841065e-05, + "loss": 1.2463, + "step": 4081000 + }, + { + "epoch": 2.45, + "learning_rate": 3.89063853352805e-05, + "loss": 1.2165, + "step": 4081500 + }, + { + "epoch": 2.45, + "learning_rate": 3.890428536971994e-05, + "loss": 1.2168, + "step": 4082000 + }, + { + "epoch": 2.45, + "learning_rate": 3.890218540415937e-05, + "loss": 1.2189, + "step": 4082500 + }, + { + "epoch": 2.45, + "learning_rate": 3.8900089638529926e-05, + "loss": 1.2384, + "step": 4083000 + }, + { + "epoch": 2.45, + "learning_rate": 3.8897989672969366e-05, + "loss": 1.2113, + "step": 4083500 + }, + { + "epoch": 2.45, + "learning_rate": 3.88958897074088e-05, + "loss": 1.2245, + "step": 4084000 + }, + { + "epoch": 2.45, + "learning_rate": 3.889378974184823e-05, + "loss": 1.1906, + "step": 4084500 + }, + { + "epoch": 2.45, + "learning_rate": 3.8891689776287674e-05, + "loss": 1.2169, + "step": 4085000 + }, + { + "epoch": 2.45, + "learning_rate": 3.888958981072711e-05, + "loss": 1.2146, + "step": 4085500 + }, + { + "epoch": 2.45, + "learning_rate": 3.8887489845166534e-05, + "loss": 1.2385, + "step": 4086000 + }, + { + "epoch": 2.45, + "learning_rate": 3.8885389879605974e-05, + "loss": 1.2286, + "step": 4086500 + }, + { + "epoch": 2.45, + "learning_rate": 3.8883294113976534e-05, + "loss": 1.2097, + "step": 4087000 + }, + { + "epoch": 2.45, + "learning_rate": 3.888119834834709e-05, + "loss": 1.2188, + "step": 4087500 + }, + { + "epoch": 2.45, + "learning_rate": 3.887909838278652e-05, + "loss": 1.2236, + "step": 4088000 + }, + { + "epoch": 2.45, + "learning_rate": 3.8876998417225955e-05, + "loss": 1.2404, + "step": 4088500 + }, + { + "epoch": 2.45, + "learning_rate": 3.8874898451665395e-05, + "loss": 1.1954, + "step": 4089000 + }, + { + "epoch": 2.45, + "learning_rate": 3.887279848610483e-05, + "loss": 1.2449, + "step": 4089500 + }, + { + "epoch": 2.45, + "learning_rate": 3.887069852054426e-05, + "loss": 1.2372, + "step": 4090000 + }, + { + "epoch": 2.45, + "learning_rate": 3.88685985549837e-05, + "loss": 1.2379, + "step": 4090500 + }, + { + "epoch": 2.45, + "learning_rate": 3.8866502789354256e-05, + "loss": 1.2335, + "step": 4091000 + }, + { + "epoch": 2.45, + "learning_rate": 3.886440282379369e-05, + "loss": 1.244, + "step": 4091500 + }, + { + "epoch": 2.45, + "learning_rate": 3.886230285823313e-05, + "loss": 1.2322, + "step": 4092000 + }, + { + "epoch": 2.45, + "learning_rate": 3.886020289267256e-05, + "loss": 1.2258, + "step": 4092500 + }, + { + "epoch": 2.45, + "learning_rate": 3.8858102927111996e-05, + "loss": 1.2326, + "step": 4093000 + }, + { + "epoch": 2.45, + "learning_rate": 3.885600296155143e-05, + "loss": 1.2122, + "step": 4093500 + }, + { + "epoch": 2.45, + "learning_rate": 3.885390719592199e-05, + "loss": 1.22, + "step": 4094000 + }, + { + "epoch": 2.45, + "learning_rate": 3.8851807230361424e-05, + "loss": 1.2125, + "step": 4094500 + }, + { + "epoch": 2.46, + "learning_rate": 3.884970726480086e-05, + "loss": 1.2113, + "step": 4095000 + }, + { + "epoch": 2.46, + "learning_rate": 3.884760729924029e-05, + "loss": 1.1873, + "step": 4095500 + }, + { + "epoch": 2.46, + "learning_rate": 3.8845507333679724e-05, + "loss": 1.2142, + "step": 4096000 + }, + { + "epoch": 2.46, + "learning_rate": 3.8843411568050284e-05, + "loss": 1.211, + "step": 4096500 + }, + { + "epoch": 2.46, + "learning_rate": 3.884131160248972e-05, + "loss": 1.2355, + "step": 4097000 + }, + { + "epoch": 2.46, + "learning_rate": 3.883921163692916e-05, + "loss": 1.2273, + "step": 4097500 + }, + { + "epoch": 2.46, + "learning_rate": 3.8837111671368585e-05, + "loss": 1.218, + "step": 4098000 + }, + { + "epoch": 2.46, + "learning_rate": 3.8835015905739145e-05, + "loss": 1.2227, + "step": 4098500 + }, + { + "epoch": 2.46, + "learning_rate": 3.8832915940178585e-05, + "loss": 1.1832, + "step": 4099000 + }, + { + "epoch": 2.46, + "learning_rate": 3.883081597461802e-05, + "loss": 1.2112, + "step": 4099500 + }, + { + "epoch": 2.46, + "learning_rate": 3.882871600905745e-05, + "loss": 1.2406, + "step": 4100000 + }, + { + "epoch": 2.46, + "eval_loss": 1.1778119802474976, + "eval_runtime": 1105.3868, + "eval_samples_per_second": 476.503, + "eval_steps_per_second": 79.417, + "step": 4100000 + }, + { + "epoch": 2.46, + "learning_rate": 3.8826616043496886e-05, + "loss": 1.2149, + "step": 4100500 + }, + { + "epoch": 2.46, + "learning_rate": 3.8824520277867446e-05, + "loss": 1.2216, + "step": 4101000 + }, + { + "epoch": 2.46, + "learning_rate": 3.882242031230688e-05, + "loss": 1.2, + "step": 4101500 + }, + { + "epoch": 2.46, + "learning_rate": 3.882032034674631e-05, + "loss": 1.2241, + "step": 4102000 + }, + { + "epoch": 2.46, + "learning_rate": 3.881822038118575e-05, + "loss": 1.2425, + "step": 4102500 + }, + { + "epoch": 2.46, + "learning_rate": 3.881612041562518e-05, + "loss": 1.2225, + "step": 4103000 + }, + { + "epoch": 2.46, + "learning_rate": 3.881402464999574e-05, + "loss": 1.1942, + "step": 4103500 + }, + { + "epoch": 2.46, + "learning_rate": 3.8811924684435174e-05, + "loss": 1.2254, + "step": 4104000 + }, + { + "epoch": 2.46, + "learning_rate": 3.8809824718874614e-05, + "loss": 1.2022, + "step": 4104500 + }, + { + "epoch": 2.46, + "learning_rate": 3.880772475331404e-05, + "loss": 1.2268, + "step": 4105000 + }, + { + "epoch": 2.46, + "learning_rate": 3.880562478775348e-05, + "loss": 1.2162, + "step": 4105500 + }, + { + "epoch": 2.46, + "learning_rate": 3.8803524822192914e-05, + "loss": 1.207, + "step": 4106000 + }, + { + "epoch": 2.46, + "learning_rate": 3.8801429056563475e-05, + "loss": 1.216, + "step": 4106500 + }, + { + "epoch": 2.46, + "learning_rate": 3.879932909100291e-05, + "loss": 1.2357, + "step": 4107000 + }, + { + "epoch": 2.46, + "learning_rate": 3.879722912544234e-05, + "loss": 1.1981, + "step": 4107500 + }, + { + "epoch": 2.46, + "learning_rate": 3.8795129159881775e-05, + "loss": 1.2308, + "step": 4108000 + }, + { + "epoch": 2.46, + "learning_rate": 3.879302919432121e-05, + "loss": 1.2108, + "step": 4108500 + }, + { + "epoch": 2.46, + "learning_rate": 3.879092922876065e-05, + "loss": 1.205, + "step": 4109000 + }, + { + "epoch": 2.46, + "learning_rate": 3.878882926320008e-05, + "loss": 1.2105, + "step": 4109500 + }, + { + "epoch": 2.46, + "learning_rate": 3.8786729297639516e-05, + "loss": 1.2176, + "step": 4110000 + }, + { + "epoch": 2.46, + "learning_rate": 3.878463353201007e-05, + "loss": 1.2359, + "step": 4110500 + }, + { + "epoch": 2.46, + "learning_rate": 3.878253356644951e-05, + "loss": 1.2142, + "step": 4111000 + }, + { + "epoch": 2.47, + "learning_rate": 3.878043360088894e-05, + "loss": 1.2104, + "step": 4111500 + }, + { + "epoch": 2.47, + "learning_rate": 3.8778333635328376e-05, + "loss": 1.2116, + "step": 4112000 + }, + { + "epoch": 2.47, + "learning_rate": 3.877623786969894e-05, + "loss": 1.2446, + "step": 4112500 + }, + { + "epoch": 2.47, + "learning_rate": 3.877413790413837e-05, + "loss": 1.2482, + "step": 4113000 + }, + { + "epoch": 2.47, + "learning_rate": 3.8772037938577804e-05, + "loss": 1.2035, + "step": 4113500 + }, + { + "epoch": 2.47, + "learning_rate": 3.8769937973017244e-05, + "loss": 1.195, + "step": 4114000 + }, + { + "epoch": 2.47, + "learning_rate": 3.876783800745668e-05, + "loss": 1.2472, + "step": 4114500 + }, + { + "epoch": 2.47, + "learning_rate": 3.876574224182723e-05, + "loss": 1.2413, + "step": 4115000 + }, + { + "epoch": 2.47, + "learning_rate": 3.8763642276266664e-05, + "loss": 1.2114, + "step": 4115500 + }, + { + "epoch": 2.47, + "learning_rate": 3.8761542310706105e-05, + "loss": 1.2233, + "step": 4116000 + }, + { + "epoch": 2.47, + "learning_rate": 3.875944234514554e-05, + "loss": 1.1913, + "step": 4116500 + }, + { + "epoch": 2.47, + "learning_rate": 3.875734657951609e-05, + "loss": 1.2259, + "step": 4117000 + }, + { + "epoch": 2.47, + "learning_rate": 3.8755246613955525e-05, + "loss": 1.2384, + "step": 4117500 + }, + { + "epoch": 2.47, + "learning_rate": 3.8753146648394965e-05, + "loss": 1.2053, + "step": 4118000 + }, + { + "epoch": 2.47, + "learning_rate": 3.87510466828344e-05, + "loss": 1.2397, + "step": 4118500 + }, + { + "epoch": 2.47, + "learning_rate": 3.874895091720496e-05, + "loss": 1.2195, + "step": 4119000 + }, + { + "epoch": 2.47, + "learning_rate": 3.874685095164439e-05, + "loss": 1.2145, + "step": 4119500 + }, + { + "epoch": 2.47, + "learning_rate": 3.8744750986083826e-05, + "loss": 1.2297, + "step": 4120000 + }, + { + "epoch": 2.47, + "learning_rate": 3.8742655220454386e-05, + "loss": 1.1953, + "step": 4120500 + }, + { + "epoch": 2.47, + "learning_rate": 3.874055525489382e-05, + "loss": 1.1861, + "step": 4121000 + }, + { + "epoch": 2.47, + "learning_rate": 3.873845528933326e-05, + "loss": 1.225, + "step": 4121500 + }, + { + "epoch": 2.47, + "learning_rate": 3.873635532377269e-05, + "loss": 1.2175, + "step": 4122000 + }, + { + "epoch": 2.47, + "learning_rate": 3.873425955814325e-05, + "loss": 1.2259, + "step": 4122500 + }, + { + "epoch": 2.47, + "learning_rate": 3.873215959258268e-05, + "loss": 1.2244, + "step": 4123000 + }, + { + "epoch": 2.47, + "learning_rate": 3.873005962702212e-05, + "loss": 1.1998, + "step": 4123500 + }, + { + "epoch": 2.47, + "learning_rate": 3.8727959661461554e-05, + "loss": 1.232, + "step": 4124000 + }, + { + "epoch": 2.47, + "learning_rate": 3.872585969590098e-05, + "loss": 1.2588, + "step": 4124500 + }, + { + "epoch": 2.47, + "learning_rate": 3.872375973034042e-05, + "loss": 1.2015, + "step": 4125000 + }, + { + "epoch": 2.47, + "learning_rate": 3.8721659764779855e-05, + "loss": 1.2123, + "step": 4125500 + }, + { + "epoch": 2.47, + "learning_rate": 3.871955979921929e-05, + "loss": 1.1864, + "step": 4126000 + }, + { + "epoch": 2.47, + "learning_rate": 3.871745983365873e-05, + "loss": 1.2309, + "step": 4126500 + }, + { + "epoch": 2.47, + "learning_rate": 3.871535986809816e-05, + "loss": 1.2245, + "step": 4127000 + }, + { + "epoch": 2.47, + "learning_rate": 3.8713264102468715e-05, + "loss": 1.2114, + "step": 4127500 + }, + { + "epoch": 2.47, + "learning_rate": 3.8711164136908156e-05, + "loss": 1.2328, + "step": 4128000 + }, + { + "epoch": 2.48, + "learning_rate": 3.870906417134759e-05, + "loss": 1.2302, + "step": 4128500 + }, + { + "epoch": 2.48, + "learning_rate": 3.870696420578702e-05, + "loss": 1.2362, + "step": 4129000 + }, + { + "epoch": 2.48, + "learning_rate": 3.870486424022646e-05, + "loss": 1.2266, + "step": 4129500 + }, + { + "epoch": 2.48, + "learning_rate": 3.8702768474597016e-05, + "loss": 1.2398, + "step": 4130000 + }, + { + "epoch": 2.48, + "learning_rate": 3.870066850903645e-05, + "loss": 1.2278, + "step": 4130500 + }, + { + "epoch": 2.48, + "learning_rate": 3.869856854347588e-05, + "loss": 1.202, + "step": 4131000 + }, + { + "epoch": 2.48, + "learning_rate": 3.8696468577915324e-05, + "loss": 1.1953, + "step": 4131500 + }, + { + "epoch": 2.48, + "learning_rate": 3.869436861235476e-05, + "loss": 1.2555, + "step": 4132000 + }, + { + "epoch": 2.48, + "learning_rate": 3.869226864679419e-05, + "loss": 1.2192, + "step": 4132500 + }, + { + "epoch": 2.48, + "learning_rate": 3.869016868123363e-05, + "loss": 1.1829, + "step": 4133000 + }, + { + "epoch": 2.48, + "learning_rate": 3.8688072915604184e-05, + "loss": 1.2021, + "step": 4133500 + }, + { + "epoch": 2.48, + "learning_rate": 3.868597295004362e-05, + "loss": 1.1873, + "step": 4134000 + }, + { + "epoch": 2.48, + "learning_rate": 3.868387298448305e-05, + "loss": 1.2125, + "step": 4134500 + }, + { + "epoch": 2.48, + "learning_rate": 3.868177301892249e-05, + "loss": 1.2164, + "step": 4135000 + }, + { + "epoch": 2.48, + "learning_rate": 3.8679673053361925e-05, + "loss": 1.2382, + "step": 4135500 + }, + { + "epoch": 2.48, + "learning_rate": 3.867757728773248e-05, + "loss": 1.2176, + "step": 4136000 + }, + { + "epoch": 2.48, + "learning_rate": 3.867547732217192e-05, + "loss": 1.24, + "step": 4136500 + }, + { + "epoch": 2.48, + "learning_rate": 3.867337735661135e-05, + "loss": 1.2383, + "step": 4137000 + }, + { + "epoch": 2.48, + "learning_rate": 3.8671277391050786e-05, + "loss": 1.2185, + "step": 4137500 + }, + { + "epoch": 2.48, + "learning_rate": 3.8669177425490226e-05, + "loss": 1.2171, + "step": 4138000 + }, + { + "epoch": 2.48, + "learning_rate": 3.866708165986078e-05, + "loss": 1.2397, + "step": 4138500 + }, + { + "epoch": 2.48, + "learning_rate": 3.866498169430021e-05, + "loss": 1.1875, + "step": 4139000 + }, + { + "epoch": 2.48, + "learning_rate": 3.8662881728739646e-05, + "loss": 1.2151, + "step": 4139500 + }, + { + "epoch": 2.48, + "learning_rate": 3.8660781763179087e-05, + "loss": 1.2395, + "step": 4140000 + }, + { + "epoch": 2.48, + "learning_rate": 3.865868179761852e-05, + "loss": 1.2268, + "step": 4140500 + }, + { + "epoch": 2.48, + "learning_rate": 3.8656586031989074e-05, + "loss": 1.2022, + "step": 4141000 + }, + { + "epoch": 2.48, + "learning_rate": 3.8654486066428514e-05, + "loss": 1.2351, + "step": 4141500 + }, + { + "epoch": 2.48, + "learning_rate": 3.865238610086795e-05, + "loss": 1.266, + "step": 4142000 + }, + { + "epoch": 2.48, + "learning_rate": 3.865028613530738e-05, + "loss": 1.2249, + "step": 4142500 + }, + { + "epoch": 2.48, + "learning_rate": 3.864818616974682e-05, + "loss": 1.252, + "step": 4143000 + }, + { + "epoch": 2.48, + "learning_rate": 3.8646090404117375e-05, + "loss": 1.2412, + "step": 4143500 + }, + { + "epoch": 2.48, + "learning_rate": 3.864399043855681e-05, + "loss": 1.2271, + "step": 4144000 + }, + { + "epoch": 2.48, + "learning_rate": 3.864189047299624e-05, + "loss": 1.2278, + "step": 4144500 + }, + { + "epoch": 2.49, + "learning_rate": 3.863979050743568e-05, + "loss": 1.2204, + "step": 4145000 + }, + { + "epoch": 2.49, + "learning_rate": 3.8637694741806235e-05, + "loss": 1.2233, + "step": 4145500 + }, + { + "epoch": 2.49, + "learning_rate": 3.863559477624567e-05, + "loss": 1.2159, + "step": 4146000 + }, + { + "epoch": 2.49, + "learning_rate": 3.86334948106851e-05, + "loss": 1.2006, + "step": 4146500 + }, + { + "epoch": 2.49, + "learning_rate": 3.863139484512454e-05, + "loss": 1.2104, + "step": 4147000 + }, + { + "epoch": 2.49, + "learning_rate": 3.8629294879563976e-05, + "loss": 1.2443, + "step": 4147500 + }, + { + "epoch": 2.49, + "learning_rate": 3.862719491400341e-05, + "loss": 1.1978, + "step": 4148000 + }, + { + "epoch": 2.49, + "learning_rate": 3.862509914837397e-05, + "loss": 1.217, + "step": 4148500 + }, + { + "epoch": 2.49, + "learning_rate": 3.86229991828134e-05, + "loss": 1.2021, + "step": 4149000 + }, + { + "epoch": 2.49, + "learning_rate": 3.862089921725284e-05, + "loss": 1.1947, + "step": 4149500 + }, + { + "epoch": 2.49, + "learning_rate": 3.861879925169228e-05, + "loss": 1.2162, + "step": 4150000 + }, + { + "epoch": 2.49, + "learning_rate": 3.861669928613171e-05, + "loss": 1.2673, + "step": 4150500 + }, + { + "epoch": 2.49, + "learning_rate": 3.8614603520502264e-05, + "loss": 1.2133, + "step": 4151000 + }, + { + "epoch": 2.49, + "learning_rate": 3.86125035549417e-05, + "loss": 1.2416, + "step": 4151500 + }, + { + "epoch": 2.49, + "learning_rate": 3.861040358938114e-05, + "loss": 1.2186, + "step": 4152000 + }, + { + "epoch": 2.49, + "learning_rate": 3.860830362382057e-05, + "loss": 1.2434, + "step": 4152500 + }, + { + "epoch": 2.49, + "learning_rate": 3.8606207858191125e-05, + "loss": 1.2165, + "step": 4153000 + }, + { + "epoch": 2.49, + "learning_rate": 3.860410789263056e-05, + "loss": 1.2181, + "step": 4153500 + }, + { + "epoch": 2.49, + "learning_rate": 3.860201212700111e-05, + "loss": 1.2138, + "step": 4154000 + }, + { + "epoch": 2.49, + "learning_rate": 3.859991216144055e-05, + "loss": 1.2212, + "step": 4154500 + }, + { + "epoch": 2.49, + "learning_rate": 3.8597812195879985e-05, + "loss": 1.1922, + "step": 4155000 + }, + { + "epoch": 2.49, + "learning_rate": 3.8595712230319426e-05, + "loss": 1.2299, + "step": 4155500 + }, + { + "epoch": 2.49, + "learning_rate": 3.859361226475886e-05, + "loss": 1.2106, + "step": 4156000 + }, + { + "epoch": 2.49, + "learning_rate": 3.859151229919829e-05, + "loss": 1.2559, + "step": 4156500 + }, + { + "epoch": 2.49, + "learning_rate": 3.858941233363773e-05, + "loss": 1.2088, + "step": 4157000 + }, + { + "epoch": 2.49, + "learning_rate": 3.8587312368077166e-05, + "loss": 1.221, + "step": 4157500 + }, + { + "epoch": 2.49, + "learning_rate": 3.85852124025166e-05, + "loss": 1.2206, + "step": 4158000 + }, + { + "epoch": 2.49, + "learning_rate": 3.858311243695603e-05, + "loss": 1.2181, + "step": 4158500 + }, + { + "epoch": 2.49, + "learning_rate": 3.8581012471395467e-05, + "loss": 1.2389, + "step": 4159000 + }, + { + "epoch": 2.49, + "learning_rate": 3.85789125058349e-05, + "loss": 1.2372, + "step": 4159500 + }, + { + "epoch": 2.49, + "learning_rate": 3.857681674020546e-05, + "loss": 1.2197, + "step": 4160000 + }, + { + "epoch": 2.49, + "learning_rate": 3.85747167746449e-05, + "loss": 1.2104, + "step": 4160500 + }, + { + "epoch": 2.49, + "learning_rate": 3.857261680908433e-05, + "loss": 1.2269, + "step": 4161000 + }, + { + "epoch": 2.49, + "learning_rate": 3.857052104345489e-05, + "loss": 1.2266, + "step": 4161500 + }, + { + "epoch": 2.5, + "learning_rate": 3.856842107789432e-05, + "loss": 1.2125, + "step": 4162000 + }, + { + "epoch": 2.5, + "learning_rate": 3.856632111233376e-05, + "loss": 1.2356, + "step": 4162500 + }, + { + "epoch": 2.5, + "learning_rate": 3.856422114677319e-05, + "loss": 1.2055, + "step": 4163000 + }, + { + "epoch": 2.5, + "learning_rate": 3.856212118121263e-05, + "loss": 1.2046, + "step": 4163500 + }, + { + "epoch": 2.5, + "learning_rate": 3.856002121565206e-05, + "loss": 1.2099, + "step": 4164000 + }, + { + "epoch": 2.5, + "learning_rate": 3.855792545002262e-05, + "loss": 1.2275, + "step": 4164500 + }, + { + "epoch": 2.5, + "learning_rate": 3.8555825484462056e-05, + "loss": 1.212, + "step": 4165000 + }, + { + "epoch": 2.5, + "learning_rate": 3.855372551890149e-05, + "loss": 1.2234, + "step": 4165500 + }, + { + "epoch": 2.5, + "learning_rate": 3.855162555334092e-05, + "loss": 1.2288, + "step": 4166000 + }, + { + "epoch": 2.5, + "learning_rate": 3.8549525587780356e-05, + "loss": 1.2324, + "step": 4166500 + }, + { + "epoch": 2.5, + "learning_rate": 3.8547429822150916e-05, + "loss": 1.2201, + "step": 4167000 + }, + { + "epoch": 2.5, + "learning_rate": 3.8545329856590357e-05, + "loss": 1.2302, + "step": 4167500 + }, + { + "epoch": 2.5, + "learning_rate": 3.854322989102978e-05, + "loss": 1.2271, + "step": 4168000 + }, + { + "epoch": 2.5, + "learning_rate": 3.854112992546922e-05, + "loss": 1.2321, + "step": 4168500 + }, + { + "epoch": 2.5, + "learning_rate": 3.853902995990866e-05, + "loss": 1.2301, + "step": 4169000 + }, + { + "epoch": 2.5, + "learning_rate": 3.853693419427922e-05, + "loss": 1.2289, + "step": 4169500 + }, + { + "epoch": 2.5, + "learning_rate": 3.853483842864977e-05, + "loss": 1.211, + "step": 4170000 + }, + { + "epoch": 2.5, + "learning_rate": 3.8532738463089204e-05, + "loss": 1.2278, + "step": 4170500 + }, + { + "epoch": 2.5, + "learning_rate": 3.8530638497528645e-05, + "loss": 1.2107, + "step": 4171000 + }, + { + "epoch": 2.5, + "learning_rate": 3.852853853196808e-05, + "loss": 1.229, + "step": 4171500 + }, + { + "epoch": 2.5, + "learning_rate": 3.852643856640751e-05, + "loss": 1.2183, + "step": 4172000 + }, + { + "epoch": 2.5, + "learning_rate": 3.8524338600846945e-05, + "loss": 1.1871, + "step": 4172500 + }, + { + "epoch": 2.5, + "learning_rate": 3.852223863528638e-05, + "loss": 1.2202, + "step": 4173000 + }, + { + "epoch": 2.5, + "learning_rate": 3.852013866972581e-05, + "loss": 1.2457, + "step": 4173500 + }, + { + "epoch": 2.5, + "learning_rate": 3.851803870416525e-05, + "loss": 1.2156, + "step": 4174000 + }, + { + "epoch": 2.5, + "learning_rate": 3.851594293853581e-05, + "loss": 1.1957, + "step": 4174500 + }, + { + "epoch": 2.5, + "learning_rate": 3.851384297297524e-05, + "loss": 1.2108, + "step": 4175000 + }, + { + "epoch": 2.5, + "learning_rate": 3.851174300741467e-05, + "loss": 1.206, + "step": 4175500 + }, + { + "epoch": 2.5, + "learning_rate": 3.850964304185411e-05, + "loss": 1.1935, + "step": 4176000 + }, + { + "epoch": 2.5, + "learning_rate": 3.8507543076293546e-05, + "loss": 1.2108, + "step": 4176500 + }, + { + "epoch": 2.5, + "learning_rate": 3.850544311073298e-05, + "loss": 1.2164, + "step": 4177000 + }, + { + "epoch": 2.5, + "learning_rate": 3.850334734510354e-05, + "loss": 1.2322, + "step": 4177500 + }, + { + "epoch": 2.5, + "learning_rate": 3.8501247379542973e-05, + "loss": 1.2391, + "step": 4178000 + }, + { + "epoch": 2.51, + "learning_rate": 3.849914741398241e-05, + "loss": 1.2102, + "step": 4178500 + }, + { + "epoch": 2.51, + "learning_rate": 3.849704744842185e-05, + "loss": 1.2259, + "step": 4179000 + }, + { + "epoch": 2.51, + "learning_rate": 3.849494748286128e-05, + "loss": 1.2195, + "step": 4179500 + }, + { + "epoch": 2.51, + "learning_rate": 3.8492847517300714e-05, + "loss": 1.2088, + "step": 4180000 + }, + { + "epoch": 2.51, + "learning_rate": 3.8490747551740154e-05, + "loss": 1.2214, + "step": 4180500 + }, + { + "epoch": 2.51, + "learning_rate": 3.848864758617959e-05, + "loss": 1.2155, + "step": 4181000 + }, + { + "epoch": 2.51, + "learning_rate": 3.848655182055014e-05, + "loss": 1.2015, + "step": 4181500 + }, + { + "epoch": 2.51, + "learning_rate": 3.8484451854989575e-05, + "loss": 1.1976, + "step": 4182000 + }, + { + "epoch": 2.51, + "learning_rate": 3.8482351889429015e-05, + "loss": 1.2003, + "step": 4182500 + }, + { + "epoch": 2.51, + "learning_rate": 3.848025192386845e-05, + "loss": 1.2114, + "step": 4183000 + }, + { + "epoch": 2.51, + "learning_rate": 3.8478156158239e-05, + "loss": 1.2435, + "step": 4183500 + }, + { + "epoch": 2.51, + "learning_rate": 3.8476056192678436e-05, + "loss": 1.1696, + "step": 4184000 + }, + { + "epoch": 2.51, + "learning_rate": 3.8473956227117876e-05, + "loss": 1.2459, + "step": 4184500 + }, + { + "epoch": 2.51, + "learning_rate": 3.847185626155731e-05, + "loss": 1.2216, + "step": 4185000 + }, + { + "epoch": 2.51, + "learning_rate": 3.846976049592786e-05, + "loss": 1.2045, + "step": 4185500 + }, + { + "epoch": 2.51, + "learning_rate": 3.84676605303673e-05, + "loss": 1.2306, + "step": 4186000 + }, + { + "epoch": 2.51, + "learning_rate": 3.8465560564806737e-05, + "loss": 1.2552, + "step": 4186500 + }, + { + "epoch": 2.51, + "learning_rate": 3.846346479917729e-05, + "loss": 1.2404, + "step": 4187000 + }, + { + "epoch": 2.51, + "learning_rate": 3.8461364833616724e-05, + "loss": 1.2195, + "step": 4187500 + }, + { + "epoch": 2.51, + "learning_rate": 3.8459264868056164e-05, + "loss": 1.2228, + "step": 4188000 + }, + { + "epoch": 2.51, + "learning_rate": 3.84571649024956e-05, + "loss": 1.2271, + "step": 4188500 + }, + { + "epoch": 2.51, + "learning_rate": 3.845506493693503e-05, + "loss": 1.2224, + "step": 4189000 + }, + { + "epoch": 2.51, + "learning_rate": 3.845296497137447e-05, + "loss": 1.2429, + "step": 4189500 + }, + { + "epoch": 2.51, + "learning_rate": 3.8450865005813904e-05, + "loss": 1.2272, + "step": 4190000 + }, + { + "epoch": 2.51, + "learning_rate": 3.844876504025334e-05, + "loss": 1.2059, + "step": 4190500 + }, + { + "epoch": 2.51, + "learning_rate": 3.844666927462389e-05, + "loss": 1.2168, + "step": 4191000 + }, + { + "epoch": 2.51, + "learning_rate": 3.844456930906333e-05, + "loss": 1.2083, + "step": 4191500 + }, + { + "epoch": 2.51, + "learning_rate": 3.8442469343502765e-05, + "loss": 1.1967, + "step": 4192000 + }, + { + "epoch": 2.51, + "learning_rate": 3.84403693779422e-05, + "loss": 1.2133, + "step": 4192500 + }, + { + "epoch": 2.51, + "learning_rate": 3.843827781224388e-05, + "loss": 1.2022, + "step": 4193000 + }, + { + "epoch": 2.51, + "learning_rate": 3.843617784668332e-05, + "loss": 1.2183, + "step": 4193500 + }, + { + "epoch": 2.51, + "learning_rate": 3.8434077881122746e-05, + "loss": 1.2195, + "step": 4194000 + }, + { + "epoch": 2.51, + "learning_rate": 3.843197791556218e-05, + "loss": 1.2081, + "step": 4194500 + }, + { + "epoch": 2.52, + "learning_rate": 3.842987795000162e-05, + "loss": 1.2311, + "step": 4195000 + }, + { + "epoch": 2.52, + "learning_rate": 3.842777798444105e-05, + "loss": 1.1919, + "step": 4195500 + }, + { + "epoch": 2.52, + "learning_rate": 3.8425678018880487e-05, + "loss": 1.2323, + "step": 4196000 + }, + { + "epoch": 2.52, + "learning_rate": 3.842357805331993e-05, + "loss": 1.184, + "step": 4196500 + }, + { + "epoch": 2.52, + "learning_rate": 3.842147808775936e-05, + "loss": 1.196, + "step": 4197000 + }, + { + "epoch": 2.52, + "learning_rate": 3.8419378122198794e-05, + "loss": 1.2, + "step": 4197500 + }, + { + "epoch": 2.52, + "learning_rate": 3.8417278156638234e-05, + "loss": 1.2245, + "step": 4198000 + }, + { + "epoch": 2.52, + "learning_rate": 3.841517819107767e-05, + "loss": 1.2268, + "step": 4198500 + }, + { + "epoch": 2.52, + "learning_rate": 3.84130782255171e-05, + "loss": 1.1826, + "step": 4199000 + }, + { + "epoch": 2.52, + "learning_rate": 3.8410982459887654e-05, + "loss": 1.218, + "step": 4199500 + }, + { + "epoch": 2.52, + "learning_rate": 3.8408882494327095e-05, + "loss": 1.1937, + "step": 4200000 + }, + { + "epoch": 2.52, + "eval_loss": 1.1766777038574219, + "eval_runtime": 1098.718, + "eval_samples_per_second": 479.395, + "eval_steps_per_second": 79.899, + "step": 4200000 + }, + { + "epoch": 2.52, + "learning_rate": 3.840678252876653e-05, + "loss": 1.2039, + "step": 4200500 + }, + { + "epoch": 2.52, + "learning_rate": 3.840468256320597e-05, + "loss": 1.2128, + "step": 4201000 + }, + { + "epoch": 2.52, + "learning_rate": 3.84025825976454e-05, + "loss": 1.1999, + "step": 4201500 + }, + { + "epoch": 2.52, + "learning_rate": 3.8400486832015955e-05, + "loss": 1.1987, + "step": 4202000 + }, + { + "epoch": 2.52, + "learning_rate": 3.839838686645539e-05, + "loss": 1.2, + "step": 4202500 + }, + { + "epoch": 2.52, + "learning_rate": 3.839628690089483e-05, + "loss": 1.22, + "step": 4203000 + }, + { + "epoch": 2.52, + "learning_rate": 3.839418693533426e-05, + "loss": 1.2402, + "step": 4203500 + }, + { + "epoch": 2.52, + "learning_rate": 3.8392086969773696e-05, + "loss": 1.2128, + "step": 4204000 + }, + { + "epoch": 2.52, + "learning_rate": 3.838999120414425e-05, + "loss": 1.1967, + "step": 4204500 + }, + { + "epoch": 2.52, + "learning_rate": 3.838789123858369e-05, + "loss": 1.2259, + "step": 4205000 + }, + { + "epoch": 2.52, + "learning_rate": 3.838579127302312e-05, + "loss": 1.205, + "step": 4205500 + }, + { + "epoch": 2.52, + "learning_rate": 3.838369130746256e-05, + "loss": 1.2285, + "step": 4206000 + }, + { + "epoch": 2.52, + "learning_rate": 3.8381591341902e-05, + "loss": 1.1938, + "step": 4206500 + }, + { + "epoch": 2.52, + "learning_rate": 3.8379491376341424e-05, + "loss": 1.2086, + "step": 4207000 + }, + { + "epoch": 2.52, + "learning_rate": 3.8377395610711984e-05, + "loss": 1.2319, + "step": 4207500 + }, + { + "epoch": 2.52, + "learning_rate": 3.8375295645151424e-05, + "loss": 1.2402, + "step": 4208000 + }, + { + "epoch": 2.52, + "learning_rate": 3.837319567959086e-05, + "loss": 1.2184, + "step": 4208500 + }, + { + "epoch": 2.52, + "learning_rate": 3.8371095714030284e-05, + "loss": 1.2045, + "step": 4209000 + }, + { + "epoch": 2.52, + "learning_rate": 3.8368995748469725e-05, + "loss": 1.2128, + "step": 4209500 + }, + { + "epoch": 2.52, + "learning_rate": 3.836689578290916e-05, + "loss": 1.2232, + "step": 4210000 + }, + { + "epoch": 2.52, + "learning_rate": 3.836480001727972e-05, + "loss": 1.2138, + "step": 4210500 + }, + { + "epoch": 2.52, + "learning_rate": 3.836270005171915e-05, + "loss": 1.2317, + "step": 4211000 + }, + { + "epoch": 2.52, + "learning_rate": 3.8360604286089705e-05, + "loss": 1.1962, + "step": 4211500 + }, + { + "epoch": 2.53, + "learning_rate": 3.8358504320529146e-05, + "loss": 1.2018, + "step": 4212000 + }, + { + "epoch": 2.53, + "learning_rate": 3.835640435496858e-05, + "loss": 1.23, + "step": 4212500 + }, + { + "epoch": 2.53, + "learning_rate": 3.835430438940801e-05, + "loss": 1.2023, + "step": 4213000 + }, + { + "epoch": 2.53, + "learning_rate": 3.835220862377857e-05, + "loss": 1.196, + "step": 4213500 + }, + { + "epoch": 2.53, + "learning_rate": 3.8350108658218006e-05, + "loss": 1.2055, + "step": 4214000 + }, + { + "epoch": 2.53, + "learning_rate": 3.834800869265744e-05, + "loss": 1.2194, + "step": 4214500 + }, + { + "epoch": 2.53, + "learning_rate": 3.834590872709688e-05, + "loss": 1.2162, + "step": 4215000 + }, + { + "epoch": 2.53, + "learning_rate": 3.8343808761536314e-05, + "loss": 1.2344, + "step": 4215500 + }, + { + "epoch": 2.53, + "learning_rate": 3.834170879597575e-05, + "loss": 1.229, + "step": 4216000 + }, + { + "epoch": 2.53, + "learning_rate": 3.833960883041518e-05, + "loss": 1.2174, + "step": 4216500 + }, + { + "epoch": 2.53, + "learning_rate": 3.8337508864854614e-05, + "loss": 1.225, + "step": 4217000 + }, + { + "epoch": 2.53, + "learning_rate": 3.833540889929405e-05, + "loss": 1.1931, + "step": 4217500 + }, + { + "epoch": 2.53, + "learning_rate": 3.833331313366461e-05, + "loss": 1.2053, + "step": 4218000 + }, + { + "epoch": 2.53, + "learning_rate": 3.833121316810404e-05, + "loss": 1.2225, + "step": 4218500 + }, + { + "epoch": 2.53, + "learning_rate": 3.8329113202543475e-05, + "loss": 1.2201, + "step": 4219000 + }, + { + "epoch": 2.53, + "learning_rate": 3.832701323698291e-05, + "loss": 1.2359, + "step": 4219500 + }, + { + "epoch": 2.53, + "learning_rate": 3.832491327142235e-05, + "loss": 1.1967, + "step": 4220000 + }, + { + "epoch": 2.53, + "learning_rate": 3.832281330586178e-05, + "loss": 1.1962, + "step": 4220500 + }, + { + "epoch": 2.53, + "learning_rate": 3.8320717540232335e-05, + "loss": 1.1961, + "step": 4221000 + }, + { + "epoch": 2.53, + "learning_rate": 3.8318617574671776e-05, + "loss": 1.2292, + "step": 4221500 + }, + { + "epoch": 2.53, + "learning_rate": 3.831651760911121e-05, + "loss": 1.2014, + "step": 4222000 + }, + { + "epoch": 2.53, + "learning_rate": 3.831441764355064e-05, + "loss": 1.1928, + "step": 4222500 + }, + { + "epoch": 2.53, + "learning_rate": 3.831231767799008e-05, + "loss": 1.2392, + "step": 4223000 + }, + { + "epoch": 2.53, + "learning_rate": 3.8310217712429516e-05, + "loss": 1.201, + "step": 4223500 + }, + { + "epoch": 2.53, + "learning_rate": 3.830811774686895e-05, + "loss": 1.2009, + "step": 4224000 + }, + { + "epoch": 2.53, + "learning_rate": 3.83060219812395e-05, + "loss": 1.2182, + "step": 4224500 + }, + { + "epoch": 2.53, + "learning_rate": 3.8303922015678944e-05, + "loss": 1.231, + "step": 4225000 + }, + { + "epoch": 2.53, + "learning_rate": 3.830182205011838e-05, + "loss": 1.2026, + "step": 4225500 + }, + { + "epoch": 2.53, + "learning_rate": 3.829972208455781e-05, + "loss": 1.2129, + "step": 4226000 + }, + { + "epoch": 2.53, + "learning_rate": 3.829762211899725e-05, + "loss": 1.2392, + "step": 4226500 + }, + { + "epoch": 2.53, + "learning_rate": 3.8295526353367804e-05, + "loss": 1.2036, + "step": 4227000 + }, + { + "epoch": 2.53, + "learning_rate": 3.829342638780724e-05, + "loss": 1.1836, + "step": 4227500 + }, + { + "epoch": 2.53, + "learning_rate": 3.829132642224667e-05, + "loss": 1.2339, + "step": 4228000 + }, + { + "epoch": 2.54, + "learning_rate": 3.828922645668611e-05, + "loss": 1.1989, + "step": 4228500 + }, + { + "epoch": 2.54, + "learning_rate": 3.8287126491125545e-05, + "loss": 1.2226, + "step": 4229000 + }, + { + "epoch": 2.54, + "learning_rate": 3.828502652556498e-05, + "loss": 1.2049, + "step": 4229500 + }, + { + "epoch": 2.54, + "learning_rate": 3.828292656000442e-05, + "loss": 1.2163, + "step": 4230000 + }, + { + "epoch": 2.54, + "learning_rate": 3.828083079437497e-05, + "loss": 1.2093, + "step": 4230500 + }, + { + "epoch": 2.54, + "learning_rate": 3.8278730828814406e-05, + "loss": 1.2233, + "step": 4231000 + }, + { + "epoch": 2.54, + "learning_rate": 3.8276630863253846e-05, + "loss": 1.1856, + "step": 4231500 + }, + { + "epoch": 2.54, + "learning_rate": 3.827453089769328e-05, + "loss": 1.222, + "step": 4232000 + }, + { + "epoch": 2.54, + "learning_rate": 3.827243093213271e-05, + "loss": 1.2037, + "step": 4232500 + }, + { + "epoch": 2.54, + "learning_rate": 3.827033096657215e-05, + "loss": 1.2209, + "step": 4233000 + }, + { + "epoch": 2.54, + "learning_rate": 3.8268235200942707e-05, + "loss": 1.2315, + "step": 4233500 + }, + { + "epoch": 2.54, + "learning_rate": 3.826613523538214e-05, + "loss": 1.2248, + "step": 4234000 + }, + { + "epoch": 2.54, + "learning_rate": 3.8264035269821574e-05, + "loss": 1.2221, + "step": 4234500 + }, + { + "epoch": 2.54, + "learning_rate": 3.8261935304261014e-05, + "loss": 1.1987, + "step": 4235000 + }, + { + "epoch": 2.54, + "learning_rate": 3.825983533870045e-05, + "loss": 1.1886, + "step": 4235500 + }, + { + "epoch": 2.54, + "learning_rate": 3.8257735373139874e-05, + "loss": 1.2181, + "step": 4236000 + }, + { + "epoch": 2.54, + "learning_rate": 3.8255635407579314e-05, + "loss": 1.2278, + "step": 4236500 + }, + { + "epoch": 2.54, + "learning_rate": 3.825353544201875e-05, + "loss": 1.2162, + "step": 4237000 + }, + { + "epoch": 2.54, + "learning_rate": 3.825143967638931e-05, + "loss": 1.1972, + "step": 4237500 + }, + { + "epoch": 2.54, + "learning_rate": 3.824933971082874e-05, + "loss": 1.2334, + "step": 4238000 + }, + { + "epoch": 2.54, + "learning_rate": 3.8247239745268175e-05, + "loss": 1.1983, + "step": 4238500 + }, + { + "epoch": 2.54, + "learning_rate": 3.824513977970761e-05, + "loss": 1.1974, + "step": 4239000 + }, + { + "epoch": 2.54, + "learning_rate": 3.824304401407817e-05, + "loss": 1.2372, + "step": 4239500 + }, + { + "epoch": 2.54, + "learning_rate": 3.824094824844872e-05, + "loss": 1.1997, + "step": 4240000 + }, + { + "epoch": 2.54, + "learning_rate": 3.823884828288816e-05, + "loss": 1.2289, + "step": 4240500 + }, + { + "epoch": 2.54, + "learning_rate": 3.8236748317327596e-05, + "loss": 1.2322, + "step": 4241000 + }, + { + "epoch": 2.54, + "learning_rate": 3.823464835176703e-05, + "loss": 1.2088, + "step": 4241500 + }, + { + "epoch": 2.54, + "learning_rate": 3.823254838620647e-05, + "loss": 1.1991, + "step": 4242000 + }, + { + "epoch": 2.54, + "learning_rate": 3.82304484206459e-05, + "loss": 1.2232, + "step": 4242500 + }, + { + "epoch": 2.54, + "learning_rate": 3.8228348455085337e-05, + "loss": 1.23, + "step": 4243000 + }, + { + "epoch": 2.54, + "learning_rate": 3.822625268945589e-05, + "loss": 1.2124, + "step": 4243500 + }, + { + "epoch": 2.54, + "learning_rate": 3.822415272389533e-05, + "loss": 1.2083, + "step": 4244000 + }, + { + "epoch": 2.54, + "learning_rate": 3.8222052758334764e-05, + "loss": 1.2382, + "step": 4244500 + }, + { + "epoch": 2.55, + "learning_rate": 3.82199527927742e-05, + "loss": 1.2191, + "step": 4245000 + }, + { + "epoch": 2.55, + "learning_rate": 3.821785282721363e-05, + "loss": 1.2124, + "step": 4245500 + }, + { + "epoch": 2.55, + "learning_rate": 3.8215752861653064e-05, + "loss": 1.2305, + "step": 4246000 + }, + { + "epoch": 2.55, + "learning_rate": 3.8213657096023625e-05, + "loss": 1.209, + "step": 4246500 + }, + { + "epoch": 2.55, + "learning_rate": 3.8211557130463065e-05, + "loss": 1.1887, + "step": 4247000 + }, + { + "epoch": 2.55, + "learning_rate": 3.82094571649025e-05, + "loss": 1.22, + "step": 4247500 + }, + { + "epoch": 2.55, + "learning_rate": 3.820736139927305e-05, + "loss": 1.2098, + "step": 4248000 + }, + { + "epoch": 2.55, + "learning_rate": 3.8205261433712485e-05, + "loss": 1.2315, + "step": 4248500 + }, + { + "epoch": 2.55, + "learning_rate": 3.8203161468151926e-05, + "loss": 1.199, + "step": 4249000 + }, + { + "epoch": 2.55, + "learning_rate": 3.820106150259136e-05, + "loss": 1.2178, + "step": 4249500 + }, + { + "epoch": 2.55, + "learning_rate": 3.819896153703079e-05, + "loss": 1.1936, + "step": 4250000 + }, + { + "epoch": 2.55, + "learning_rate": 3.8196861571470226e-05, + "loss": 1.1868, + "step": 4250500 + }, + { + "epoch": 2.55, + "learning_rate": 3.819476160590966e-05, + "loss": 1.2008, + "step": 4251000 + }, + { + "epoch": 2.55, + "learning_rate": 3.819266164034909e-05, + "loss": 1.2402, + "step": 4251500 + }, + { + "epoch": 2.55, + "learning_rate": 3.819056167478853e-05, + "loss": 1.2207, + "step": 4252000 + }, + { + "epoch": 2.55, + "learning_rate": 3.8188461709227966e-05, + "loss": 1.1873, + "step": 4252500 + }, + { + "epoch": 2.55, + "learning_rate": 3.81863617436674e-05, + "loss": 1.2003, + "step": 4253000 + }, + { + "epoch": 2.55, + "learning_rate": 3.818426177810684e-05, + "loss": 1.2167, + "step": 4253500 + }, + { + "epoch": 2.55, + "learning_rate": 3.8182166012477394e-05, + "loss": 1.1948, + "step": 4254000 + }, + { + "epoch": 2.55, + "learning_rate": 3.818006604691683e-05, + "loss": 1.2249, + "step": 4254500 + }, + { + "epoch": 2.55, + "learning_rate": 3.817796608135627e-05, + "loss": 1.2331, + "step": 4255000 + }, + { + "epoch": 2.55, + "learning_rate": 3.81758661157957e-05, + "loss": 1.2065, + "step": 4255500 + }, + { + "epoch": 2.55, + "learning_rate": 3.8173766150235134e-05, + "loss": 1.2026, + "step": 4256000 + }, + { + "epoch": 2.55, + "learning_rate": 3.817167038460569e-05, + "loss": 1.1534, + "step": 4256500 + }, + { + "epoch": 2.55, + "learning_rate": 3.816957041904513e-05, + "loss": 1.2397, + "step": 4257000 + }, + { + "epoch": 2.55, + "learning_rate": 3.816747045348456e-05, + "loss": 1.1747, + "step": 4257500 + }, + { + "epoch": 2.55, + "learning_rate": 3.8165370487923995e-05, + "loss": 1.2296, + "step": 4258000 + }, + { + "epoch": 2.55, + "learning_rate": 3.8163270522363435e-05, + "loss": 1.2031, + "step": 4258500 + }, + { + "epoch": 2.55, + "learning_rate": 3.816117475673399e-05, + "loss": 1.2304, + "step": 4259000 + }, + { + "epoch": 2.55, + "learning_rate": 3.815907479117342e-05, + "loss": 1.2106, + "step": 4259500 + }, + { + "epoch": 2.55, + "learning_rate": 3.815697482561286e-05, + "loss": 1.25, + "step": 4260000 + }, + { + "epoch": 2.55, + "learning_rate": 3.8154874860052296e-05, + "loss": 1.1865, + "step": 4260500 + }, + { + "epoch": 2.55, + "learning_rate": 3.815277909442285e-05, + "loss": 1.2227, + "step": 4261000 + }, + { + "epoch": 2.55, + "learning_rate": 3.815067912886228e-05, + "loss": 1.2178, + "step": 4261500 + }, + { + "epoch": 2.56, + "learning_rate": 3.814857916330172e-05, + "loss": 1.1893, + "step": 4262000 + }, + { + "epoch": 2.56, + "learning_rate": 3.814647919774116e-05, + "loss": 1.211, + "step": 4262500 + }, + { + "epoch": 2.56, + "learning_rate": 3.814437923218059e-05, + "loss": 1.2369, + "step": 4263000 + }, + { + "epoch": 2.56, + "learning_rate": 3.814227926662003e-05, + "loss": 1.2116, + "step": 4263500 + }, + { + "epoch": 2.56, + "learning_rate": 3.8140183500990584e-05, + "loss": 1.1834, + "step": 4264000 + }, + { + "epoch": 2.56, + "learning_rate": 3.813808353543002e-05, + "loss": 1.2044, + "step": 4264500 + }, + { + "epoch": 2.56, + "learning_rate": 3.813598356986945e-05, + "loss": 1.2249, + "step": 4265000 + }, + { + "epoch": 2.56, + "learning_rate": 3.8133887804240005e-05, + "loss": 1.2106, + "step": 4265500 + }, + { + "epoch": 2.56, + "learning_rate": 3.8131787838679445e-05, + "loss": 1.2204, + "step": 4266000 + }, + { + "epoch": 2.56, + "learning_rate": 3.812968787311888e-05, + "loss": 1.2171, + "step": 4266500 + }, + { + "epoch": 2.56, + "learning_rate": 3.812758790755832e-05, + "loss": 1.2397, + "step": 4267000 + }, + { + "epoch": 2.56, + "learning_rate": 3.812548794199775e-05, + "loss": 1.2259, + "step": 4267500 + }, + { + "epoch": 2.56, + "learning_rate": 3.8123387976437185e-05, + "loss": 1.1981, + "step": 4268000 + }, + { + "epoch": 2.56, + "learning_rate": 3.8121288010876626e-05, + "loss": 1.2315, + "step": 4268500 + }, + { + "epoch": 2.56, + "learning_rate": 3.811919224524718e-05, + "loss": 1.2061, + "step": 4269000 + }, + { + "epoch": 2.56, + "learning_rate": 3.811709227968661e-05, + "loss": 1.1928, + "step": 4269500 + }, + { + "epoch": 2.56, + "learning_rate": 3.8114992314126046e-05, + "loss": 1.2096, + "step": 4270000 + }, + { + "epoch": 2.56, + "learning_rate": 3.8112892348565486e-05, + "loss": 1.219, + "step": 4270500 + }, + { + "epoch": 2.56, + "learning_rate": 3.811079238300492e-05, + "loss": 1.2362, + "step": 4271000 + }, + { + "epoch": 2.56, + "learning_rate": 3.810869241744435e-05, + "loss": 1.2177, + "step": 4271500 + }, + { + "epoch": 2.56, + "learning_rate": 3.8106592451883794e-05, + "loss": 1.2053, + "step": 4272000 + }, + { + "epoch": 2.56, + "learning_rate": 3.810449248632322e-05, + "loss": 1.2097, + "step": 4272500 + }, + { + "epoch": 2.56, + "learning_rate": 3.8102392520762654e-05, + "loss": 1.2168, + "step": 4273000 + }, + { + "epoch": 2.56, + "learning_rate": 3.8100296755133214e-05, + "loss": 1.2175, + "step": 4273500 + }, + { + "epoch": 2.56, + "learning_rate": 3.8098196789572654e-05, + "loss": 1.2465, + "step": 4274000 + }, + { + "epoch": 2.56, + "learning_rate": 3.809609682401209e-05, + "loss": 1.1992, + "step": 4274500 + }, + { + "epoch": 2.56, + "learning_rate": 3.809399685845152e-05, + "loss": 1.2254, + "step": 4275000 + }, + { + "epoch": 2.56, + "learning_rate": 3.809190109282208e-05, + "loss": 1.2336, + "step": 4275500 + }, + { + "epoch": 2.56, + "learning_rate": 3.8089801127261515e-05, + "loss": 1.2255, + "step": 4276000 + }, + { + "epoch": 2.56, + "learning_rate": 3.808770116170095e-05, + "loss": 1.2097, + "step": 4276500 + }, + { + "epoch": 2.56, + "learning_rate": 3.808560119614039e-05, + "loss": 1.1769, + "step": 4277000 + }, + { + "epoch": 2.56, + "learning_rate": 3.8083501230579815e-05, + "loss": 1.1974, + "step": 4277500 + }, + { + "epoch": 2.56, + "learning_rate": 3.8081405464950376e-05, + "loss": 1.2469, + "step": 4278000 + }, + { + "epoch": 2.57, + "learning_rate": 3.807930969932093e-05, + "loss": 1.2071, + "step": 4278500 + }, + { + "epoch": 2.57, + "learning_rate": 3.807720973376036e-05, + "loss": 1.2128, + "step": 4279000 + }, + { + "epoch": 2.57, + "learning_rate": 3.80751097681998e-05, + "loss": 1.2164, + "step": 4279500 + }, + { + "epoch": 2.57, + "learning_rate": 3.8073009802639236e-05, + "loss": 1.1953, + "step": 4280000 + }, + { + "epoch": 2.57, + "learning_rate": 3.807090983707867e-05, + "loss": 1.2057, + "step": 4280500 + }, + { + "epoch": 2.57, + "learning_rate": 3.806880987151811e-05, + "loss": 1.2418, + "step": 4281000 + }, + { + "epoch": 2.57, + "learning_rate": 3.8066709905957544e-05, + "loss": 1.1931, + "step": 4281500 + }, + { + "epoch": 2.57, + "learning_rate": 3.806460994039698e-05, + "loss": 1.2296, + "step": 4282000 + }, + { + "epoch": 2.57, + "learning_rate": 3.806250997483641e-05, + "loss": 1.2235, + "step": 4282500 + }, + { + "epoch": 2.57, + "learning_rate": 3.8060410009275844e-05, + "loss": 1.2435, + "step": 4283000 + }, + { + "epoch": 2.57, + "learning_rate": 3.8058314243646404e-05, + "loss": 1.1818, + "step": 4283500 + }, + { + "epoch": 2.57, + "learning_rate": 3.8056214278085845e-05, + "loss": 1.2283, + "step": 4284000 + }, + { + "epoch": 2.57, + "learning_rate": 3.805411431252527e-05, + "loss": 1.1878, + "step": 4284500 + }, + { + "epoch": 2.57, + "learning_rate": 3.8052014346964705e-05, + "loss": 1.2149, + "step": 4285000 + }, + { + "epoch": 2.57, + "learning_rate": 3.8049918581335265e-05, + "loss": 1.2161, + "step": 4285500 + }, + { + "epoch": 2.57, + "learning_rate": 3.804782701563694e-05, + "loss": 1.2047, + "step": 4286000 + }, + { + "epoch": 2.57, + "learning_rate": 3.804572705007638e-05, + "loss": 1.248, + "step": 4286500 + }, + { + "epoch": 2.57, + "learning_rate": 3.804362708451581e-05, + "loss": 1.2102, + "step": 4287000 + }, + { + "epoch": 2.57, + "learning_rate": 3.8041527118955246e-05, + "loss": 1.2157, + "step": 4287500 + }, + { + "epoch": 2.57, + "learning_rate": 3.8039427153394686e-05, + "loss": 1.1973, + "step": 4288000 + }, + { + "epoch": 2.57, + "learning_rate": 3.803732718783412e-05, + "loss": 1.2231, + "step": 4288500 + }, + { + "epoch": 2.57, + "learning_rate": 3.803522722227355e-05, + "loss": 1.2103, + "step": 4289000 + }, + { + "epoch": 2.57, + "learning_rate": 3.803312725671299e-05, + "loss": 1.2212, + "step": 4289500 + }, + { + "epoch": 2.57, + "learning_rate": 3.803102729115243e-05, + "loss": 1.2224, + "step": 4290000 + }, + { + "epoch": 2.57, + "learning_rate": 3.802892732559186e-05, + "loss": 1.2023, + "step": 4290500 + }, + { + "epoch": 2.57, + "learning_rate": 3.80268273600313e-05, + "loss": 1.2175, + "step": 4291000 + }, + { + "epoch": 2.57, + "learning_rate": 3.802472739447073e-05, + "loss": 1.1829, + "step": 4291500 + }, + { + "epoch": 2.57, + "learning_rate": 3.802262742891016e-05, + "loss": 1.2137, + "step": 4292000 + }, + { + "epoch": 2.57, + "learning_rate": 3.802053166328072e-05, + "loss": 1.2476, + "step": 4292500 + }, + { + "epoch": 2.57, + "learning_rate": 3.801843169772016e-05, + "loss": 1.2172, + "step": 4293000 + }, + { + "epoch": 2.57, + "learning_rate": 3.8016331732159595e-05, + "loss": 1.2304, + "step": 4293500 + }, + { + "epoch": 2.57, + "learning_rate": 3.801423596653015e-05, + "loss": 1.209, + "step": 4294000 + }, + { + "epoch": 2.57, + "learning_rate": 3.801213600096958e-05, + "loss": 1.2139, + "step": 4294500 + }, + { + "epoch": 2.58, + "learning_rate": 3.801003603540902e-05, + "loss": 1.2214, + "step": 4295000 + }, + { + "epoch": 2.58, + "learning_rate": 3.8007936069848455e-05, + "loss": 1.191, + "step": 4295500 + }, + { + "epoch": 2.58, + "learning_rate": 3.800583610428789e-05, + "loss": 1.197, + "step": 4296000 + }, + { + "epoch": 2.58, + "learning_rate": 3.800373613872732e-05, + "loss": 1.2281, + "step": 4296500 + }, + { + "epoch": 2.58, + "learning_rate": 3.8001636173166756e-05, + "loss": 1.212, + "step": 4297000 + }, + { + "epoch": 2.58, + "learning_rate": 3.7999536207606196e-05, + "loss": 1.2066, + "step": 4297500 + }, + { + "epoch": 2.58, + "learning_rate": 3.799743624204563e-05, + "loss": 1.232, + "step": 4298000 + }, + { + "epoch": 2.58, + "learning_rate": 3.799534047641619e-05, + "loss": 1.2079, + "step": 4298500 + }, + { + "epoch": 2.58, + "learning_rate": 3.7993240510855616e-05, + "loss": 1.1922, + "step": 4299000 + }, + { + "epoch": 2.58, + "learning_rate": 3.799114054529506e-05, + "loss": 1.2116, + "step": 4299500 + }, + { + "epoch": 2.58, + "learning_rate": 3.798904057973449e-05, + "loss": 1.1956, + "step": 4300000 + }, + { + "epoch": 2.58, + "eval_loss": 1.1711117029190063, + "eval_runtime": 1097.9922, + "eval_samples_per_second": 479.712, + "eval_steps_per_second": 79.952, + "step": 4300000 + }, + { + "epoch": 2.58, + "learning_rate": 3.7986940614173924e-05, + "loss": 1.2074, + "step": 4300500 + }, + { + "epoch": 2.58, + "learning_rate": 3.7984844848544484e-05, + "loss": 1.239, + "step": 4301000 + }, + { + "epoch": 2.58, + "learning_rate": 3.798274488298392e-05, + "loss": 1.1959, + "step": 4301500 + }, + { + "epoch": 2.58, + "learning_rate": 3.798064491742335e-05, + "loss": 1.2282, + "step": 4302000 + }, + { + "epoch": 2.58, + "learning_rate": 3.7978544951862784e-05, + "loss": 1.215, + "step": 4302500 + }, + { + "epoch": 2.58, + "learning_rate": 3.7976444986302225e-05, + "loss": 1.2057, + "step": 4303000 + }, + { + "epoch": 2.58, + "learning_rate": 3.797434502074166e-05, + "loss": 1.217, + "step": 4303500 + }, + { + "epoch": 2.58, + "learning_rate": 3.797224505518109e-05, + "loss": 1.2254, + "step": 4304000 + }, + { + "epoch": 2.58, + "learning_rate": 3.797014508962053e-05, + "loss": 1.2297, + "step": 4304500 + }, + { + "epoch": 2.58, + "learning_rate": 3.7968049323991085e-05, + "loss": 1.1851, + "step": 4305000 + }, + { + "epoch": 2.58, + "learning_rate": 3.796594935843052e-05, + "loss": 1.2299, + "step": 4305500 + }, + { + "epoch": 2.58, + "learning_rate": 3.796384939286996e-05, + "loss": 1.2062, + "step": 4306000 + }, + { + "epoch": 2.58, + "learning_rate": 3.796174942730939e-05, + "loss": 1.2471, + "step": 4306500 + }, + { + "epoch": 2.58, + "learning_rate": 3.7959653661679946e-05, + "loss": 1.2162, + "step": 4307000 + }, + { + "epoch": 2.58, + "learning_rate": 3.7957557896050506e-05, + "loss": 1.2207, + "step": 4307500 + }, + { + "epoch": 2.58, + "learning_rate": 3.795545793048994e-05, + "loss": 1.2126, + "step": 4308000 + }, + { + "epoch": 2.58, + "learning_rate": 3.795335796492937e-05, + "loss": 1.1924, + "step": 4308500 + }, + { + "epoch": 2.58, + "learning_rate": 3.7951262199299934e-05, + "loss": 1.2228, + "step": 4309000 + }, + { + "epoch": 2.58, + "learning_rate": 3.794916223373937e-05, + "loss": 1.201, + "step": 4309500 + }, + { + "epoch": 2.58, + "learning_rate": 3.79470622681788e-05, + "loss": 1.209, + "step": 4310000 + }, + { + "epoch": 2.58, + "learning_rate": 3.794496230261824e-05, + "loss": 1.2004, + "step": 4310500 + }, + { + "epoch": 2.58, + "learning_rate": 3.794286233705767e-05, + "loss": 1.2303, + "step": 4311000 + }, + { + "epoch": 2.58, + "learning_rate": 3.794076237149711e-05, + "loss": 1.2122, + "step": 4311500 + }, + { + "epoch": 2.59, + "learning_rate": 3.793866240593654e-05, + "loss": 1.1962, + "step": 4312000 + }, + { + "epoch": 2.59, + "learning_rate": 3.7936562440375975e-05, + "loss": 1.2311, + "step": 4312500 + }, + { + "epoch": 2.59, + "learning_rate": 3.7934462474815415e-05, + "loss": 1.2239, + "step": 4313000 + }, + { + "epoch": 2.59, + "learning_rate": 3.793236670918597e-05, + "loss": 1.223, + "step": 4313500 + }, + { + "epoch": 2.59, + "learning_rate": 3.79302667436254e-05, + "loss": 1.1937, + "step": 4314000 + }, + { + "epoch": 2.59, + "learning_rate": 3.7928166778064835e-05, + "loss": 1.2077, + "step": 4314500 + }, + { + "epoch": 2.59, + "learning_rate": 3.7926066812504276e-05, + "loss": 1.2371, + "step": 4315000 + }, + { + "epoch": 2.59, + "learning_rate": 3.792397104687483e-05, + "loss": 1.248, + "step": 4315500 + }, + { + "epoch": 2.59, + "learning_rate": 3.792187108131426e-05, + "loss": 1.2262, + "step": 4316000 + }, + { + "epoch": 2.59, + "learning_rate": 3.7919771115753696e-05, + "loss": 1.2082, + "step": 4316500 + }, + { + "epoch": 2.59, + "learning_rate": 3.7917671150193136e-05, + "loss": 1.2298, + "step": 4317000 + }, + { + "epoch": 2.59, + "learning_rate": 3.791557118463257e-05, + "loss": 1.1988, + "step": 4317500 + }, + { + "epoch": 2.59, + "learning_rate": 3.791347541900312e-05, + "loss": 1.2064, + "step": 4318000 + }, + { + "epoch": 2.59, + "learning_rate": 3.7911375453442564e-05, + "loss": 1.2163, + "step": 4318500 + }, + { + "epoch": 2.59, + "learning_rate": 3.7909275487882e-05, + "loss": 1.1809, + "step": 4319000 + }, + { + "epoch": 2.59, + "learning_rate": 3.790717552232143e-05, + "loss": 1.2064, + "step": 4319500 + }, + { + "epoch": 2.59, + "learning_rate": 3.790507555676087e-05, + "loss": 1.2026, + "step": 4320000 + }, + { + "epoch": 2.59, + "learning_rate": 3.7902975591200304e-05, + "loss": 1.2201, + "step": 4320500 + }, + { + "epoch": 2.59, + "learning_rate": 3.790087982557086e-05, + "loss": 1.2326, + "step": 4321000 + }, + { + "epoch": 2.59, + "learning_rate": 3.789877986001029e-05, + "loss": 1.2159, + "step": 4321500 + }, + { + "epoch": 2.59, + "learning_rate": 3.789667989444973e-05, + "loss": 1.2023, + "step": 4322000 + }, + { + "epoch": 2.59, + "learning_rate": 3.7894579928889165e-05, + "loss": 1.2163, + "step": 4322500 + }, + { + "epoch": 2.59, + "learning_rate": 3.789248416325972e-05, + "loss": 1.2117, + "step": 4323000 + }, + { + "epoch": 2.59, + "learning_rate": 3.789038419769915e-05, + "loss": 1.2017, + "step": 4323500 + }, + { + "epoch": 2.59, + "learning_rate": 3.788828423213859e-05, + "loss": 1.2275, + "step": 4324000 + }, + { + "epoch": 2.59, + "learning_rate": 3.7886184266578026e-05, + "loss": 1.217, + "step": 4324500 + }, + { + "epoch": 2.59, + "learning_rate": 3.788408430101746e-05, + "loss": 1.1756, + "step": 4325000 + }, + { + "epoch": 2.59, + "learning_rate": 3.788198853538802e-05, + "loss": 1.1987, + "step": 4325500 + }, + { + "epoch": 2.59, + "learning_rate": 3.787988856982745e-05, + "loss": 1.2193, + "step": 4326000 + }, + { + "epoch": 2.59, + "learning_rate": 3.7877788604266886e-05, + "loss": 1.2398, + "step": 4326500 + }, + { + "epoch": 2.59, + "learning_rate": 3.787568863870633e-05, + "loss": 1.2238, + "step": 4327000 + }, + { + "epoch": 2.59, + "learning_rate": 3.787359287307688e-05, + "loss": 1.2035, + "step": 4327500 + }, + { + "epoch": 2.59, + "learning_rate": 3.7871492907516314e-05, + "loss": 1.2041, + "step": 4328000 + }, + { + "epoch": 2.6, + "learning_rate": 3.786939294195575e-05, + "loss": 1.2587, + "step": 4328500 + }, + { + "epoch": 2.6, + "learning_rate": 3.786729297639519e-05, + "loss": 1.217, + "step": 4329000 + }, + { + "epoch": 2.6, + "learning_rate": 3.786519301083462e-05, + "loss": 1.2283, + "step": 4329500 + }, + { + "epoch": 2.6, + "learning_rate": 3.7863093045274054e-05, + "loss": 1.2012, + "step": 4330000 + }, + { + "epoch": 2.6, + "learning_rate": 3.786099727964461e-05, + "loss": 1.1985, + "step": 4330500 + }, + { + "epoch": 2.6, + "learning_rate": 3.785889731408405e-05, + "loss": 1.2035, + "step": 4331000 + }, + { + "epoch": 2.6, + "learning_rate": 3.785679734852348e-05, + "loss": 1.2381, + "step": 4331500 + }, + { + "epoch": 2.6, + "learning_rate": 3.785469738296292e-05, + "loss": 1.2051, + "step": 4332000 + }, + { + "epoch": 2.6, + "learning_rate": 3.7852597417402355e-05, + "loss": 1.2232, + "step": 4332500 + }, + { + "epoch": 2.6, + "learning_rate": 3.785049745184179e-05, + "loss": 1.238, + "step": 4333000 + }, + { + "epoch": 2.6, + "learning_rate": 3.784839748628123e-05, + "loss": 1.223, + "step": 4333500 + }, + { + "epoch": 2.6, + "learning_rate": 3.784629752072066e-05, + "loss": 1.1851, + "step": 4334000 + }, + { + "epoch": 2.6, + "learning_rate": 3.7844201755091216e-05, + "loss": 1.2077, + "step": 4334500 + }, + { + "epoch": 2.6, + "learning_rate": 3.784210598946177e-05, + "loss": 1.2062, + "step": 4335000 + }, + { + "epoch": 2.6, + "learning_rate": 3.784001022383233e-05, + "loss": 1.2089, + "step": 4335500 + }, + { + "epoch": 2.6, + "learning_rate": 3.783791025827176e-05, + "loss": 1.2074, + "step": 4336000 + }, + { + "epoch": 2.6, + "learning_rate": 3.7835810292711204e-05, + "loss": 1.2054, + "step": 4336500 + }, + { + "epoch": 2.6, + "learning_rate": 3.783371032715063e-05, + "loss": 1.2181, + "step": 4337000 + }, + { + "epoch": 2.6, + "learning_rate": 3.7831610361590064e-05, + "loss": 1.2379, + "step": 4337500 + }, + { + "epoch": 2.6, + "learning_rate": 3.7829510396029504e-05, + "loss": 1.2081, + "step": 4338000 + }, + { + "epoch": 2.6, + "learning_rate": 3.782741043046894e-05, + "loss": 1.1883, + "step": 4338500 + }, + { + "epoch": 2.6, + "learning_rate": 3.782531046490838e-05, + "loss": 1.1868, + "step": 4339000 + }, + { + "epoch": 2.6, + "learning_rate": 3.782321049934781e-05, + "loss": 1.2058, + "step": 4339500 + }, + { + "epoch": 2.6, + "learning_rate": 3.7821110533787245e-05, + "loss": 1.1934, + "step": 4340000 + }, + { + "epoch": 2.6, + "learning_rate": 3.78190147681578e-05, + "loss": 1.2248, + "step": 4340500 + }, + { + "epoch": 2.6, + "learning_rate": 3.781691480259724e-05, + "loss": 1.2268, + "step": 4341000 + }, + { + "epoch": 2.6, + "learning_rate": 3.781481483703667e-05, + "loss": 1.2115, + "step": 4341500 + }, + { + "epoch": 2.6, + "learning_rate": 3.7812714871476105e-05, + "loss": 1.2071, + "step": 4342000 + }, + { + "epoch": 2.6, + "learning_rate": 3.7810614905915546e-05, + "loss": 1.1998, + "step": 4342500 + }, + { + "epoch": 2.6, + "learning_rate": 3.78085191402861e-05, + "loss": 1.232, + "step": 4343000 + }, + { + "epoch": 2.6, + "learning_rate": 3.780641917472553e-05, + "loss": 1.2164, + "step": 4343500 + }, + { + "epoch": 2.6, + "learning_rate": 3.7804319209164966e-05, + "loss": 1.2002, + "step": 4344000 + }, + { + "epoch": 2.6, + "learning_rate": 3.7802219243604406e-05, + "loss": 1.2062, + "step": 4344500 + }, + { + "epoch": 2.61, + "learning_rate": 3.780012347797496e-05, + "loss": 1.2022, + "step": 4345000 + }, + { + "epoch": 2.61, + "learning_rate": 3.779802351241439e-05, + "loss": 1.2143, + "step": 4345500 + }, + { + "epoch": 2.61, + "learning_rate": 3.7795923546853834e-05, + "loss": 1.2271, + "step": 4346000 + }, + { + "epoch": 2.61, + "learning_rate": 3.779382358129327e-05, + "loss": 1.2418, + "step": 4346500 + }, + { + "epoch": 2.61, + "learning_rate": 3.77917236157327e-05, + "loss": 1.2214, + "step": 4347000 + }, + { + "epoch": 2.61, + "learning_rate": 3.7789627850103254e-05, + "loss": 1.2291, + "step": 4347500 + }, + { + "epoch": 2.61, + "learning_rate": 3.7787532084473814e-05, + "loss": 1.2229, + "step": 4348000 + }, + { + "epoch": 2.61, + "learning_rate": 3.7785432118913255e-05, + "loss": 1.2125, + "step": 4348500 + }, + { + "epoch": 2.61, + "learning_rate": 3.778333215335268e-05, + "loss": 1.202, + "step": 4349000 + }, + { + "epoch": 2.61, + "learning_rate": 3.7781232187792115e-05, + "loss": 1.2179, + "step": 4349500 + }, + { + "epoch": 2.61, + "learning_rate": 3.7779132222231555e-05, + "loss": 1.1836, + "step": 4350000 + }, + { + "epoch": 2.61, + "learning_rate": 3.777703225667099e-05, + "loss": 1.1948, + "step": 4350500 + }, + { + "epoch": 2.61, + "learning_rate": 3.777493229111042e-05, + "loss": 1.2334, + "step": 4351000 + }, + { + "epoch": 2.61, + "learning_rate": 3.777283232554986e-05, + "loss": 1.2038, + "step": 4351500 + }, + { + "epoch": 2.61, + "learning_rate": 3.7770732359989296e-05, + "loss": 1.1914, + "step": 4352000 + }, + { + "epoch": 2.61, + "learning_rate": 3.776863659435985e-05, + "loss": 1.2288, + "step": 4352500 + }, + { + "epoch": 2.61, + "learning_rate": 3.776653662879929e-05, + "loss": 1.2249, + "step": 4353000 + }, + { + "epoch": 2.61, + "learning_rate": 3.776443666323872e-05, + "loss": 1.2435, + "step": 4353500 + }, + { + "epoch": 2.61, + "learning_rate": 3.7762340897609276e-05, + "loss": 1.1967, + "step": 4354000 + }, + { + "epoch": 2.61, + "learning_rate": 3.776024093204871e-05, + "loss": 1.2522, + "step": 4354500 + }, + { + "epoch": 2.61, + "learning_rate": 3.775814096648815e-05, + "loss": 1.2142, + "step": 4355000 + }, + { + "epoch": 2.61, + "learning_rate": 3.7756041000927584e-05, + "loss": 1.2276, + "step": 4355500 + }, + { + "epoch": 2.61, + "learning_rate": 3.775394103536702e-05, + "loss": 1.2209, + "step": 4356000 + }, + { + "epoch": 2.61, + "learning_rate": 3.775184106980646e-05, + "loss": 1.2056, + "step": 4356500 + }, + { + "epoch": 2.61, + "learning_rate": 3.774974110424589e-05, + "loss": 1.231, + "step": 4357000 + }, + { + "epoch": 2.61, + "learning_rate": 3.7747641138685324e-05, + "loss": 1.2285, + "step": 4357500 + }, + { + "epoch": 2.61, + "learning_rate": 3.7745541173124764e-05, + "loss": 1.2244, + "step": 4358000 + }, + { + "epoch": 2.61, + "learning_rate": 3.77434412075642e-05, + "loss": 1.2046, + "step": 4358500 + }, + { + "epoch": 2.61, + "learning_rate": 3.7741341242003625e-05, + "loss": 1.203, + "step": 4359000 + }, + { + "epoch": 2.61, + "learning_rate": 3.7739241276443065e-05, + "loss": 1.1867, + "step": 4359500 + }, + { + "epoch": 2.61, + "learning_rate": 3.7737145510813625e-05, + "loss": 1.2079, + "step": 4360000 + }, + { + "epoch": 2.61, + "learning_rate": 3.773504974518418e-05, + "loss": 1.2033, + "step": 4360500 + }, + { + "epoch": 2.61, + "learning_rate": 3.773294977962361e-05, + "loss": 1.1957, + "step": 4361000 + }, + { + "epoch": 2.61, + "learning_rate": 3.773084981406305e-05, + "loss": 1.2122, + "step": 4361500 + }, + { + "epoch": 2.62, + "learning_rate": 3.7728749848502486e-05, + "loss": 1.1986, + "step": 4362000 + }, + { + "epoch": 2.62, + "learning_rate": 3.772664988294192e-05, + "loss": 1.1876, + "step": 4362500 + }, + { + "epoch": 2.62, + "learning_rate": 3.772455411731247e-05, + "loss": 1.1838, + "step": 4363000 + }, + { + "epoch": 2.62, + "learning_rate": 3.772245415175191e-05, + "loss": 1.2147, + "step": 4363500 + }, + { + "epoch": 2.62, + "learning_rate": 3.772035418619135e-05, + "loss": 1.2008, + "step": 4364000 + }, + { + "epoch": 2.62, + "learning_rate": 3.771825422063078e-05, + "loss": 1.2216, + "step": 4364500 + }, + { + "epoch": 2.62, + "learning_rate": 3.771615425507022e-05, + "loss": 1.1932, + "step": 4365000 + }, + { + "epoch": 2.62, + "learning_rate": 3.7714054289509654e-05, + "loss": 1.2096, + "step": 4365500 + }, + { + "epoch": 2.62, + "learning_rate": 3.771195432394909e-05, + "loss": 1.2179, + "step": 4366000 + }, + { + "epoch": 2.62, + "learning_rate": 3.770985435838852e-05, + "loss": 1.2099, + "step": 4366500 + }, + { + "epoch": 2.62, + "learning_rate": 3.770775859275908e-05, + "loss": 1.2148, + "step": 4367000 + }, + { + "epoch": 2.62, + "learning_rate": 3.7705662827129635e-05, + "loss": 1.2221, + "step": 4367500 + }, + { + "epoch": 2.62, + "learning_rate": 3.770356286156907e-05, + "loss": 1.2291, + "step": 4368000 + }, + { + "epoch": 2.62, + "learning_rate": 3.770146289600851e-05, + "loss": 1.239, + "step": 4368500 + }, + { + "epoch": 2.62, + "learning_rate": 3.769936293044794e-05, + "loss": 1.1933, + "step": 4369000 + }, + { + "epoch": 2.62, + "learning_rate": 3.7697262964887375e-05, + "loss": 1.2128, + "step": 4369500 + }, + { + "epoch": 2.62, + "learning_rate": 3.7695162999326815e-05, + "loss": 1.1933, + "step": 4370000 + }, + { + "epoch": 2.62, + "learning_rate": 3.769306303376625e-05, + "loss": 1.2263, + "step": 4370500 + }, + { + "epoch": 2.62, + "learning_rate": 3.76909672681368e-05, + "loss": 1.2228, + "step": 4371000 + }, + { + "epoch": 2.62, + "learning_rate": 3.7688867302576236e-05, + "loss": 1.2077, + "step": 4371500 + }, + { + "epoch": 2.62, + "learning_rate": 3.7686767337015676e-05, + "loss": 1.2177, + "step": 4372000 + }, + { + "epoch": 2.62, + "learning_rate": 3.768466737145511e-05, + "loss": 1.2111, + "step": 4372500 + }, + { + "epoch": 2.62, + "learning_rate": 3.768256740589454e-05, + "loss": 1.2148, + "step": 4373000 + }, + { + "epoch": 2.62, + "learning_rate": 3.76804716402651e-05, + "loss": 1.2335, + "step": 4373500 + }, + { + "epoch": 2.62, + "learning_rate": 3.767837167470454e-05, + "loss": 1.2015, + "step": 4374000 + }, + { + "epoch": 2.62, + "learning_rate": 3.767627170914397e-05, + "loss": 1.2122, + "step": 4374500 + }, + { + "epoch": 2.62, + "learning_rate": 3.7674171743583404e-05, + "loss": 1.2042, + "step": 4375000 + }, + { + "epoch": 2.62, + "learning_rate": 3.7672071778022844e-05, + "loss": 1.2212, + "step": 4375500 + }, + { + "epoch": 2.62, + "learning_rate": 3.76699760123934e-05, + "loss": 1.1926, + "step": 4376000 + }, + { + "epoch": 2.62, + "learning_rate": 3.766787604683283e-05, + "loss": 1.2014, + "step": 4376500 + }, + { + "epoch": 2.62, + "learning_rate": 3.766577608127227e-05, + "loss": 1.2006, + "step": 4377000 + }, + { + "epoch": 2.62, + "learning_rate": 3.7663676115711705e-05, + "loss": 1.2074, + "step": 4377500 + }, + { + "epoch": 2.62, + "learning_rate": 3.766158035008226e-05, + "loss": 1.1944, + "step": 4378000 + }, + { + "epoch": 2.63, + "learning_rate": 3.765948038452169e-05, + "loss": 1.205, + "step": 4378500 + }, + { + "epoch": 2.63, + "learning_rate": 3.765738041896113e-05, + "loss": 1.2292, + "step": 4379000 + }, + { + "epoch": 2.63, + "learning_rate": 3.7655280453400566e-05, + "loss": 1.245, + "step": 4379500 + }, + { + "epoch": 2.63, + "learning_rate": 3.765318048784e-05, + "loss": 1.2431, + "step": 4380000 + }, + { + "epoch": 2.63, + "learning_rate": 3.765108472221055e-05, + "loss": 1.2105, + "step": 4380500 + }, + { + "epoch": 2.63, + "learning_rate": 3.764898475664999e-05, + "loss": 1.215, + "step": 4381000 + }, + { + "epoch": 2.63, + "learning_rate": 3.7646884791089426e-05, + "loss": 1.2113, + "step": 4381500 + }, + { + "epoch": 2.63, + "learning_rate": 3.764478482552886e-05, + "loss": 1.2354, + "step": 4382000 + }, + { + "epoch": 2.63, + "learning_rate": 3.76426848599683e-05, + "loss": 1.1899, + "step": 4382500 + }, + { + "epoch": 2.63, + "learning_rate": 3.764058489440773e-05, + "loss": 1.2096, + "step": 4383000 + }, + { + "epoch": 2.63, + "learning_rate": 3.763848492884717e-05, + "loss": 1.2048, + "step": 4383500 + }, + { + "epoch": 2.63, + "learning_rate": 3.76363849632866e-05, + "loss": 1.2087, + "step": 4384000 + }, + { + "epoch": 2.63, + "learning_rate": 3.763428919765716e-05, + "loss": 1.1912, + "step": 4384500 + }, + { + "epoch": 2.63, + "learning_rate": 3.7632189232096594e-05, + "loss": 1.2125, + "step": 4385000 + }, + { + "epoch": 2.63, + "learning_rate": 3.763008926653603e-05, + "loss": 1.2182, + "step": 4385500 + }, + { + "epoch": 2.63, + "learning_rate": 3.762798930097546e-05, + "loss": 1.2146, + "step": 4386000 + }, + { + "epoch": 2.63, + "learning_rate": 3.762589353534602e-05, + "loss": 1.2512, + "step": 4386500 + }, + { + "epoch": 2.63, + "learning_rate": 3.7623793569785455e-05, + "loss": 1.1869, + "step": 4387000 + }, + { + "epoch": 2.63, + "learning_rate": 3.7621693604224895e-05, + "loss": 1.1998, + "step": 4387500 + }, + { + "epoch": 2.63, + "learning_rate": 3.761959363866432e-05, + "loss": 1.2329, + "step": 4388000 + }, + { + "epoch": 2.63, + "learning_rate": 3.7617502072966e-05, + "loss": 1.2008, + "step": 4388500 + }, + { + "epoch": 2.63, + "learning_rate": 3.7615402107405436e-05, + "loss": 1.2257, + "step": 4389000 + }, + { + "epoch": 2.63, + "learning_rate": 3.7613302141844876e-05, + "loss": 1.2047, + "step": 4389500 + }, + { + "epoch": 2.63, + "learning_rate": 3.761120217628431e-05, + "loss": 1.24, + "step": 4390000 + }, + { + "epoch": 2.63, + "learning_rate": 3.760910221072374e-05, + "loss": 1.2129, + "step": 4390500 + }, + { + "epoch": 2.63, + "learning_rate": 3.7607006445094296e-05, + "loss": 1.2061, + "step": 4391000 + }, + { + "epoch": 2.63, + "learning_rate": 3.760490647953374e-05, + "loss": 1.1795, + "step": 4391500 + }, + { + "epoch": 2.63, + "learning_rate": 3.760280651397317e-05, + "loss": 1.2074, + "step": 4392000 + }, + { + "epoch": 2.63, + "learning_rate": 3.7600706548412604e-05, + "loss": 1.2085, + "step": 4392500 + }, + { + "epoch": 2.63, + "learning_rate": 3.759861078278316e-05, + "loss": 1.1778, + "step": 4393000 + }, + { + "epoch": 2.63, + "learning_rate": 3.759651501715372e-05, + "loss": 1.2119, + "step": 4393500 + }, + { + "epoch": 2.63, + "learning_rate": 3.759441505159315e-05, + "loss": 1.1892, + "step": 4394000 + }, + { + "epoch": 2.63, + "learning_rate": 3.7592315086032584e-05, + "loss": 1.2175, + "step": 4394500 + }, + { + "epoch": 2.63, + "learning_rate": 3.7590215120472025e-05, + "loss": 1.2312, + "step": 4395000 + }, + { + "epoch": 2.64, + "learning_rate": 3.758811515491146e-05, + "loss": 1.213, + "step": 4395500 + }, + { + "epoch": 2.64, + "learning_rate": 3.758601938928202e-05, + "loss": 1.1981, + "step": 4396000 + }, + { + "epoch": 2.64, + "learning_rate": 3.7583919423721445e-05, + "loss": 1.2325, + "step": 4396500 + }, + { + "epoch": 2.64, + "learning_rate": 3.7581819458160885e-05, + "loss": 1.1897, + "step": 4397000 + }, + { + "epoch": 2.64, + "learning_rate": 3.757971949260032e-05, + "loss": 1.2175, + "step": 4397500 + }, + { + "epoch": 2.64, + "learning_rate": 3.757761952703975e-05, + "loss": 1.1963, + "step": 4398000 + }, + { + "epoch": 2.64, + "learning_rate": 3.757551956147919e-05, + "loss": 1.233, + "step": 4398500 + }, + { + "epoch": 2.64, + "learning_rate": 3.7573419595918626e-05, + "loss": 1.1935, + "step": 4399000 + }, + { + "epoch": 2.64, + "learning_rate": 3.757131963035806e-05, + "loss": 1.2238, + "step": 4399500 + }, + { + "epoch": 2.64, + "learning_rate": 3.75692196647975e-05, + "loss": 1.2251, + "step": 4400000 + }, + { + "epoch": 2.64, + "eval_loss": 1.1657322645187378, + "eval_runtime": 1099.1385, + "eval_samples_per_second": 479.212, + "eval_steps_per_second": 79.869, + "step": 4400000 + }, + { + "epoch": 2.64, + "learning_rate": 3.756711969923693e-05, + "loss": 1.2357, + "step": 4400500 + }, + { + "epoch": 2.64, + "learning_rate": 3.756501973367637e-05, + "loss": 1.2106, + "step": 4401000 + }, + { + "epoch": 2.64, + "learning_rate": 3.756291976811581e-05, + "loss": 1.2053, + "step": 4401500 + }, + { + "epoch": 2.64, + "learning_rate": 3.756082820241748e-05, + "loss": 1.2313, + "step": 4402000 + }, + { + "epoch": 2.64, + "learning_rate": 3.7558728236856914e-05, + "loss": 1.1984, + "step": 4402500 + }, + { + "epoch": 2.64, + "learning_rate": 3.755662827129635e-05, + "loss": 1.1957, + "step": 4403000 + }, + { + "epoch": 2.64, + "learning_rate": 3.755452830573579e-05, + "loss": 1.2084, + "step": 4403500 + }, + { + "epoch": 2.64, + "learning_rate": 3.755242834017522e-05, + "loss": 1.1872, + "step": 4404000 + }, + { + "epoch": 2.64, + "learning_rate": 3.7550328374614655e-05, + "loss": 1.2052, + "step": 4404500 + }, + { + "epoch": 2.64, + "learning_rate": 3.7548228409054095e-05, + "loss": 1.2295, + "step": 4405000 + }, + { + "epoch": 2.64, + "learning_rate": 3.754612844349353e-05, + "loss": 1.2171, + "step": 4405500 + }, + { + "epoch": 2.64, + "learning_rate": 3.754403267786408e-05, + "loss": 1.2185, + "step": 4406000 + }, + { + "epoch": 2.64, + "learning_rate": 3.7541932712303515e-05, + "loss": 1.2056, + "step": 4406500 + }, + { + "epoch": 2.64, + "learning_rate": 3.7539832746742956e-05, + "loss": 1.1981, + "step": 4407000 + }, + { + "epoch": 2.64, + "learning_rate": 3.753773278118239e-05, + "loss": 1.245, + "step": 4407500 + }, + { + "epoch": 2.64, + "learning_rate": 3.753563701555294e-05, + "loss": 1.1932, + "step": 4408000 + }, + { + "epoch": 2.64, + "learning_rate": 3.7533541249923496e-05, + "loss": 1.1845, + "step": 4408500 + }, + { + "epoch": 2.64, + "learning_rate": 3.7531441284362936e-05, + "loss": 1.1873, + "step": 4409000 + }, + { + "epoch": 2.64, + "learning_rate": 3.752934131880237e-05, + "loss": 1.2121, + "step": 4409500 + }, + { + "epoch": 2.64, + "learning_rate": 3.75272413532418e-05, + "loss": 1.2368, + "step": 4410000 + }, + { + "epoch": 2.64, + "learning_rate": 3.7525141387681244e-05, + "loss": 1.2022, + "step": 4410500 + }, + { + "epoch": 2.64, + "learning_rate": 3.752304982198292e-05, + "loss": 1.2097, + "step": 4411000 + }, + { + "epoch": 2.64, + "learning_rate": 3.752094985642236e-05, + "loss": 1.2103, + "step": 4411500 + }, + { + "epoch": 2.65, + "learning_rate": 3.751884989086179e-05, + "loss": 1.2152, + "step": 4412000 + }, + { + "epoch": 2.65, + "learning_rate": 3.7516749925301224e-05, + "loss": 1.2081, + "step": 4412500 + }, + { + "epoch": 2.65, + "learning_rate": 3.7514649959740665e-05, + "loss": 1.192, + "step": 4413000 + }, + { + "epoch": 2.65, + "learning_rate": 3.751254999418009e-05, + "loss": 1.2016, + "step": 4413500 + }, + { + "epoch": 2.65, + "learning_rate": 3.7510450028619525e-05, + "loss": 1.2182, + "step": 4414000 + }, + { + "epoch": 2.65, + "learning_rate": 3.7508350063058965e-05, + "loss": 1.1954, + "step": 4414500 + }, + { + "epoch": 2.65, + "learning_rate": 3.75062500974984e-05, + "loss": 1.2268, + "step": 4415000 + }, + { + "epoch": 2.65, + "learning_rate": 3.750415433186895e-05, + "loss": 1.2096, + "step": 4415500 + }, + { + "epoch": 2.65, + "learning_rate": 3.750205436630839e-05, + "loss": 1.1921, + "step": 4416000 + }, + { + "epoch": 2.65, + "learning_rate": 3.7499954400747826e-05, + "loss": 1.2256, + "step": 4416500 + }, + { + "epoch": 2.65, + "learning_rate": 3.749785443518726e-05, + "loss": 1.1651, + "step": 4417000 + }, + { + "epoch": 2.65, + "learning_rate": 3.749575866955782e-05, + "loss": 1.2143, + "step": 4417500 + }, + { + "epoch": 2.65, + "learning_rate": 3.749365870399725e-05, + "loss": 1.1977, + "step": 4418000 + }, + { + "epoch": 2.65, + "learning_rate": 3.7491558738436686e-05, + "loss": 1.2257, + "step": 4418500 + }, + { + "epoch": 2.65, + "learning_rate": 3.748945877287612e-05, + "loss": 1.2149, + "step": 4419000 + }, + { + "epoch": 2.65, + "learning_rate": 3.748736300724668e-05, + "loss": 1.2131, + "step": 4419500 + }, + { + "epoch": 2.65, + "learning_rate": 3.748526304168612e-05, + "loss": 1.214, + "step": 4420000 + }, + { + "epoch": 2.65, + "learning_rate": 3.748316307612555e-05, + "loss": 1.2003, + "step": 4420500 + }, + { + "epoch": 2.65, + "learning_rate": 3.748106311056498e-05, + "loss": 1.199, + "step": 4421000 + }, + { + "epoch": 2.65, + "learning_rate": 3.747896314500442e-05, + "loss": 1.2201, + "step": 4421500 + }, + { + "epoch": 2.65, + "learning_rate": 3.7476863179443854e-05, + "loss": 1.1687, + "step": 4422000 + }, + { + "epoch": 2.65, + "learning_rate": 3.7474767413814415e-05, + "loss": 1.2092, + "step": 4422500 + }, + { + "epoch": 2.65, + "learning_rate": 3.747266744825385e-05, + "loss": 1.1789, + "step": 4423000 + }, + { + "epoch": 2.65, + "learning_rate": 3.747056748269328e-05, + "loss": 1.2124, + "step": 4423500 + }, + { + "epoch": 2.65, + "learning_rate": 3.7468467517132715e-05, + "loss": 1.2179, + "step": 4424000 + }, + { + "epoch": 2.65, + "learning_rate": 3.7466367551572155e-05, + "loss": 1.2112, + "step": 4424500 + }, + { + "epoch": 2.65, + "learning_rate": 3.746426758601159e-05, + "loss": 1.224, + "step": 4425000 + }, + { + "epoch": 2.65, + "learning_rate": 3.746216762045102e-05, + "loss": 1.1806, + "step": 4425500 + }, + { + "epoch": 2.65, + "learning_rate": 3.7460071854821576e-05, + "loss": 1.1955, + "step": 4426000 + }, + { + "epoch": 2.65, + "learning_rate": 3.7457971889261016e-05, + "loss": 1.1677, + "step": 4426500 + }, + { + "epoch": 2.65, + "learning_rate": 3.745587192370045e-05, + "loss": 1.1853, + "step": 4427000 + }, + { + "epoch": 2.65, + "learning_rate": 3.745377195813988e-05, + "loss": 1.2243, + "step": 4427500 + }, + { + "epoch": 2.65, + "learning_rate": 3.745167199257932e-05, + "loss": 1.2365, + "step": 4428000 + }, + { + "epoch": 2.66, + "learning_rate": 3.744957202701876e-05, + "loss": 1.1986, + "step": 4428500 + }, + { + "epoch": 2.66, + "learning_rate": 3.744747206145819e-05, + "loss": 1.2228, + "step": 4429000 + }, + { + "epoch": 2.66, + "learning_rate": 3.744537209589763e-05, + "loss": 1.1789, + "step": 4429500 + }, + { + "epoch": 2.66, + "learning_rate": 3.7443272130337064e-05, + "loss": 1.2196, + "step": 4430000 + }, + { + "epoch": 2.66, + "learning_rate": 3.74411721647765e-05, + "loss": 1.1968, + "step": 4430500 + }, + { + "epoch": 2.66, + "learning_rate": 3.743907219921593e-05, + "loss": 1.2057, + "step": 4431000 + }, + { + "epoch": 2.66, + "learning_rate": 3.7436972233655364e-05, + "loss": 1.2077, + "step": 4431500 + }, + { + "epoch": 2.66, + "learning_rate": 3.7434876468025925e-05, + "loss": 1.1919, + "step": 4432000 + }, + { + "epoch": 2.66, + "learning_rate": 3.7432776502465365e-05, + "loss": 1.2149, + "step": 4432500 + }, + { + "epoch": 2.66, + "learning_rate": 3.743068073683592e-05, + "loss": 1.2114, + "step": 4433000 + }, + { + "epoch": 2.66, + "learning_rate": 3.742858077127535e-05, + "loss": 1.2244, + "step": 4433500 + }, + { + "epoch": 2.66, + "learning_rate": 3.7426480805714785e-05, + "loss": 1.1967, + "step": 4434000 + }, + { + "epoch": 2.66, + "learning_rate": 3.7424380840154226e-05, + "loss": 1.2102, + "step": 4434500 + }, + { + "epoch": 2.66, + "learning_rate": 3.742228087459366e-05, + "loss": 1.2152, + "step": 4435000 + }, + { + "epoch": 2.66, + "learning_rate": 3.7420180909033086e-05, + "loss": 1.2235, + "step": 4435500 + }, + { + "epoch": 2.66, + "learning_rate": 3.7418080943472526e-05, + "loss": 1.2207, + "step": 4436000 + }, + { + "epoch": 2.66, + "learning_rate": 3.741598097791196e-05, + "loss": 1.2132, + "step": 4436500 + }, + { + "epoch": 2.66, + "learning_rate": 3.741388521228252e-05, + "loss": 1.221, + "step": 4437000 + }, + { + "epoch": 2.66, + "learning_rate": 3.741178524672195e-05, + "loss": 1.2138, + "step": 4437500 + }, + { + "epoch": 2.66, + "learning_rate": 3.740968528116139e-05, + "loss": 1.221, + "step": 4438000 + }, + { + "epoch": 2.66, + "learning_rate": 3.740758951553195e-05, + "loss": 1.2102, + "step": 4438500 + }, + { + "epoch": 2.66, + "learning_rate": 3.740548954997138e-05, + "loss": 1.2038, + "step": 4439000 + }, + { + "epoch": 2.66, + "learning_rate": 3.740338958441082e-05, + "loss": 1.2192, + "step": 4439500 + }, + { + "epoch": 2.66, + "learning_rate": 3.7401289618850254e-05, + "loss": 1.2068, + "step": 4440000 + }, + { + "epoch": 2.66, + "learning_rate": 3.739918965328968e-05, + "loss": 1.2141, + "step": 4440500 + }, + { + "epoch": 2.66, + "learning_rate": 3.739708968772912e-05, + "loss": 1.1971, + "step": 4441000 + }, + { + "epoch": 2.66, + "learning_rate": 3.7394989722168555e-05, + "loss": 1.2089, + "step": 4441500 + }, + { + "epoch": 2.66, + "learning_rate": 3.739288975660799e-05, + "loss": 1.1627, + "step": 4442000 + }, + { + "epoch": 2.66, + "learning_rate": 3.739079399097854e-05, + "loss": 1.2189, + "step": 4442500 + }, + { + "epoch": 2.66, + "learning_rate": 3.738869402541798e-05, + "loss": 1.1798, + "step": 4443000 + }, + { + "epoch": 2.66, + "learning_rate": 3.7386594059857415e-05, + "loss": 1.2182, + "step": 4443500 + }, + { + "epoch": 2.66, + "learning_rate": 3.738449409429685e-05, + "loss": 1.2382, + "step": 4444000 + }, + { + "epoch": 2.66, + "learning_rate": 3.738240252859853e-05, + "loss": 1.2126, + "step": 4444500 + }, + { + "epoch": 2.66, + "learning_rate": 3.738030256303797e-05, + "loss": 1.1834, + "step": 4445000 + }, + { + "epoch": 2.67, + "learning_rate": 3.73782025974774e-05, + "loss": 1.1854, + "step": 4445500 + }, + { + "epoch": 2.67, + "learning_rate": 3.7376102631916836e-05, + "loss": 1.192, + "step": 4446000 + }, + { + "epoch": 2.67, + "learning_rate": 3.7374002666356277e-05, + "loss": 1.2063, + "step": 4446500 + }, + { + "epoch": 2.67, + "learning_rate": 3.737190690072683e-05, + "loss": 1.2056, + "step": 4447000 + }, + { + "epoch": 2.67, + "learning_rate": 3.7369806935166264e-05, + "loss": 1.1957, + "step": 4447500 + }, + { + "epoch": 2.67, + "learning_rate": 3.73677069696057e-05, + "loss": 1.2347, + "step": 4448000 + }, + { + "epoch": 2.67, + "learning_rate": 3.736560700404514e-05, + "loss": 1.2208, + "step": 4448500 + }, + { + "epoch": 2.67, + "learning_rate": 3.736350703848457e-05, + "loss": 1.2232, + "step": 4449000 + }, + { + "epoch": 2.67, + "learning_rate": 3.7361407072924004e-05, + "loss": 1.1973, + "step": 4449500 + }, + { + "epoch": 2.67, + "learning_rate": 3.735930710736344e-05, + "loss": 1.1996, + "step": 4450000 + }, + { + "epoch": 2.67, + "learning_rate": 3.735720714180287e-05, + "loss": 1.2181, + "step": 4450500 + }, + { + "epoch": 2.67, + "learning_rate": 3.735511137617343e-05, + "loss": 1.2032, + "step": 4451000 + }, + { + "epoch": 2.67, + "learning_rate": 3.7353011410612865e-05, + "loss": 1.2197, + "step": 4451500 + }, + { + "epoch": 2.67, + "learning_rate": 3.73509114450523e-05, + "loss": 1.2307, + "step": 4452000 + }, + { + "epoch": 2.67, + "learning_rate": 3.734881147949173e-05, + "loss": 1.2136, + "step": 4452500 + }, + { + "epoch": 2.67, + "learning_rate": 3.734671571386229e-05, + "loss": 1.1857, + "step": 4453000 + }, + { + "epoch": 2.67, + "learning_rate": 3.7344619948232846e-05, + "loss": 1.2111, + "step": 4453500 + }, + { + "epoch": 2.67, + "learning_rate": 3.7342519982672286e-05, + "loss": 1.2055, + "step": 4454000 + }, + { + "epoch": 2.67, + "learning_rate": 3.734042001711172e-05, + "loss": 1.1874, + "step": 4454500 + }, + { + "epoch": 2.67, + "learning_rate": 3.733832005155115e-05, + "loss": 1.2045, + "step": 4455000 + }, + { + "epoch": 2.67, + "learning_rate": 3.733622008599059e-05, + "loss": 1.2073, + "step": 4455500 + }, + { + "epoch": 2.67, + "learning_rate": 3.733412012043003e-05, + "loss": 1.2253, + "step": 4456000 + }, + { + "epoch": 2.67, + "learning_rate": 3.733202435480058e-05, + "loss": 1.1832, + "step": 4456500 + }, + { + "epoch": 2.67, + "learning_rate": 3.7329924389240014e-05, + "loss": 1.2181, + "step": 4457000 + }, + { + "epoch": 2.67, + "learning_rate": 3.7327824423679454e-05, + "loss": 1.181, + "step": 4457500 + }, + { + "epoch": 2.67, + "learning_rate": 3.732572445811889e-05, + "loss": 1.2172, + "step": 4458000 + }, + { + "epoch": 2.67, + "learning_rate": 3.732362449255832e-05, + "loss": 1.2273, + "step": 4458500 + }, + { + "epoch": 2.67, + "learning_rate": 3.732152872692888e-05, + "loss": 1.2111, + "step": 4459000 + }, + { + "epoch": 2.67, + "learning_rate": 3.7319428761368315e-05, + "loss": 1.2224, + "step": 4459500 + }, + { + "epoch": 2.67, + "learning_rate": 3.731732879580775e-05, + "loss": 1.1904, + "step": 4460000 + }, + { + "epoch": 2.67, + "learning_rate": 3.731522883024719e-05, + "loss": 1.2094, + "step": 4460500 + }, + { + "epoch": 2.67, + "learning_rate": 3.731312886468662e-05, + "loss": 1.1945, + "step": 4461000 + }, + { + "epoch": 2.67, + "learning_rate": 3.731102889912605e-05, + "loss": 1.2206, + "step": 4461500 + }, + { + "epoch": 2.68, + "learning_rate": 3.730893313349661e-05, + "loss": 1.1879, + "step": 4462000 + }, + { + "epoch": 2.68, + "learning_rate": 3.730683316793605e-05, + "loss": 1.2016, + "step": 4462500 + }, + { + "epoch": 2.68, + "learning_rate": 3.730473320237548e-05, + "loss": 1.2392, + "step": 4463000 + }, + { + "epoch": 2.68, + "learning_rate": 3.7302637436746036e-05, + "loss": 1.1793, + "step": 4463500 + }, + { + "epoch": 2.68, + "learning_rate": 3.730053747118547e-05, + "loss": 1.1998, + "step": 4464000 + }, + { + "epoch": 2.68, + "learning_rate": 3.729843750562491e-05, + "loss": 1.2018, + "step": 4464500 + }, + { + "epoch": 2.68, + "learning_rate": 3.729633754006434e-05, + "loss": 1.2131, + "step": 4465000 + }, + { + "epoch": 2.68, + "learning_rate": 3.729423757450378e-05, + "loss": 1.1859, + "step": 4465500 + }, + { + "epoch": 2.68, + "learning_rate": 3.729213760894322e-05, + "loss": 1.2183, + "step": 4466000 + }, + { + "epoch": 2.68, + "learning_rate": 3.7290037643382644e-05, + "loss": 1.213, + "step": 4466500 + }, + { + "epoch": 2.68, + "learning_rate": 3.7287937677822084e-05, + "loss": 1.2292, + "step": 4467000 + }, + { + "epoch": 2.68, + "learning_rate": 3.728583771226152e-05, + "loss": 1.19, + "step": 4467500 + }, + { + "epoch": 2.68, + "learning_rate": 3.728373774670095e-05, + "loss": 1.2123, + "step": 4468000 + }, + { + "epoch": 2.68, + "learning_rate": 3.728164198107151e-05, + "loss": 1.2185, + "step": 4468500 + }, + { + "epoch": 2.68, + "learning_rate": 3.7279546215442065e-05, + "loss": 1.2111, + "step": 4469000 + }, + { + "epoch": 2.68, + "learning_rate": 3.7277446249881505e-05, + "loss": 1.2133, + "step": 4469500 + }, + { + "epoch": 2.68, + "learning_rate": 3.727534628432094e-05, + "loss": 1.2369, + "step": 4470000 + }, + { + "epoch": 2.68, + "learning_rate": 3.727324631876037e-05, + "loss": 1.2076, + "step": 4470500 + }, + { + "epoch": 2.68, + "learning_rate": 3.727114635319981e-05, + "loss": 1.1963, + "step": 4471000 + }, + { + "epoch": 2.68, + "learning_rate": 3.726904638763924e-05, + "loss": 1.2016, + "step": 4471500 + }, + { + "epoch": 2.68, + "learning_rate": 3.726694642207867e-05, + "loss": 1.2082, + "step": 4472000 + }, + { + "epoch": 2.68, + "learning_rate": 3.726484645651811e-05, + "loss": 1.2014, + "step": 4472500 + }, + { + "epoch": 2.68, + "learning_rate": 3.726275069088867e-05, + "loss": 1.1942, + "step": 4473000 + }, + { + "epoch": 2.68, + "learning_rate": 3.72606507253281e-05, + "loss": 1.1928, + "step": 4473500 + }, + { + "epoch": 2.68, + "learning_rate": 3.725855075976754e-05, + "loss": 1.2291, + "step": 4474000 + }, + { + "epoch": 2.68, + "learning_rate": 3.725645079420697e-05, + "loss": 1.1859, + "step": 4474500 + }, + { + "epoch": 2.68, + "learning_rate": 3.725435082864641e-05, + "loss": 1.2203, + "step": 4475000 + }, + { + "epoch": 2.68, + "learning_rate": 3.725225086308585e-05, + "loss": 1.2171, + "step": 4475500 + }, + { + "epoch": 2.68, + "learning_rate": 3.72501550974564e-05, + "loss": 1.1794, + "step": 4476000 + }, + { + "epoch": 2.68, + "learning_rate": 3.7248055131895834e-05, + "loss": 1.2169, + "step": 4476500 + }, + { + "epoch": 2.68, + "learning_rate": 3.724595516633527e-05, + "loss": 1.2109, + "step": 4477000 + }, + { + "epoch": 2.68, + "learning_rate": 3.724385520077471e-05, + "loss": 1.197, + "step": 4477500 + }, + { + "epoch": 2.68, + "learning_rate": 3.724175523521414e-05, + "loss": 1.2231, + "step": 4478000 + }, + { + "epoch": 2.69, + "learning_rate": 3.7239659469584695e-05, + "loss": 1.2241, + "step": 4478500 + }, + { + "epoch": 2.69, + "learning_rate": 3.723755950402413e-05, + "loss": 1.2119, + "step": 4479000 + }, + { + "epoch": 2.69, + "learning_rate": 3.723545953846357e-05, + "loss": 1.2237, + "step": 4479500 + }, + { + "epoch": 2.69, + "learning_rate": 3.7233359572903e-05, + "loss": 1.1747, + "step": 4480000 + }, + { + "epoch": 2.69, + "learning_rate": 3.7231259607342435e-05, + "loss": 1.2032, + "step": 4480500 + }, + { + "epoch": 2.69, + "learning_rate": 3.7229159641781875e-05, + "loss": 1.1848, + "step": 4481000 + }, + { + "epoch": 2.69, + "learning_rate": 3.722705967622131e-05, + "loss": 1.1924, + "step": 4481500 + }, + { + "epoch": 2.69, + "learning_rate": 3.722495971066075e-05, + "loss": 1.2005, + "step": 4482000 + }, + { + "epoch": 2.69, + "learning_rate": 3.72228639450313e-05, + "loss": 1.2102, + "step": 4482500 + }, + { + "epoch": 2.69, + "learning_rate": 3.7220763979470736e-05, + "loss": 1.1917, + "step": 4483000 + }, + { + "epoch": 2.69, + "learning_rate": 3.721866401391017e-05, + "loss": 1.1979, + "step": 4483500 + }, + { + "epoch": 2.69, + "learning_rate": 3.721656824828072e-05, + "loss": 1.2182, + "step": 4484000 + }, + { + "epoch": 2.69, + "learning_rate": 3.7214472482651284e-05, + "loss": 1.2239, + "step": 4484500 + }, + { + "epoch": 2.69, + "learning_rate": 3.7212372517090724e-05, + "loss": 1.2424, + "step": 4485000 + }, + { + "epoch": 2.69, + "learning_rate": 3.721027255153015e-05, + "loss": 1.1784, + "step": 4485500 + }, + { + "epoch": 2.69, + "learning_rate": 3.7208172585969584e-05, + "loss": 1.2004, + "step": 4486000 + }, + { + "epoch": 2.69, + "learning_rate": 3.7206072620409024e-05, + "loss": 1.2478, + "step": 4486500 + }, + { + "epoch": 2.69, + "learning_rate": 3.720397265484846e-05, + "loss": 1.234, + "step": 4487000 + }, + { + "epoch": 2.69, + "learning_rate": 3.720187268928789e-05, + "loss": 1.1881, + "step": 4487500 + }, + { + "epoch": 2.69, + "learning_rate": 3.719977272372733e-05, + "loss": 1.1966, + "step": 4488000 + }, + { + "epoch": 2.69, + "learning_rate": 3.7197672758166765e-05, + "loss": 1.1886, + "step": 4488500 + }, + { + "epoch": 2.69, + "learning_rate": 3.7195572792606205e-05, + "loss": 1.2231, + "step": 4489000 + }, + { + "epoch": 2.69, + "learning_rate": 3.719347282704564e-05, + "loss": 1.2004, + "step": 4489500 + }, + { + "epoch": 2.69, + "learning_rate": 3.719137706141619e-05, + "loss": 1.2025, + "step": 4490000 + }, + { + "epoch": 2.69, + "learning_rate": 3.7189277095855626e-05, + "loss": 1.1911, + "step": 4490500 + }, + { + "epoch": 2.69, + "learning_rate": 3.7187177130295066e-05, + "loss": 1.2208, + "step": 4491000 + }, + { + "epoch": 2.69, + "learning_rate": 3.71850771647345e-05, + "loss": 1.1989, + "step": 4491500 + }, + { + "epoch": 2.69, + "learning_rate": 3.718297719917393e-05, + "loss": 1.2078, + "step": 4492000 + }, + { + "epoch": 2.69, + "learning_rate": 3.7180881433544486e-05, + "loss": 1.2043, + "step": 4492500 + }, + { + "epoch": 2.69, + "learning_rate": 3.7178781467983927e-05, + "loss": 1.1965, + "step": 4493000 + }, + { + "epoch": 2.69, + "learning_rate": 3.717668150242336e-05, + "loss": 1.189, + "step": 4493500 + }, + { + "epoch": 2.69, + "learning_rate": 3.7174581536862793e-05, + "loss": 1.2165, + "step": 4494000 + }, + { + "epoch": 2.69, + "learning_rate": 3.7172481571302234e-05, + "loss": 1.2124, + "step": 4494500 + }, + { + "epoch": 2.69, + "learning_rate": 3.717038580567279e-05, + "loss": 1.2135, + "step": 4495000 + }, + { + "epoch": 2.7, + "learning_rate": 3.716828584011222e-05, + "loss": 1.2205, + "step": 4495500 + }, + { + "epoch": 2.7, + "learning_rate": 3.716618587455166e-05, + "loss": 1.2408, + "step": 4496000 + }, + { + "epoch": 2.7, + "learning_rate": 3.7164085908991094e-05, + "loss": 1.2011, + "step": 4496500 + }, + { + "epoch": 2.7, + "learning_rate": 3.716198594343053e-05, + "loss": 1.2201, + "step": 4497000 + }, + { + "epoch": 2.7, + "learning_rate": 3.715988597786997e-05, + "loss": 1.1845, + "step": 4497500 + }, + { + "epoch": 2.7, + "learning_rate": 3.715779021224052e-05, + "loss": 1.2008, + "step": 4498000 + }, + { + "epoch": 2.7, + "learning_rate": 3.7155690246679955e-05, + "loss": 1.204, + "step": 4498500 + }, + { + "epoch": 2.7, + "learning_rate": 3.715359448105051e-05, + "loss": 1.2202, + "step": 4499000 + }, + { + "epoch": 2.7, + "learning_rate": 3.715149451548994e-05, + "loss": 1.2081, + "step": 4499500 + }, + { + "epoch": 2.7, + "learning_rate": 3.714939454992938e-05, + "loss": 1.2204, + "step": 4500000 + }, + { + "epoch": 2.7, + "eval_loss": 1.1660878658294678, + "eval_runtime": 1104.5668, + "eval_samples_per_second": 476.857, + "eval_steps_per_second": 79.476, + "step": 4500000 + }, + { + "epoch": 2.7, + "learning_rate": 3.7147294584368816e-05, + "loss": 1.2168, + "step": 4500500 + }, + { + "epoch": 2.7, + "learning_rate": 3.714519461880825e-05, + "loss": 1.1995, + "step": 4501000 + }, + { + "epoch": 2.7, + "learning_rate": 3.714309465324769e-05, + "loss": 1.1965, + "step": 4501500 + }, + { + "epoch": 2.7, + "learning_rate": 3.714099468768712e-05, + "loss": 1.198, + "step": 4502000 + }, + { + "epoch": 2.7, + "learning_rate": 3.7138894722126556e-05, + "loss": 1.2243, + "step": 4502500 + }, + { + "epoch": 2.7, + "learning_rate": 3.713679895649712e-05, + "loss": 1.2141, + "step": 4503000 + }, + { + "epoch": 2.7, + "learning_rate": 3.713470319086767e-05, + "loss": 1.2205, + "step": 4503500 + }, + { + "epoch": 2.7, + "learning_rate": 3.713260742523823e-05, + "loss": 1.2158, + "step": 4504000 + }, + { + "epoch": 2.7, + "learning_rate": 3.713050745967766e-05, + "loss": 1.2192, + "step": 4504500 + }, + { + "epoch": 2.7, + "learning_rate": 3.712840749411709e-05, + "loss": 1.2056, + "step": 4505000 + }, + { + "epoch": 2.7, + "learning_rate": 3.712630752855653e-05, + "loss": 1.1917, + "step": 4505500 + }, + { + "epoch": 2.7, + "learning_rate": 3.7124207562995965e-05, + "loss": 1.2154, + "step": 4506000 + }, + { + "epoch": 2.7, + "learning_rate": 3.71221075974354e-05, + "loss": 1.2152, + "step": 4506500 + }, + { + "epoch": 2.7, + "learning_rate": 3.712000763187484e-05, + "loss": 1.215, + "step": 4507000 + }, + { + "epoch": 2.7, + "learning_rate": 3.711790766631427e-05, + "loss": 1.2267, + "step": 4507500 + }, + { + "epoch": 2.7, + "learning_rate": 3.7115807700753705e-05, + "loss": 1.1986, + "step": 4508000 + }, + { + "epoch": 2.7, + "learning_rate": 3.7113707735193145e-05, + "loss": 1.1956, + "step": 4508500 + }, + { + "epoch": 2.7, + "learning_rate": 3.711160776963258e-05, + "loss": 1.1994, + "step": 4509000 + }, + { + "epoch": 2.7, + "learning_rate": 3.710950780407201e-05, + "loss": 1.2137, + "step": 4509500 + }, + { + "epoch": 2.7, + "learning_rate": 3.7107407838511446e-05, + "loss": 1.2, + "step": 4510000 + }, + { + "epoch": 2.7, + "learning_rate": 3.7105312072882006e-05, + "loss": 1.2109, + "step": 4510500 + }, + { + "epoch": 2.7, + "learning_rate": 3.710321210732144e-05, + "loss": 1.2096, + "step": 4511000 + }, + { + "epoch": 2.7, + "learning_rate": 3.710111214176088e-05, + "loss": 1.2084, + "step": 4511500 + }, + { + "epoch": 2.71, + "learning_rate": 3.709901217620031e-05, + "loss": 1.2131, + "step": 4512000 + }, + { + "epoch": 2.71, + "learning_rate": 3.709691221063974e-05, + "loss": 1.2246, + "step": 4512500 + }, + { + "epoch": 2.71, + "learning_rate": 3.709481224507918e-05, + "loss": 1.1942, + "step": 4513000 + }, + { + "epoch": 2.71, + "learning_rate": 3.7092712279518614e-05, + "loss": 1.2029, + "step": 4513500 + }, + { + "epoch": 2.71, + "learning_rate": 3.709061231395805e-05, + "loss": 1.2152, + "step": 4514000 + }, + { + "epoch": 2.71, + "learning_rate": 3.708851654832861e-05, + "loss": 1.2066, + "step": 4514500 + }, + { + "epoch": 2.71, + "learning_rate": 3.708641658276804e-05, + "loss": 1.1868, + "step": 4515000 + }, + { + "epoch": 2.71, + "learning_rate": 3.7084316617207474e-05, + "loss": 1.2129, + "step": 4515500 + }, + { + "epoch": 2.71, + "learning_rate": 3.708221665164691e-05, + "loss": 1.1843, + "step": 4516000 + }, + { + "epoch": 2.71, + "learning_rate": 3.708011668608635e-05, + "loss": 1.2288, + "step": 4516500 + }, + { + "epoch": 2.71, + "learning_rate": 3.707801672052578e-05, + "loss": 1.1749, + "step": 4517000 + }, + { + "epoch": 2.71, + "learning_rate": 3.7075916754965215e-05, + "loss": 1.202, + "step": 4517500 + }, + { + "epoch": 2.71, + "learning_rate": 3.7073820989335775e-05, + "loss": 1.2309, + "step": 4518000 + }, + { + "epoch": 2.71, + "learning_rate": 3.707172102377521e-05, + "loss": 1.1955, + "step": 4518500 + }, + { + "epoch": 2.71, + "learning_rate": 3.706962105821464e-05, + "loss": 1.2199, + "step": 4519000 + }, + { + "epoch": 2.71, + "learning_rate": 3.7067525292585196e-05, + "loss": 1.218, + "step": 4519500 + }, + { + "epoch": 2.71, + "learning_rate": 3.7065425327024636e-05, + "loss": 1.2075, + "step": 4520000 + }, + { + "epoch": 2.71, + "learning_rate": 3.706332536146407e-05, + "loss": 1.2114, + "step": 4520500 + }, + { + "epoch": 2.71, + "learning_rate": 3.70612253959035e-05, + "loss": 1.1896, + "step": 4521000 + }, + { + "epoch": 2.71, + "learning_rate": 3.705912543034294e-05, + "loss": 1.2396, + "step": 4521500 + }, + { + "epoch": 2.71, + "learning_rate": 3.705702546478238e-05, + "loss": 1.2188, + "step": 4522000 + }, + { + "epoch": 2.71, + "learning_rate": 3.705492549922181e-05, + "loss": 1.1916, + "step": 4522500 + }, + { + "epoch": 2.71, + "learning_rate": 3.705282553366125e-05, + "loss": 1.2313, + "step": 4523000 + }, + { + "epoch": 2.71, + "learning_rate": 3.7050725568100684e-05, + "loss": 1.2217, + "step": 4523500 + }, + { + "epoch": 2.71, + "learning_rate": 3.704862560254012e-05, + "loss": 1.2234, + "step": 4524000 + }, + { + "epoch": 2.71, + "learning_rate": 3.704652563697956e-05, + "loss": 1.2273, + "step": 4524500 + }, + { + "epoch": 2.71, + "learning_rate": 3.7044425671418984e-05, + "loss": 1.2013, + "step": 4525000 + }, + { + "epoch": 2.71, + "learning_rate": 3.7042329905789545e-05, + "loss": 1.228, + "step": 4525500 + }, + { + "epoch": 2.71, + "learning_rate": 3.704022994022898e-05, + "loss": 1.2217, + "step": 4526000 + }, + { + "epoch": 2.71, + "learning_rate": 3.703812997466842e-05, + "loss": 1.2027, + "step": 4526500 + }, + { + "epoch": 2.71, + "learning_rate": 3.703603000910785e-05, + "loss": 1.1982, + "step": 4527000 + }, + { + "epoch": 2.71, + "learning_rate": 3.7033930043547285e-05, + "loss": 1.1679, + "step": 4527500 + }, + { + "epoch": 2.71, + "learning_rate": 3.703183847784896e-05, + "loss": 1.2017, + "step": 4528000 + }, + { + "epoch": 2.72, + "learning_rate": 3.702974271221952e-05, + "loss": 1.2312, + "step": 4528500 + }, + { + "epoch": 2.72, + "learning_rate": 3.702764274665895e-05, + "loss": 1.2109, + "step": 4529000 + }, + { + "epoch": 2.72, + "learning_rate": 3.7025542781098386e-05, + "loss": 1.2252, + "step": 4529500 + }, + { + "epoch": 2.72, + "learning_rate": 3.702344281553782e-05, + "loss": 1.2068, + "step": 4530000 + }, + { + "epoch": 2.72, + "learning_rate": 3.702134284997726e-05, + "loss": 1.1873, + "step": 4530500 + }, + { + "epoch": 2.72, + "learning_rate": 3.701924288441669e-05, + "loss": 1.2113, + "step": 4531000 + }, + { + "epoch": 2.72, + "learning_rate": 3.701714291885613e-05, + "loss": 1.2069, + "step": 4531500 + }, + { + "epoch": 2.72, + "learning_rate": 3.701504295329557e-05, + "loss": 1.1964, + "step": 4532000 + }, + { + "epoch": 2.72, + "learning_rate": 3.7012942987735e-05, + "loss": 1.2276, + "step": 4532500 + }, + { + "epoch": 2.72, + "learning_rate": 3.7010843022174434e-05, + "loss": 1.2111, + "step": 4533000 + }, + { + "epoch": 2.72, + "learning_rate": 3.7008743056613874e-05, + "loss": 1.1891, + "step": 4533500 + }, + { + "epoch": 2.72, + "learning_rate": 3.700664309105331e-05, + "loss": 1.221, + "step": 4534000 + }, + { + "epoch": 2.72, + "learning_rate": 3.700454732542386e-05, + "loss": 1.2013, + "step": 4534500 + }, + { + "epoch": 2.72, + "learning_rate": 3.7002451559794415e-05, + "loss": 1.2063, + "step": 4535000 + }, + { + "epoch": 2.72, + "learning_rate": 3.7000351594233855e-05, + "loss": 1.214, + "step": 4535500 + }, + { + "epoch": 2.72, + "learning_rate": 3.699825162867329e-05, + "loss": 1.191, + "step": 4536000 + }, + { + "epoch": 2.72, + "learning_rate": 3.699615166311272e-05, + "loss": 1.2092, + "step": 4536500 + }, + { + "epoch": 2.72, + "learning_rate": 3.699405169755216e-05, + "loss": 1.2009, + "step": 4537000 + }, + { + "epoch": 2.72, + "learning_rate": 3.6991955931922716e-05, + "loss": 1.1854, + "step": 4537500 + }, + { + "epoch": 2.72, + "learning_rate": 3.698985596636215e-05, + "loss": 1.2093, + "step": 4538000 + }, + { + "epoch": 2.72, + "learning_rate": 3.698775600080158e-05, + "loss": 1.2152, + "step": 4538500 + }, + { + "epoch": 2.72, + "learning_rate": 3.698566023517214e-05, + "loss": 1.1986, + "step": 4539000 + }, + { + "epoch": 2.72, + "learning_rate": 3.6983560269611576e-05, + "loss": 1.1942, + "step": 4539500 + }, + { + "epoch": 2.72, + "learning_rate": 3.698146030405101e-05, + "loss": 1.2155, + "step": 4540000 + }, + { + "epoch": 2.72, + "learning_rate": 3.697936033849045e-05, + "loss": 1.2098, + "step": 4540500 + }, + { + "epoch": 2.72, + "learning_rate": 3.6977260372929884e-05, + "loss": 1.1877, + "step": 4541000 + }, + { + "epoch": 2.72, + "learning_rate": 3.697516040736932e-05, + "loss": 1.1985, + "step": 4541500 + }, + { + "epoch": 2.72, + "learning_rate": 3.697306044180876e-05, + "loss": 1.2029, + "step": 4542000 + }, + { + "epoch": 2.72, + "learning_rate": 3.697096047624819e-05, + "loss": 1.2008, + "step": 4542500 + }, + { + "epoch": 2.72, + "learning_rate": 3.6968860510687624e-05, + "loss": 1.193, + "step": 4543000 + }, + { + "epoch": 2.72, + "learning_rate": 3.6966760545127064e-05, + "loss": 1.2343, + "step": 4543500 + }, + { + "epoch": 2.72, + "learning_rate": 3.69646605795665e-05, + "loss": 1.2487, + "step": 4544000 + }, + { + "epoch": 2.72, + "learning_rate": 3.696256481393705e-05, + "loss": 1.193, + "step": 4544500 + }, + { + "epoch": 2.72, + "learning_rate": 3.6960464848376485e-05, + "loss": 1.2261, + "step": 4545000 + }, + { + "epoch": 2.73, + "learning_rate": 3.6958364882815925e-05, + "loss": 1.1965, + "step": 4545500 + }, + { + "epoch": 2.73, + "learning_rate": 3.695626491725536e-05, + "loss": 1.2251, + "step": 4546000 + }, + { + "epoch": 2.73, + "learning_rate": 3.6954164951694785e-05, + "loss": 1.2047, + "step": 4546500 + }, + { + "epoch": 2.73, + "learning_rate": 3.6952064986134226e-05, + "loss": 1.2077, + "step": 4547000 + }, + { + "epoch": 2.73, + "learning_rate": 3.694996502057366e-05, + "loss": 1.2017, + "step": 4547500 + }, + { + "epoch": 2.73, + "learning_rate": 3.694786925494422e-05, + "loss": 1.1888, + "step": 4548000 + }, + { + "epoch": 2.73, + "learning_rate": 3.694576928938366e-05, + "loss": 1.2027, + "step": 4548500 + }, + { + "epoch": 2.73, + "learning_rate": 3.6943669323823086e-05, + "loss": 1.1987, + "step": 4549000 + }, + { + "epoch": 2.73, + "learning_rate": 3.694156935826252e-05, + "loss": 1.2177, + "step": 4549500 + }, + { + "epoch": 2.73, + "learning_rate": 3.693947359263308e-05, + "loss": 1.1962, + "step": 4550000 + }, + { + "epoch": 2.73, + "learning_rate": 3.693737362707252e-05, + "loss": 1.17, + "step": 4550500 + }, + { + "epoch": 2.73, + "learning_rate": 3.6935273661511954e-05, + "loss": 1.1927, + "step": 4551000 + }, + { + "epoch": 2.73, + "learning_rate": 3.693317369595138e-05, + "loss": 1.2085, + "step": 4551500 + }, + { + "epoch": 2.73, + "learning_rate": 3.693107373039082e-05, + "loss": 1.2002, + "step": 4552000 + }, + { + "epoch": 2.73, + "learning_rate": 3.692897796476138e-05, + "loss": 1.2013, + "step": 4552500 + }, + { + "epoch": 2.73, + "learning_rate": 3.6926877999200815e-05, + "loss": 1.203, + "step": 4553000 + }, + { + "epoch": 2.73, + "learning_rate": 3.692477803364025e-05, + "loss": 1.223, + "step": 4553500 + }, + { + "epoch": 2.73, + "learning_rate": 3.692267806807968e-05, + "loss": 1.1881, + "step": 4554000 + }, + { + "epoch": 2.73, + "learning_rate": 3.6920578102519115e-05, + "loss": 1.2013, + "step": 4554500 + }, + { + "epoch": 2.73, + "learning_rate": 3.6918482336889675e-05, + "loss": 1.2175, + "step": 4555000 + }, + { + "epoch": 2.73, + "learning_rate": 3.6916382371329115e-05, + "loss": 1.1975, + "step": 4555500 + }, + { + "epoch": 2.73, + "learning_rate": 3.691428240576854e-05, + "loss": 1.2009, + "step": 4556000 + }, + { + "epoch": 2.73, + "learning_rate": 3.6912182440207976e-05, + "loss": 1.2237, + "step": 4556500 + }, + { + "epoch": 2.73, + "learning_rate": 3.6910082474647416e-05, + "loss": 1.2099, + "step": 4557000 + }, + { + "epoch": 2.73, + "learning_rate": 3.690798250908685e-05, + "loss": 1.1489, + "step": 4557500 + }, + { + "epoch": 2.73, + "learning_rate": 3.690588254352628e-05, + "loss": 1.2198, + "step": 4558000 + }, + { + "epoch": 2.73, + "learning_rate": 3.690378257796572e-05, + "loss": 1.1984, + "step": 4558500 + }, + { + "epoch": 2.73, + "learning_rate": 3.6901686812336277e-05, + "loss": 1.2234, + "step": 4559000 + }, + { + "epoch": 2.73, + "learning_rate": 3.689958684677571e-05, + "loss": 1.1995, + "step": 4559500 + }, + { + "epoch": 2.73, + "learning_rate": 3.689749108114627e-05, + "loss": 1.2181, + "step": 4560000 + }, + { + "epoch": 2.73, + "learning_rate": 3.6895391115585704e-05, + "loss": 1.209, + "step": 4560500 + }, + { + "epoch": 2.73, + "learning_rate": 3.689329115002514e-05, + "loss": 1.208, + "step": 4561000 + }, + { + "epoch": 2.73, + "learning_rate": 3.689119118446457e-05, + "loss": 1.1806, + "step": 4561500 + }, + { + "epoch": 2.74, + "learning_rate": 3.688909541883513e-05, + "loss": 1.2011, + "step": 4562000 + }, + { + "epoch": 2.74, + "learning_rate": 3.688699545327457e-05, + "loss": 1.2142, + "step": 4562500 + }, + { + "epoch": 2.74, + "learning_rate": 3.6884895487714005e-05, + "loss": 1.1844, + "step": 4563000 + }, + { + "epoch": 2.74, + "learning_rate": 3.688279552215343e-05, + "loss": 1.2255, + "step": 4563500 + }, + { + "epoch": 2.74, + "learning_rate": 3.688069975652399e-05, + "loss": 1.2144, + "step": 4564000 + }, + { + "epoch": 2.74, + "learning_rate": 3.687859979096343e-05, + "loss": 1.2006, + "step": 4564500 + }, + { + "epoch": 2.74, + "learning_rate": 3.6876499825402866e-05, + "loss": 1.1816, + "step": 4565000 + }, + { + "epoch": 2.74, + "learning_rate": 3.687439985984229e-05, + "loss": 1.2074, + "step": 4565500 + }, + { + "epoch": 2.74, + "learning_rate": 3.687229989428173e-05, + "loss": 1.217, + "step": 4566000 + }, + { + "epoch": 2.74, + "learning_rate": 3.687020412865229e-05, + "loss": 1.174, + "step": 4566500 + }, + { + "epoch": 2.74, + "learning_rate": 3.6868104163091726e-05, + "loss": 1.2064, + "step": 4567000 + }, + { + "epoch": 2.74, + "learning_rate": 3.686600419753116e-05, + "loss": 1.2314, + "step": 4567500 + }, + { + "epoch": 2.74, + "learning_rate": 3.686390423197059e-05, + "loss": 1.1926, + "step": 4568000 + }, + { + "epoch": 2.74, + "learning_rate": 3.686180426641003e-05, + "loss": 1.2192, + "step": 4568500 + }, + { + "epoch": 2.74, + "learning_rate": 3.685970850078059e-05, + "loss": 1.2161, + "step": 4569000 + }, + { + "epoch": 2.74, + "learning_rate": 3.685760853522003e-05, + "loss": 1.1912, + "step": 4569500 + }, + { + "epoch": 2.74, + "learning_rate": 3.685550856965946e-05, + "loss": 1.2363, + "step": 4570000 + }, + { + "epoch": 2.74, + "learning_rate": 3.685340860409889e-05, + "loss": 1.2171, + "step": 4570500 + }, + { + "epoch": 2.74, + "learning_rate": 3.685131283846945e-05, + "loss": 1.1836, + "step": 4571000 + }, + { + "epoch": 2.74, + "learning_rate": 3.684921707284e-05, + "loss": 1.2071, + "step": 4571500 + }, + { + "epoch": 2.74, + "learning_rate": 3.684711710727944e-05, + "loss": 1.1999, + "step": 4572000 + }, + { + "epoch": 2.74, + "learning_rate": 3.6845017141718875e-05, + "loss": 1.2004, + "step": 4572500 + }, + { + "epoch": 2.74, + "learning_rate": 3.684291717615831e-05, + "loss": 1.2007, + "step": 4573000 + }, + { + "epoch": 2.74, + "learning_rate": 3.684081721059775e-05, + "loss": 1.1962, + "step": 4573500 + }, + { + "epoch": 2.74, + "learning_rate": 3.683871724503718e-05, + "loss": 1.2038, + "step": 4574000 + }, + { + "epoch": 2.74, + "learning_rate": 3.6836617279476616e-05, + "loss": 1.2125, + "step": 4574500 + }, + { + "epoch": 2.74, + "learning_rate": 3.6834521513847176e-05, + "loss": 1.1972, + "step": 4575000 + }, + { + "epoch": 2.74, + "learning_rate": 3.683242154828661e-05, + "loss": 1.2274, + "step": 4575500 + }, + { + "epoch": 2.74, + "learning_rate": 3.683032158272604e-05, + "loss": 1.2047, + "step": 4576000 + }, + { + "epoch": 2.74, + "learning_rate": 3.682822161716548e-05, + "loss": 1.2195, + "step": 4576500 + }, + { + "epoch": 2.74, + "learning_rate": 3.6826121651604917e-05, + "loss": 1.2151, + "step": 4577000 + }, + { + "epoch": 2.74, + "learning_rate": 3.682402168604434e-05, + "loss": 1.206, + "step": 4577500 + }, + { + "epoch": 2.74, + "learning_rate": 3.6821925920414904e-05, + "loss": 1.2157, + "step": 4578000 + }, + { + "epoch": 2.74, + "learning_rate": 3.6819825954854344e-05, + "loss": 1.1932, + "step": 4578500 + }, + { + "epoch": 2.75, + "learning_rate": 3.681772598929378e-05, + "loss": 1.202, + "step": 4579000 + }, + { + "epoch": 2.75, + "learning_rate": 3.681562602373321e-05, + "loss": 1.2344, + "step": 4579500 + }, + { + "epoch": 2.75, + "learning_rate": 3.6813526058172644e-05, + "loss": 1.2045, + "step": 4580000 + }, + { + "epoch": 2.75, + "learning_rate": 3.681142609261208e-05, + "loss": 1.2112, + "step": 4580500 + }, + { + "epoch": 2.75, + "learning_rate": 3.680932612705151e-05, + "loss": 1.2218, + "step": 4581000 + }, + { + "epoch": 2.75, + "learning_rate": 3.680722616149095e-05, + "loss": 1.2245, + "step": 4581500 + }, + { + "epoch": 2.75, + "learning_rate": 3.6805126195930385e-05, + "loss": 1.1939, + "step": 4582000 + }, + { + "epoch": 2.75, + "learning_rate": 3.680302623036982e-05, + "loss": 1.1896, + "step": 4582500 + }, + { + "epoch": 2.75, + "learning_rate": 3.680092626480926e-05, + "loss": 1.1903, + "step": 4583000 + }, + { + "epoch": 2.75, + "learning_rate": 3.679882629924869e-05, + "loss": 1.1853, + "step": 4583500 + }, + { + "epoch": 2.75, + "learning_rate": 3.6796730533619246e-05, + "loss": 1.208, + "step": 4584000 + }, + { + "epoch": 2.75, + "learning_rate": 3.6794634767989806e-05, + "loss": 1.2015, + "step": 4584500 + }, + { + "epoch": 2.75, + "learning_rate": 3.679253480242924e-05, + "loss": 1.215, + "step": 4585000 + }, + { + "epoch": 2.75, + "learning_rate": 3.679043483686867e-05, + "loss": 1.2204, + "step": 4585500 + }, + { + "epoch": 2.75, + "learning_rate": 3.6788334871308106e-05, + "loss": 1.2246, + "step": 4586000 + }, + { + "epoch": 2.75, + "learning_rate": 3.6786234905747547e-05, + "loss": 1.1694, + "step": 4586500 + }, + { + "epoch": 2.75, + "learning_rate": 3.67841391401181e-05, + "loss": 1.1844, + "step": 4587000 + }, + { + "epoch": 2.75, + "learning_rate": 3.6782039174557534e-05, + "loss": 1.203, + "step": 4587500 + }, + { + "epoch": 2.75, + "learning_rate": 3.677993920899697e-05, + "loss": 1.1947, + "step": 4588000 + }, + { + "epoch": 2.75, + "learning_rate": 3.677783924343641e-05, + "loss": 1.1909, + "step": 4588500 + }, + { + "epoch": 2.75, + "learning_rate": 3.677573927787584e-05, + "loss": 1.1864, + "step": 4589000 + }, + { + "epoch": 2.75, + "learning_rate": 3.6773643512246394e-05, + "loss": 1.1993, + "step": 4589500 + }, + { + "epoch": 2.75, + "learning_rate": 3.6771547746616955e-05, + "loss": 1.2095, + "step": 4590000 + }, + { + "epoch": 2.75, + "learning_rate": 3.6769447781056395e-05, + "loss": 1.194, + "step": 4590500 + }, + { + "epoch": 2.75, + "learning_rate": 3.676734781549583e-05, + "loss": 1.1733, + "step": 4591000 + }, + { + "epoch": 2.75, + "learning_rate": 3.676524784993526e-05, + "loss": 1.2103, + "step": 4591500 + }, + { + "epoch": 2.75, + "learning_rate": 3.6763147884374695e-05, + "loss": 1.1967, + "step": 4592000 + }, + { + "epoch": 2.75, + "learning_rate": 3.676104791881413e-05, + "loss": 1.1865, + "step": 4592500 + }, + { + "epoch": 2.75, + "learning_rate": 3.675894795325356e-05, + "loss": 1.2106, + "step": 4593000 + }, + { + "epoch": 2.75, + "learning_rate": 3.6756847987693e-05, + "loss": 1.1816, + "step": 4593500 + }, + { + "epoch": 2.75, + "learning_rate": 3.6754748022132436e-05, + "loss": 1.2173, + "step": 4594000 + }, + { + "epoch": 2.75, + "learning_rate": 3.675265225650299e-05, + "loss": 1.2146, + "step": 4594500 + }, + { + "epoch": 2.75, + "learning_rate": 3.675055229094242e-05, + "loss": 1.2085, + "step": 4595000 + }, + { + "epoch": 2.76, + "learning_rate": 3.674845232538186e-05, + "loss": 1.2103, + "step": 4595500 + }, + { + "epoch": 2.76, + "learning_rate": 3.6746352359821297e-05, + "loss": 1.2127, + "step": 4596000 + }, + { + "epoch": 2.76, + "learning_rate": 3.674425239426073e-05, + "loss": 1.2178, + "step": 4596500 + }, + { + "epoch": 2.76, + "learning_rate": 3.674215242870017e-05, + "loss": 1.1744, + "step": 4597000 + }, + { + "epoch": 2.76, + "learning_rate": 3.6740056663070724e-05, + "loss": 1.2118, + "step": 4597500 + }, + { + "epoch": 2.76, + "learning_rate": 3.6737960897441284e-05, + "loss": 1.193, + "step": 4598000 + }, + { + "epoch": 2.76, + "learning_rate": 3.673586093188072e-05, + "loss": 1.1942, + "step": 4598500 + }, + { + "epoch": 2.76, + "learning_rate": 3.673376096632015e-05, + "loss": 1.2093, + "step": 4599000 + }, + { + "epoch": 2.76, + "learning_rate": 3.6731661000759585e-05, + "loss": 1.1747, + "step": 4599500 + }, + { + "epoch": 2.76, + "learning_rate": 3.672956103519902e-05, + "loss": 1.2163, + "step": 4600000 + }, + { + "epoch": 2.76, + "eval_loss": 1.163139820098877, + "eval_runtime": 1103.2733, + "eval_samples_per_second": 477.416, + "eval_steps_per_second": 79.57, + "step": 4600000 + }, + { + "epoch": 2.76, + "learning_rate": 3.672746106963846e-05, + "loss": 1.2211, + "step": 4600500 + }, + { + "epoch": 2.76, + "learning_rate": 3.672536110407789e-05, + "loss": 1.204, + "step": 4601000 + }, + { + "epoch": 2.76, + "learning_rate": 3.6723261138517325e-05, + "loss": 1.2014, + "step": 4601500 + }, + { + "epoch": 2.76, + "learning_rate": 3.6721161172956765e-05, + "loss": 1.2161, + "step": 4602000 + }, + { + "epoch": 2.76, + "learning_rate": 3.671906540732732e-05, + "loss": 1.2148, + "step": 4602500 + }, + { + "epoch": 2.76, + "learning_rate": 3.671696544176675e-05, + "loss": 1.1951, + "step": 4603000 + }, + { + "epoch": 2.76, + "learning_rate": 3.6714865476206186e-05, + "loss": 1.2089, + "step": 4603500 + }, + { + "epoch": 2.76, + "learning_rate": 3.6712765510645626e-05, + "loss": 1.2359, + "step": 4604000 + }, + { + "epoch": 2.76, + "learning_rate": 3.671066554508506e-05, + "loss": 1.1791, + "step": 4604500 + }, + { + "epoch": 2.76, + "learning_rate": 3.670856557952449e-05, + "loss": 1.2236, + "step": 4605000 + }, + { + "epoch": 2.76, + "learning_rate": 3.670646561396393e-05, + "loss": 1.1952, + "step": 4605500 + }, + { + "epoch": 2.76, + "learning_rate": 3.670436564840337e-05, + "loss": 1.1927, + "step": 4606000 + }, + { + "epoch": 2.76, + "learning_rate": 3.670226988277392e-05, + "loss": 1.1981, + "step": 4606500 + }, + { + "epoch": 2.76, + "learning_rate": 3.670016991721336e-05, + "loss": 1.1933, + "step": 4607000 + }, + { + "epoch": 2.76, + "learning_rate": 3.6698069951652794e-05, + "loss": 1.215, + "step": 4607500 + }, + { + "epoch": 2.76, + "learning_rate": 3.669596998609223e-05, + "loss": 1.2234, + "step": 4608000 + }, + { + "epoch": 2.76, + "learning_rate": 3.66938784203939e-05, + "loss": 1.1973, + "step": 4608500 + }, + { + "epoch": 2.76, + "learning_rate": 3.6691778454833335e-05, + "loss": 1.2188, + "step": 4609000 + }, + { + "epoch": 2.76, + "learning_rate": 3.6689678489272775e-05, + "loss": 1.1961, + "step": 4609500 + }, + { + "epoch": 2.76, + "learning_rate": 3.668757852371221e-05, + "loss": 1.2124, + "step": 4610000 + }, + { + "epoch": 2.76, + "learning_rate": 3.668547855815164e-05, + "loss": 1.1954, + "step": 4610500 + }, + { + "epoch": 2.76, + "learning_rate": 3.66833827925222e-05, + "loss": 1.217, + "step": 4611000 + }, + { + "epoch": 2.76, + "learning_rate": 3.6681282826961636e-05, + "loss": 1.2008, + "step": 4611500 + }, + { + "epoch": 2.77, + "learning_rate": 3.667918286140107e-05, + "loss": 1.199, + "step": 4612000 + }, + { + "epoch": 2.77, + "learning_rate": 3.667708289584051e-05, + "loss": 1.1947, + "step": 4612500 + }, + { + "epoch": 2.77, + "learning_rate": 3.667498293027994e-05, + "loss": 1.1845, + "step": 4613000 + }, + { + "epoch": 2.77, + "learning_rate": 3.6672882964719376e-05, + "loss": 1.2102, + "step": 4613500 + }, + { + "epoch": 2.77, + "learning_rate": 3.667078719908993e-05, + "loss": 1.2148, + "step": 4614000 + }, + { + "epoch": 2.77, + "learning_rate": 3.666868723352937e-05, + "loss": 1.1992, + "step": 4614500 + }, + { + "epoch": 2.77, + "learning_rate": 3.6666587267968804e-05, + "loss": 1.1759, + "step": 4615000 + }, + { + "epoch": 2.77, + "learning_rate": 3.666448730240824e-05, + "loss": 1.1837, + "step": 4615500 + }, + { + "epoch": 2.77, + "learning_rate": 3.666238733684768e-05, + "loss": 1.202, + "step": 4616000 + }, + { + "epoch": 2.77, + "learning_rate": 3.666028737128711e-05, + "loss": 1.1811, + "step": 4616500 + }, + { + "epoch": 2.77, + "learning_rate": 3.6658187405726544e-05, + "loss": 1.1783, + "step": 4617000 + }, + { + "epoch": 2.77, + "learning_rate": 3.6656087440165984e-05, + "loss": 1.1965, + "step": 4617500 + }, + { + "epoch": 2.77, + "learning_rate": 3.665399167453654e-05, + "loss": 1.2006, + "step": 4618000 + }, + { + "epoch": 2.77, + "learning_rate": 3.665189170897597e-05, + "loss": 1.2456, + "step": 4618500 + }, + { + "epoch": 2.77, + "learning_rate": 3.664979174341541e-05, + "loss": 1.1703, + "step": 4619000 + }, + { + "epoch": 2.77, + "learning_rate": 3.6647691777854845e-05, + "loss": 1.1583, + "step": 4619500 + }, + { + "epoch": 2.77, + "learning_rate": 3.66455960122254e-05, + "loss": 1.2215, + "step": 4620000 + }, + { + "epoch": 2.77, + "learning_rate": 3.664349604666483e-05, + "loss": 1.2196, + "step": 4620500 + }, + { + "epoch": 2.77, + "learning_rate": 3.6641400281035386e-05, + "loss": 1.1872, + "step": 4621000 + }, + { + "epoch": 2.77, + "learning_rate": 3.6639300315474826e-05, + "loss": 1.2154, + "step": 4621500 + }, + { + "epoch": 2.77, + "learning_rate": 3.663720034991426e-05, + "loss": 1.2243, + "step": 4622000 + }, + { + "epoch": 2.77, + "learning_rate": 3.663510038435369e-05, + "loss": 1.2142, + "step": 4622500 + }, + { + "epoch": 2.77, + "learning_rate": 3.6633004618724246e-05, + "loss": 1.193, + "step": 4623000 + }, + { + "epoch": 2.77, + "learning_rate": 3.663090465316369e-05, + "loss": 1.2032, + "step": 4623500 + }, + { + "epoch": 2.77, + "learning_rate": 3.662880468760312e-05, + "loss": 1.2135, + "step": 4624000 + }, + { + "epoch": 2.77, + "learning_rate": 3.6626704722042554e-05, + "loss": 1.2345, + "step": 4624500 + }, + { + "epoch": 2.77, + "learning_rate": 3.6624604756481994e-05, + "loss": 1.2111, + "step": 4625000 + }, + { + "epoch": 2.77, + "learning_rate": 3.662250899085255e-05, + "loss": 1.2073, + "step": 4625500 + }, + { + "epoch": 2.77, + "learning_rate": 3.662040902529198e-05, + "loss": 1.2208, + "step": 4626000 + }, + { + "epoch": 2.77, + "learning_rate": 3.661830905973142e-05, + "loss": 1.1991, + "step": 4626500 + }, + { + "epoch": 2.77, + "learning_rate": 3.6616209094170855e-05, + "loss": 1.1907, + "step": 4627000 + }, + { + "epoch": 2.77, + "learning_rate": 3.661410912861029e-05, + "loss": 1.209, + "step": 4627500 + }, + { + "epoch": 2.77, + "learning_rate": 3.661200916304973e-05, + "loss": 1.1826, + "step": 4628000 + }, + { + "epoch": 2.77, + "learning_rate": 3.660990919748916e-05, + "loss": 1.2014, + "step": 4628500 + }, + { + "epoch": 2.78, + "learning_rate": 3.6607809231928595e-05, + "loss": 1.2212, + "step": 4629000 + }, + { + "epoch": 2.78, + "learning_rate": 3.6605717666230276e-05, + "loss": 1.1959, + "step": 4629500 + }, + { + "epoch": 2.78, + "learning_rate": 3.66036177006697e-05, + "loss": 1.2147, + "step": 4630000 + }, + { + "epoch": 2.78, + "learning_rate": 3.660151773510914e-05, + "loss": 1.2333, + "step": 4630500 + }, + { + "epoch": 2.78, + "learning_rate": 3.6599417769548576e-05, + "loss": 1.2151, + "step": 4631000 + }, + { + "epoch": 2.78, + "learning_rate": 3.6597322003919136e-05, + "loss": 1.2335, + "step": 4631500 + }, + { + "epoch": 2.78, + "learning_rate": 3.6595222038358577e-05, + "loss": 1.2113, + "step": 4632000 + }, + { + "epoch": 2.78, + "learning_rate": 3.6593122072798e-05, + "loss": 1.2189, + "step": 4632500 + }, + { + "epoch": 2.78, + "learning_rate": 3.659102210723744e-05, + "loss": 1.2152, + "step": 4633000 + }, + { + "epoch": 2.78, + "learning_rate": 3.658892214167688e-05, + "loss": 1.1874, + "step": 4633500 + }, + { + "epoch": 2.78, + "learning_rate": 3.658682217611631e-05, + "loss": 1.1624, + "step": 4634000 + }, + { + "epoch": 2.78, + "learning_rate": 3.6584722210555744e-05, + "loss": 1.1837, + "step": 4634500 + }, + { + "epoch": 2.78, + "learning_rate": 3.6582622244995184e-05, + "loss": 1.2007, + "step": 4635000 + }, + { + "epoch": 2.78, + "learning_rate": 3.658052227943462e-05, + "loss": 1.1904, + "step": 4635500 + }, + { + "epoch": 2.78, + "learning_rate": 3.657842231387405e-05, + "loss": 1.1769, + "step": 4636000 + }, + { + "epoch": 2.78, + "learning_rate": 3.657633074817573e-05, + "loss": 1.2046, + "step": 4636500 + }, + { + "epoch": 2.78, + "learning_rate": 3.657423078261516e-05, + "loss": 1.1649, + "step": 4637000 + }, + { + "epoch": 2.78, + "learning_rate": 3.65721308170546e-05, + "loss": 1.1793, + "step": 4637500 + }, + { + "epoch": 2.78, + "learning_rate": 3.657003085149403e-05, + "loss": 1.2177, + "step": 4638000 + }, + { + "epoch": 2.78, + "learning_rate": 3.656793088593347e-05, + "loss": 1.161, + "step": 4638500 + }, + { + "epoch": 2.78, + "learning_rate": 3.6565830920372906e-05, + "loss": 1.192, + "step": 4639000 + }, + { + "epoch": 2.78, + "learning_rate": 3.656373095481234e-05, + "loss": 1.205, + "step": 4639500 + }, + { + "epoch": 2.78, + "learning_rate": 3.656163098925178e-05, + "loss": 1.181, + "step": 4640000 + }, + { + "epoch": 2.78, + "learning_rate": 3.655953102369121e-05, + "loss": 1.1877, + "step": 4640500 + }, + { + "epoch": 2.78, + "learning_rate": 3.6557435258061766e-05, + "loss": 1.2009, + "step": 4641000 + }, + { + "epoch": 2.78, + "learning_rate": 3.65553352925012e-05, + "loss": 1.2092, + "step": 4641500 + }, + { + "epoch": 2.78, + "learning_rate": 3.655323532694064e-05, + "loss": 1.1991, + "step": 4642000 + }, + { + "epoch": 2.78, + "learning_rate": 3.6551135361380073e-05, + "loss": 1.1978, + "step": 4642500 + }, + { + "epoch": 2.78, + "learning_rate": 3.654903539581951e-05, + "loss": 1.1945, + "step": 4643000 + }, + { + "epoch": 2.78, + "learning_rate": 3.654693543025895e-05, + "loss": 1.186, + "step": 4643500 + }, + { + "epoch": 2.78, + "learning_rate": 3.654483546469838e-05, + "loss": 1.1879, + "step": 4644000 + }, + { + "epoch": 2.78, + "learning_rate": 3.6542735499137814e-05, + "loss": 1.1983, + "step": 4644500 + }, + { + "epoch": 2.78, + "learning_rate": 3.654063973350837e-05, + "loss": 1.2077, + "step": 4645000 + }, + { + "epoch": 2.79, + "learning_rate": 3.653854396787893e-05, + "loss": 1.2137, + "step": 4645500 + }, + { + "epoch": 2.79, + "learning_rate": 3.653644400231836e-05, + "loss": 1.2147, + "step": 4646000 + }, + { + "epoch": 2.79, + "learning_rate": 3.6534344036757795e-05, + "loss": 1.2019, + "step": 4646500 + }, + { + "epoch": 2.79, + "learning_rate": 3.6532244071197235e-05, + "loss": 1.2145, + "step": 4647000 + }, + { + "epoch": 2.79, + "learning_rate": 3.653014410563667e-05, + "loss": 1.2013, + "step": 4647500 + }, + { + "epoch": 2.79, + "learning_rate": 3.65280441400761e-05, + "loss": 1.1908, + "step": 4648000 + }, + { + "epoch": 2.79, + "learning_rate": 3.6525948374446656e-05, + "loss": 1.2172, + "step": 4648500 + }, + { + "epoch": 2.79, + "learning_rate": 3.6523848408886096e-05, + "loss": 1.207, + "step": 4649000 + }, + { + "epoch": 2.79, + "learning_rate": 3.652174844332553e-05, + "loss": 1.2157, + "step": 4649500 + }, + { + "epoch": 2.79, + "learning_rate": 3.651965267769608e-05, + "loss": 1.2491, + "step": 4650000 + }, + { + "epoch": 2.79, + "learning_rate": 3.6517552712135516e-05, + "loss": 1.2084, + "step": 4650500 + }, + { + "epoch": 2.79, + "learning_rate": 3.6515452746574957e-05, + "loss": 1.1814, + "step": 4651000 + }, + { + "epoch": 2.79, + "learning_rate": 3.651335278101439e-05, + "loss": 1.1819, + "step": 4651500 + }, + { + "epoch": 2.79, + "learning_rate": 3.6511252815453824e-05, + "loss": 1.2158, + "step": 4652000 + }, + { + "epoch": 2.79, + "learning_rate": 3.6509152849893264e-05, + "loss": 1.1715, + "step": 4652500 + }, + { + "epoch": 2.79, + "learning_rate": 3.65070528843327e-05, + "loss": 1.1916, + "step": 4653000 + }, + { + "epoch": 2.79, + "learning_rate": 3.650495291877213e-05, + "loss": 1.251, + "step": 4653500 + }, + { + "epoch": 2.79, + "learning_rate": 3.650285295321157e-05, + "loss": 1.1896, + "step": 4654000 + }, + { + "epoch": 2.79, + "learning_rate": 3.6500761387513245e-05, + "loss": 1.1931, + "step": 4654500 + }, + { + "epoch": 2.79, + "learning_rate": 3.649866142195268e-05, + "loss": 1.199, + "step": 4655000 + }, + { + "epoch": 2.79, + "learning_rate": 3.649656145639211e-05, + "loss": 1.2088, + "step": 4655500 + }, + { + "epoch": 2.79, + "learning_rate": 3.649446149083155e-05, + "loss": 1.1944, + "step": 4656000 + }, + { + "epoch": 2.79, + "learning_rate": 3.6492361525270985e-05, + "loss": 1.2331, + "step": 4656500 + }, + { + "epoch": 2.79, + "learning_rate": 3.649026155971042e-05, + "loss": 1.1861, + "step": 4657000 + }, + { + "epoch": 2.79, + "learning_rate": 3.648816159414986e-05, + "loss": 1.1927, + "step": 4657500 + }, + { + "epoch": 2.79, + "learning_rate": 3.648606582852041e-05, + "loss": 1.248, + "step": 4658000 + }, + { + "epoch": 2.79, + "learning_rate": 3.6483965862959846e-05, + "loss": 1.2187, + "step": 4658500 + }, + { + "epoch": 2.79, + "learning_rate": 3.648186589739928e-05, + "loss": 1.1919, + "step": 4659000 + }, + { + "epoch": 2.79, + "learning_rate": 3.647976593183872e-05, + "loss": 1.1589, + "step": 4659500 + }, + { + "epoch": 2.79, + "learning_rate": 3.647766596627815e-05, + "loss": 1.2003, + "step": 4660000 + }, + { + "epoch": 2.79, + "learning_rate": 3.647557020064871e-05, + "loss": 1.1919, + "step": 4660500 + }, + { + "epoch": 2.79, + "learning_rate": 3.647347023508815e-05, + "loss": 1.2178, + "step": 4661000 + }, + { + "epoch": 2.79, + "learning_rate": 3.647137026952758e-05, + "loss": 1.2156, + "step": 4661500 + }, + { + "epoch": 2.8, + "learning_rate": 3.6469270303967014e-05, + "loss": 1.2119, + "step": 4662000 + }, + { + "epoch": 2.8, + "learning_rate": 3.6467170338406454e-05, + "loss": 1.1729, + "step": 4662500 + }, + { + "epoch": 2.8, + "learning_rate": 3.646507037284589e-05, + "loss": 1.1975, + "step": 4663000 + }, + { + "epoch": 2.8, + "learning_rate": 3.646297040728532e-05, + "loss": 1.2193, + "step": 4663500 + }, + { + "epoch": 2.8, + "learning_rate": 3.6460870441724754e-05, + "loss": 1.1868, + "step": 4664000 + }, + { + "epoch": 2.8, + "learning_rate": 3.645877047616419e-05, + "loss": 1.2178, + "step": 4664500 + }, + { + "epoch": 2.8, + "learning_rate": 3.645667051060362e-05, + "loss": 1.1859, + "step": 4665000 + }, + { + "epoch": 2.8, + "learning_rate": 3.645457054504306e-05, + "loss": 1.2217, + "step": 4665500 + }, + { + "epoch": 2.8, + "learning_rate": 3.645247477941362e-05, + "loss": 1.1866, + "step": 4666000 + }, + { + "epoch": 2.8, + "learning_rate": 3.645037481385305e-05, + "loss": 1.2174, + "step": 4666500 + }, + { + "epoch": 2.8, + "learning_rate": 3.644827484829248e-05, + "loss": 1.1947, + "step": 4667000 + }, + { + "epoch": 2.8, + "learning_rate": 3.644617488273192e-05, + "loss": 1.1833, + "step": 4667500 + }, + { + "epoch": 2.8, + "learning_rate": 3.6444074917171356e-05, + "loss": 1.182, + "step": 4668000 + }, + { + "epoch": 2.8, + "learning_rate": 3.6441979151541916e-05, + "loss": 1.2013, + "step": 4668500 + }, + { + "epoch": 2.8, + "learning_rate": 3.643987918598135e-05, + "loss": 1.1957, + "step": 4669000 + }, + { + "epoch": 2.8, + "learning_rate": 3.643778342035191e-05, + "loss": 1.2004, + "step": 4669500 + }, + { + "epoch": 2.8, + "learning_rate": 3.643568345479134e-05, + "loss": 1.2074, + "step": 4670000 + }, + { + "epoch": 2.8, + "learning_rate": 3.643358348923078e-05, + "loss": 1.1961, + "step": 4670500 + }, + { + "epoch": 2.8, + "learning_rate": 3.643148352367022e-05, + "loss": 1.1886, + "step": 4671000 + }, + { + "epoch": 2.8, + "learning_rate": 3.6429383558109644e-05, + "loss": 1.1983, + "step": 4671500 + }, + { + "epoch": 2.8, + "learning_rate": 3.6427287792480204e-05, + "loss": 1.2227, + "step": 4672000 + }, + { + "epoch": 2.8, + "learning_rate": 3.642518782691964e-05, + "loss": 1.2107, + "step": 4672500 + }, + { + "epoch": 2.8, + "learning_rate": 3.642308786135908e-05, + "loss": 1.2124, + "step": 4673000 + }, + { + "epoch": 2.8, + "learning_rate": 3.6420987895798504e-05, + "loss": 1.2263, + "step": 4673500 + }, + { + "epoch": 2.8, + "learning_rate": 3.6418892130169065e-05, + "loss": 1.2343, + "step": 4674000 + }, + { + "epoch": 2.8, + "learning_rate": 3.64167921646085e-05, + "loss": 1.189, + "step": 4674500 + }, + { + "epoch": 2.8, + "learning_rate": 3.641469219904794e-05, + "loss": 1.22, + "step": 4675000 + }, + { + "epoch": 2.8, + "learning_rate": 3.641259223348737e-05, + "loss": 1.2115, + "step": 4675500 + }, + { + "epoch": 2.8, + "learning_rate": 3.6410492267926805e-05, + "loss": 1.2095, + "step": 4676000 + }, + { + "epoch": 2.8, + "learning_rate": 3.640839230236624e-05, + "loss": 1.2128, + "step": 4676500 + }, + { + "epoch": 2.8, + "learning_rate": 3.640629233680567e-05, + "loss": 1.2167, + "step": 4677000 + }, + { + "epoch": 2.8, + "learning_rate": 3.640419237124511e-05, + "loss": 1.2097, + "step": 4677500 + }, + { + "epoch": 2.8, + "learning_rate": 3.6402092405684546e-05, + "loss": 1.1971, + "step": 4678000 + }, + { + "epoch": 2.8, + "learning_rate": 3.639999244012398e-05, + "loss": 1.1878, + "step": 4678500 + }, + { + "epoch": 2.81, + "learning_rate": 3.639789247456342e-05, + "loss": 1.2122, + "step": 4679000 + }, + { + "epoch": 2.81, + "learning_rate": 3.639579250900285e-05, + "loss": 1.1921, + "step": 4679500 + }, + { + "epoch": 2.81, + "learning_rate": 3.639369254344229e-05, + "loss": 1.2129, + "step": 4680000 + }, + { + "epoch": 2.81, + "learning_rate": 3.639159257788173e-05, + "loss": 1.1916, + "step": 4680500 + }, + { + "epoch": 2.81, + "learning_rate": 3.638949261232116e-05, + "loss": 1.1941, + "step": 4681000 + }, + { + "epoch": 2.81, + "learning_rate": 3.638739264676059e-05, + "loss": 1.202, + "step": 4681500 + }, + { + "epoch": 2.81, + "learning_rate": 3.638529688113115e-05, + "loss": 1.2154, + "step": 4682000 + }, + { + "epoch": 2.81, + "learning_rate": 3.638319691557059e-05, + "loss": 1.2032, + "step": 4682500 + }, + { + "epoch": 2.81, + "learning_rate": 3.638109695001002e-05, + "loss": 1.1855, + "step": 4683000 + }, + { + "epoch": 2.81, + "learning_rate": 3.6378996984449455e-05, + "loss": 1.1944, + "step": 4683500 + }, + { + "epoch": 2.81, + "learning_rate": 3.637690121882001e-05, + "loss": 1.2142, + "step": 4684000 + }, + { + "epoch": 2.81, + "learning_rate": 3.637480125325945e-05, + "loss": 1.204, + "step": 4684500 + }, + { + "epoch": 2.81, + "learning_rate": 3.637270128769888e-05, + "loss": 1.1987, + "step": 4685000 + }, + { + "epoch": 2.81, + "learning_rate": 3.637060132213832e-05, + "loss": 1.2046, + "step": 4685500 + }, + { + "epoch": 2.81, + "learning_rate": 3.6368501356577756e-05, + "loss": 1.202, + "step": 4686000 + }, + { + "epoch": 2.81, + "learning_rate": 3.636640559094831e-05, + "loss": 1.2092, + "step": 4686500 + }, + { + "epoch": 2.81, + "learning_rate": 3.636430562538774e-05, + "loss": 1.1874, + "step": 4687000 + }, + { + "epoch": 2.81, + "learning_rate": 3.636220565982718e-05, + "loss": 1.2251, + "step": 4687500 + }, + { + "epoch": 2.81, + "learning_rate": 3.6360105694266616e-05, + "loss": 1.1958, + "step": 4688000 + }, + { + "epoch": 2.81, + "learning_rate": 3.635800992863717e-05, + "loss": 1.2091, + "step": 4688500 + }, + { + "epoch": 2.81, + "learning_rate": 3.63559099630766e-05, + "loss": 1.1761, + "step": 4689000 + }, + { + "epoch": 2.81, + "learning_rate": 3.6353809997516044e-05, + "loss": 1.1953, + "step": 4689500 + }, + { + "epoch": 2.81, + "learning_rate": 3.635171003195548e-05, + "loss": 1.2074, + "step": 4690000 + }, + { + "epoch": 2.81, + "learning_rate": 3.634961006639491e-05, + "loss": 1.2042, + "step": 4690500 + }, + { + "epoch": 2.81, + "learning_rate": 3.6347510100834344e-05, + "loss": 1.2107, + "step": 4691000 + }, + { + "epoch": 2.81, + "learning_rate": 3.634541013527378e-05, + "loss": 1.196, + "step": 4691500 + }, + { + "epoch": 2.81, + "learning_rate": 3.634331016971322e-05, + "loss": 1.1701, + "step": 4692000 + }, + { + "epoch": 2.81, + "learning_rate": 3.634121020415265e-05, + "loss": 1.2213, + "step": 4692500 + }, + { + "epoch": 2.81, + "learning_rate": 3.6339110238592084e-05, + "loss": 1.2067, + "step": 4693000 + }, + { + "epoch": 2.81, + "learning_rate": 3.6337010273031525e-05, + "loss": 1.2223, + "step": 4693500 + }, + { + "epoch": 2.81, + "learning_rate": 3.633491450740208e-05, + "loss": 1.2028, + "step": 4694000 + }, + { + "epoch": 2.81, + "learning_rate": 3.633281454184151e-05, + "loss": 1.2097, + "step": 4694500 + }, + { + "epoch": 2.81, + "learning_rate": 3.6330714576280945e-05, + "loss": 1.1858, + "step": 4695000 + }, + { + "epoch": 2.82, + "learning_rate": 3.6328614610720385e-05, + "loss": 1.2022, + "step": 4695500 + }, + { + "epoch": 2.82, + "learning_rate": 3.632651464515982e-05, + "loss": 1.218, + "step": 4696000 + }, + { + "epoch": 2.82, + "learning_rate": 3.632441467959925e-05, + "loss": 1.1727, + "step": 4696500 + }, + { + "epoch": 2.82, + "learning_rate": 3.632231471403869e-05, + "loss": 1.1869, + "step": 4697000 + }, + { + "epoch": 2.82, + "learning_rate": 3.6320214748478126e-05, + "loss": 1.191, + "step": 4697500 + }, + { + "epoch": 2.82, + "learning_rate": 3.631811898284868e-05, + "loss": 1.2257, + "step": 4698000 + }, + { + "epoch": 2.82, + "learning_rate": 3.631601901728811e-05, + "loss": 1.2039, + "step": 4698500 + }, + { + "epoch": 2.82, + "learning_rate": 3.631391905172755e-05, + "loss": 1.1765, + "step": 4699000 + }, + { + "epoch": 2.82, + "learning_rate": 3.631181908616699e-05, + "loss": 1.2188, + "step": 4699500 + }, + { + "epoch": 2.82, + "learning_rate": 3.630971912060642e-05, + "loss": 1.1825, + "step": 4700000 + }, + { + "epoch": 2.82, + "eval_loss": 1.1633455753326416, + "eval_runtime": 1101.2926, + "eval_samples_per_second": 478.274, + "eval_steps_per_second": 79.713, + "step": 4700000 + }, + { + "epoch": 2.82, + "learning_rate": 3.630761915504586e-05, + "loss": 1.2054, + "step": 4700500 + }, + { + "epoch": 2.82, + "learning_rate": 3.6305519189485294e-05, + "loss": 1.188, + "step": 4701000 + }, + { + "epoch": 2.82, + "learning_rate": 3.630341922392473e-05, + "loss": 1.1919, + "step": 4701500 + }, + { + "epoch": 2.82, + "learning_rate": 3.630132345829529e-05, + "loss": 1.2154, + "step": 4702000 + }, + { + "epoch": 2.82, + "learning_rate": 3.629922349273472e-05, + "loss": 1.2176, + "step": 4702500 + }, + { + "epoch": 2.82, + "learning_rate": 3.6297123527174155e-05, + "loss": 1.1711, + "step": 4703000 + }, + { + "epoch": 2.82, + "learning_rate": 3.6295023561613595e-05, + "loss": 1.1888, + "step": 4703500 + }, + { + "epoch": 2.82, + "learning_rate": 3.629292779598415e-05, + "loss": 1.2028, + "step": 4704000 + }, + { + "epoch": 2.82, + "learning_rate": 3.62908320303547e-05, + "loss": 1.1784, + "step": 4704500 + }, + { + "epoch": 2.82, + "learning_rate": 3.6288732064794136e-05, + "loss": 1.2093, + "step": 4705000 + }, + { + "epoch": 2.82, + "learning_rate": 3.628663209923357e-05, + "loss": 1.2052, + "step": 4705500 + }, + { + "epoch": 2.82, + "learning_rate": 3.628453213367301e-05, + "loss": 1.2177, + "step": 4706000 + }, + { + "epoch": 2.82, + "learning_rate": 3.628243216811244e-05, + "loss": 1.1742, + "step": 4706500 + }, + { + "epoch": 2.82, + "learning_rate": 3.6280332202551876e-05, + "loss": 1.2079, + "step": 4707000 + }, + { + "epoch": 2.82, + "learning_rate": 3.6278232236991316e-05, + "loss": 1.2041, + "step": 4707500 + }, + { + "epoch": 2.82, + "learning_rate": 3.627613227143075e-05, + "loss": 1.2185, + "step": 4708000 + }, + { + "epoch": 2.82, + "learning_rate": 3.6274036505801303e-05, + "loss": 1.2061, + "step": 4708500 + }, + { + "epoch": 2.82, + "learning_rate": 3.6271936540240744e-05, + "loss": 1.1878, + "step": 4709000 + }, + { + "epoch": 2.82, + "learning_rate": 3.626983657468018e-05, + "loss": 1.2126, + "step": 4709500 + }, + { + "epoch": 2.82, + "learning_rate": 3.626773660911961e-05, + "loss": 1.196, + "step": 4710000 + }, + { + "epoch": 2.82, + "learning_rate": 3.6265640843490164e-05, + "loss": 1.1912, + "step": 4710500 + }, + { + "epoch": 2.82, + "learning_rate": 3.6263540877929604e-05, + "loss": 1.2203, + "step": 4711000 + }, + { + "epoch": 2.82, + "learning_rate": 3.626144091236904e-05, + "loss": 1.1876, + "step": 4711500 + }, + { + "epoch": 2.83, + "learning_rate": 3.625934094680847e-05, + "loss": 1.2069, + "step": 4712000 + }, + { + "epoch": 2.83, + "learning_rate": 3.625724098124791e-05, + "loss": 1.2107, + "step": 4712500 + }, + { + "epoch": 2.83, + "learning_rate": 3.6255141015687345e-05, + "loss": 1.1891, + "step": 4713000 + }, + { + "epoch": 2.83, + "learning_rate": 3.625304105012677e-05, + "loss": 1.1799, + "step": 4713500 + }, + { + "epoch": 2.83, + "learning_rate": 3.625094108456621e-05, + "loss": 1.2006, + "step": 4714000 + }, + { + "epoch": 2.83, + "learning_rate": 3.624884531893677e-05, + "loss": 1.1825, + "step": 4714500 + }, + { + "epoch": 2.83, + "learning_rate": 3.6246745353376206e-05, + "loss": 1.2411, + "step": 4715000 + }, + { + "epoch": 2.83, + "learning_rate": 3.624464538781564e-05, + "loss": 1.1987, + "step": 4715500 + }, + { + "epoch": 2.83, + "learning_rate": 3.624254542225507e-05, + "loss": 1.2261, + "step": 4716000 + }, + { + "epoch": 2.83, + "learning_rate": 3.6240445456694506e-05, + "loss": 1.1952, + "step": 4716500 + }, + { + "epoch": 2.83, + "learning_rate": 3.6238345491133946e-05, + "loss": 1.195, + "step": 4717000 + }, + { + "epoch": 2.83, + "learning_rate": 3.623624552557338e-05, + "loss": 1.2201, + "step": 4717500 + }, + { + "epoch": 2.83, + "learning_rate": 3.623414556001281e-05, + "loss": 1.2103, + "step": 4718000 + }, + { + "epoch": 2.83, + "learning_rate": 3.6232053994314494e-05, + "loss": 1.2044, + "step": 4718500 + }, + { + "epoch": 2.83, + "learning_rate": 3.622995402875393e-05, + "loss": 1.2071, + "step": 4719000 + }, + { + "epoch": 2.83, + "learning_rate": 3.622785406319337e-05, + "loss": 1.1848, + "step": 4719500 + }, + { + "epoch": 2.83, + "learning_rate": 3.62257540976328e-05, + "loss": 1.2019, + "step": 4720000 + }, + { + "epoch": 2.83, + "learning_rate": 3.622365413207223e-05, + "loss": 1.1901, + "step": 4720500 + }, + { + "epoch": 2.83, + "learning_rate": 3.622155416651167e-05, + "loss": 1.1973, + "step": 4721000 + }, + { + "epoch": 2.83, + "learning_rate": 3.62194542009511e-05, + "loss": 1.1691, + "step": 4721500 + }, + { + "epoch": 2.83, + "learning_rate": 3.6217354235390535e-05, + "loss": 1.2154, + "step": 4722000 + }, + { + "epoch": 2.83, + "learning_rate": 3.6215258469761095e-05, + "loss": 1.2039, + "step": 4722500 + }, + { + "epoch": 2.83, + "learning_rate": 3.6213162704131655e-05, + "loss": 1.2102, + "step": 4723000 + }, + { + "epoch": 2.83, + "learning_rate": 3.621106273857109e-05, + "loss": 1.1888, + "step": 4723500 + }, + { + "epoch": 2.83, + "learning_rate": 3.620896277301052e-05, + "loss": 1.1894, + "step": 4724000 + }, + { + "epoch": 2.83, + "learning_rate": 3.620686280744996e-05, + "loss": 1.1901, + "step": 4724500 + }, + { + "epoch": 2.83, + "learning_rate": 3.6204762841889396e-05, + "loss": 1.2517, + "step": 4725000 + }, + { + "epoch": 2.83, + "learning_rate": 3.620266707625995e-05, + "loss": 1.1845, + "step": 4725500 + }, + { + "epoch": 2.83, + "learning_rate": 3.62005713106305e-05, + "loss": 1.1993, + "step": 4726000 + }, + { + "epoch": 2.83, + "learning_rate": 3.619847134506994e-05, + "loss": 1.1984, + "step": 4726500 + }, + { + "epoch": 2.83, + "learning_rate": 3.619637137950938e-05, + "loss": 1.194, + "step": 4727000 + }, + { + "epoch": 2.83, + "learning_rate": 3.619427141394881e-05, + "loss": 1.2029, + "step": 4727500 + }, + { + "epoch": 2.83, + "learning_rate": 3.6192171448388244e-05, + "loss": 1.2284, + "step": 4728000 + }, + { + "epoch": 2.83, + "learning_rate": 3.6190071482827684e-05, + "loss": 1.2056, + "step": 4728500 + }, + { + "epoch": 2.84, + "learning_rate": 3.618797151726712e-05, + "loss": 1.1984, + "step": 4729000 + }, + { + "epoch": 2.84, + "learning_rate": 3.618587155170655e-05, + "loss": 1.1771, + "step": 4729500 + }, + { + "epoch": 2.84, + "learning_rate": 3.6183771586145984e-05, + "loss": 1.1913, + "step": 4730000 + }, + { + "epoch": 2.84, + "learning_rate": 3.6181675820516545e-05, + "loss": 1.2038, + "step": 4730500 + }, + { + "epoch": 2.84, + "learning_rate": 3.617957585495598e-05, + "loss": 1.2057, + "step": 4731000 + }, + { + "epoch": 2.84, + "learning_rate": 3.617747588939542e-05, + "loss": 1.2085, + "step": 4731500 + }, + { + "epoch": 2.84, + "learning_rate": 3.617537592383485e-05, + "loss": 1.2051, + "step": 4732000 + }, + { + "epoch": 2.84, + "learning_rate": 3.6173280158205405e-05, + "loss": 1.2227, + "step": 4732500 + }, + { + "epoch": 2.84, + "learning_rate": 3.617118439257596e-05, + "loss": 1.203, + "step": 4733000 + }, + { + "epoch": 2.84, + "learning_rate": 3.616908442701539e-05, + "loss": 1.2091, + "step": 4733500 + }, + { + "epoch": 2.84, + "learning_rate": 3.616698446145483e-05, + "loss": 1.2071, + "step": 4734000 + }, + { + "epoch": 2.84, + "learning_rate": 3.6164884495894266e-05, + "loss": 1.2114, + "step": 4734500 + }, + { + "epoch": 2.84, + "learning_rate": 3.61627845303337e-05, + "loss": 1.1817, + "step": 4735000 + }, + { + "epoch": 2.84, + "learning_rate": 3.616068456477314e-05, + "loss": 1.2095, + "step": 4735500 + }, + { + "epoch": 2.84, + "learning_rate": 3.615858459921257e-05, + "loss": 1.1924, + "step": 4736000 + }, + { + "epoch": 2.84, + "learning_rate": 3.615648463365201e-05, + "loss": 1.199, + "step": 4736500 + }, + { + "epoch": 2.84, + "learning_rate": 3.615438886802257e-05, + "loss": 1.1851, + "step": 4737000 + }, + { + "epoch": 2.84, + "learning_rate": 3.6152288902462e-05, + "loss": 1.2288, + "step": 4737500 + }, + { + "epoch": 2.84, + "learning_rate": 3.6150188936901434e-05, + "loss": 1.1873, + "step": 4738000 + }, + { + "epoch": 2.84, + "learning_rate": 3.6148088971340874e-05, + "loss": 1.2014, + "step": 4738500 + }, + { + "epoch": 2.84, + "learning_rate": 3.614598900578031e-05, + "loss": 1.2026, + "step": 4739000 + }, + { + "epoch": 2.84, + "learning_rate": 3.614389324015086e-05, + "loss": 1.1921, + "step": 4739500 + }, + { + "epoch": 2.84, + "learning_rate": 3.6141793274590295e-05, + "loss": 1.1905, + "step": 4740000 + }, + { + "epoch": 2.84, + "learning_rate": 3.6139693309029735e-05, + "loss": 1.2072, + "step": 4740500 + }, + { + "epoch": 2.84, + "learning_rate": 3.613759334346917e-05, + "loss": 1.2167, + "step": 4741000 + }, + { + "epoch": 2.84, + "learning_rate": 3.613549757783972e-05, + "loss": 1.207, + "step": 4741500 + }, + { + "epoch": 2.84, + "learning_rate": 3.6133397612279156e-05, + "loss": 1.2005, + "step": 4742000 + }, + { + "epoch": 2.84, + "learning_rate": 3.6131297646718596e-05, + "loss": 1.2083, + "step": 4742500 + }, + { + "epoch": 2.84, + "learning_rate": 3.612919768115803e-05, + "loss": 1.2005, + "step": 4743000 + }, + { + "epoch": 2.84, + "learning_rate": 3.612709771559747e-05, + "loss": 1.2197, + "step": 4743500 + }, + { + "epoch": 2.84, + "learning_rate": 3.612500194996802e-05, + "loss": 1.2311, + "step": 4744000 + }, + { + "epoch": 2.84, + "learning_rate": 3.6122901984407456e-05, + "loss": 1.1843, + "step": 4744500 + }, + { + "epoch": 2.84, + "learning_rate": 3.612080201884689e-05, + "loss": 1.1958, + "step": 4745000 + }, + { + "epoch": 2.85, + "learning_rate": 3.611870205328633e-05, + "loss": 1.219, + "step": 4745500 + }, + { + "epoch": 2.85, + "learning_rate": 3.6116602087725764e-05, + "loss": 1.2087, + "step": 4746000 + }, + { + "epoch": 2.85, + "learning_rate": 3.611450632209632e-05, + "loss": 1.1806, + "step": 4746500 + }, + { + "epoch": 2.85, + "learning_rate": 3.611240635653575e-05, + "loss": 1.2002, + "step": 4747000 + }, + { + "epoch": 2.85, + "learning_rate": 3.611030639097519e-05, + "loss": 1.2056, + "step": 4747500 + }, + { + "epoch": 2.85, + "learning_rate": 3.6108206425414624e-05, + "loss": 1.199, + "step": 4748000 + }, + { + "epoch": 2.85, + "learning_rate": 3.610610645985406e-05, + "loss": 1.1788, + "step": 4748500 + }, + { + "epoch": 2.85, + "learning_rate": 3.610400649429349e-05, + "loss": 1.2023, + "step": 4749000 + }, + { + "epoch": 2.85, + "learning_rate": 3.6101906528732925e-05, + "loss": 1.2092, + "step": 4749500 + }, + { + "epoch": 2.85, + "learning_rate": 3.609980656317236e-05, + "loss": 1.2067, + "step": 4750000 + }, + { + "epoch": 2.85, + "learning_rate": 3.60977065976118e-05, + "loss": 1.1919, + "step": 4750500 + }, + { + "epoch": 2.85, + "learning_rate": 3.609561083198236e-05, + "loss": 1.218, + "step": 4751000 + }, + { + "epoch": 2.85, + "learning_rate": 3.6093510866421785e-05, + "loss": 1.197, + "step": 4751500 + }, + { + "epoch": 2.85, + "learning_rate": 3.6091410900861226e-05, + "loss": 1.2201, + "step": 4752000 + }, + { + "epoch": 2.85, + "learning_rate": 3.608931093530066e-05, + "loss": 1.1732, + "step": 4752500 + }, + { + "epoch": 2.85, + "learning_rate": 3.608721096974009e-05, + "loss": 1.1765, + "step": 4753000 + }, + { + "epoch": 2.85, + "learning_rate": 3.608511100417953e-05, + "loss": 1.1958, + "step": 4753500 + }, + { + "epoch": 2.85, + "learning_rate": 3.6083015238550086e-05, + "loss": 1.2067, + "step": 4754000 + }, + { + "epoch": 2.85, + "learning_rate": 3.608091527298952e-05, + "loss": 1.1921, + "step": 4754500 + }, + { + "epoch": 2.85, + "learning_rate": 3.607881950736008e-05, + "loss": 1.1672, + "step": 4755000 + }, + { + "epoch": 2.85, + "learning_rate": 3.6076719541799514e-05, + "loss": 1.1818, + "step": 4755500 + }, + { + "epoch": 2.85, + "learning_rate": 3.6074619576238954e-05, + "loss": 1.2246, + "step": 4756000 + }, + { + "epoch": 2.85, + "learning_rate": 3.607251961067838e-05, + "loss": 1.1816, + "step": 4756500 + }, + { + "epoch": 2.85, + "learning_rate": 3.607041964511782e-05, + "loss": 1.2222, + "step": 4757000 + }, + { + "epoch": 2.85, + "learning_rate": 3.606832387948838e-05, + "loss": 1.2449, + "step": 4757500 + }, + { + "epoch": 2.85, + "learning_rate": 3.6066223913927815e-05, + "loss": 1.1944, + "step": 4758000 + }, + { + "epoch": 2.85, + "learning_rate": 3.606412394836724e-05, + "loss": 1.2011, + "step": 4758500 + }, + { + "epoch": 2.85, + "learning_rate": 3.606202398280668e-05, + "loss": 1.1835, + "step": 4759000 + }, + { + "epoch": 2.85, + "learning_rate": 3.6059924017246115e-05, + "loss": 1.1727, + "step": 4759500 + }, + { + "epoch": 2.85, + "learning_rate": 3.605782405168555e-05, + "loss": 1.2028, + "step": 4760000 + }, + { + "epoch": 2.85, + "learning_rate": 3.605572828605611e-05, + "loss": 1.2084, + "step": 4760500 + }, + { + "epoch": 2.85, + "learning_rate": 3.605362832049554e-05, + "loss": 1.2158, + "step": 4761000 + }, + { + "epoch": 2.85, + "learning_rate": 3.6051528354934976e-05, + "loss": 1.1973, + "step": 4761500 + }, + { + "epoch": 2.86, + "learning_rate": 3.604942838937441e-05, + "loss": 1.1967, + "step": 4762000 + }, + { + "epoch": 2.86, + "learning_rate": 3.604732842381385e-05, + "loss": 1.1897, + "step": 4762500 + }, + { + "epoch": 2.86, + "learning_rate": 3.604522845825328e-05, + "loss": 1.2448, + "step": 4763000 + }, + { + "epoch": 2.86, + "learning_rate": 3.6043132692623837e-05, + "loss": 1.2157, + "step": 4763500 + }, + { + "epoch": 2.86, + "learning_rate": 3.604103272706328e-05, + "loss": 1.1874, + "step": 4764000 + }, + { + "epoch": 2.86, + "learning_rate": 3.603893276150271e-05, + "loss": 1.2273, + "step": 4764500 + }, + { + "epoch": 2.86, + "learning_rate": 3.6036832795942144e-05, + "loss": 1.228, + "step": 4765000 + }, + { + "epoch": 2.86, + "learning_rate": 3.6034732830381584e-05, + "loss": 1.1743, + "step": 4765500 + }, + { + "epoch": 2.86, + "learning_rate": 3.603263706475214e-05, + "loss": 1.175, + "step": 4766000 + }, + { + "epoch": 2.86, + "learning_rate": 3.603053709919157e-05, + "loss": 1.1941, + "step": 4766500 + }, + { + "epoch": 2.86, + "learning_rate": 3.6028437133631004e-05, + "loss": 1.1667, + "step": 4767000 + }, + { + "epoch": 2.86, + "learning_rate": 3.6026337168070445e-05, + "loss": 1.2261, + "step": 4767500 + }, + { + "epoch": 2.86, + "learning_rate": 3.602423720250988e-05, + "loss": 1.2095, + "step": 4768000 + }, + { + "epoch": 2.86, + "learning_rate": 3.602213723694931e-05, + "loss": 1.2033, + "step": 4768500 + }, + { + "epoch": 2.86, + "learning_rate": 3.6020041471319865e-05, + "loss": 1.2201, + "step": 4769000 + }, + { + "epoch": 2.86, + "learning_rate": 3.6017941505759305e-05, + "loss": 1.1929, + "step": 4769500 + }, + { + "epoch": 2.86, + "learning_rate": 3.601584154019874e-05, + "loss": 1.189, + "step": 4770000 + }, + { + "epoch": 2.86, + "learning_rate": 3.601374157463817e-05, + "loss": 1.204, + "step": 4770500 + }, + { + "epoch": 2.86, + "learning_rate": 3.601164160907761e-05, + "loss": 1.2056, + "step": 4771000 + }, + { + "epoch": 2.86, + "learning_rate": 3.6009541643517046e-05, + "loss": 1.2071, + "step": 4771500 + }, + { + "epoch": 2.86, + "learning_rate": 3.600744167795648e-05, + "loss": 1.2204, + "step": 4772000 + }, + { + "epoch": 2.86, + "learning_rate": 3.600534171239592e-05, + "loss": 1.2162, + "step": 4772500 + }, + { + "epoch": 2.86, + "learning_rate": 3.600324174683535e-05, + "loss": 1.1889, + "step": 4773000 + }, + { + "epoch": 2.86, + "learning_rate": 3.6001141781274787e-05, + "loss": 1.1886, + "step": 4773500 + }, + { + "epoch": 2.86, + "learning_rate": 3.599904181571422e-05, + "loss": 1.2125, + "step": 4774000 + }, + { + "epoch": 2.86, + "learning_rate": 3.5996941850153653e-05, + "loss": 1.1845, + "step": 4774500 + }, + { + "epoch": 2.86, + "learning_rate": 3.5994846084524214e-05, + "loss": 1.1981, + "step": 4775000 + }, + { + "epoch": 2.86, + "learning_rate": 3.5992746118963654e-05, + "loss": 1.2056, + "step": 4775500 + }, + { + "epoch": 2.86, + "learning_rate": 3.599065035333421e-05, + "loss": 1.1964, + "step": 4776000 + }, + { + "epoch": 2.86, + "learning_rate": 3.598855038777364e-05, + "loss": 1.1839, + "step": 4776500 + }, + { + "epoch": 2.86, + "learning_rate": 3.5986450422213075e-05, + "loss": 1.2048, + "step": 4777000 + }, + { + "epoch": 2.86, + "learning_rate": 3.5984350456652515e-05, + "loss": 1.202, + "step": 4777500 + }, + { + "epoch": 2.86, + "learning_rate": 3.598225049109195e-05, + "loss": 1.2022, + "step": 4778000 + }, + { + "epoch": 2.86, + "learning_rate": 3.59801547254625e-05, + "loss": 1.1963, + "step": 4778500 + }, + { + "epoch": 2.87, + "learning_rate": 3.5978054759901935e-05, + "loss": 1.2098, + "step": 4779000 + }, + { + "epoch": 2.87, + "learning_rate": 3.5975954794341376e-05, + "loss": 1.1974, + "step": 4779500 + }, + { + "epoch": 2.87, + "learning_rate": 3.597385482878081e-05, + "loss": 1.2121, + "step": 4780000 + }, + { + "epoch": 2.87, + "learning_rate": 3.597175486322024e-05, + "loss": 1.2044, + "step": 4780500 + }, + { + "epoch": 2.87, + "learning_rate": 3.5969654897659676e-05, + "loss": 1.2116, + "step": 4781000 + }, + { + "epoch": 2.87, + "learning_rate": 3.5967559132030236e-05, + "loss": 1.2026, + "step": 4781500 + }, + { + "epoch": 2.87, + "learning_rate": 3.596545916646967e-05, + "loss": 1.2008, + "step": 4782000 + }, + { + "epoch": 2.87, + "learning_rate": 3.596335920090911e-05, + "loss": 1.2317, + "step": 4782500 + }, + { + "epoch": 2.87, + "learning_rate": 3.596125923534854e-05, + "loss": 1.1985, + "step": 4783000 + }, + { + "epoch": 2.87, + "learning_rate": 3.59591634697191e-05, + "loss": 1.1721, + "step": 4783500 + }, + { + "epoch": 2.87, + "learning_rate": 3.595706350415853e-05, + "loss": 1.1958, + "step": 4784000 + }, + { + "epoch": 2.87, + "learning_rate": 3.595496353859797e-05, + "loss": 1.2187, + "step": 4784500 + }, + { + "epoch": 2.87, + "learning_rate": 3.5952863573037404e-05, + "loss": 1.1704, + "step": 4785000 + }, + { + "epoch": 2.87, + "learning_rate": 3.595076780740796e-05, + "loss": 1.2295, + "step": 4785500 + }, + { + "epoch": 2.87, + "learning_rate": 3.594866784184739e-05, + "loss": 1.1823, + "step": 4786000 + }, + { + "epoch": 2.87, + "learning_rate": 3.594656787628683e-05, + "loss": 1.2088, + "step": 4786500 + }, + { + "epoch": 2.87, + "learning_rate": 3.5944467910726265e-05, + "loss": 1.1953, + "step": 4787000 + }, + { + "epoch": 2.87, + "learning_rate": 3.594237214509682e-05, + "loss": 1.2237, + "step": 4787500 + }, + { + "epoch": 2.87, + "learning_rate": 3.594027217953626e-05, + "loss": 1.2132, + "step": 4788000 + }, + { + "epoch": 2.87, + "learning_rate": 3.593817221397569e-05, + "loss": 1.1921, + "step": 4788500 + }, + { + "epoch": 2.87, + "learning_rate": 3.5936072248415126e-05, + "loss": 1.236, + "step": 4789000 + }, + { + "epoch": 2.87, + "learning_rate": 3.5933972282854566e-05, + "loss": 1.2194, + "step": 4789500 + }, + { + "epoch": 2.87, + "learning_rate": 3.5931872317294e-05, + "loss": 1.2034, + "step": 4790000 + }, + { + "epoch": 2.87, + "learning_rate": 3.5929772351733426e-05, + "loss": 1.2056, + "step": 4790500 + }, + { + "epoch": 2.87, + "learning_rate": 3.5927672386172866e-05, + "loss": 1.1931, + "step": 4791000 + }, + { + "epoch": 2.87, + "learning_rate": 3.59255724206123e-05, + "loss": 1.1875, + "step": 4791500 + }, + { + "epoch": 2.87, + "learning_rate": 3.592347245505173e-05, + "loss": 1.2071, + "step": 4792000 + }, + { + "epoch": 2.87, + "learning_rate": 3.592137248949117e-05, + "loss": 1.2045, + "step": 4792500 + }, + { + "epoch": 2.87, + "learning_rate": 3.591927252393061e-05, + "loss": 1.1994, + "step": 4793000 + }, + { + "epoch": 2.87, + "learning_rate": 3.591717255837004e-05, + "loss": 1.1904, + "step": 4793500 + }, + { + "epoch": 2.87, + "learning_rate": 3.591507259280948e-05, + "loss": 1.1726, + "step": 4794000 + }, + { + "epoch": 2.87, + "learning_rate": 3.5912976827180034e-05, + "loss": 1.2033, + "step": 4794500 + }, + { + "epoch": 2.87, + "learning_rate": 3.591087686161947e-05, + "loss": 1.212, + "step": 4795000 + }, + { + "epoch": 2.88, + "learning_rate": 3.59087768960589e-05, + "loss": 1.2119, + "step": 4795500 + }, + { + "epoch": 2.88, + "learning_rate": 3.590667693049834e-05, + "loss": 1.216, + "step": 4796000 + }, + { + "epoch": 2.88, + "learning_rate": 3.5904576964937775e-05, + "loss": 1.1873, + "step": 4796500 + }, + { + "epoch": 2.88, + "learning_rate": 3.5902476999377215e-05, + "loss": 1.1738, + "step": 4797000 + }, + { + "epoch": 2.88, + "learning_rate": 3.590037703381665e-05, + "loss": 1.1922, + "step": 4797500 + }, + { + "epoch": 2.88, + "learning_rate": 3.58982812681872e-05, + "loss": 1.216, + "step": 4798000 + }, + { + "epoch": 2.88, + "learning_rate": 3.5896181302626635e-05, + "loss": 1.2084, + "step": 4798500 + }, + { + "epoch": 2.88, + "learning_rate": 3.5894081337066076e-05, + "loss": 1.1966, + "step": 4799000 + }, + { + "epoch": 2.88, + "learning_rate": 3.589198137150551e-05, + "loss": 1.2035, + "step": 4799500 + }, + { + "epoch": 2.88, + "learning_rate": 3.588988560587606e-05, + "loss": 1.1797, + "step": 4800000 + }, + { + "epoch": 2.88, + "eval_loss": 1.1569877862930298, + "eval_runtime": 1104.5998, + "eval_samples_per_second": 476.842, + "eval_steps_per_second": 79.474, + "step": 4800000 + }, + { + "epoch": 2.88, + "learning_rate": 3.5887785640315496e-05, + "loss": 1.2247, + "step": 4800500 + }, + { + "epoch": 2.88, + "learning_rate": 3.5885685674754936e-05, + "loss": 1.205, + "step": 4801000 + }, + { + "epoch": 2.88, + "learning_rate": 3.588358570919437e-05, + "loss": 1.1895, + "step": 4801500 + }, + { + "epoch": 2.88, + "learning_rate": 3.58814857436338e-05, + "loss": 1.1901, + "step": 4802000 + }, + { + "epoch": 2.88, + "learning_rate": 3.587938997800436e-05, + "loss": 1.2086, + "step": 4802500 + }, + { + "epoch": 2.88, + "learning_rate": 3.58772900124438e-05, + "loss": 1.1996, + "step": 4803000 + }, + { + "epoch": 2.88, + "learning_rate": 3.587519424681435e-05, + "loss": 1.1872, + "step": 4803500 + }, + { + "epoch": 2.88, + "learning_rate": 3.5873094281253784e-05, + "loss": 1.2036, + "step": 4804000 + }, + { + "epoch": 2.88, + "learning_rate": 3.5870994315693224e-05, + "loss": 1.203, + "step": 4804500 + }, + { + "epoch": 2.88, + "learning_rate": 3.586889855006378e-05, + "loss": 1.1776, + "step": 4805000 + }, + { + "epoch": 2.88, + "learning_rate": 3.586679858450321e-05, + "loss": 1.218, + "step": 4805500 + }, + { + "epoch": 2.88, + "learning_rate": 3.5864698618942645e-05, + "loss": 1.1744, + "step": 4806000 + }, + { + "epoch": 2.88, + "learning_rate": 3.5862598653382085e-05, + "loss": 1.2032, + "step": 4806500 + }, + { + "epoch": 2.88, + "learning_rate": 3.586049868782152e-05, + "loss": 1.2093, + "step": 4807000 + }, + { + "epoch": 2.88, + "learning_rate": 3.585839872226095e-05, + "loss": 1.1724, + "step": 4807500 + }, + { + "epoch": 2.88, + "learning_rate": 3.585629875670039e-05, + "loss": 1.2006, + "step": 4808000 + }, + { + "epoch": 2.88, + "learning_rate": 3.5854198791139826e-05, + "loss": 1.1943, + "step": 4808500 + }, + { + "epoch": 2.88, + "learning_rate": 3.585209882557926e-05, + "loss": 1.2294, + "step": 4809000 + }, + { + "epoch": 2.88, + "learning_rate": 3.58499988600187e-05, + "loss": 1.1754, + "step": 4809500 + }, + { + "epoch": 2.88, + "learning_rate": 3.5847898894458126e-05, + "loss": 1.2126, + "step": 4810000 + }, + { + "epoch": 2.88, + "learning_rate": 3.5845798928897566e-05, + "loss": 1.2136, + "step": 4810500 + }, + { + "epoch": 2.88, + "learning_rate": 3.5843698963337e-05, + "loss": 1.1973, + "step": 4811000 + }, + { + "epoch": 2.88, + "learning_rate": 3.584159899777643e-05, + "loss": 1.2096, + "step": 4811500 + }, + { + "epoch": 2.88, + "learning_rate": 3.5839499032215874e-05, + "loss": 1.1865, + "step": 4812000 + }, + { + "epoch": 2.89, + "learning_rate": 3.583739906665531e-05, + "loss": 1.1923, + "step": 4812500 + }, + { + "epoch": 2.89, + "learning_rate": 3.583530330102586e-05, + "loss": 1.1776, + "step": 4813000 + }, + { + "epoch": 2.89, + "learning_rate": 3.5833203335465294e-05, + "loss": 1.2029, + "step": 4813500 + }, + { + "epoch": 2.89, + "learning_rate": 3.5831103369904734e-05, + "loss": 1.1947, + "step": 4814000 + }, + { + "epoch": 2.89, + "learning_rate": 3.582900340434417e-05, + "loss": 1.189, + "step": 4814500 + }, + { + "epoch": 2.89, + "learning_rate": 3.582690763871472e-05, + "loss": 1.2078, + "step": 4815000 + }, + { + "epoch": 2.89, + "learning_rate": 3.5824807673154155e-05, + "loss": 1.1927, + "step": 4815500 + }, + { + "epoch": 2.89, + "learning_rate": 3.5822707707593595e-05, + "loss": 1.2251, + "step": 4816000 + }, + { + "epoch": 2.89, + "learning_rate": 3.582060774203303e-05, + "loss": 1.2161, + "step": 4816500 + }, + { + "epoch": 2.89, + "learning_rate": 3.581850777647246e-05, + "loss": 1.19, + "step": 4817000 + }, + { + "epoch": 2.89, + "learning_rate": 3.581641201084302e-05, + "loss": 1.2019, + "step": 4817500 + }, + { + "epoch": 2.89, + "learning_rate": 3.5814312045282456e-05, + "loss": 1.1848, + "step": 4818000 + }, + { + "epoch": 2.89, + "learning_rate": 3.581221207972189e-05, + "loss": 1.1792, + "step": 4818500 + }, + { + "epoch": 2.89, + "learning_rate": 3.581011631409245e-05, + "loss": 1.1943, + "step": 4819000 + }, + { + "epoch": 2.89, + "learning_rate": 3.580801634853188e-05, + "loss": 1.2053, + "step": 4819500 + }, + { + "epoch": 2.89, + "learning_rate": 3.5805916382971316e-05, + "loss": 1.1727, + "step": 4820000 + }, + { + "epoch": 2.89, + "learning_rate": 3.580381641741075e-05, + "loss": 1.1908, + "step": 4820500 + }, + { + "epoch": 2.89, + "learning_rate": 3.580171645185019e-05, + "loss": 1.1763, + "step": 4821000 + }, + { + "epoch": 2.89, + "learning_rate": 3.5799616486289624e-05, + "loss": 1.1968, + "step": 4821500 + }, + { + "epoch": 2.89, + "learning_rate": 3.579751652072906e-05, + "loss": 1.2089, + "step": 4822000 + }, + { + "epoch": 2.89, + "learning_rate": 3.57954165551685e-05, + "loss": 1.1851, + "step": 4822500 + }, + { + "epoch": 2.89, + "learning_rate": 3.579331658960793e-05, + "loss": 1.2242, + "step": 4823000 + }, + { + "epoch": 2.89, + "learning_rate": 3.5791216624047364e-05, + "loss": 1.1793, + "step": 4823500 + }, + { + "epoch": 2.89, + "learning_rate": 3.5789116658486804e-05, + "loss": 1.1819, + "step": 4824000 + }, + { + "epoch": 2.89, + "learning_rate": 3.578701669292624e-05, + "loss": 1.174, + "step": 4824500 + }, + { + "epoch": 2.89, + "learning_rate": 3.578492092729679e-05, + "loss": 1.1942, + "step": 4825000 + }, + { + "epoch": 2.89, + "learning_rate": 3.5782820961736225e-05, + "loss": 1.2153, + "step": 4825500 + }, + { + "epoch": 2.89, + "learning_rate": 3.5780720996175665e-05, + "loss": 1.195, + "step": 4826000 + }, + { + "epoch": 2.89, + "learning_rate": 3.57786210306151e-05, + "loss": 1.2042, + "step": 4826500 + }, + { + "epoch": 2.89, + "learning_rate": 3.577652526498565e-05, + "loss": 1.2042, + "step": 4827000 + }, + { + "epoch": 2.89, + "learning_rate": 3.577442529942509e-05, + "loss": 1.1934, + "step": 4827500 + }, + { + "epoch": 2.89, + "learning_rate": 3.5772325333864526e-05, + "loss": 1.1855, + "step": 4828000 + }, + { + "epoch": 2.89, + "learning_rate": 3.577022536830396e-05, + "loss": 1.2214, + "step": 4828500 + }, + { + "epoch": 2.9, + "learning_rate": 3.576812960267451e-05, + "loss": 1.175, + "step": 4829000 + }, + { + "epoch": 2.9, + "learning_rate": 3.5766033837045066e-05, + "loss": 1.1773, + "step": 4829500 + }, + { + "epoch": 2.9, + "learning_rate": 3.576393387148451e-05, + "loss": 1.2022, + "step": 4830000 + }, + { + "epoch": 2.9, + "learning_rate": 3.576183390592394e-05, + "loss": 1.1988, + "step": 4830500 + }, + { + "epoch": 2.9, + "learning_rate": 3.5759733940363374e-05, + "loss": 1.2069, + "step": 4831000 + }, + { + "epoch": 2.9, + "learning_rate": 3.5757633974802814e-05, + "loss": 1.1969, + "step": 4831500 + }, + { + "epoch": 2.9, + "learning_rate": 3.575553820917337e-05, + "loss": 1.2188, + "step": 4832000 + }, + { + "epoch": 2.9, + "learning_rate": 3.57534382436128e-05, + "loss": 1.2256, + "step": 4832500 + }, + { + "epoch": 2.9, + "learning_rate": 3.575133827805224e-05, + "loss": 1.2124, + "step": 4833000 + }, + { + "epoch": 2.9, + "learning_rate": 3.5749238312491675e-05, + "loss": 1.1965, + "step": 4833500 + }, + { + "epoch": 2.9, + "learning_rate": 3.574713834693111e-05, + "loss": 1.2095, + "step": 4834000 + }, + { + "epoch": 2.9, + "learning_rate": 3.574503838137055e-05, + "loss": 1.1893, + "step": 4834500 + }, + { + "epoch": 2.9, + "learning_rate": 3.574293841580998e-05, + "loss": 1.1957, + "step": 4835000 + }, + { + "epoch": 2.9, + "learning_rate": 3.5740838450249415e-05, + "loss": 1.206, + "step": 4835500 + }, + { + "epoch": 2.9, + "learning_rate": 3.573874268461997e-05, + "loss": 1.185, + "step": 4836000 + }, + { + "epoch": 2.9, + "learning_rate": 3.573664271905941e-05, + "loss": 1.1827, + "step": 4836500 + }, + { + "epoch": 2.9, + "learning_rate": 3.573454275349884e-05, + "loss": 1.1985, + "step": 4837000 + }, + { + "epoch": 2.9, + "learning_rate": 3.5732442787938276e-05, + "loss": 1.1878, + "step": 4837500 + }, + { + "epoch": 2.9, + "learning_rate": 3.5730342822377716e-05, + "loss": 1.2003, + "step": 4838000 + }, + { + "epoch": 2.9, + "learning_rate": 3.572824285681715e-05, + "loss": 1.1847, + "step": 4838500 + }, + { + "epoch": 2.9, + "learning_rate": 3.572614289125658e-05, + "loss": 1.1819, + "step": 4839000 + }, + { + "epoch": 2.9, + "learning_rate": 3.572404712562714e-05, + "loss": 1.194, + "step": 4839500 + }, + { + "epoch": 2.9, + "learning_rate": 3.572194716006658e-05, + "loss": 1.1941, + "step": 4840000 + }, + { + "epoch": 2.9, + "learning_rate": 3.571984719450601e-05, + "loss": 1.2302, + "step": 4840500 + }, + { + "epoch": 2.9, + "learning_rate": 3.5717747228945444e-05, + "loss": 1.1904, + "step": 4841000 + }, + { + "epoch": 2.9, + "learning_rate": 3.5715647263384884e-05, + "loss": 1.2105, + "step": 4841500 + }, + { + "epoch": 2.9, + "learning_rate": 3.571354729782431e-05, + "loss": 1.2244, + "step": 4842000 + }, + { + "epoch": 2.9, + "learning_rate": 3.571145573212599e-05, + "loss": 1.212, + "step": 4842500 + }, + { + "epoch": 2.9, + "learning_rate": 3.5709355766565425e-05, + "loss": 1.2059, + "step": 4843000 + }, + { + "epoch": 2.9, + "learning_rate": 3.5707255801004865e-05, + "loss": 1.1899, + "step": 4843500 + }, + { + "epoch": 2.9, + "learning_rate": 3.57051558354443e-05, + "loss": 1.1849, + "step": 4844000 + }, + { + "epoch": 2.9, + "learning_rate": 3.570305586988373e-05, + "loss": 1.1731, + "step": 4844500 + }, + { + "epoch": 2.9, + "learning_rate": 3.570095590432317e-05, + "loss": 1.2052, + "step": 4845000 + }, + { + "epoch": 2.91, + "learning_rate": 3.5698855938762606e-05, + "loss": 1.2027, + "step": 4845500 + }, + { + "epoch": 2.91, + "learning_rate": 3.569675597320204e-05, + "loss": 1.2156, + "step": 4846000 + }, + { + "epoch": 2.91, + "learning_rate": 3.569466020757259e-05, + "loss": 1.1996, + "step": 4846500 + }, + { + "epoch": 2.91, + "learning_rate": 3.569256024201203e-05, + "loss": 1.1731, + "step": 4847000 + }, + { + "epoch": 2.91, + "learning_rate": 3.5690460276451466e-05, + "loss": 1.1668, + "step": 4847500 + }, + { + "epoch": 2.91, + "learning_rate": 3.56883603108909e-05, + "loss": 1.1802, + "step": 4848000 + }, + { + "epoch": 2.91, + "learning_rate": 3.568626034533034e-05, + "loss": 1.2018, + "step": 4848500 + }, + { + "epoch": 2.91, + "learning_rate": 3.5684160379769767e-05, + "loss": 1.1728, + "step": 4849000 + }, + { + "epoch": 2.91, + "learning_rate": 3.568206041420921e-05, + "loss": 1.1795, + "step": 4849500 + }, + { + "epoch": 2.91, + "learning_rate": 3.567996044864864e-05, + "loss": 1.2172, + "step": 4850000 + }, + { + "epoch": 2.91, + "learning_rate": 3.56778646830192e-05, + "loss": 1.2268, + "step": 4850500 + }, + { + "epoch": 2.91, + "learning_rate": 3.5675764717458634e-05, + "loss": 1.2204, + "step": 4851000 + }, + { + "epoch": 2.91, + "learning_rate": 3.567366475189807e-05, + "loss": 1.1781, + "step": 4851500 + }, + { + "epoch": 2.91, + "learning_rate": 3.56715647863375e-05, + "loss": 1.1988, + "step": 4852000 + }, + { + "epoch": 2.91, + "learning_rate": 3.5669464820776934e-05, + "loss": 1.2125, + "step": 4852500 + }, + { + "epoch": 2.91, + "learning_rate": 3.5667364855216375e-05, + "loss": 1.2356, + "step": 4853000 + }, + { + "epoch": 2.91, + "learning_rate": 3.566526488965581e-05, + "loss": 1.1736, + "step": 4853500 + }, + { + "epoch": 2.91, + "learning_rate": 3.566316492409524e-05, + "loss": 1.1886, + "step": 4854000 + }, + { + "epoch": 2.91, + "learning_rate": 3.5661069158465795e-05, + "loss": 1.2477, + "step": 4854500 + }, + { + "epoch": 2.91, + "learning_rate": 3.5658969192905235e-05, + "loss": 1.165, + "step": 4855000 + }, + { + "epoch": 2.91, + "learning_rate": 3.5656873427275796e-05, + "loss": 1.1912, + "step": 4855500 + }, + { + "epoch": 2.91, + "learning_rate": 3.565477346171522e-05, + "loss": 1.2064, + "step": 4856000 + }, + { + "epoch": 2.91, + "learning_rate": 3.565267349615466e-05, + "loss": 1.17, + "step": 4856500 + }, + { + "epoch": 2.91, + "learning_rate": 3.5650573530594096e-05, + "loss": 1.2205, + "step": 4857000 + }, + { + "epoch": 2.91, + "learning_rate": 3.564847356503353e-05, + "loss": 1.2147, + "step": 4857500 + }, + { + "epoch": 2.91, + "learning_rate": 3.564637359947297e-05, + "loss": 1.1853, + "step": 4858000 + }, + { + "epoch": 2.91, + "learning_rate": 3.56442736339124e-05, + "loss": 1.2118, + "step": 4858500 + }, + { + "epoch": 2.91, + "learning_rate": 3.564217786828296e-05, + "loss": 1.2127, + "step": 4859000 + }, + { + "epoch": 2.91, + "learning_rate": 3.564007790272239e-05, + "loss": 1.2163, + "step": 4859500 + }, + { + "epoch": 2.91, + "learning_rate": 3.563797793716183e-05, + "loss": 1.2155, + "step": 4860000 + }, + { + "epoch": 2.91, + "learning_rate": 3.5635877971601264e-05, + "loss": 1.2094, + "step": 4860500 + }, + { + "epoch": 2.91, + "learning_rate": 3.56337780060407e-05, + "loss": 1.192, + "step": 4861000 + }, + { + "epoch": 2.91, + "learning_rate": 3.563167804048014e-05, + "loss": 1.2084, + "step": 4861500 + }, + { + "epoch": 2.91, + "learning_rate": 3.562957807491957e-05, + "loss": 1.1808, + "step": 4862000 + }, + { + "epoch": 2.92, + "learning_rate": 3.5627478109359005e-05, + "loss": 1.1785, + "step": 4862500 + }, + { + "epoch": 2.92, + "learning_rate": 3.5625382343729565e-05, + "loss": 1.1689, + "step": 4863000 + }, + { + "epoch": 2.92, + "learning_rate": 3.5623282378169e-05, + "loss": 1.198, + "step": 4863500 + }, + { + "epoch": 2.92, + "learning_rate": 3.562118661253955e-05, + "loss": 1.2205, + "step": 4864000 + }, + { + "epoch": 2.92, + "learning_rate": 3.5619086646978986e-05, + "loss": 1.1966, + "step": 4864500 + }, + { + "epoch": 2.92, + "learning_rate": 3.5616986681418426e-05, + "loss": 1.21, + "step": 4865000 + }, + { + "epoch": 2.92, + "learning_rate": 3.561488671585786e-05, + "loss": 1.2158, + "step": 4865500 + }, + { + "epoch": 2.92, + "learning_rate": 3.561278675029729e-05, + "loss": 1.2127, + "step": 4866000 + }, + { + "epoch": 2.92, + "learning_rate": 3.5610690984667846e-05, + "loss": 1.2204, + "step": 4866500 + }, + { + "epoch": 2.92, + "learning_rate": 3.5608591019107286e-05, + "loss": 1.1964, + "step": 4867000 + }, + { + "epoch": 2.92, + "learning_rate": 3.560649105354672e-05, + "loss": 1.1677, + "step": 4867500 + }, + { + "epoch": 2.92, + "learning_rate": 3.5604391087986153e-05, + "loss": 1.1937, + "step": 4868000 + }, + { + "epoch": 2.92, + "learning_rate": 3.5602291122425594e-05, + "loss": 1.2094, + "step": 4868500 + }, + { + "epoch": 2.92, + "learning_rate": 3.560019115686503e-05, + "loss": 1.2223, + "step": 4869000 + }, + { + "epoch": 2.92, + "learning_rate": 3.559809539123558e-05, + "loss": 1.2166, + "step": 4869500 + }, + { + "epoch": 2.92, + "learning_rate": 3.559599542567502e-05, + "loss": 1.2035, + "step": 4870000 + }, + { + "epoch": 2.92, + "learning_rate": 3.5593895460114454e-05, + "loss": 1.2044, + "step": 4870500 + }, + { + "epoch": 2.92, + "learning_rate": 3.559179549455389e-05, + "loss": 1.1768, + "step": 4871000 + }, + { + "epoch": 2.92, + "learning_rate": 3.558969552899333e-05, + "loss": 1.2329, + "step": 4871500 + }, + { + "epoch": 2.92, + "learning_rate": 3.558759556343276e-05, + "loss": 1.1919, + "step": 4872000 + }, + { + "epoch": 2.92, + "learning_rate": 3.5585495597872195e-05, + "loss": 1.2393, + "step": 4872500 + }, + { + "epoch": 2.92, + "learning_rate": 3.5583395632311635e-05, + "loss": 1.1783, + "step": 4873000 + }, + { + "epoch": 2.92, + "learning_rate": 3.558129986668219e-05, + "loss": 1.2029, + "step": 4873500 + }, + { + "epoch": 2.92, + "learning_rate": 3.557920410105274e-05, + "loss": 1.197, + "step": 4874000 + }, + { + "epoch": 2.92, + "learning_rate": 3.5577104135492176e-05, + "loss": 1.1909, + "step": 4874500 + }, + { + "epoch": 2.92, + "learning_rate": 3.557500416993161e-05, + "loss": 1.2012, + "step": 4875000 + }, + { + "epoch": 2.92, + "learning_rate": 3.557290420437105e-05, + "loss": 1.2581, + "step": 4875500 + }, + { + "epoch": 2.92, + "learning_rate": 3.557080423881048e-05, + "loss": 1.1802, + "step": 4876000 + }, + { + "epoch": 2.92, + "learning_rate": 3.5568704273249916e-05, + "loss": 1.2095, + "step": 4876500 + }, + { + "epoch": 2.92, + "learning_rate": 3.556660850762048e-05, + "loss": 1.1637, + "step": 4877000 + }, + { + "epoch": 2.92, + "learning_rate": 3.556450854205991e-05, + "loss": 1.1899, + "step": 4877500 + }, + { + "epoch": 2.92, + "learning_rate": 3.5562408576499344e-05, + "loss": 1.1925, + "step": 4878000 + }, + { + "epoch": 2.92, + "learning_rate": 3.5560308610938784e-05, + "loss": 1.2125, + "step": 4878500 + }, + { + "epoch": 2.93, + "learning_rate": 3.555820864537822e-05, + "loss": 1.1718, + "step": 4879000 + }, + { + "epoch": 2.93, + "learning_rate": 3.555610867981765e-05, + "loss": 1.2255, + "step": 4879500 + }, + { + "epoch": 2.93, + "learning_rate": 3.555400871425709e-05, + "loss": 1.1551, + "step": 4880000 + }, + { + "epoch": 2.93, + "learning_rate": 3.5551908748696525e-05, + "loss": 1.2055, + "step": 4880500 + }, + { + "epoch": 2.93, + "learning_rate": 3.554981298306708e-05, + "loss": 1.1939, + "step": 4881000 + }, + { + "epoch": 2.93, + "learning_rate": 3.554771301750651e-05, + "loss": 1.188, + "step": 4881500 + }, + { + "epoch": 2.93, + "learning_rate": 3.554561305194595e-05, + "loss": 1.2225, + "step": 4882000 + }, + { + "epoch": 2.93, + "learning_rate": 3.5543517286316505e-05, + "loss": 1.169, + "step": 4882500 + }, + { + "epoch": 2.93, + "learning_rate": 3.554141732075594e-05, + "loss": 1.1823, + "step": 4883000 + }, + { + "epoch": 2.93, + "learning_rate": 3.553931735519537e-05, + "loss": 1.1724, + "step": 4883500 + }, + { + "epoch": 2.93, + "learning_rate": 3.553721738963481e-05, + "loss": 1.1869, + "step": 4884000 + }, + { + "epoch": 2.93, + "learning_rate": 3.5535117424074246e-05, + "loss": 1.2344, + "step": 4884500 + }, + { + "epoch": 2.93, + "learning_rate": 3.55330216584448e-05, + "loss": 1.1936, + "step": 4885000 + }, + { + "epoch": 2.93, + "learning_rate": 3.553092169288424e-05, + "loss": 1.1978, + "step": 4885500 + }, + { + "epoch": 2.93, + "learning_rate": 3.552882172732367e-05, + "loss": 1.1808, + "step": 4886000 + }, + { + "epoch": 2.93, + "learning_rate": 3.552672176176311e-05, + "loss": 1.2096, + "step": 4886500 + }, + { + "epoch": 2.93, + "learning_rate": 3.552462179620255e-05, + "loss": 1.1803, + "step": 4887000 + }, + { + "epoch": 2.93, + "learning_rate": 3.552252183064198e-05, + "loss": 1.1873, + "step": 4887500 + }, + { + "epoch": 2.93, + "learning_rate": 3.552042186508141e-05, + "loss": 1.1726, + "step": 4888000 + }, + { + "epoch": 2.93, + "learning_rate": 3.551832609945197e-05, + "loss": 1.1765, + "step": 4888500 + }, + { + "epoch": 2.93, + "learning_rate": 3.551622613389141e-05, + "loss": 1.157, + "step": 4889000 + }, + { + "epoch": 2.93, + "learning_rate": 3.551412616833084e-05, + "loss": 1.1899, + "step": 4889500 + }, + { + "epoch": 2.93, + "learning_rate": 3.5512026202770275e-05, + "loss": 1.2315, + "step": 4890000 + }, + { + "epoch": 2.93, + "learning_rate": 3.550992623720971e-05, + "loss": 1.201, + "step": 4890500 + }, + { + "epoch": 2.93, + "learning_rate": 3.550782627164914e-05, + "loss": 1.2075, + "step": 4891000 + }, + { + "epoch": 2.93, + "learning_rate": 3.55057305060197e-05, + "loss": 1.1848, + "step": 4891500 + }, + { + "epoch": 2.93, + "learning_rate": 3.5503634740390255e-05, + "loss": 1.1803, + "step": 4892000 + }, + { + "epoch": 2.93, + "learning_rate": 3.550153897476081e-05, + "loss": 1.177, + "step": 4892500 + }, + { + "epoch": 2.93, + "learning_rate": 3.549943900920025e-05, + "loss": 1.2364, + "step": 4893000 + }, + { + "epoch": 2.93, + "learning_rate": 3.549733904363968e-05, + "loss": 1.2193, + "step": 4893500 + }, + { + "epoch": 2.93, + "learning_rate": 3.5495239078079116e-05, + "loss": 1.1924, + "step": 4894000 + }, + { + "epoch": 2.93, + "learning_rate": 3.5493139112518556e-05, + "loss": 1.1939, + "step": 4894500 + }, + { + "epoch": 2.93, + "learning_rate": 3.549103914695799e-05, + "loss": 1.1952, + "step": 4895000 + }, + { + "epoch": 2.94, + "learning_rate": 3.548893918139742e-05, + "loss": 1.2072, + "step": 4895500 + }, + { + "epoch": 2.94, + "learning_rate": 3.5486839215836864e-05, + "loss": 1.1993, + "step": 4896000 + }, + { + "epoch": 2.94, + "learning_rate": 3.548474345020742e-05, + "loss": 1.2296, + "step": 4896500 + }, + { + "epoch": 2.94, + "learning_rate": 3.548264348464685e-05, + "loss": 1.2044, + "step": 4897000 + }, + { + "epoch": 2.94, + "learning_rate": 3.5480543519086284e-05, + "loss": 1.2146, + "step": 4897500 + }, + { + "epoch": 2.94, + "learning_rate": 3.5478443553525724e-05, + "loss": 1.213, + "step": 4898000 + }, + { + "epoch": 2.94, + "learning_rate": 3.547634358796516e-05, + "loss": 1.1903, + "step": 4898500 + }, + { + "epoch": 2.94, + "learning_rate": 3.547424362240459e-05, + "loss": 1.1772, + "step": 4899000 + }, + { + "epoch": 2.94, + "learning_rate": 3.547214365684403e-05, + "loss": 1.2092, + "step": 4899500 + }, + { + "epoch": 2.94, + "learning_rate": 3.547004369128346e-05, + "loss": 1.1931, + "step": 4900000 + }, + { + "epoch": 2.94, + "eval_loss": 1.150070071220398, + "eval_runtime": 1102.0417, + "eval_samples_per_second": 477.949, + "eval_steps_per_second": 79.659, + "step": 4900000 + }, + { + "epoch": 2.94, + "learning_rate": 3.546794792565402e-05, + "loss": 1.1686, + "step": 4900500 + }, + { + "epoch": 2.94, + "learning_rate": 3.546584796009346e-05, + "loss": 1.1786, + "step": 4901000 + }, + { + "epoch": 2.94, + "learning_rate": 3.546374799453289e-05, + "loss": 1.1834, + "step": 4901500 + }, + { + "epoch": 2.94, + "learning_rate": 3.5461648028972326e-05, + "loss": 1.1955, + "step": 4902000 + }, + { + "epoch": 2.94, + "learning_rate": 3.545954806341176e-05, + "loss": 1.2104, + "step": 4902500 + }, + { + "epoch": 2.94, + "learning_rate": 3.545744809785119e-05, + "loss": 1.1799, + "step": 4903000 + }, + { + "epoch": 2.94, + "learning_rate": 3.5455348132290626e-05, + "loss": 1.1846, + "step": 4903500 + }, + { + "epoch": 2.94, + "learning_rate": 3.5453248166730066e-05, + "loss": 1.1922, + "step": 4904000 + }, + { + "epoch": 2.94, + "learning_rate": 3.545115240110062e-05, + "loss": 1.1906, + "step": 4904500 + }, + { + "epoch": 2.94, + "learning_rate": 3.544905243554005e-05, + "loss": 1.1742, + "step": 4905000 + }, + { + "epoch": 2.94, + "learning_rate": 3.544695246997949e-05, + "loss": 1.19, + "step": 4905500 + }, + { + "epoch": 2.94, + "learning_rate": 3.544485250441893e-05, + "loss": 1.1771, + "step": 4906000 + }, + { + "epoch": 2.94, + "learning_rate": 3.544275253885836e-05, + "loss": 1.2039, + "step": 4906500 + }, + { + "epoch": 2.94, + "learning_rate": 3.5440656773228914e-05, + "loss": 1.2081, + "step": 4907000 + }, + { + "epoch": 2.94, + "learning_rate": 3.5438556807668354e-05, + "loss": 1.1953, + "step": 4907500 + }, + { + "epoch": 2.94, + "learning_rate": 3.543645684210779e-05, + "loss": 1.1965, + "step": 4908000 + }, + { + "epoch": 2.94, + "learning_rate": 3.543436107647835e-05, + "loss": 1.1947, + "step": 4908500 + }, + { + "epoch": 2.94, + "learning_rate": 3.543226111091778e-05, + "loss": 1.176, + "step": 4909000 + }, + { + "epoch": 2.94, + "learning_rate": 3.5430161145357215e-05, + "loss": 1.1858, + "step": 4909500 + }, + { + "epoch": 2.94, + "learning_rate": 3.542806117979665e-05, + "loss": 1.2069, + "step": 4910000 + }, + { + "epoch": 2.94, + "learning_rate": 3.542596121423608e-05, + "loss": 1.1935, + "step": 4910500 + }, + { + "epoch": 2.94, + "learning_rate": 3.542386124867552e-05, + "loss": 1.2113, + "step": 4911000 + }, + { + "epoch": 2.94, + "learning_rate": 3.542176548304608e-05, + "loss": 1.1929, + "step": 4911500 + }, + { + "epoch": 2.94, + "learning_rate": 3.541966551748551e-05, + "loss": 1.1827, + "step": 4912000 + }, + { + "epoch": 2.95, + "learning_rate": 3.541756555192494e-05, + "loss": 1.204, + "step": 4912500 + }, + { + "epoch": 2.95, + "learning_rate": 3.541546558636438e-05, + "loss": 1.1612, + "step": 4913000 + }, + { + "epoch": 2.95, + "learning_rate": 3.5413365620803816e-05, + "loss": 1.1853, + "step": 4913500 + }, + { + "epoch": 2.95, + "learning_rate": 3.541126565524325e-05, + "loss": 1.1911, + "step": 4914000 + }, + { + "epoch": 2.95, + "learning_rate": 3.540916568968269e-05, + "loss": 1.2116, + "step": 4914500 + }, + { + "epoch": 2.95, + "learning_rate": 3.5407065724122123e-05, + "loss": 1.1766, + "step": 4915000 + }, + { + "epoch": 2.95, + "learning_rate": 3.5404965758561564e-05, + "loss": 1.1856, + "step": 4915500 + }, + { + "epoch": 2.95, + "learning_rate": 3.5402865793001e-05, + "loss": 1.1894, + "step": 4916000 + }, + { + "epoch": 2.95, + "learning_rate": 3.540076582744043e-05, + "loss": 1.1785, + "step": 4916500 + }, + { + "epoch": 2.95, + "learning_rate": 3.5398670061810984e-05, + "loss": 1.1615, + "step": 4917000 + }, + { + "epoch": 2.95, + "learning_rate": 3.5396570096250424e-05, + "loss": 1.1646, + "step": 4917500 + }, + { + "epoch": 2.95, + "learning_rate": 3.539447013068986e-05, + "loss": 1.1889, + "step": 4918000 + }, + { + "epoch": 2.95, + "learning_rate": 3.539237016512929e-05, + "loss": 1.1881, + "step": 4918500 + }, + { + "epoch": 2.95, + "learning_rate": 3.5390274399499845e-05, + "loss": 1.1956, + "step": 4919000 + }, + { + "epoch": 2.95, + "learning_rate": 3.5388174433939285e-05, + "loss": 1.1853, + "step": 4919500 + }, + { + "epoch": 2.95, + "learning_rate": 3.538607446837872e-05, + "loss": 1.2042, + "step": 4920000 + }, + { + "epoch": 2.95, + "learning_rate": 3.538397450281815e-05, + "loss": 1.1773, + "step": 4920500 + }, + { + "epoch": 2.95, + "learning_rate": 3.538187453725759e-05, + "loss": 1.2138, + "step": 4921000 + }, + { + "epoch": 2.95, + "learning_rate": 3.5379774571697026e-05, + "loss": 1.1997, + "step": 4921500 + }, + { + "epoch": 2.95, + "learning_rate": 3.537767880606758e-05, + "loss": 1.2098, + "step": 4922000 + }, + { + "epoch": 2.95, + "learning_rate": 3.537557884050702e-05, + "loss": 1.1951, + "step": 4922500 + }, + { + "epoch": 2.95, + "learning_rate": 3.537347887494645e-05, + "loss": 1.1966, + "step": 4923000 + }, + { + "epoch": 2.95, + "learning_rate": 3.5371378909385887e-05, + "loss": 1.2012, + "step": 4923500 + }, + { + "epoch": 2.95, + "learning_rate": 3.536927894382533e-05, + "loss": 1.1754, + "step": 4924000 + }, + { + "epoch": 2.95, + "learning_rate": 3.5367178978264753e-05, + "loss": 1.1853, + "step": 4924500 + }, + { + "epoch": 2.95, + "learning_rate": 3.536507901270419e-05, + "loss": 1.1625, + "step": 4925000 + }, + { + "epoch": 2.95, + "learning_rate": 3.536297904714363e-05, + "loss": 1.2095, + "step": 4925500 + }, + { + "epoch": 2.95, + "learning_rate": 3.536087908158306e-05, + "loss": 1.1875, + "step": 4926000 + }, + { + "epoch": 2.95, + "learning_rate": 3.5358779116022494e-05, + "loss": 1.1759, + "step": 4926500 + }, + { + "epoch": 2.95, + "learning_rate": 3.5356679150461934e-05, + "loss": 1.1836, + "step": 4927000 + }, + { + "epoch": 2.95, + "learning_rate": 3.535457918490137e-05, + "loss": 1.1699, + "step": 4927500 + }, + { + "epoch": 2.95, + "learning_rate": 3.535248341927192e-05, + "loss": 1.1825, + "step": 4928000 + }, + { + "epoch": 2.95, + "learning_rate": 3.5350383453711355e-05, + "loss": 1.1832, + "step": 4928500 + }, + { + "epoch": 2.96, + "learning_rate": 3.5348283488150795e-05, + "loss": 1.1735, + "step": 4929000 + }, + { + "epoch": 2.96, + "learning_rate": 3.534618352259023e-05, + "loss": 1.2161, + "step": 4929500 + }, + { + "epoch": 2.96, + "learning_rate": 3.534408775696078e-05, + "loss": 1.1926, + "step": 4930000 + }, + { + "epoch": 2.96, + "learning_rate": 3.534198779140022e-05, + "loss": 1.1985, + "step": 4930500 + }, + { + "epoch": 2.96, + "learning_rate": 3.5339887825839656e-05, + "loss": 1.1791, + "step": 4931000 + }, + { + "epoch": 2.96, + "learning_rate": 3.533779206021021e-05, + "loss": 1.2029, + "step": 4931500 + }, + { + "epoch": 2.96, + "learning_rate": 3.533569209464964e-05, + "loss": 1.2021, + "step": 4932000 + }, + { + "epoch": 2.96, + "learning_rate": 3.533359212908908e-05, + "loss": 1.1804, + "step": 4932500 + }, + { + "epoch": 2.96, + "learning_rate": 3.5331492163528516e-05, + "loss": 1.2075, + "step": 4933000 + }, + { + "epoch": 2.96, + "learning_rate": 3.532939639789908e-05, + "loss": 1.2154, + "step": 4933500 + }, + { + "epoch": 2.96, + "learning_rate": 3.5327296432338503e-05, + "loss": 1.1871, + "step": 4934000 + }, + { + "epoch": 2.96, + "learning_rate": 3.5325196466777944e-05, + "loss": 1.1961, + "step": 4934500 + }, + { + "epoch": 2.96, + "learning_rate": 3.532309650121738e-05, + "loss": 1.1789, + "step": 4935000 + }, + { + "epoch": 2.96, + "learning_rate": 3.532099653565681e-05, + "loss": 1.1838, + "step": 4935500 + }, + { + "epoch": 2.96, + "learning_rate": 3.531889657009625e-05, + "loss": 1.1975, + "step": 4936000 + }, + { + "epoch": 2.96, + "learning_rate": 3.5316796604535684e-05, + "loss": 1.1985, + "step": 4936500 + }, + { + "epoch": 2.96, + "learning_rate": 3.531469663897512e-05, + "loss": 1.1751, + "step": 4937000 + }, + { + "epoch": 2.96, + "learning_rate": 3.531259667341456e-05, + "loss": 1.1921, + "step": 4937500 + }, + { + "epoch": 2.96, + "learning_rate": 3.531049670785399e-05, + "loss": 1.1814, + "step": 4938000 + }, + { + "epoch": 2.96, + "learning_rate": 3.5308400942224545e-05, + "loss": 1.1779, + "step": 4938500 + }, + { + "epoch": 2.96, + "learning_rate": 3.5306300976663985e-05, + "loss": 1.2068, + "step": 4939000 + }, + { + "epoch": 2.96, + "learning_rate": 3.530420101110342e-05, + "loss": 1.1964, + "step": 4939500 + }, + { + "epoch": 2.96, + "learning_rate": 3.530210524547397e-05, + "loss": 1.1794, + "step": 4940000 + }, + { + "epoch": 2.96, + "learning_rate": 3.5300005279913406e-05, + "loss": 1.2067, + "step": 4940500 + }, + { + "epoch": 2.96, + "learning_rate": 3.5297905314352846e-05, + "loss": 1.203, + "step": 4941000 + }, + { + "epoch": 2.96, + "learning_rate": 3.529580534879228e-05, + "loss": 1.1975, + "step": 4941500 + }, + { + "epoch": 2.96, + "learning_rate": 3.529370538323171e-05, + "loss": 1.1813, + "step": 4942000 + }, + { + "epoch": 2.96, + "learning_rate": 3.529160541767115e-05, + "loss": 1.1987, + "step": 4942500 + }, + { + "epoch": 2.96, + "learning_rate": 3.528950965204171e-05, + "loss": 1.1871, + "step": 4943000 + }, + { + "epoch": 2.96, + "learning_rate": 3.528740968648114e-05, + "loss": 1.1964, + "step": 4943500 + }, + { + "epoch": 2.96, + "learning_rate": 3.5285309720920574e-05, + "loss": 1.1983, + "step": 4944000 + }, + { + "epoch": 2.96, + "learning_rate": 3.5283209755360014e-05, + "loss": 1.1786, + "step": 4944500 + }, + { + "epoch": 2.96, + "learning_rate": 3.528110978979945e-05, + "loss": 1.1886, + "step": 4945000 + }, + { + "epoch": 2.97, + "learning_rate": 3.527900982423888e-05, + "loss": 1.1916, + "step": 4945500 + }, + { + "epoch": 2.97, + "learning_rate": 3.527690985867832e-05, + "loss": 1.2012, + "step": 4946000 + }, + { + "epoch": 2.97, + "learning_rate": 3.527480989311775e-05, + "loss": 1.1637, + "step": 4946500 + }, + { + "epoch": 2.97, + "learning_rate": 3.527270992755719e-05, + "loss": 1.2104, + "step": 4947000 + }, + { + "epoch": 2.97, + "learning_rate": 3.527061416192775e-05, + "loss": 1.1895, + "step": 4947500 + }, + { + "epoch": 2.97, + "learning_rate": 3.526851419636718e-05, + "loss": 1.2104, + "step": 4948000 + }, + { + "epoch": 2.97, + "learning_rate": 3.5266414230806615e-05, + "loss": 1.1835, + "step": 4948500 + }, + { + "epoch": 2.97, + "learning_rate": 3.526431426524605e-05, + "loss": 1.1696, + "step": 4949000 + }, + { + "epoch": 2.97, + "learning_rate": 3.526221429968548e-05, + "loss": 1.1902, + "step": 4949500 + }, + { + "epoch": 2.97, + "learning_rate": 3.5260114334124916e-05, + "loss": 1.1867, + "step": 4950000 + }, + { + "epoch": 2.97, + "learning_rate": 3.5258014368564356e-05, + "loss": 1.1919, + "step": 4950500 + }, + { + "epoch": 2.97, + "learning_rate": 3.5255918602934916e-05, + "loss": 1.1933, + "step": 4951000 + }, + { + "epoch": 2.97, + "learning_rate": 3.525381863737434e-05, + "loss": 1.1714, + "step": 4951500 + }, + { + "epoch": 2.97, + "learning_rate": 3.5251718671813776e-05, + "loss": 1.2231, + "step": 4952000 + }, + { + "epoch": 2.97, + "learning_rate": 3.5249618706253217e-05, + "loss": 1.1904, + "step": 4952500 + }, + { + "epoch": 2.97, + "learning_rate": 3.524751874069265e-05, + "loss": 1.2286, + "step": 4953000 + }, + { + "epoch": 2.97, + "learning_rate": 3.5245418775132084e-05, + "loss": 1.2117, + "step": 4953500 + }, + { + "epoch": 2.97, + "learning_rate": 3.5243318809571524e-05, + "loss": 1.1688, + "step": 4954000 + }, + { + "epoch": 2.97, + "learning_rate": 3.524122304394208e-05, + "loss": 1.1621, + "step": 4954500 + }, + { + "epoch": 2.97, + "learning_rate": 3.523912307838151e-05, + "loss": 1.2109, + "step": 4955000 + }, + { + "epoch": 2.97, + "learning_rate": 3.523702311282095e-05, + "loss": 1.2456, + "step": 4955500 + }, + { + "epoch": 2.97, + "learning_rate": 3.5234923147260384e-05, + "loss": 1.1945, + "step": 4956000 + }, + { + "epoch": 2.97, + "learning_rate": 3.523282738163094e-05, + "loss": 1.1965, + "step": 4956500 + }, + { + "epoch": 2.97, + "learning_rate": 3.523072741607037e-05, + "loss": 1.1922, + "step": 4957000 + }, + { + "epoch": 2.97, + "learning_rate": 3.522862745050981e-05, + "loss": 1.1867, + "step": 4957500 + }, + { + "epoch": 2.97, + "learning_rate": 3.5226527484949245e-05, + "loss": 1.2077, + "step": 4958000 + }, + { + "epoch": 2.97, + "learning_rate": 3.522442751938868e-05, + "loss": 1.2082, + "step": 4958500 + }, + { + "epoch": 2.97, + "learning_rate": 3.522232755382812e-05, + "loss": 1.1866, + "step": 4959000 + }, + { + "epoch": 2.97, + "learning_rate": 3.522022758826755e-05, + "loss": 1.1989, + "step": 4959500 + }, + { + "epoch": 2.97, + "learning_rate": 3.5218131822638106e-05, + "loss": 1.1908, + "step": 4960000 + }, + { + "epoch": 2.97, + "learning_rate": 3.521603185707754e-05, + "loss": 1.2024, + "step": 4960500 + }, + { + "epoch": 2.97, + "learning_rate": 3.521393189151698e-05, + "loss": 1.2216, + "step": 4961000 + }, + { + "epoch": 2.97, + "learning_rate": 3.521183192595641e-05, + "loss": 1.1952, + "step": 4961500 + }, + { + "epoch": 2.97, + "learning_rate": 3.520973196039585e-05, + "loss": 1.1668, + "step": 4962000 + }, + { + "epoch": 2.98, + "learning_rate": 3.520763619476641e-05, + "loss": 1.2139, + "step": 4962500 + }, + { + "epoch": 2.98, + "learning_rate": 3.520553622920584e-05, + "loss": 1.2309, + "step": 4963000 + }, + { + "epoch": 2.98, + "learning_rate": 3.5203436263645274e-05, + "loss": 1.1917, + "step": 4963500 + }, + { + "epoch": 2.98, + "learning_rate": 3.5201336298084714e-05, + "loss": 1.1939, + "step": 4964000 + }, + { + "epoch": 2.98, + "learning_rate": 3.519923633252415e-05, + "loss": 1.1804, + "step": 4964500 + }, + { + "epoch": 2.98, + "learning_rate": 3.519713636696358e-05, + "loss": 1.1879, + "step": 4965000 + }, + { + "epoch": 2.98, + "learning_rate": 3.519503640140302e-05, + "loss": 1.1878, + "step": 4965500 + }, + { + "epoch": 2.98, + "learning_rate": 3.5192940635773575e-05, + "loss": 1.1847, + "step": 4966000 + }, + { + "epoch": 2.98, + "learning_rate": 3.519084067021301e-05, + "loss": 1.227, + "step": 4966500 + }, + { + "epoch": 2.98, + "learning_rate": 3.518874070465244e-05, + "loss": 1.1889, + "step": 4967000 + }, + { + "epoch": 2.98, + "learning_rate": 3.518664073909188e-05, + "loss": 1.1746, + "step": 4967500 + }, + { + "epoch": 2.98, + "learning_rate": 3.5184540773531315e-05, + "loss": 1.1893, + "step": 4968000 + }, + { + "epoch": 2.98, + "learning_rate": 3.518244500790187e-05, + "loss": 1.1907, + "step": 4968500 + }, + { + "epoch": 2.98, + "learning_rate": 3.518034504234131e-05, + "loss": 1.1976, + "step": 4969000 + }, + { + "epoch": 2.98, + "learning_rate": 3.517824507678074e-05, + "loss": 1.1862, + "step": 4969500 + }, + { + "epoch": 2.98, + "learning_rate": 3.5176145111220176e-05, + "loss": 1.2249, + "step": 4970000 + }, + { + "epoch": 2.98, + "learning_rate": 3.5174045145659616e-05, + "loss": 1.1959, + "step": 4970500 + }, + { + "epoch": 2.98, + "learning_rate": 3.517194518009905e-05, + "loss": 1.1911, + "step": 4971000 + }, + { + "epoch": 2.98, + "learning_rate": 3.5169845214538476e-05, + "loss": 1.1862, + "step": 4971500 + }, + { + "epoch": 2.98, + "learning_rate": 3.516774524897792e-05, + "loss": 1.1583, + "step": 4972000 + }, + { + "epoch": 2.98, + "learning_rate": 3.516564948334848e-05, + "loss": 1.1852, + "step": 4972500 + }, + { + "epoch": 2.98, + "learning_rate": 3.516354951778791e-05, + "loss": 1.165, + "step": 4973000 + }, + { + "epoch": 2.98, + "learning_rate": 3.516144955222734e-05, + "loss": 1.1931, + "step": 4973500 + }, + { + "epoch": 2.98, + "learning_rate": 3.515934958666678e-05, + "loss": 1.1914, + "step": 4974000 + }, + { + "epoch": 2.98, + "learning_rate": 3.515725802096846e-05, + "loss": 1.1932, + "step": 4974500 + }, + { + "epoch": 2.98, + "learning_rate": 3.515515805540789e-05, + "loss": 1.2081, + "step": 4975000 + }, + { + "epoch": 2.98, + "learning_rate": 3.5153058089847325e-05, + "loss": 1.172, + "step": 4975500 + }, + { + "epoch": 2.98, + "learning_rate": 3.5150958124286765e-05, + "loss": 1.1788, + "step": 4976000 + }, + { + "epoch": 2.98, + "learning_rate": 3.51488581587262e-05, + "loss": 1.213, + "step": 4976500 + }, + { + "epoch": 2.98, + "learning_rate": 3.514675819316563e-05, + "loss": 1.1947, + "step": 4977000 + }, + { + "epoch": 2.98, + "learning_rate": 3.514465822760507e-05, + "loss": 1.2347, + "step": 4977500 + }, + { + "epoch": 2.98, + "learning_rate": 3.5142558262044506e-05, + "loss": 1.2143, + "step": 4978000 + }, + { + "epoch": 2.98, + "learning_rate": 3.514046249641506e-05, + "loss": 1.1915, + "step": 4978500 + }, + { + "epoch": 2.99, + "learning_rate": 3.513836253085449e-05, + "loss": 1.195, + "step": 4979000 + }, + { + "epoch": 2.99, + "learning_rate": 3.513626256529393e-05, + "loss": 1.1789, + "step": 4979500 + }, + { + "epoch": 2.99, + "learning_rate": 3.5134162599733366e-05, + "loss": 1.2034, + "step": 4980000 + }, + { + "epoch": 2.99, + "learning_rate": 3.51320626341728e-05, + "loss": 1.1813, + "step": 4980500 + }, + { + "epoch": 2.99, + "learning_rate": 3.5129966868543353e-05, + "loss": 1.2187, + "step": 4981000 + }, + { + "epoch": 2.99, + "learning_rate": 3.5127866902982794e-05, + "loss": 1.1764, + "step": 4981500 + }, + { + "epoch": 2.99, + "learning_rate": 3.512577113735335e-05, + "loss": 1.1802, + "step": 4982000 + }, + { + "epoch": 2.99, + "learning_rate": 3.512367117179278e-05, + "loss": 1.1749, + "step": 4982500 + }, + { + "epoch": 2.99, + "learning_rate": 3.512157120623222e-05, + "loss": 1.1856, + "step": 4983000 + }, + { + "epoch": 2.99, + "learning_rate": 3.5119471240671654e-05, + "loss": 1.173, + "step": 4983500 + }, + { + "epoch": 2.99, + "learning_rate": 3.511737127511109e-05, + "loss": 1.1714, + "step": 4984000 + }, + { + "epoch": 2.99, + "learning_rate": 3.511527130955053e-05, + "loss": 1.1896, + "step": 4984500 + }, + { + "epoch": 2.99, + "learning_rate": 3.511317134398996e-05, + "loss": 1.2135, + "step": 4985000 + }, + { + "epoch": 2.99, + "learning_rate": 3.5111075578360515e-05, + "loss": 1.1937, + "step": 4985500 + }, + { + "epoch": 2.99, + "learning_rate": 3.510897561279995e-05, + "loss": 1.2188, + "step": 4986000 + }, + { + "epoch": 2.99, + "learning_rate": 3.510687564723939e-05, + "loss": 1.235, + "step": 4986500 + }, + { + "epoch": 2.99, + "learning_rate": 3.510477568167882e-05, + "loss": 1.2255, + "step": 4987000 + }, + { + "epoch": 2.99, + "learning_rate": 3.5102675716118256e-05, + "loss": 1.2107, + "step": 4987500 + }, + { + "epoch": 2.99, + "learning_rate": 3.510057575055769e-05, + "loss": 1.2078, + "step": 4988000 + }, + { + "epoch": 2.99, + "learning_rate": 3.509847578499712e-05, + "loss": 1.2072, + "step": 4988500 + }, + { + "epoch": 2.99, + "learning_rate": 3.509638001936768e-05, + "loss": 1.1822, + "step": 4989000 + }, + { + "epoch": 2.99, + "learning_rate": 3.5094280053807116e-05, + "loss": 1.1902, + "step": 4989500 + }, + { + "epoch": 2.99, + "learning_rate": 3.509218008824656e-05, + "loss": 1.2092, + "step": 4990000 + }, + { + "epoch": 2.99, + "learning_rate": 3.5090080122685983e-05, + "loss": 1.1944, + "step": 4990500 + }, + { + "epoch": 2.99, + "learning_rate": 3.5087980157125424e-05, + "loss": 1.2213, + "step": 4991000 + }, + { + "epoch": 2.99, + "learning_rate": 3.508588019156486e-05, + "loss": 1.2173, + "step": 4991500 + }, + { + "epoch": 2.99, + "learning_rate": 3.508378022600429e-05, + "loss": 1.1726, + "step": 4992000 + }, + { + "epoch": 2.99, + "learning_rate": 3.508168026044373e-05, + "loss": 1.2283, + "step": 4992500 + }, + { + "epoch": 2.99, + "learning_rate": 3.5079580294883164e-05, + "loss": 1.2095, + "step": 4993000 + }, + { + "epoch": 2.99, + "learning_rate": 3.507748452925372e-05, + "loss": 1.2043, + "step": 4993500 + }, + { + "epoch": 2.99, + "learning_rate": 3.507538456369315e-05, + "loss": 1.1754, + "step": 4994000 + }, + { + "epoch": 2.99, + "learning_rate": 3.507328459813259e-05, + "loss": 1.2093, + "step": 4994500 + }, + { + "epoch": 2.99, + "learning_rate": 3.5071184632572025e-05, + "loss": 1.2137, + "step": 4995000 + }, + { + "epoch": 3.0, + "learning_rate": 3.506908886694258e-05, + "loss": 1.234, + "step": 4995500 + }, + { + "epoch": 3.0, + "learning_rate": 3.506699310131314e-05, + "loss": 1.1872, + "step": 4996000 + }, + { + "epoch": 3.0, + "learning_rate": 3.506489313575257e-05, + "loss": 1.1619, + "step": 4996500 + }, + { + "epoch": 3.0, + "learning_rate": 3.506279317019201e-05, + "loss": 1.2051, + "step": 4997000 + }, + { + "epoch": 3.0, + "learning_rate": 3.506069320463144e-05, + "loss": 1.2021, + "step": 4997500 + }, + { + "epoch": 3.0, + "learning_rate": 3.5058597439002e-05, + "loss": 1.1838, + "step": 4998000 + }, + { + "epoch": 3.0, + "learning_rate": 3.505649747344144e-05, + "loss": 1.1752, + "step": 4998500 + }, + { + "epoch": 3.0, + "learning_rate": 3.505439750788087e-05, + "loss": 1.2044, + "step": 4999000 + }, + { + "epoch": 3.0, + "learning_rate": 3.505229754232031e-05, + "loss": 1.1751, + "step": 4999500 + }, + { + "epoch": 3.0, + "learning_rate": 3.505019757675974e-05, + "loss": 1.2131, + "step": 5000000 + }, + { + "epoch": 3.0, + "eval_loss": 1.148377776145935, + "eval_runtime": 1098.3919, + "eval_samples_per_second": 479.537, + "eval_steps_per_second": 79.923, + "step": 5000000 + }, + { + "epoch": 3.0, + "learning_rate": 3.5048097611199174e-05, + "loss": 1.1856, + "step": 5000500 + }, + { + "epoch": 3.0, + "learning_rate": 3.504599764563861e-05, + "loss": 1.2012, + "step": 5001000 + }, + { + "epoch": 3.0, + "learning_rate": 3.504389768007805e-05, + "loss": 1.2196, + "step": 5001500 + }, + { + "epoch": 3.0, + "learning_rate": 3.504180191444861e-05, + "loss": 1.194, + "step": 5002000 + }, + { + "epoch": 3.0, + "learning_rate": 3.5039701948888034e-05, + "loss": 1.1778, + "step": 5002500 + }, + { + "epoch": 3.0, + "learning_rate": 3.503760198332747e-05, + "loss": 1.1772, + "step": 5003000 + }, + { + "epoch": 3.0, + "learning_rate": 3.503550201776691e-05, + "loss": 1.185, + "step": 5003500 + }, + { + "epoch": 3.0, + "learning_rate": 3.503340205220634e-05, + "loss": 1.2015, + "step": 5004000 + }, + { + "epoch": 3.0, + "learning_rate": 3.5031302086645775e-05, + "loss": 1.1376, + "step": 5004500 + }, + { + "epoch": 3.0, + "learning_rate": 3.5029202121085215e-05, + "loss": 1.1188, + "step": 5005000 + }, + { + "epoch": 3.0, + "learning_rate": 3.502710215552465e-05, + "loss": 1.139, + "step": 5005500 + }, + { + "epoch": 3.0, + "learning_rate": 3.502500218996408e-05, + "loss": 1.1583, + "step": 5006000 + }, + { + "epoch": 3.0, + "learning_rate": 3.502290222440352e-05, + "loss": 1.1542, + "step": 5006500 + }, + { + "epoch": 3.0, + "learning_rate": 3.5020802258842956e-05, + "loss": 1.1757, + "step": 5007000 + }, + { + "epoch": 3.0, + "learning_rate": 3.5018702293282396e-05, + "loss": 1.1799, + "step": 5007500 + }, + { + "epoch": 3.0, + "learning_rate": 3.501660232772182e-05, + "loss": 1.1484, + "step": 5008000 + }, + { + "epoch": 3.0, + "learning_rate": 3.5014502362161256e-05, + "loss": 1.1543, + "step": 5008500 + }, + { + "epoch": 3.0, + "learning_rate": 3.5012402396600696e-05, + "loss": 1.125, + "step": 5009000 + }, + { + "epoch": 3.0, + "learning_rate": 3.501030243104013e-05, + "loss": 1.147, + "step": 5009500 + }, + { + "epoch": 3.0, + "learning_rate": 3.5008206665410684e-05, + "loss": 1.1666, + "step": 5010000 + }, + { + "epoch": 3.0, + "learning_rate": 3.500610669985012e-05, + "loss": 1.1843, + "step": 5010500 + }, + { + "epoch": 3.0, + "learning_rate": 3.500400673428956e-05, + "loss": 1.1633, + "step": 5011000 + }, + { + "epoch": 3.0, + "learning_rate": 3.500190676872899e-05, + "loss": 1.1559, + "step": 5011500 + }, + { + "epoch": 3.0, + "learning_rate": 3.499981100309955e-05, + "loss": 1.1695, + "step": 5012000 + }, + { + "epoch": 3.01, + "learning_rate": 3.499771103753898e-05, + "loss": 1.1516, + "step": 5012500 + }, + { + "epoch": 3.01, + "learning_rate": 3.499561107197842e-05, + "loss": 1.1696, + "step": 5013000 + }, + { + "epoch": 3.01, + "learning_rate": 3.499351110641785e-05, + "loss": 1.1501, + "step": 5013500 + }, + { + "epoch": 3.01, + "learning_rate": 3.499141534078841e-05, + "loss": 1.152, + "step": 5014000 + }, + { + "epoch": 3.01, + "learning_rate": 3.498931537522785e-05, + "loss": 1.1424, + "step": 5014500 + }, + { + "epoch": 3.01, + "learning_rate": 3.498721540966728e-05, + "loss": 1.1464, + "step": 5015000 + }, + { + "epoch": 3.01, + "learning_rate": 3.498511544410671e-05, + "loss": 1.1582, + "step": 5015500 + }, + { + "epoch": 3.01, + "learning_rate": 3.498301547854615e-05, + "loss": 1.1566, + "step": 5016000 + }, + { + "epoch": 3.01, + "learning_rate": 3.498091971291671e-05, + "loss": 1.1515, + "step": 5016500 + }, + { + "epoch": 3.01, + "learning_rate": 3.4978819747356146e-05, + "loss": 1.1634, + "step": 5017000 + }, + { + "epoch": 3.01, + "learning_rate": 3.497671978179557e-05, + "loss": 1.1652, + "step": 5017500 + }, + { + "epoch": 3.01, + "learning_rate": 3.497462401616613e-05, + "loss": 1.1473, + "step": 5018000 + }, + { + "epoch": 3.01, + "learning_rate": 3.4972524050605573e-05, + "loss": 1.1453, + "step": 5018500 + }, + { + "epoch": 3.01, + "learning_rate": 3.497042408504501e-05, + "loss": 1.161, + "step": 5019000 + }, + { + "epoch": 3.01, + "learning_rate": 3.4968324119484434e-05, + "loss": 1.18, + "step": 5019500 + }, + { + "epoch": 3.01, + "learning_rate": 3.4966224153923874e-05, + "loss": 1.1652, + "step": 5020000 + }, + { + "epoch": 3.01, + "learning_rate": 3.496412418836331e-05, + "loss": 1.1181, + "step": 5020500 + }, + { + "epoch": 3.01, + "learning_rate": 3.496202422280275e-05, + "loss": 1.143, + "step": 5021000 + }, + { + "epoch": 3.01, + "learning_rate": 3.495992425724218e-05, + "loss": 1.1664, + "step": 5021500 + }, + { + "epoch": 3.01, + "learning_rate": 3.4957824291681614e-05, + "loss": 1.1568, + "step": 5022000 + }, + { + "epoch": 3.01, + "learning_rate": 3.495572852605217e-05, + "loss": 1.1728, + "step": 5022500 + }, + { + "epoch": 3.01, + "learning_rate": 3.495362856049161e-05, + "loss": 1.134, + "step": 5023000 + }, + { + "epoch": 3.01, + "learning_rate": 3.495152859493104e-05, + "loss": 1.1872, + "step": 5023500 + }, + { + "epoch": 3.01, + "learning_rate": 3.4949428629370475e-05, + "loss": 1.1649, + "step": 5024000 + }, + { + "epoch": 3.01, + "learning_rate": 3.4947337063672156e-05, + "loss": 1.1377, + "step": 5024500 + }, + { + "epoch": 3.01, + "learning_rate": 3.494523709811159e-05, + "loss": 1.1376, + "step": 5025000 + }, + { + "epoch": 3.01, + "learning_rate": 3.494313713255103e-05, + "loss": 1.1705, + "step": 5025500 + }, + { + "epoch": 3.01, + "learning_rate": 3.494103716699046e-05, + "loss": 1.1507, + "step": 5026000 + }, + { + "epoch": 3.01, + "learning_rate": 3.4938937201429896e-05, + "loss": 1.1481, + "step": 5026500 + }, + { + "epoch": 3.01, + "learning_rate": 3.4936841435800457e-05, + "loss": 1.1766, + "step": 5027000 + }, + { + "epoch": 3.01, + "learning_rate": 3.493474147023989e-05, + "loss": 1.1501, + "step": 5027500 + }, + { + "epoch": 3.01, + "learning_rate": 3.4932641504679324e-05, + "loss": 1.1493, + "step": 5028000 + }, + { + "epoch": 3.01, + "learning_rate": 3.4930541539118764e-05, + "loss": 1.1341, + "step": 5028500 + }, + { + "epoch": 3.02, + "learning_rate": 3.492844157355819e-05, + "loss": 1.1606, + "step": 5029000 + }, + { + "epoch": 3.02, + "learning_rate": 3.492634580792875e-05, + "loss": 1.1669, + "step": 5029500 + }, + { + "epoch": 3.02, + "learning_rate": 3.4924245842368184e-05, + "loss": 1.1753, + "step": 5030000 + }, + { + "epoch": 3.02, + "learning_rate": 3.4922145876807624e-05, + "loss": 1.17, + "step": 5030500 + }, + { + "epoch": 3.02, + "learning_rate": 3.492004591124706e-05, + "loss": 1.1479, + "step": 5031000 + }, + { + "epoch": 3.02, + "learning_rate": 3.4917945945686485e-05, + "loss": 1.1393, + "step": 5031500 + }, + { + "epoch": 3.02, + "learning_rate": 3.4915845980125925e-05, + "loss": 1.1538, + "step": 5032000 + }, + { + "epoch": 3.02, + "learning_rate": 3.491374601456536e-05, + "loss": 1.149, + "step": 5032500 + }, + { + "epoch": 3.02, + "learning_rate": 3.491164604900479e-05, + "loss": 1.1827, + "step": 5033000 + }, + { + "epoch": 3.02, + "learning_rate": 3.490955028337535e-05, + "loss": 1.1527, + "step": 5033500 + }, + { + "epoch": 3.02, + "learning_rate": 3.4907450317814786e-05, + "loss": 1.1285, + "step": 5034000 + }, + { + "epoch": 3.02, + "learning_rate": 3.490535035225422e-05, + "loss": 1.1637, + "step": 5034500 + }, + { + "epoch": 3.02, + "learning_rate": 3.490325038669366e-05, + "loss": 1.1503, + "step": 5035000 + }, + { + "epoch": 3.02, + "learning_rate": 3.490115042113309e-05, + "loss": 1.1693, + "step": 5035500 + }, + { + "epoch": 3.02, + "learning_rate": 3.489905465550365e-05, + "loss": 1.1443, + "step": 5036000 + }, + { + "epoch": 3.02, + "learning_rate": 3.489695468994308e-05, + "loss": 1.188, + "step": 5036500 + }, + { + "epoch": 3.02, + "learning_rate": 3.489485472438252e-05, + "loss": 1.1658, + "step": 5037000 + }, + { + "epoch": 3.02, + "learning_rate": 3.4892754758821953e-05, + "loss": 1.1719, + "step": 5037500 + }, + { + "epoch": 3.02, + "learning_rate": 3.489065479326139e-05, + "loss": 1.1546, + "step": 5038000 + }, + { + "epoch": 3.02, + "learning_rate": 3.488855482770083e-05, + "loss": 1.158, + "step": 5038500 + }, + { + "epoch": 3.02, + "learning_rate": 3.488645486214026e-05, + "loss": 1.1637, + "step": 5039000 + }, + { + "epoch": 3.02, + "learning_rate": 3.4884354896579694e-05, + "loss": 1.1619, + "step": 5039500 + }, + { + "epoch": 3.02, + "learning_rate": 3.488225913095025e-05, + "loss": 1.1549, + "step": 5040000 + }, + { + "epoch": 3.02, + "learning_rate": 3.488015916538969e-05, + "loss": 1.1661, + "step": 5040500 + }, + { + "epoch": 3.02, + "learning_rate": 3.487806339976024e-05, + "loss": 1.1758, + "step": 5041000 + }, + { + "epoch": 3.02, + "learning_rate": 3.4875963434199675e-05, + "loss": 1.1814, + "step": 5041500 + }, + { + "epoch": 3.02, + "learning_rate": 3.4873863468639115e-05, + "loss": 1.1791, + "step": 5042000 + }, + { + "epoch": 3.02, + "learning_rate": 3.487176350307855e-05, + "loss": 1.1868, + "step": 5042500 + }, + { + "epoch": 3.02, + "learning_rate": 3.486966353751798e-05, + "loss": 1.1497, + "step": 5043000 + }, + { + "epoch": 3.02, + "learning_rate": 3.4867567771888536e-05, + "loss": 1.1821, + "step": 5043500 + }, + { + "epoch": 3.02, + "learning_rate": 3.4865467806327976e-05, + "loss": 1.1656, + "step": 5044000 + }, + { + "epoch": 3.02, + "learning_rate": 3.486336784076741e-05, + "loss": 1.1687, + "step": 5044500 + }, + { + "epoch": 3.02, + "learning_rate": 3.486126787520684e-05, + "loss": 1.1724, + "step": 5045000 + }, + { + "epoch": 3.02, + "learning_rate": 3.485916790964628e-05, + "loss": 1.1566, + "step": 5045500 + }, + { + "epoch": 3.03, + "learning_rate": 3.4857067944085716e-05, + "loss": 1.1816, + "step": 5046000 + }, + { + "epoch": 3.03, + "learning_rate": 3.485496797852515e-05, + "loss": 1.1953, + "step": 5046500 + }, + { + "epoch": 3.03, + "learning_rate": 3.4852872212895704e-05, + "loss": 1.1385, + "step": 5047000 + }, + { + "epoch": 3.03, + "learning_rate": 3.4850772247335144e-05, + "loss": 1.1679, + "step": 5047500 + }, + { + "epoch": 3.03, + "learning_rate": 3.484867228177458e-05, + "loss": 1.1657, + "step": 5048000 + }, + { + "epoch": 3.03, + "learning_rate": 3.484657231621401e-05, + "loss": 1.143, + "step": 5048500 + }, + { + "epoch": 3.03, + "learning_rate": 3.484447235065345e-05, + "loss": 1.1685, + "step": 5049000 + }, + { + "epoch": 3.03, + "learning_rate": 3.4842372385092884e-05, + "loss": 1.1395, + "step": 5049500 + }, + { + "epoch": 3.03, + "learning_rate": 3.484027241953232e-05, + "loss": 1.1605, + "step": 5050000 + }, + { + "epoch": 3.03, + "learning_rate": 3.483817245397176e-05, + "loss": 1.1602, + "step": 5050500 + }, + { + "epoch": 3.03, + "learning_rate": 3.483607668834231e-05, + "loss": 1.1565, + "step": 5051000 + }, + { + "epoch": 3.03, + "learning_rate": 3.4833976722781745e-05, + "loss": 1.1485, + "step": 5051500 + }, + { + "epoch": 3.03, + "learning_rate": 3.4831876757221185e-05, + "loss": 1.1394, + "step": 5052000 + }, + { + "epoch": 3.03, + "learning_rate": 3.482977679166062e-05, + "loss": 1.1427, + "step": 5052500 + }, + { + "epoch": 3.03, + "learning_rate": 3.482768102603117e-05, + "loss": 1.1716, + "step": 5053000 + }, + { + "epoch": 3.03, + "learning_rate": 3.4825581060470606e-05, + "loss": 1.1655, + "step": 5053500 + }, + { + "epoch": 3.03, + "learning_rate": 3.4823481094910046e-05, + "loss": 1.1793, + "step": 5054000 + }, + { + "epoch": 3.03, + "learning_rate": 3.482138112934948e-05, + "loss": 1.1777, + "step": 5054500 + }, + { + "epoch": 3.03, + "learning_rate": 3.481928536372003e-05, + "loss": 1.1851, + "step": 5055000 + }, + { + "epoch": 3.03, + "learning_rate": 3.4817185398159467e-05, + "loss": 1.1471, + "step": 5055500 + }, + { + "epoch": 3.03, + "learning_rate": 3.481508963253003e-05, + "loss": 1.1726, + "step": 5056000 + }, + { + "epoch": 3.03, + "learning_rate": 3.481298966696946e-05, + "loss": 1.1588, + "step": 5056500 + }, + { + "epoch": 3.03, + "learning_rate": 3.4810889701408894e-05, + "loss": 1.1643, + "step": 5057000 + }, + { + "epoch": 3.03, + "learning_rate": 3.4808789735848334e-05, + "loss": 1.1906, + "step": 5057500 + }, + { + "epoch": 3.03, + "learning_rate": 3.480668977028777e-05, + "loss": 1.1519, + "step": 5058000 + }, + { + "epoch": 3.03, + "learning_rate": 3.48045898047272e-05, + "loss": 1.1751, + "step": 5058500 + }, + { + "epoch": 3.03, + "learning_rate": 3.480248983916664e-05, + "loss": 1.1854, + "step": 5059000 + }, + { + "epoch": 3.03, + "learning_rate": 3.4800389873606075e-05, + "loss": 1.1599, + "step": 5059500 + }, + { + "epoch": 3.03, + "learning_rate": 3.479829410797663e-05, + "loss": 1.1621, + "step": 5060000 + }, + { + "epoch": 3.03, + "learning_rate": 3.479619834234718e-05, + "loss": 1.1936, + "step": 5060500 + }, + { + "epoch": 3.03, + "learning_rate": 3.4794098376786615e-05, + "loss": 1.1335, + "step": 5061000 + }, + { + "epoch": 3.03, + "learning_rate": 3.4791998411226056e-05, + "loss": 1.1487, + "step": 5061500 + }, + { + "epoch": 3.03, + "learning_rate": 3.478989844566549e-05, + "loss": 1.1465, + "step": 5062000 + }, + { + "epoch": 3.04, + "learning_rate": 3.478779848010492e-05, + "loss": 1.1591, + "step": 5062500 + }, + { + "epoch": 3.04, + "learning_rate": 3.478569851454436e-05, + "loss": 1.1636, + "step": 5063000 + }, + { + "epoch": 3.04, + "learning_rate": 3.4783598548983796e-05, + "loss": 1.145, + "step": 5063500 + }, + { + "epoch": 3.04, + "learning_rate": 3.478149858342323e-05, + "loss": 1.1814, + "step": 5064000 + }, + { + "epoch": 3.04, + "learning_rate": 3.477940281779379e-05, + "loss": 1.1652, + "step": 5064500 + }, + { + "epoch": 3.04, + "learning_rate": 3.4777307052164344e-05, + "loss": 1.1713, + "step": 5065000 + }, + { + "epoch": 3.04, + "learning_rate": 3.477520708660378e-05, + "loss": 1.1264, + "step": 5065500 + }, + { + "epoch": 3.04, + "learning_rate": 3.477310712104321e-05, + "loss": 1.1617, + "step": 5066000 + }, + { + "epoch": 3.04, + "learning_rate": 3.477100715548265e-05, + "loss": 1.1768, + "step": 5066500 + }, + { + "epoch": 3.04, + "learning_rate": 3.4768907189922084e-05, + "loss": 1.1809, + "step": 5067000 + }, + { + "epoch": 3.04, + "learning_rate": 3.476680722436152e-05, + "loss": 1.1635, + "step": 5067500 + }, + { + "epoch": 3.04, + "learning_rate": 3.476470725880096e-05, + "loss": 1.1727, + "step": 5068000 + }, + { + "epoch": 3.04, + "learning_rate": 3.476261149317151e-05, + "loss": 1.185, + "step": 5068500 + }, + { + "epoch": 3.04, + "learning_rate": 3.4760511527610945e-05, + "loss": 1.1826, + "step": 5069000 + }, + { + "epoch": 3.04, + "learning_rate": 3.475841156205038e-05, + "loss": 1.1794, + "step": 5069500 + }, + { + "epoch": 3.04, + "learning_rate": 3.475631579642094e-05, + "loss": 1.1416, + "step": 5070000 + }, + { + "epoch": 3.04, + "learning_rate": 3.475421583086037e-05, + "loss": 1.1545, + "step": 5070500 + }, + { + "epoch": 3.04, + "learning_rate": 3.4752115865299806e-05, + "loss": 1.1677, + "step": 5071000 + }, + { + "epoch": 3.04, + "learning_rate": 3.4750015899739246e-05, + "loss": 1.1722, + "step": 5071500 + }, + { + "epoch": 3.04, + "learning_rate": 3.474791593417868e-05, + "loss": 1.1362, + "step": 5072000 + }, + { + "epoch": 3.04, + "learning_rate": 3.474581596861811e-05, + "loss": 1.1738, + "step": 5072500 + }, + { + "epoch": 3.04, + "learning_rate": 3.474371600305755e-05, + "loss": 1.1662, + "step": 5073000 + }, + { + "epoch": 3.04, + "learning_rate": 3.4741616037496986e-05, + "loss": 1.1414, + "step": 5073500 + }, + { + "epoch": 3.04, + "learning_rate": 3.473951607193642e-05, + "loss": 1.1692, + "step": 5074000 + }, + { + "epoch": 3.04, + "learning_rate": 3.473741610637586e-05, + "loss": 1.163, + "step": 5074500 + }, + { + "epoch": 3.04, + "learning_rate": 3.4735316140815294e-05, + "loss": 1.1498, + "step": 5075000 + }, + { + "epoch": 3.04, + "learning_rate": 3.473321617525472e-05, + "loss": 1.1321, + "step": 5075500 + }, + { + "epoch": 3.04, + "learning_rate": 3.473112040962528e-05, + "loss": 1.1773, + "step": 5076000 + }, + { + "epoch": 3.04, + "learning_rate": 3.4729024643995834e-05, + "loss": 1.1429, + "step": 5076500 + }, + { + "epoch": 3.04, + "learning_rate": 3.4726928878366395e-05, + "loss": 1.1393, + "step": 5077000 + }, + { + "epoch": 3.04, + "learning_rate": 3.472482891280583e-05, + "loss": 1.1425, + "step": 5077500 + }, + { + "epoch": 3.04, + "learning_rate": 3.472272894724526e-05, + "loss": 1.159, + "step": 5078000 + }, + { + "epoch": 3.04, + "learning_rate": 3.47206289816847e-05, + "loss": 1.1996, + "step": 5078500 + }, + { + "epoch": 3.05, + "learning_rate": 3.4718529016124135e-05, + "loss": 1.1572, + "step": 5079000 + }, + { + "epoch": 3.05, + "learning_rate": 3.471642905056357e-05, + "loss": 1.1635, + "step": 5079500 + }, + { + "epoch": 3.05, + "learning_rate": 3.471432908500301e-05, + "loss": 1.1472, + "step": 5080000 + }, + { + "epoch": 3.05, + "learning_rate": 3.471222911944244e-05, + "loss": 1.1643, + "step": 5080500 + }, + { + "epoch": 3.05, + "learning_rate": 3.4710129153881876e-05, + "loss": 1.1504, + "step": 5081000 + }, + { + "epoch": 3.05, + "learning_rate": 3.4708029188321316e-05, + "loss": 1.1789, + "step": 5081500 + }, + { + "epoch": 3.05, + "learning_rate": 3.470593342269187e-05, + "loss": 1.1777, + "step": 5082000 + }, + { + "epoch": 3.05, + "learning_rate": 3.47038334571313e-05, + "loss": 1.1615, + "step": 5082500 + }, + { + "epoch": 3.05, + "learning_rate": 3.4701733491570736e-05, + "loss": 1.178, + "step": 5083000 + }, + { + "epoch": 3.05, + "learning_rate": 3.469963352601018e-05, + "loss": 1.1391, + "step": 5083500 + }, + { + "epoch": 3.05, + "learning_rate": 3.469753356044961e-05, + "loss": 1.178, + "step": 5084000 + }, + { + "epoch": 3.05, + "learning_rate": 3.4695433594889044e-05, + "loss": 1.1655, + "step": 5084500 + }, + { + "epoch": 3.05, + "learning_rate": 3.469333362932848e-05, + "loss": 1.1338, + "step": 5085000 + }, + { + "epoch": 3.05, + "learning_rate": 3.469123366376791e-05, + "loss": 1.1757, + "step": 5085500 + }, + { + "epoch": 3.05, + "learning_rate": 3.468913789813847e-05, + "loss": 1.1656, + "step": 5086000 + }, + { + "epoch": 3.05, + "learning_rate": 3.468703793257791e-05, + "loss": 1.1735, + "step": 5086500 + }, + { + "epoch": 3.05, + "learning_rate": 3.468493796701734e-05, + "loss": 1.1662, + "step": 5087000 + }, + { + "epoch": 3.05, + "learning_rate": 3.468283800145677e-05, + "loss": 1.1587, + "step": 5087500 + }, + { + "epoch": 3.05, + "learning_rate": 3.468074223582733e-05, + "loss": 1.1637, + "step": 5088000 + }, + { + "epoch": 3.05, + "learning_rate": 3.4678646470197885e-05, + "loss": 1.156, + "step": 5088500 + }, + { + "epoch": 3.05, + "learning_rate": 3.4676546504637325e-05, + "loss": 1.1744, + "step": 5089000 + }, + { + "epoch": 3.05, + "learning_rate": 3.467444653907676e-05, + "loss": 1.1842, + "step": 5089500 + }, + { + "epoch": 3.05, + "learning_rate": 3.467234657351619e-05, + "loss": 1.1882, + "step": 5090000 + }, + { + "epoch": 3.05, + "learning_rate": 3.467024660795563e-05, + "loss": 1.1722, + "step": 5090500 + }, + { + "epoch": 3.05, + "learning_rate": 3.4668146642395066e-05, + "loss": 1.1785, + "step": 5091000 + }, + { + "epoch": 3.05, + "learning_rate": 3.466605087676562e-05, + "loss": 1.1747, + "step": 5091500 + }, + { + "epoch": 3.05, + "learning_rate": 3.466395091120505e-05, + "loss": 1.1917, + "step": 5092000 + }, + { + "epoch": 3.05, + "learning_rate": 3.466185094564449e-05, + "loss": 1.1643, + "step": 5092500 + }, + { + "epoch": 3.05, + "learning_rate": 3.465975098008393e-05, + "loss": 1.1629, + "step": 5093000 + }, + { + "epoch": 3.05, + "learning_rate": 3.465765101452337e-05, + "loss": 1.1315, + "step": 5093500 + }, + { + "epoch": 3.05, + "learning_rate": 3.46555510489628e-05, + "loss": 1.1755, + "step": 5094000 + }, + { + "epoch": 3.05, + "learning_rate": 3.4653455283333354e-05, + "loss": 1.1576, + "step": 5094500 + }, + { + "epoch": 3.05, + "learning_rate": 3.465135531777279e-05, + "loss": 1.1908, + "step": 5095000 + }, + { + "epoch": 3.05, + "learning_rate": 3.464925535221223e-05, + "loss": 1.1754, + "step": 5095500 + }, + { + "epoch": 3.06, + "learning_rate": 3.464715538665166e-05, + "loss": 1.1544, + "step": 5096000 + }, + { + "epoch": 3.06, + "learning_rate": 3.4645059621022215e-05, + "loss": 1.1655, + "step": 5096500 + }, + { + "epoch": 3.06, + "learning_rate": 3.464295965546165e-05, + "loss": 1.1512, + "step": 5097000 + }, + { + "epoch": 3.06, + "learning_rate": 3.464085968990109e-05, + "loss": 1.1611, + "step": 5097500 + }, + { + "epoch": 3.06, + "learning_rate": 3.463875972434052e-05, + "loss": 1.1394, + "step": 5098000 + }, + { + "epoch": 3.06, + "learning_rate": 3.4636659758779955e-05, + "loss": 1.159, + "step": 5098500 + }, + { + "epoch": 3.06, + "learning_rate": 3.463455979321939e-05, + "loss": 1.1599, + "step": 5099000 + }, + { + "epoch": 3.06, + "learning_rate": 3.463246402758995e-05, + "loss": 1.1619, + "step": 5099500 + }, + { + "epoch": 3.06, + "learning_rate": 3.463036406202938e-05, + "loss": 1.1291, + "step": 5100000 + }, + { + "epoch": 3.06, + "eval_loss": 1.1477725505828857, + "eval_runtime": 1108.108, + "eval_samples_per_second": 475.333, + "eval_steps_per_second": 79.222, + "step": 5100000 + }, + { + "epoch": 3.06, + "learning_rate": 3.462826409646882e-05, + "loss": 1.1777, + "step": 5100500 + }, + { + "epoch": 3.06, + "learning_rate": 3.4626164130908256e-05, + "loss": 1.151, + "step": 5101000 + }, + { + "epoch": 3.06, + "learning_rate": 3.462406416534768e-05, + "loss": 1.1536, + "step": 5101500 + }, + { + "epoch": 3.06, + "learning_rate": 3.462196419978712e-05, + "loss": 1.1529, + "step": 5102000 + }, + { + "epoch": 3.06, + "learning_rate": 3.461986423422656e-05, + "loss": 1.1782, + "step": 5102500 + }, + { + "epoch": 3.06, + "learning_rate": 3.461776426866599e-05, + "loss": 1.1513, + "step": 5103000 + }, + { + "epoch": 3.06, + "learning_rate": 3.461566850303655e-05, + "loss": 1.1653, + "step": 5103500 + }, + { + "epoch": 3.06, + "learning_rate": 3.4613568537475984e-05, + "loss": 1.1866, + "step": 5104000 + }, + { + "epoch": 3.06, + "learning_rate": 3.4611472771846544e-05, + "loss": 1.1638, + "step": 5104500 + }, + { + "epoch": 3.06, + "learning_rate": 3.460937280628598e-05, + "loss": 1.1641, + "step": 5105000 + }, + { + "epoch": 3.06, + "learning_rate": 3.460727284072541e-05, + "loss": 1.1569, + "step": 5105500 + }, + { + "epoch": 3.06, + "learning_rate": 3.4605172875164845e-05, + "loss": 1.1469, + "step": 5106000 + }, + { + "epoch": 3.06, + "learning_rate": 3.460307290960428e-05, + "loss": 1.1823, + "step": 5106500 + }, + { + "epoch": 3.06, + "learning_rate": 3.460097294404372e-05, + "loss": 1.1686, + "step": 5107000 + }, + { + "epoch": 3.06, + "learning_rate": 3.459887297848315e-05, + "loss": 1.158, + "step": 5107500 + }, + { + "epoch": 3.06, + "learning_rate": 3.459677721285371e-05, + "loss": 1.1699, + "step": 5108000 + }, + { + "epoch": 3.06, + "learning_rate": 3.459467724729314e-05, + "loss": 1.1607, + "step": 5108500 + }, + { + "epoch": 3.06, + "learning_rate": 3.459257728173258e-05, + "loss": 1.1454, + "step": 5109000 + }, + { + "epoch": 3.06, + "learning_rate": 3.459047731617201e-05, + "loss": 1.1684, + "step": 5109500 + }, + { + "epoch": 3.06, + "learning_rate": 3.4588377350611446e-05, + "loss": 1.162, + "step": 5110000 + }, + { + "epoch": 3.06, + "learning_rate": 3.4586277385050886e-05, + "loss": 1.1588, + "step": 5110500 + }, + { + "epoch": 3.06, + "learning_rate": 3.458417741949032e-05, + "loss": 1.1503, + "step": 5111000 + }, + { + "epoch": 3.06, + "learning_rate": 3.458207745392975e-05, + "loss": 1.1632, + "step": 5111500 + }, + { + "epoch": 3.06, + "learning_rate": 3.4579985888231434e-05, + "loss": 1.1492, + "step": 5112000 + }, + { + "epoch": 3.07, + "learning_rate": 3.457788592267087e-05, + "loss": 1.1504, + "step": 5112500 + }, + { + "epoch": 3.07, + "learning_rate": 3.457578595711031e-05, + "loss": 1.1795, + "step": 5113000 + }, + { + "epoch": 3.07, + "learning_rate": 3.4573685991549734e-05, + "loss": 1.1777, + "step": 5113500 + }, + { + "epoch": 3.07, + "learning_rate": 3.4571586025989174e-05, + "loss": 1.1716, + "step": 5114000 + }, + { + "epoch": 3.07, + "learning_rate": 3.4569490260359735e-05, + "loss": 1.1592, + "step": 5114500 + }, + { + "epoch": 3.07, + "learning_rate": 3.456739029479917e-05, + "loss": 1.1643, + "step": 5115000 + }, + { + "epoch": 3.07, + "learning_rate": 3.45652903292386e-05, + "loss": 1.1944, + "step": 5115500 + }, + { + "epoch": 3.07, + "learning_rate": 3.4563190363678035e-05, + "loss": 1.1755, + "step": 5116000 + }, + { + "epoch": 3.07, + "learning_rate": 3.456109039811747e-05, + "loss": 1.1651, + "step": 5116500 + }, + { + "epoch": 3.07, + "learning_rate": 3.45589904325569e-05, + "loss": 1.1571, + "step": 5117000 + }, + { + "epoch": 3.07, + "learning_rate": 3.455689046699634e-05, + "loss": 1.1635, + "step": 5117500 + }, + { + "epoch": 3.07, + "learning_rate": 3.4554794701366896e-05, + "loss": 1.1843, + "step": 5118000 + }, + { + "epoch": 3.07, + "learning_rate": 3.455269473580633e-05, + "loss": 1.1745, + "step": 5118500 + }, + { + "epoch": 3.07, + "learning_rate": 3.455059477024576e-05, + "loss": 1.1586, + "step": 5119000 + }, + { + "epoch": 3.07, + "learning_rate": 3.45484948046852e-05, + "loss": 1.1568, + "step": 5119500 + }, + { + "epoch": 3.07, + "learning_rate": 3.4546394839124636e-05, + "loss": 1.1885, + "step": 5120000 + }, + { + "epoch": 3.07, + "learning_rate": 3.454429487356407e-05, + "loss": 1.1882, + "step": 5120500 + }, + { + "epoch": 3.07, + "learning_rate": 3.454219490800351e-05, + "loss": 1.1538, + "step": 5121000 + }, + { + "epoch": 3.07, + "learning_rate": 3.4540094942442944e-05, + "loss": 1.1836, + "step": 5121500 + }, + { + "epoch": 3.07, + "learning_rate": 3.45379991768135e-05, + "loss": 1.1663, + "step": 5122000 + }, + { + "epoch": 3.07, + "learning_rate": 3.453590341118406e-05, + "loss": 1.118, + "step": 5122500 + }, + { + "epoch": 3.07, + "learning_rate": 3.453380344562349e-05, + "loss": 1.1574, + "step": 5123000 + }, + { + "epoch": 3.07, + "learning_rate": 3.4531703480062924e-05, + "loss": 1.1688, + "step": 5123500 + }, + { + "epoch": 3.07, + "learning_rate": 3.452960351450236e-05, + "loss": 1.1644, + "step": 5124000 + }, + { + "epoch": 3.07, + "learning_rate": 3.45275035489418e-05, + "loss": 1.1635, + "step": 5124500 + }, + { + "epoch": 3.07, + "learning_rate": 3.452540358338123e-05, + "loss": 1.149, + "step": 5125000 + }, + { + "epoch": 3.07, + "learning_rate": 3.4523307817751785e-05, + "loss": 1.1669, + "step": 5125500 + }, + { + "epoch": 3.07, + "learning_rate": 3.452120785219122e-05, + "loss": 1.1293, + "step": 5126000 + }, + { + "epoch": 3.07, + "learning_rate": 3.451910788663066e-05, + "loss": 1.1639, + "step": 5126500 + }, + { + "epoch": 3.07, + "learning_rate": 3.451700792107009e-05, + "loss": 1.1415, + "step": 5127000 + }, + { + "epoch": 3.07, + "learning_rate": 3.4514907955509526e-05, + "loss": 1.1787, + "step": 5127500 + }, + { + "epoch": 3.07, + "learning_rate": 3.4512807989948966e-05, + "loss": 1.1856, + "step": 5128000 + }, + { + "epoch": 3.07, + "learning_rate": 3.45107080243884e-05, + "loss": 1.1218, + "step": 5128500 + }, + { + "epoch": 3.08, + "learning_rate": 3.450860805882783e-05, + "loss": 1.1869, + "step": 5129000 + }, + { + "epoch": 3.08, + "learning_rate": 3.450651229319839e-05, + "loss": 1.1676, + "step": 5129500 + }, + { + "epoch": 3.08, + "learning_rate": 3.450441652756895e-05, + "loss": 1.1703, + "step": 5130000 + }, + { + "epoch": 3.08, + "learning_rate": 3.450231656200838e-05, + "loss": 1.1621, + "step": 5130500 + }, + { + "epoch": 3.08, + "learning_rate": 3.4500216596447814e-05, + "loss": 1.1605, + "step": 5131000 + }, + { + "epoch": 3.08, + "learning_rate": 3.4498116630887254e-05, + "loss": 1.1544, + "step": 5131500 + }, + { + "epoch": 3.08, + "learning_rate": 3.4496020865257814e-05, + "loss": 1.1684, + "step": 5132000 + }, + { + "epoch": 3.08, + "learning_rate": 3.449392089969724e-05, + "loss": 1.1565, + "step": 5132500 + }, + { + "epoch": 3.08, + "learning_rate": 3.4491820934136674e-05, + "loss": 1.1775, + "step": 5133000 + }, + { + "epoch": 3.08, + "learning_rate": 3.4489720968576115e-05, + "loss": 1.1334, + "step": 5133500 + }, + { + "epoch": 3.08, + "learning_rate": 3.448762100301555e-05, + "loss": 1.1533, + "step": 5134000 + }, + { + "epoch": 3.08, + "learning_rate": 3.448552103745498e-05, + "loss": 1.1779, + "step": 5134500 + }, + { + "epoch": 3.08, + "learning_rate": 3.448342107189442e-05, + "loss": 1.1855, + "step": 5135000 + }, + { + "epoch": 3.08, + "learning_rate": 3.4481321106333855e-05, + "loss": 1.1363, + "step": 5135500 + }, + { + "epoch": 3.08, + "learning_rate": 3.447922534070441e-05, + "loss": 1.1588, + "step": 5136000 + }, + { + "epoch": 3.08, + "learning_rate": 3.447712957507497e-05, + "loss": 1.1472, + "step": 5136500 + }, + { + "epoch": 3.08, + "learning_rate": 3.44750296095144e-05, + "loss": 1.1904, + "step": 5137000 + }, + { + "epoch": 3.08, + "learning_rate": 3.4472929643953836e-05, + "loss": 1.1813, + "step": 5137500 + }, + { + "epoch": 3.08, + "learning_rate": 3.447082967839327e-05, + "loss": 1.1553, + "step": 5138000 + }, + { + "epoch": 3.08, + "learning_rate": 3.446872971283271e-05, + "loss": 1.1512, + "step": 5138500 + }, + { + "epoch": 3.08, + "learning_rate": 3.446662974727214e-05, + "loss": 1.186, + "step": 5139000 + }, + { + "epoch": 3.08, + "learning_rate": 3.446452978171158e-05, + "loss": 1.1542, + "step": 5139500 + }, + { + "epoch": 3.08, + "learning_rate": 3.446242981615102e-05, + "loss": 1.1547, + "step": 5140000 + }, + { + "epoch": 3.08, + "learning_rate": 3.446033405052157e-05, + "loss": 1.164, + "step": 5140500 + }, + { + "epoch": 3.08, + "learning_rate": 3.4458234084961004e-05, + "loss": 1.1528, + "step": 5141000 + }, + { + "epoch": 3.08, + "learning_rate": 3.4456138319331564e-05, + "loss": 1.172, + "step": 5141500 + }, + { + "epoch": 3.08, + "learning_rate": 3.4454038353771e-05, + "loss": 1.1634, + "step": 5142000 + }, + { + "epoch": 3.08, + "learning_rate": 3.445193838821043e-05, + "loss": 1.1785, + "step": 5142500 + }, + { + "epoch": 3.08, + "learning_rate": 3.4449838422649865e-05, + "loss": 1.1635, + "step": 5143000 + }, + { + "epoch": 3.08, + "learning_rate": 3.4447738457089305e-05, + "loss": 1.1579, + "step": 5143500 + }, + { + "epoch": 3.08, + "learning_rate": 3.444563849152874e-05, + "loss": 1.1812, + "step": 5144000 + }, + { + "epoch": 3.08, + "learning_rate": 3.444353852596817e-05, + "loss": 1.2038, + "step": 5144500 + }, + { + "epoch": 3.08, + "learning_rate": 3.444143856040761e-05, + "loss": 1.1697, + "step": 5145000 + }, + { + "epoch": 3.08, + "learning_rate": 3.4439342794778166e-05, + "loss": 1.1301, + "step": 5145500 + }, + { + "epoch": 3.09, + "learning_rate": 3.44372428292176e-05, + "loss": 1.1288, + "step": 5146000 + }, + { + "epoch": 3.09, + "learning_rate": 3.443514286365703e-05, + "loss": 1.1832, + "step": 5146500 + }, + { + "epoch": 3.09, + "learning_rate": 3.443304289809647e-05, + "loss": 1.1595, + "step": 5147000 + }, + { + "epoch": 3.09, + "learning_rate": 3.4430947132467026e-05, + "loss": 1.178, + "step": 5147500 + }, + { + "epoch": 3.09, + "learning_rate": 3.442884716690646e-05, + "loss": 1.1671, + "step": 5148000 + }, + { + "epoch": 3.09, + "learning_rate": 3.442674720134589e-05, + "loss": 1.1416, + "step": 5148500 + }, + { + "epoch": 3.09, + "learning_rate": 3.4424647235785334e-05, + "loss": 1.163, + "step": 5149000 + }, + { + "epoch": 3.09, + "learning_rate": 3.442255147015589e-05, + "loss": 1.1784, + "step": 5149500 + }, + { + "epoch": 3.09, + "learning_rate": 3.442045150459532e-05, + "loss": 1.1777, + "step": 5150000 + }, + { + "epoch": 3.09, + "learning_rate": 3.441835153903476e-05, + "loss": 1.1762, + "step": 5150500 + }, + { + "epoch": 3.09, + "learning_rate": 3.441625577340532e-05, + "loss": 1.1757, + "step": 5151000 + }, + { + "epoch": 3.09, + "learning_rate": 3.441415580784475e-05, + "loss": 1.1626, + "step": 5151500 + }, + { + "epoch": 3.09, + "learning_rate": 3.441205584228418e-05, + "loss": 1.1287, + "step": 5152000 + }, + { + "epoch": 3.09, + "learning_rate": 3.440995587672362e-05, + "loss": 1.1468, + "step": 5152500 + }, + { + "epoch": 3.09, + "learning_rate": 3.4407855911163055e-05, + "loss": 1.1755, + "step": 5153000 + }, + { + "epoch": 3.09, + "learning_rate": 3.440575594560249e-05, + "loss": 1.1621, + "step": 5153500 + }, + { + "epoch": 3.09, + "learning_rate": 3.440365598004193e-05, + "loss": 1.1269, + "step": 5154000 + }, + { + "epoch": 3.09, + "learning_rate": 3.440156021441248e-05, + "loss": 1.1528, + "step": 5154500 + }, + { + "epoch": 3.09, + "learning_rate": 3.4399460248851916e-05, + "loss": 1.1714, + "step": 5155000 + }, + { + "epoch": 3.09, + "learning_rate": 3.439736028329135e-05, + "loss": 1.2, + "step": 5155500 + }, + { + "epoch": 3.09, + "learning_rate": 3.439526031773079e-05, + "loss": 1.1606, + "step": 5156000 + }, + { + "epoch": 3.09, + "learning_rate": 3.439316035217022e-05, + "loss": 1.1981, + "step": 5156500 + }, + { + "epoch": 3.09, + "learning_rate": 3.4391060386609656e-05, + "loss": 1.1692, + "step": 5157000 + }, + { + "epoch": 3.09, + "learning_rate": 3.43889604210491e-05, + "loss": 1.1747, + "step": 5157500 + }, + { + "epoch": 3.09, + "learning_rate": 3.438686045548853e-05, + "loss": 1.1719, + "step": 5158000 + }, + { + "epoch": 3.09, + "learning_rate": 3.4384768889790204e-05, + "loss": 1.1672, + "step": 5158500 + }, + { + "epoch": 3.09, + "learning_rate": 3.438266892422964e-05, + "loss": 1.1738, + "step": 5159000 + }, + { + "epoch": 3.09, + "learning_rate": 3.438056895866908e-05, + "loss": 1.1766, + "step": 5159500 + }, + { + "epoch": 3.09, + "learning_rate": 3.437846899310851e-05, + "loss": 1.1598, + "step": 5160000 + }, + { + "epoch": 3.09, + "learning_rate": 3.437637322747907e-05, + "loss": 1.1275, + "step": 5160500 + }, + { + "epoch": 3.09, + "learning_rate": 3.43742732619185e-05, + "loss": 1.1568, + "step": 5161000 + }, + { + "epoch": 3.09, + "learning_rate": 3.437217329635794e-05, + "loss": 1.1494, + "step": 5161500 + }, + { + "epoch": 3.09, + "learning_rate": 3.437007333079737e-05, + "loss": 1.1419, + "step": 5162000 + }, + { + "epoch": 3.1, + "learning_rate": 3.4367973365236805e-05, + "loss": 1.1695, + "step": 5162500 + }, + { + "epoch": 3.1, + "learning_rate": 3.4365873399676245e-05, + "loss": 1.1782, + "step": 5163000 + }, + { + "epoch": 3.1, + "learning_rate": 3.436377343411568e-05, + "loss": 1.1773, + "step": 5163500 + }, + { + "epoch": 3.1, + "learning_rate": 3.436167346855512e-05, + "loss": 1.1414, + "step": 5164000 + }, + { + "epoch": 3.1, + "learning_rate": 3.435957350299455e-05, + "loss": 1.1794, + "step": 5164500 + }, + { + "epoch": 3.1, + "learning_rate": 3.4357473537433986e-05, + "loss": 1.1836, + "step": 5165000 + }, + { + "epoch": 3.1, + "learning_rate": 3.435537777180454e-05, + "loss": 1.1359, + "step": 5165500 + }, + { + "epoch": 3.1, + "learning_rate": 3.435327780624398e-05, + "loss": 1.1512, + "step": 5166000 + }, + { + "epoch": 3.1, + "learning_rate": 3.435117784068341e-05, + "loss": 1.1679, + "step": 5166500 + }, + { + "epoch": 3.1, + "learning_rate": 3.434907787512285e-05, + "loss": 1.1509, + "step": 5167000 + }, + { + "epoch": 3.1, + "learning_rate": 3.434697790956229e-05, + "loss": 1.1544, + "step": 5167500 + }, + { + "epoch": 3.1, + "learning_rate": 3.434488214393284e-05, + "loss": 1.1652, + "step": 5168000 + }, + { + "epoch": 3.1, + "learning_rate": 3.4342782178372274e-05, + "loss": 1.1348, + "step": 5168500 + }, + { + "epoch": 3.1, + "learning_rate": 3.434068221281171e-05, + "loss": 1.1794, + "step": 5169000 + }, + { + "epoch": 3.1, + "learning_rate": 3.433858224725115e-05, + "loss": 1.1466, + "step": 5169500 + }, + { + "epoch": 3.1, + "learning_rate": 3.433648228169058e-05, + "loss": 1.1726, + "step": 5170000 + }, + { + "epoch": 3.1, + "learning_rate": 3.4334386516061135e-05, + "loss": 1.165, + "step": 5170500 + }, + { + "epoch": 3.1, + "learning_rate": 3.4332286550500575e-05, + "loss": 1.1513, + "step": 5171000 + }, + { + "epoch": 3.1, + "learning_rate": 3.433018658494001e-05, + "loss": 1.1536, + "step": 5171500 + }, + { + "epoch": 3.1, + "learning_rate": 3.432808661937944e-05, + "loss": 1.1542, + "step": 5172000 + }, + { + "epoch": 3.1, + "learning_rate": 3.432598665381888e-05, + "loss": 1.1494, + "step": 5172500 + }, + { + "epoch": 3.1, + "learning_rate": 3.432389508812055e-05, + "loss": 1.1518, + "step": 5173000 + }, + { + "epoch": 3.1, + "learning_rate": 3.432179512255999e-05, + "loss": 1.1486, + "step": 5173500 + }, + { + "epoch": 3.1, + "learning_rate": 3.431969515699942e-05, + "loss": 1.1897, + "step": 5174000 + }, + { + "epoch": 3.1, + "learning_rate": 3.4317595191438856e-05, + "loss": 1.1523, + "step": 5174500 + }, + { + "epoch": 3.1, + "learning_rate": 3.4315495225878296e-05, + "loss": 1.1794, + "step": 5175000 + }, + { + "epoch": 3.1, + "learning_rate": 3.431339526031773e-05, + "loss": 1.1795, + "step": 5175500 + }, + { + "epoch": 3.1, + "learning_rate": 3.431129529475716e-05, + "loss": 1.151, + "step": 5176000 + }, + { + "epoch": 3.1, + "learning_rate": 3.4309195329196604e-05, + "loss": 1.1613, + "step": 5176500 + }, + { + "epoch": 3.1, + "learning_rate": 3.430709536363604e-05, + "loss": 1.1612, + "step": 5177000 + }, + { + "epoch": 3.1, + "learning_rate": 3.430499539807547e-05, + "loss": 1.1539, + "step": 5177500 + }, + { + "epoch": 3.1, + "learning_rate": 3.430289543251491e-05, + "loss": 1.1579, + "step": 5178000 + }, + { + "epoch": 3.1, + "learning_rate": 3.430079546695434e-05, + "loss": 1.1498, + "step": 5178500 + }, + { + "epoch": 3.11, + "learning_rate": 3.42986997013249e-05, + "loss": 1.136, + "step": 5179000 + }, + { + "epoch": 3.11, + "learning_rate": 3.429659973576434e-05, + "loss": 1.169, + "step": 5179500 + }, + { + "epoch": 3.11, + "learning_rate": 3.429449977020377e-05, + "loss": 1.1862, + "step": 5180000 + }, + { + "epoch": 3.11, + "learning_rate": 3.4292399804643205e-05, + "loss": 1.1693, + "step": 5180500 + }, + { + "epoch": 3.11, + "learning_rate": 3.429030403901376e-05, + "loss": 1.1988, + "step": 5181000 + }, + { + "epoch": 3.11, + "learning_rate": 3.42882040734532e-05, + "loss": 1.1741, + "step": 5181500 + }, + { + "epoch": 3.11, + "learning_rate": 3.428610830782375e-05, + "loss": 1.1554, + "step": 5182000 + }, + { + "epoch": 3.11, + "learning_rate": 3.4284008342263186e-05, + "loss": 1.1465, + "step": 5182500 + }, + { + "epoch": 3.11, + "learning_rate": 3.428190837670262e-05, + "loss": 1.168, + "step": 5183000 + }, + { + "epoch": 3.11, + "learning_rate": 3.427980841114206e-05, + "loss": 1.1677, + "step": 5183500 + }, + { + "epoch": 3.11, + "learning_rate": 3.427770844558149e-05, + "loss": 1.1748, + "step": 5184000 + }, + { + "epoch": 3.11, + "learning_rate": 3.4275608480020926e-05, + "loss": 1.1878, + "step": 5184500 + }, + { + "epoch": 3.11, + "learning_rate": 3.4273508514460367e-05, + "loss": 1.1437, + "step": 5185000 + }, + { + "epoch": 3.11, + "learning_rate": 3.427141274883092e-05, + "loss": 1.1731, + "step": 5185500 + }, + { + "epoch": 3.11, + "learning_rate": 3.4269312783270354e-05, + "loss": 1.1823, + "step": 5186000 + }, + { + "epoch": 3.11, + "learning_rate": 3.4267212817709794e-05, + "loss": 1.1693, + "step": 5186500 + }, + { + "epoch": 3.11, + "learning_rate": 3.426511285214923e-05, + "loss": 1.1903, + "step": 5187000 + }, + { + "epoch": 3.11, + "learning_rate": 3.426301288658866e-05, + "loss": 1.1629, + "step": 5187500 + }, + { + "epoch": 3.11, + "learning_rate": 3.4260912921028094e-05, + "loss": 1.1494, + "step": 5188000 + }, + { + "epoch": 3.11, + "learning_rate": 3.425881295546753e-05, + "loss": 1.155, + "step": 5188500 + }, + { + "epoch": 3.11, + "learning_rate": 3.425671298990696e-05, + "loss": 1.1768, + "step": 5189000 + }, + { + "epoch": 3.11, + "learning_rate": 3.425461722427752e-05, + "loss": 1.1818, + "step": 5189500 + }, + { + "epoch": 3.11, + "learning_rate": 3.4252521458648075e-05, + "loss": 1.157, + "step": 5190000 + }, + { + "epoch": 3.11, + "learning_rate": 3.4250421493087515e-05, + "loss": 1.1508, + "step": 5190500 + }, + { + "epoch": 3.11, + "learning_rate": 3.424832572745807e-05, + "loss": 1.1913, + "step": 5191000 + }, + { + "epoch": 3.11, + "learning_rate": 3.42462257618975e-05, + "loss": 1.1816, + "step": 5191500 + }, + { + "epoch": 3.11, + "learning_rate": 3.424412579633694e-05, + "loss": 1.1617, + "step": 5192000 + }, + { + "epoch": 3.11, + "learning_rate": 3.4242025830776376e-05, + "loss": 1.154, + "step": 5192500 + }, + { + "epoch": 3.11, + "learning_rate": 3.423993006514693e-05, + "loss": 1.1697, + "step": 5193000 + }, + { + "epoch": 3.11, + "learning_rate": 3.423783009958636e-05, + "loss": 1.1489, + "step": 5193500 + }, + { + "epoch": 3.11, + "learning_rate": 3.42357301340258e-05, + "loss": 1.1716, + "step": 5194000 + }, + { + "epoch": 3.11, + "learning_rate": 3.423363436839636e-05, + "loss": 1.1734, + "step": 5194500 + }, + { + "epoch": 3.11, + "learning_rate": 3.423153440283579e-05, + "loss": 1.1538, + "step": 5195000 + }, + { + "epoch": 3.11, + "learning_rate": 3.4229434437275224e-05, + "loss": 1.1546, + "step": 5195500 + }, + { + "epoch": 3.12, + "learning_rate": 3.4227334471714664e-05, + "loss": 1.1936, + "step": 5196000 + }, + { + "epoch": 3.12, + "learning_rate": 3.42252345061541e-05, + "loss": 1.1827, + "step": 5196500 + }, + { + "epoch": 3.12, + "learning_rate": 3.422313454059353e-05, + "loss": 1.1716, + "step": 5197000 + }, + { + "epoch": 3.12, + "learning_rate": 3.422103457503297e-05, + "loss": 1.1482, + "step": 5197500 + }, + { + "epoch": 3.12, + "learning_rate": 3.4218934609472405e-05, + "loss": 1.1445, + "step": 5198000 + }, + { + "epoch": 3.12, + "learning_rate": 3.421683464391184e-05, + "loss": 1.1744, + "step": 5198500 + }, + { + "epoch": 3.12, + "learning_rate": 3.421473467835128e-05, + "loss": 1.1511, + "step": 5199000 + }, + { + "epoch": 3.12, + "learning_rate": 3.421263471279071e-05, + "loss": 1.1544, + "step": 5199500 + }, + { + "epoch": 3.12, + "learning_rate": 3.4210534747230145e-05, + "loss": 1.1808, + "step": 5200000 + }, + { + "epoch": 3.12, + "eval_loss": 1.1460847854614258, + "eval_runtime": 1102.6522, + "eval_samples_per_second": 477.685, + "eval_steps_per_second": 79.614, + "step": 5200000 + }, + { + "epoch": 3.12, + "learning_rate": 3.420843478166958e-05, + "loss": 1.1751, + "step": 5200500 + }, + { + "epoch": 3.12, + "learning_rate": 3.420633481610901e-05, + "loss": 1.1673, + "step": 5201000 + }, + { + "epoch": 3.12, + "learning_rate": 3.420423485054845e-05, + "loss": 1.1478, + "step": 5201500 + }, + { + "epoch": 3.12, + "learning_rate": 3.420213908491901e-05, + "loss": 1.1684, + "step": 5202000 + }, + { + "epoch": 3.12, + "learning_rate": 3.420003911935844e-05, + "loss": 1.1746, + "step": 5202500 + }, + { + "epoch": 3.12, + "learning_rate": 3.419793915379787e-05, + "loss": 1.1453, + "step": 5203000 + }, + { + "epoch": 3.12, + "learning_rate": 3.419583918823731e-05, + "loss": 1.1699, + "step": 5203500 + }, + { + "epoch": 3.12, + "learning_rate": 3.4193739222676747e-05, + "loss": 1.1739, + "step": 5204000 + }, + { + "epoch": 3.12, + "learning_rate": 3.41916434570473e-05, + "loss": 1.1912, + "step": 5204500 + }, + { + "epoch": 3.12, + "learning_rate": 3.4189543491486734e-05, + "loss": 1.1381, + "step": 5205000 + }, + { + "epoch": 3.12, + "learning_rate": 3.4187443525926174e-05, + "loss": 1.1931, + "step": 5205500 + }, + { + "epoch": 3.12, + "learning_rate": 3.418534356036561e-05, + "loss": 1.1767, + "step": 5206000 + }, + { + "epoch": 3.12, + "learning_rate": 3.418324359480504e-05, + "loss": 1.169, + "step": 5206500 + }, + { + "epoch": 3.12, + "learning_rate": 3.418114362924448e-05, + "loss": 1.1641, + "step": 5207000 + }, + { + "epoch": 3.12, + "learning_rate": 3.4179043663683914e-05, + "loss": 1.1464, + "step": 5207500 + }, + { + "epoch": 3.12, + "learning_rate": 3.417694369812335e-05, + "loss": 1.1557, + "step": 5208000 + }, + { + "epoch": 3.12, + "learning_rate": 3.417484373256279e-05, + "loss": 1.1831, + "step": 5208500 + }, + { + "epoch": 3.12, + "learning_rate": 3.417274796693334e-05, + "loss": 1.1928, + "step": 5209000 + }, + { + "epoch": 3.12, + "learning_rate": 3.4170652201303895e-05, + "loss": 1.1676, + "step": 5209500 + }, + { + "epoch": 3.12, + "learning_rate": 3.416855223574333e-05, + "loss": 1.1631, + "step": 5210000 + }, + { + "epoch": 3.12, + "learning_rate": 3.416645227018277e-05, + "loss": 1.1481, + "step": 5210500 + }, + { + "epoch": 3.12, + "learning_rate": 3.41643523046222e-05, + "loss": 1.1631, + "step": 5211000 + }, + { + "epoch": 3.12, + "learning_rate": 3.4162252339061636e-05, + "loss": 1.1601, + "step": 5211500 + }, + { + "epoch": 3.12, + "learning_rate": 3.4160152373501076e-05, + "loss": 1.1651, + "step": 5212000 + }, + { + "epoch": 3.13, + "learning_rate": 3.415805240794051e-05, + "loss": 1.1779, + "step": 5212500 + }, + { + "epoch": 3.13, + "learning_rate": 3.415595664231106e-05, + "loss": 1.1369, + "step": 5213000 + }, + { + "epoch": 3.13, + "learning_rate": 3.41538566767505e-05, + "loss": 1.1697, + "step": 5213500 + }, + { + "epoch": 3.13, + "learning_rate": 3.415175671118994e-05, + "loss": 1.1403, + "step": 5214000 + }, + { + "epoch": 3.13, + "learning_rate": 3.414966094556049e-05, + "loss": 1.1545, + "step": 5214500 + }, + { + "epoch": 3.13, + "learning_rate": 3.4147560979999924e-05, + "loss": 1.1466, + "step": 5215000 + }, + { + "epoch": 3.13, + "learning_rate": 3.4145461014439364e-05, + "loss": 1.1723, + "step": 5215500 + }, + { + "epoch": 3.13, + "learning_rate": 3.41433610488788e-05, + "loss": 1.1806, + "step": 5216000 + }, + { + "epoch": 3.13, + "learning_rate": 3.414126108331823e-05, + "loss": 1.1873, + "step": 5216500 + }, + { + "epoch": 3.13, + "learning_rate": 3.413916111775767e-05, + "loss": 1.1938, + "step": 5217000 + }, + { + "epoch": 3.13, + "learning_rate": 3.4137065352128225e-05, + "loss": 1.2079, + "step": 5217500 + }, + { + "epoch": 3.13, + "learning_rate": 3.413496538656766e-05, + "loss": 1.1327, + "step": 5218000 + }, + { + "epoch": 3.13, + "learning_rate": 3.413286542100709e-05, + "loss": 1.2024, + "step": 5218500 + }, + { + "epoch": 3.13, + "learning_rate": 3.413076545544653e-05, + "loss": 1.1843, + "step": 5219000 + }, + { + "epoch": 3.13, + "learning_rate": 3.4128665489885965e-05, + "loss": 1.146, + "step": 5219500 + }, + { + "epoch": 3.13, + "learning_rate": 3.41265655243254e-05, + "loss": 1.1853, + "step": 5220000 + }, + { + "epoch": 3.13, + "learning_rate": 3.412446555876484e-05, + "loss": 1.1709, + "step": 5220500 + }, + { + "epoch": 3.13, + "learning_rate": 3.412236559320427e-05, + "loss": 1.1425, + "step": 5221000 + }, + { + "epoch": 3.13, + "learning_rate": 3.4120265627643706e-05, + "loss": 1.1484, + "step": 5221500 + }, + { + "epoch": 3.13, + "learning_rate": 3.411816986201426e-05, + "loss": 1.1402, + "step": 5222000 + }, + { + "epoch": 3.13, + "learning_rate": 3.41160698964537e-05, + "loss": 1.1471, + "step": 5222500 + }, + { + "epoch": 3.13, + "learning_rate": 3.411396993089313e-05, + "loss": 1.1571, + "step": 5223000 + }, + { + "epoch": 3.13, + "learning_rate": 3.4111869965332574e-05, + "loss": 1.1678, + "step": 5223500 + }, + { + "epoch": 3.13, + "learning_rate": 3.410976999977201e-05, + "loss": 1.16, + "step": 5224000 + }, + { + "epoch": 3.13, + "learning_rate": 3.4107670034211434e-05, + "loss": 1.1952, + "step": 5224500 + }, + { + "epoch": 3.13, + "learning_rate": 3.4105570068650874e-05, + "loss": 1.1712, + "step": 5225000 + }, + { + "epoch": 3.13, + "learning_rate": 3.410347010309031e-05, + "loss": 1.1833, + "step": 5225500 + }, + { + "epoch": 3.13, + "learning_rate": 3.410137433746087e-05, + "loss": 1.1686, + "step": 5226000 + }, + { + "epoch": 3.13, + "learning_rate": 3.40992743719003e-05, + "loss": 1.1937, + "step": 5226500 + }, + { + "epoch": 3.13, + "learning_rate": 3.4097178606270855e-05, + "loss": 1.183, + "step": 5227000 + }, + { + "epoch": 3.13, + "learning_rate": 3.4095078640710295e-05, + "loss": 1.181, + "step": 5227500 + }, + { + "epoch": 3.13, + "learning_rate": 3.409297867514973e-05, + "loss": 1.1785, + "step": 5228000 + }, + { + "epoch": 3.13, + "learning_rate": 3.409087870958916e-05, + "loss": 1.148, + "step": 5228500 + }, + { + "epoch": 3.13, + "learning_rate": 3.40887787440286e-05, + "loss": 1.2019, + "step": 5229000 + }, + { + "epoch": 3.14, + "learning_rate": 3.408667877846803e-05, + "loss": 1.1529, + "step": 5229500 + }, + { + "epoch": 3.14, + "learning_rate": 3.408457881290747e-05, + "loss": 1.1653, + "step": 5230000 + }, + { + "epoch": 3.14, + "learning_rate": 3.40824788473469e-05, + "loss": 1.1553, + "step": 5230500 + }, + { + "epoch": 3.14, + "learning_rate": 3.408038308171746e-05, + "loss": 1.1844, + "step": 5231000 + }, + { + "epoch": 3.14, + "learning_rate": 3.407828311615689e-05, + "loss": 1.1875, + "step": 5231500 + }, + { + "epoch": 3.14, + "learning_rate": 3.407618735052745e-05, + "loss": 1.1693, + "step": 5232000 + }, + { + "epoch": 3.14, + "learning_rate": 3.407408738496689e-05, + "loss": 1.1507, + "step": 5232500 + }, + { + "epoch": 3.14, + "learning_rate": 3.4071987419406324e-05, + "loss": 1.1529, + "step": 5233000 + }, + { + "epoch": 3.14, + "learning_rate": 3.406988745384576e-05, + "loss": 1.1726, + "step": 5233500 + }, + { + "epoch": 3.14, + "learning_rate": 3.406778748828519e-05, + "loss": 1.1601, + "step": 5234000 + }, + { + "epoch": 3.14, + "learning_rate": 3.4065687522724624e-05, + "loss": 1.1722, + "step": 5234500 + }, + { + "epoch": 3.14, + "learning_rate": 3.406358755716406e-05, + "loss": 1.14, + "step": 5235000 + }, + { + "epoch": 3.14, + "learning_rate": 3.40614875916035e-05, + "loss": 1.1924, + "step": 5235500 + }, + { + "epoch": 3.14, + "learning_rate": 3.405939182597406e-05, + "loss": 1.1706, + "step": 5236000 + }, + { + "epoch": 3.14, + "learning_rate": 3.4057291860413485e-05, + "loss": 1.1802, + "step": 5236500 + }, + { + "epoch": 3.14, + "learning_rate": 3.4055191894852925e-05, + "loss": 1.1847, + "step": 5237000 + }, + { + "epoch": 3.14, + "learning_rate": 3.405309192929236e-05, + "loss": 1.1825, + "step": 5237500 + }, + { + "epoch": 3.14, + "learning_rate": 3.405099616366292e-05, + "loss": 1.1495, + "step": 5238000 + }, + { + "epoch": 3.14, + "learning_rate": 3.404890039803347e-05, + "loss": 1.1652, + "step": 5238500 + }, + { + "epoch": 3.14, + "learning_rate": 3.4046800432472906e-05, + "loss": 1.1546, + "step": 5239000 + }, + { + "epoch": 3.14, + "learning_rate": 3.4044700466912346e-05, + "loss": 1.1809, + "step": 5239500 + }, + { + "epoch": 3.14, + "learning_rate": 3.404260050135178e-05, + "loss": 1.1423, + "step": 5240000 + }, + { + "epoch": 3.14, + "learning_rate": 3.404050053579121e-05, + "loss": 1.1882, + "step": 5240500 + }, + { + "epoch": 3.14, + "learning_rate": 3.4038404770161767e-05, + "loss": 1.1722, + "step": 5241000 + }, + { + "epoch": 3.14, + "learning_rate": 3.403630480460121e-05, + "loss": 1.1693, + "step": 5241500 + }, + { + "epoch": 3.14, + "learning_rate": 3.403420483904064e-05, + "loss": 1.1449, + "step": 5242000 + }, + { + "epoch": 3.14, + "learning_rate": 3.4032104873480074e-05, + "loss": 1.2002, + "step": 5242500 + }, + { + "epoch": 3.14, + "learning_rate": 3.4030009107850634e-05, + "loss": 1.1715, + "step": 5243000 + }, + { + "epoch": 3.14, + "learning_rate": 3.402790914229007e-05, + "loss": 1.1691, + "step": 5243500 + }, + { + "epoch": 3.14, + "learning_rate": 3.40258091767295e-05, + "loss": 1.1815, + "step": 5244000 + }, + { + "epoch": 3.14, + "learning_rate": 3.402370921116894e-05, + "loss": 1.1443, + "step": 5244500 + }, + { + "epoch": 3.14, + "learning_rate": 3.4021609245608375e-05, + "loss": 1.1581, + "step": 5245000 + }, + { + "epoch": 3.14, + "learning_rate": 3.401950928004781e-05, + "loss": 1.1545, + "step": 5245500 + }, + { + "epoch": 3.15, + "learning_rate": 3.401740931448724e-05, + "loss": 1.1413, + "step": 5246000 + }, + { + "epoch": 3.15, + "learning_rate": 3.40153135488578e-05, + "loss": 1.1474, + "step": 5246500 + }, + { + "epoch": 3.15, + "learning_rate": 3.4013213583297235e-05, + "loss": 1.1267, + "step": 5247000 + }, + { + "epoch": 3.15, + "learning_rate": 3.401111361773667e-05, + "loss": 1.1732, + "step": 5247500 + }, + { + "epoch": 3.15, + "learning_rate": 3.400901365217611e-05, + "loss": 1.1621, + "step": 5248000 + }, + { + "epoch": 3.15, + "learning_rate": 3.4006913686615536e-05, + "loss": 1.1766, + "step": 5248500 + }, + { + "epoch": 3.15, + "learning_rate": 3.400481372105497e-05, + "loss": 1.1352, + "step": 5249000 + }, + { + "epoch": 3.15, + "learning_rate": 3.400271375549441e-05, + "loss": 1.1602, + "step": 5249500 + }, + { + "epoch": 3.15, + "learning_rate": 3.400061378993384e-05, + "loss": 1.1443, + "step": 5250000 + }, + { + "epoch": 3.15, + "learning_rate": 3.39985180243044e-05, + "loss": 1.198, + "step": 5250500 + }, + { + "epoch": 3.15, + "learning_rate": 3.399641805874384e-05, + "loss": 1.1858, + "step": 5251000 + }, + { + "epoch": 3.15, + "learning_rate": 3.399431809318327e-05, + "loss": 1.1506, + "step": 5251500 + }, + { + "epoch": 3.15, + "learning_rate": 3.399222232755383e-05, + "loss": 1.1801, + "step": 5252000 + }, + { + "epoch": 3.15, + "learning_rate": 3.3990122361993264e-05, + "loss": 1.1862, + "step": 5252500 + }, + { + "epoch": 3.15, + "learning_rate": 3.39880223964327e-05, + "loss": 1.1771, + "step": 5253000 + }, + { + "epoch": 3.15, + "learning_rate": 3.398592243087213e-05, + "loss": 1.181, + "step": 5253500 + }, + { + "epoch": 3.15, + "learning_rate": 3.3983822465311564e-05, + "loss": 1.1685, + "step": 5254000 + }, + { + "epoch": 3.15, + "learning_rate": 3.3981726699682125e-05, + "loss": 1.1605, + "step": 5254500 + }, + { + "epoch": 3.15, + "learning_rate": 3.3979626734121565e-05, + "loss": 1.166, + "step": 5255000 + }, + { + "epoch": 3.15, + "learning_rate": 3.397752676856099e-05, + "loss": 1.1554, + "step": 5255500 + }, + { + "epoch": 3.15, + "learning_rate": 3.3975426803000425e-05, + "loss": 1.1752, + "step": 5256000 + }, + { + "epoch": 3.15, + "learning_rate": 3.3973331037370985e-05, + "loss": 1.171, + "step": 5256500 + }, + { + "epoch": 3.15, + "learning_rate": 3.3971231071810426e-05, + "loss": 1.1568, + "step": 5257000 + }, + { + "epoch": 3.15, + "learning_rate": 3.396913110624986e-05, + "loss": 1.1652, + "step": 5257500 + }, + { + "epoch": 3.15, + "learning_rate": 3.396703114068929e-05, + "loss": 1.1686, + "step": 5258000 + }, + { + "epoch": 3.15, + "learning_rate": 3.396493537505985e-05, + "loss": 1.1715, + "step": 5258500 + }, + { + "epoch": 3.15, + "learning_rate": 3.3962835409499286e-05, + "loss": 1.1795, + "step": 5259000 + }, + { + "epoch": 3.15, + "learning_rate": 3.396073544393872e-05, + "loss": 1.178, + "step": 5259500 + }, + { + "epoch": 3.15, + "learning_rate": 3.395863547837816e-05, + "loss": 1.148, + "step": 5260000 + }, + { + "epoch": 3.15, + "learning_rate": 3.395653551281759e-05, + "loss": 1.1626, + "step": 5260500 + }, + { + "epoch": 3.15, + "learning_rate": 3.395443554725702e-05, + "loss": 1.176, + "step": 5261000 + }, + { + "epoch": 3.15, + "learning_rate": 3.395233558169646e-05, + "loss": 1.172, + "step": 5261500 + }, + { + "epoch": 3.15, + "learning_rate": 3.3950235616135894e-05, + "loss": 1.1641, + "step": 5262000 + }, + { + "epoch": 3.16, + "learning_rate": 3.394813985050645e-05, + "loss": 1.1709, + "step": 5262500 + }, + { + "epoch": 3.16, + "learning_rate": 3.394603988494588e-05, + "loss": 1.1976, + "step": 5263000 + }, + { + "epoch": 3.16, + "learning_rate": 3.394393991938532e-05, + "loss": 1.179, + "step": 5263500 + }, + { + "epoch": 3.16, + "learning_rate": 3.3941839953824755e-05, + "loss": 1.2022, + "step": 5264000 + }, + { + "epoch": 3.16, + "learning_rate": 3.393973998826419e-05, + "loss": 1.1834, + "step": 5264500 + }, + { + "epoch": 3.16, + "learning_rate": 3.393764002270363e-05, + "loss": 1.1586, + "step": 5265000 + }, + { + "epoch": 3.16, + "learning_rate": 3.393554425707418e-05, + "loss": 1.1665, + "step": 5265500 + }, + { + "epoch": 3.16, + "learning_rate": 3.3933444291513615e-05, + "loss": 1.1882, + "step": 5266000 + }, + { + "epoch": 3.16, + "learning_rate": 3.3931344325953056e-05, + "loss": 1.155, + "step": 5266500 + }, + { + "epoch": 3.16, + "learning_rate": 3.392924436039249e-05, + "loss": 1.1745, + "step": 5267000 + }, + { + "epoch": 3.16, + "learning_rate": 3.392714439483192e-05, + "loss": 1.1598, + "step": 5267500 + }, + { + "epoch": 3.16, + "learning_rate": 3.392504442927136e-05, + "loss": 1.1596, + "step": 5268000 + }, + { + "epoch": 3.16, + "learning_rate": 3.3922944463710796e-05, + "loss": 1.174, + "step": 5268500 + }, + { + "epoch": 3.16, + "learning_rate": 3.392084449815023e-05, + "loss": 1.1552, + "step": 5269000 + }, + { + "epoch": 3.16, + "learning_rate": 3.391874873252078e-05, + "loss": 1.1933, + "step": 5269500 + }, + { + "epoch": 3.16, + "learning_rate": 3.3916648766960224e-05, + "loss": 1.1477, + "step": 5270000 + }, + { + "epoch": 3.16, + "learning_rate": 3.39145572012619e-05, + "loss": 1.1744, + "step": 5270500 + }, + { + "epoch": 3.16, + "learning_rate": 3.391245723570134e-05, + "loss": 1.1997, + "step": 5271000 + }, + { + "epoch": 3.16, + "learning_rate": 3.391035727014077e-05, + "loss": 1.154, + "step": 5271500 + }, + { + "epoch": 3.16, + "learning_rate": 3.3908257304580204e-05, + "loss": 1.1838, + "step": 5272000 + }, + { + "epoch": 3.16, + "learning_rate": 3.390615733901964e-05, + "loss": 1.1571, + "step": 5272500 + }, + { + "epoch": 3.16, + "learning_rate": 3.390405737345907e-05, + "loss": 1.1455, + "step": 5273000 + }, + { + "epoch": 3.16, + "learning_rate": 3.390195740789851e-05, + "loss": 1.1389, + "step": 5273500 + }, + { + "epoch": 3.16, + "learning_rate": 3.3899857442337945e-05, + "loss": 1.1551, + "step": 5274000 + }, + { + "epoch": 3.16, + "learning_rate": 3.389775747677738e-05, + "loss": 1.1707, + "step": 5274500 + }, + { + "epoch": 3.16, + "learning_rate": 3.389565751121682e-05, + "loss": 1.1959, + "step": 5275000 + }, + { + "epoch": 3.16, + "learning_rate": 3.389355754565625e-05, + "loss": 1.19, + "step": 5275500 + }, + { + "epoch": 3.16, + "learning_rate": 3.3891457580095686e-05, + "loss": 1.1814, + "step": 5276000 + }, + { + "epoch": 3.16, + "learning_rate": 3.388936181446624e-05, + "loss": 1.1413, + "step": 5276500 + }, + { + "epoch": 3.16, + "learning_rate": 3.388726604883679e-05, + "loss": 1.171, + "step": 5277000 + }, + { + "epoch": 3.16, + "learning_rate": 3.388516608327623e-05, + "loss": 1.1502, + "step": 5277500 + }, + { + "epoch": 3.16, + "learning_rate": 3.3883066117715666e-05, + "loss": 1.1542, + "step": 5278000 + }, + { + "epoch": 3.16, + "learning_rate": 3.38809661521551e-05, + "loss": 1.1897, + "step": 5278500 + }, + { + "epoch": 3.16, + "learning_rate": 3.387886618659454e-05, + "loss": 1.1769, + "step": 5279000 + }, + { + "epoch": 3.17, + "learning_rate": 3.3876770420965094e-05, + "loss": 1.1686, + "step": 5279500 + }, + { + "epoch": 3.17, + "learning_rate": 3.387467045540453e-05, + "loss": 1.1454, + "step": 5280000 + }, + { + "epoch": 3.17, + "learning_rate": 3.387257048984397e-05, + "loss": 1.1846, + "step": 5280500 + }, + { + "epoch": 3.17, + "learning_rate": 3.38704705242834e-05, + "loss": 1.1737, + "step": 5281000 + }, + { + "epoch": 3.17, + "learning_rate": 3.3868370558722834e-05, + "loss": 1.1612, + "step": 5281500 + }, + { + "epoch": 3.17, + "learning_rate": 3.3866270593162275e-05, + "loss": 1.1786, + "step": 5282000 + }, + { + "epoch": 3.17, + "learning_rate": 3.386417062760171e-05, + "loss": 1.1634, + "step": 5282500 + }, + { + "epoch": 3.17, + "learning_rate": 3.386207066204114e-05, + "loss": 1.1766, + "step": 5283000 + }, + { + "epoch": 3.17, + "learning_rate": 3.385997069648058e-05, + "loss": 1.1729, + "step": 5283500 + }, + { + "epoch": 3.17, + "learning_rate": 3.3857870730920015e-05, + "loss": 1.1702, + "step": 5284000 + }, + { + "epoch": 3.17, + "learning_rate": 3.385577496529057e-05, + "loss": 1.158, + "step": 5284500 + }, + { + "epoch": 3.17, + "learning_rate": 3.385367499973e-05, + "loss": 1.1882, + "step": 5285000 + }, + { + "epoch": 3.17, + "learning_rate": 3.385157503416944e-05, + "loss": 1.1644, + "step": 5285500 + }, + { + "epoch": 3.17, + "learning_rate": 3.3849475068608876e-05, + "loss": 1.1724, + "step": 5286000 + }, + { + "epoch": 3.17, + "learning_rate": 3.384737510304831e-05, + "loss": 1.1693, + "step": 5286500 + }, + { + "epoch": 3.17, + "learning_rate": 3.384527513748775e-05, + "loss": 1.1537, + "step": 5287000 + }, + { + "epoch": 3.17, + "learning_rate": 3.38431793718583e-05, + "loss": 1.1891, + "step": 5287500 + }, + { + "epoch": 3.17, + "learning_rate": 3.384107940629774e-05, + "loss": 1.1749, + "step": 5288000 + }, + { + "epoch": 3.17, + "learning_rate": 3.383897944073718e-05, + "loss": 1.1731, + "step": 5288500 + }, + { + "epoch": 3.17, + "learning_rate": 3.383687947517661e-05, + "loss": 1.152, + "step": 5289000 + }, + { + "epoch": 3.17, + "learning_rate": 3.383477950961604e-05, + "loss": 1.1807, + "step": 5289500 + }, + { + "epoch": 3.17, + "learning_rate": 3.383267954405548e-05, + "loss": 1.1774, + "step": 5290000 + }, + { + "epoch": 3.17, + "learning_rate": 3.383058377842604e-05, + "loss": 1.1496, + "step": 5290500 + }, + { + "epoch": 3.17, + "learning_rate": 3.382848381286547e-05, + "loss": 1.1536, + "step": 5291000 + }, + { + "epoch": 3.17, + "learning_rate": 3.3826383847304905e-05, + "loss": 1.2075, + "step": 5291500 + }, + { + "epoch": 3.17, + "learning_rate": 3.382428388174434e-05, + "loss": 1.1642, + "step": 5292000 + }, + { + "epoch": 3.17, + "learning_rate": 3.382218391618377e-05, + "loss": 1.1535, + "step": 5292500 + }, + { + "epoch": 3.17, + "learning_rate": 3.382008815055433e-05, + "loss": 1.1653, + "step": 5293000 + }, + { + "epoch": 3.17, + "learning_rate": 3.3817988184993765e-05, + "loss": 1.1669, + "step": 5293500 + }, + { + "epoch": 3.17, + "learning_rate": 3.3815888219433205e-05, + "loss": 1.1386, + "step": 5294000 + }, + { + "epoch": 3.17, + "learning_rate": 3.381378825387263e-05, + "loss": 1.1548, + "step": 5294500 + }, + { + "epoch": 3.17, + "learning_rate": 3.3811688288312066e-05, + "loss": 1.1754, + "step": 5295000 + }, + { + "epoch": 3.17, + "learning_rate": 3.3809588322751506e-05, + "loss": 1.1608, + "step": 5295500 + }, + { + "epoch": 3.18, + "learning_rate": 3.380748835719094e-05, + "loss": 1.1903, + "step": 5296000 + }, + { + "epoch": 3.18, + "learning_rate": 3.380538839163038e-05, + "loss": 1.1513, + "step": 5296500 + }, + { + "epoch": 3.18, + "learning_rate": 3.380329682593205e-05, + "loss": 1.1523, + "step": 5297000 + }, + { + "epoch": 3.18, + "learning_rate": 3.3801196860371493e-05, + "loss": 1.1539, + "step": 5297500 + }, + { + "epoch": 3.18, + "learning_rate": 3.379909689481093e-05, + "loss": 1.1538, + "step": 5298000 + }, + { + "epoch": 3.18, + "learning_rate": 3.379699692925036e-05, + "loss": 1.15, + "step": 5298500 + }, + { + "epoch": 3.18, + "learning_rate": 3.3794896963689794e-05, + "loss": 1.1573, + "step": 5299000 + }, + { + "epoch": 3.18, + "learning_rate": 3.3792801198060354e-05, + "loss": 1.1431, + "step": 5299500 + }, + { + "epoch": 3.18, + "learning_rate": 3.379070123249979e-05, + "loss": 1.1464, + "step": 5300000 + }, + { + "epoch": 3.18, + "eval_loss": 1.144875407218933, + "eval_runtime": 1110.2882, + "eval_samples_per_second": 474.399, + "eval_steps_per_second": 79.067, + "step": 5300000 + }, + { + "epoch": 3.18, + "learning_rate": 3.378860546687034e-05, + "loss": 1.1742, + "step": 5300500 + }, + { + "epoch": 3.18, + "learning_rate": 3.3786505501309775e-05, + "loss": 1.2002, + "step": 5301000 + }, + { + "epoch": 3.18, + "learning_rate": 3.3784405535749215e-05, + "loss": 1.1369, + "step": 5301500 + }, + { + "epoch": 3.18, + "learning_rate": 3.378230557018865e-05, + "loss": 1.1692, + "step": 5302000 + }, + { + "epoch": 3.18, + "learning_rate": 3.378020560462809e-05, + "loss": 1.1997, + "step": 5302500 + }, + { + "epoch": 3.18, + "learning_rate": 3.377810563906752e-05, + "loss": 1.1565, + "step": 5303000 + }, + { + "epoch": 3.18, + "learning_rate": 3.3776005673506956e-05, + "loss": 1.1792, + "step": 5303500 + }, + { + "epoch": 3.18, + "learning_rate": 3.377390570794639e-05, + "loss": 1.1762, + "step": 5304000 + }, + { + "epoch": 3.18, + "learning_rate": 3.377180574238582e-05, + "loss": 1.179, + "step": 5304500 + }, + { + "epoch": 3.18, + "learning_rate": 3.376970997675638e-05, + "loss": 1.1768, + "step": 5305000 + }, + { + "epoch": 3.18, + "learning_rate": 3.3767610011195816e-05, + "loss": 1.1532, + "step": 5305500 + }, + { + "epoch": 3.18, + "learning_rate": 3.3765510045635257e-05, + "loss": 1.1553, + "step": 5306000 + }, + { + "epoch": 3.18, + "learning_rate": 3.376341008007468e-05, + "loss": 1.1361, + "step": 5306500 + }, + { + "epoch": 3.18, + "learning_rate": 3.376131011451412e-05, + "loss": 1.1716, + "step": 5307000 + }, + { + "epoch": 3.18, + "learning_rate": 3.375921434888468e-05, + "loss": 1.147, + "step": 5307500 + }, + { + "epoch": 3.18, + "learning_rate": 3.375711438332412e-05, + "loss": 1.174, + "step": 5308000 + }, + { + "epoch": 3.18, + "learning_rate": 3.3755014417763544e-05, + "loss": 1.1645, + "step": 5308500 + }, + { + "epoch": 3.18, + "learning_rate": 3.3752914452202984e-05, + "loss": 1.1649, + "step": 5309000 + }, + { + "epoch": 3.18, + "learning_rate": 3.375081448664242e-05, + "loss": 1.1613, + "step": 5309500 + }, + { + "epoch": 3.18, + "learning_rate": 3.374871452108185e-05, + "loss": 1.1791, + "step": 5310000 + }, + { + "epoch": 3.18, + "learning_rate": 3.374661455552129e-05, + "loss": 1.1696, + "step": 5310500 + }, + { + "epoch": 3.18, + "learning_rate": 3.3744518789891845e-05, + "loss": 1.167, + "step": 5311000 + }, + { + "epoch": 3.18, + "learning_rate": 3.374241882433128e-05, + "loss": 1.1543, + "step": 5311500 + }, + { + "epoch": 3.18, + "learning_rate": 3.374031885877071e-05, + "loss": 1.1633, + "step": 5312000 + }, + { + "epoch": 3.19, + "learning_rate": 3.373821889321015e-05, + "loss": 1.1657, + "step": 5312500 + }, + { + "epoch": 3.19, + "learning_rate": 3.3736118927649586e-05, + "loss": 1.1861, + "step": 5313000 + }, + { + "epoch": 3.19, + "learning_rate": 3.373402316202014e-05, + "loss": 1.2088, + "step": 5313500 + }, + { + "epoch": 3.19, + "learning_rate": 3.373192319645957e-05, + "loss": 1.1784, + "step": 5314000 + }, + { + "epoch": 3.19, + "learning_rate": 3.372982323089901e-05, + "loss": 1.1717, + "step": 5314500 + }, + { + "epoch": 3.19, + "learning_rate": 3.3727723265338446e-05, + "loss": 1.1704, + "step": 5315000 + }, + { + "epoch": 3.19, + "learning_rate": 3.372562329977788e-05, + "loss": 1.1454, + "step": 5315500 + }, + { + "epoch": 3.19, + "learning_rate": 3.372352333421732e-05, + "loss": 1.1564, + "step": 5316000 + }, + { + "epoch": 3.19, + "learning_rate": 3.3721423368656753e-05, + "loss": 1.1685, + "step": 5316500 + }, + { + "epoch": 3.19, + "learning_rate": 3.371932760302731e-05, + "loss": 1.1875, + "step": 5317000 + }, + { + "epoch": 3.19, + "learning_rate": 3.371722763746675e-05, + "loss": 1.1784, + "step": 5317500 + }, + { + "epoch": 3.19, + "learning_rate": 3.37151318718373e-05, + "loss": 1.1826, + "step": 5318000 + }, + { + "epoch": 3.19, + "learning_rate": 3.3713031906276734e-05, + "loss": 1.1569, + "step": 5318500 + }, + { + "epoch": 3.19, + "learning_rate": 3.371093194071617e-05, + "loss": 1.1666, + "step": 5319000 + }, + { + "epoch": 3.19, + "learning_rate": 3.370883197515561e-05, + "loss": 1.1509, + "step": 5319500 + }, + { + "epoch": 3.19, + "learning_rate": 3.370673200959504e-05, + "loss": 1.1687, + "step": 5320000 + }, + { + "epoch": 3.19, + "learning_rate": 3.3704632044034475e-05, + "loss": 1.1747, + "step": 5320500 + }, + { + "epoch": 3.19, + "learning_rate": 3.3702532078473915e-05, + "loss": 1.1714, + "step": 5321000 + }, + { + "epoch": 3.19, + "learning_rate": 3.370043211291335e-05, + "loss": 1.1413, + "step": 5321500 + }, + { + "epoch": 3.19, + "learning_rate": 3.369833214735278e-05, + "loss": 1.1586, + "step": 5322000 + }, + { + "epoch": 3.19, + "learning_rate": 3.3696236381723336e-05, + "loss": 1.168, + "step": 5322500 + }, + { + "epoch": 3.19, + "learning_rate": 3.3694136416162776e-05, + "loss": 1.1707, + "step": 5323000 + }, + { + "epoch": 3.19, + "learning_rate": 3.369203645060221e-05, + "loss": 1.2057, + "step": 5323500 + }, + { + "epoch": 3.19, + "learning_rate": 3.368993648504164e-05, + "loss": 1.1536, + "step": 5324000 + }, + { + "epoch": 3.19, + "learning_rate": 3.368783651948108e-05, + "loss": 1.1697, + "step": 5324500 + }, + { + "epoch": 3.19, + "learning_rate": 3.3685736553920516e-05, + "loss": 1.1584, + "step": 5325000 + }, + { + "epoch": 3.19, + "learning_rate": 3.368363658835995e-05, + "loss": 1.1843, + "step": 5325500 + }, + { + "epoch": 3.19, + "learning_rate": 3.368153662279938e-05, + "loss": 1.1776, + "step": 5326000 + }, + { + "epoch": 3.19, + "learning_rate": 3.3679440857169944e-05, + "loss": 1.1261, + "step": 5326500 + }, + { + "epoch": 3.19, + "learning_rate": 3.367734089160938e-05, + "loss": 1.198, + "step": 5327000 + }, + { + "epoch": 3.19, + "learning_rate": 3.367524512597993e-05, + "loss": 1.1932, + "step": 5327500 + }, + { + "epoch": 3.19, + "learning_rate": 3.367314516041937e-05, + "loss": 1.167, + "step": 5328000 + }, + { + "epoch": 3.19, + "learning_rate": 3.3671049394789925e-05, + "loss": 1.1424, + "step": 5328500 + }, + { + "epoch": 3.19, + "learning_rate": 3.366894942922936e-05, + "loss": 1.1492, + "step": 5329000 + }, + { + "epoch": 3.2, + "learning_rate": 3.366684946366879e-05, + "loss": 1.1446, + "step": 5329500 + }, + { + "epoch": 3.2, + "learning_rate": 3.366474949810823e-05, + "loss": 1.1653, + "step": 5330000 + }, + { + "epoch": 3.2, + "learning_rate": 3.3662649532547665e-05, + "loss": 1.1483, + "step": 5330500 + }, + { + "epoch": 3.2, + "learning_rate": 3.36605495669871e-05, + "loss": 1.163, + "step": 5331000 + }, + { + "epoch": 3.2, + "learning_rate": 3.365844960142654e-05, + "loss": 1.1611, + "step": 5331500 + }, + { + "epoch": 3.2, + "learning_rate": 3.365634963586597e-05, + "loss": 1.1659, + "step": 5332000 + }, + { + "epoch": 3.2, + "learning_rate": 3.3654249670305406e-05, + "loss": 1.1664, + "step": 5332500 + }, + { + "epoch": 3.2, + "learning_rate": 3.3652149704744846e-05, + "loss": 1.2027, + "step": 5333000 + }, + { + "epoch": 3.2, + "learning_rate": 3.365004973918427e-05, + "loss": 1.1881, + "step": 5333500 + }, + { + "epoch": 3.2, + "learning_rate": 3.364794977362371e-05, + "loss": 1.1847, + "step": 5334000 + }, + { + "epoch": 3.2, + "learning_rate": 3.364585400799427e-05, + "loss": 1.1891, + "step": 5334500 + }, + { + "epoch": 3.2, + "learning_rate": 3.364375824236483e-05, + "loss": 1.1615, + "step": 5335000 + }, + { + "epoch": 3.2, + "learning_rate": 3.364165827680426e-05, + "loss": 1.1784, + "step": 5335500 + }, + { + "epoch": 3.2, + "learning_rate": 3.3639558311243694e-05, + "loss": 1.1849, + "step": 5336000 + }, + { + "epoch": 3.2, + "learning_rate": 3.3637458345683134e-05, + "loss": 1.1648, + "step": 5336500 + }, + { + "epoch": 3.2, + "learning_rate": 3.363535838012257e-05, + "loss": 1.1437, + "step": 5337000 + }, + { + "epoch": 3.2, + "learning_rate": 3.3633258414562e-05, + "loss": 1.1634, + "step": 5337500 + }, + { + "epoch": 3.2, + "learning_rate": 3.3631162648932554e-05, + "loss": 1.1636, + "step": 5338000 + }, + { + "epoch": 3.2, + "learning_rate": 3.3629062683371995e-05, + "loss": 1.1512, + "step": 5338500 + }, + { + "epoch": 3.2, + "learning_rate": 3.362696271781143e-05, + "loss": 1.1552, + "step": 5339000 + }, + { + "epoch": 3.2, + "learning_rate": 3.362486275225086e-05, + "loss": 1.1736, + "step": 5339500 + }, + { + "epoch": 3.2, + "learning_rate": 3.362276698662142e-05, + "loss": 1.1628, + "step": 5340000 + }, + { + "epoch": 3.2, + "learning_rate": 3.3620667021060855e-05, + "loss": 1.2001, + "step": 5340500 + }, + { + "epoch": 3.2, + "learning_rate": 3.361856705550029e-05, + "loss": 1.1654, + "step": 5341000 + }, + { + "epoch": 3.2, + "learning_rate": 3.361646708993973e-05, + "loss": 1.1589, + "step": 5341500 + }, + { + "epoch": 3.2, + "learning_rate": 3.361436712437916e-05, + "loss": 1.1443, + "step": 5342000 + }, + { + "epoch": 3.2, + "learning_rate": 3.3612267158818596e-05, + "loss": 1.1866, + "step": 5342500 + }, + { + "epoch": 3.2, + "learning_rate": 3.361016719325803e-05, + "loss": 1.1784, + "step": 5343000 + }, + { + "epoch": 3.2, + "learning_rate": 3.360806722769746e-05, + "loss": 1.1663, + "step": 5343500 + }, + { + "epoch": 3.2, + "learning_rate": 3.360597146206802e-05, + "loss": 1.1434, + "step": 5344000 + }, + { + "epoch": 3.2, + "learning_rate": 3.360387149650746e-05, + "loss": 1.1535, + "step": 5344500 + }, + { + "epoch": 3.2, + "learning_rate": 3.360177153094689e-05, + "loss": 1.1903, + "step": 5345000 + }, + { + "epoch": 3.2, + "learning_rate": 3.359967576531745e-05, + "loss": 1.1557, + "step": 5345500 + }, + { + "epoch": 3.21, + "learning_rate": 3.3597575799756884e-05, + "loss": 1.1887, + "step": 5346000 + }, + { + "epoch": 3.21, + "learning_rate": 3.359547583419632e-05, + "loss": 1.1675, + "step": 5346500 + }, + { + "epoch": 3.21, + "learning_rate": 3.359337586863576e-05, + "loss": 1.1825, + "step": 5347000 + }, + { + "epoch": 3.21, + "learning_rate": 3.3591275903075184e-05, + "loss": 1.181, + "step": 5347500 + }, + { + "epoch": 3.21, + "learning_rate": 3.3589175937514625e-05, + "loss": 1.1513, + "step": 5348000 + }, + { + "epoch": 3.21, + "learning_rate": 3.358707597195406e-05, + "loss": 1.1662, + "step": 5348500 + }, + { + "epoch": 3.21, + "learning_rate": 3.358497600639349e-05, + "loss": 1.1656, + "step": 5349000 + }, + { + "epoch": 3.21, + "learning_rate": 3.358288024076405e-05, + "loss": 1.1638, + "step": 5349500 + }, + { + "epoch": 3.21, + "learning_rate": 3.3580780275203485e-05, + "loss": 1.1872, + "step": 5350000 + }, + { + "epoch": 3.21, + "learning_rate": 3.357868030964292e-05, + "loss": 1.1764, + "step": 5350500 + }, + { + "epoch": 3.21, + "learning_rate": 3.357658034408235e-05, + "loss": 1.1776, + "step": 5351000 + }, + { + "epoch": 3.21, + "learning_rate": 3.357448037852179e-05, + "loss": 1.146, + "step": 5351500 + }, + { + "epoch": 3.21, + "learning_rate": 3.357238461289235e-05, + "loss": 1.1664, + "step": 5352000 + }, + { + "epoch": 3.21, + "learning_rate": 3.357028464733178e-05, + "loss": 1.183, + "step": 5352500 + }, + { + "epoch": 3.21, + "learning_rate": 3.356818468177121e-05, + "loss": 1.1656, + "step": 5353000 + }, + { + "epoch": 3.21, + "learning_rate": 3.356608471621065e-05, + "loss": 1.1752, + "step": 5353500 + }, + { + "epoch": 3.21, + "learning_rate": 3.356398475065009e-05, + "loss": 1.1802, + "step": 5354000 + }, + { + "epoch": 3.21, + "learning_rate": 3.356188478508953e-05, + "loss": 1.158, + "step": 5354500 + }, + { + "epoch": 3.21, + "learning_rate": 3.355978481952896e-05, + "loss": 1.1878, + "step": 5355000 + }, + { + "epoch": 3.21, + "learning_rate": 3.3557684853968394e-05, + "loss": 1.1831, + "step": 5355500 + }, + { + "epoch": 3.21, + "learning_rate": 3.355558908833895e-05, + "loss": 1.1679, + "step": 5356000 + }, + { + "epoch": 3.21, + "learning_rate": 3.355348912277839e-05, + "loss": 1.1638, + "step": 5356500 + }, + { + "epoch": 3.21, + "learning_rate": 3.355138915721782e-05, + "loss": 1.1638, + "step": 5357000 + }, + { + "epoch": 3.21, + "learning_rate": 3.3549289191657255e-05, + "loss": 1.1934, + "step": 5357500 + }, + { + "epoch": 3.21, + "learning_rate": 3.3547189226096695e-05, + "loss": 1.1778, + "step": 5358000 + }, + { + "epoch": 3.21, + "learning_rate": 3.354508926053613e-05, + "loss": 1.1576, + "step": 5358500 + }, + { + "epoch": 3.21, + "learning_rate": 3.354298929497556e-05, + "loss": 1.1737, + "step": 5359000 + }, + { + "epoch": 3.21, + "learning_rate": 3.3540889329415e-05, + "loss": 1.1674, + "step": 5359500 + }, + { + "epoch": 3.21, + "learning_rate": 3.3538793563785556e-05, + "loss": 1.17, + "step": 5360000 + }, + { + "epoch": 3.21, + "learning_rate": 3.353669359822499e-05, + "loss": 1.1683, + "step": 5360500 + }, + { + "epoch": 3.21, + "learning_rate": 3.353459783259554e-05, + "loss": 1.1624, + "step": 5361000 + }, + { + "epoch": 3.21, + "learning_rate": 3.353249786703498e-05, + "loss": 1.1736, + "step": 5361500 + }, + { + "epoch": 3.21, + "learning_rate": 3.3530397901474416e-05, + "loss": 1.1524, + "step": 5362000 + }, + { + "epoch": 3.22, + "learning_rate": 3.352829793591385e-05, + "loss": 1.1611, + "step": 5362500 + }, + { + "epoch": 3.22, + "learning_rate": 3.352619797035329e-05, + "loss": 1.1708, + "step": 5363000 + }, + { + "epoch": 3.22, + "learning_rate": 3.3524098004792723e-05, + "loss": 1.1678, + "step": 5363500 + }, + { + "epoch": 3.22, + "learning_rate": 3.352199803923216e-05, + "loss": 1.1549, + "step": 5364000 + }, + { + "epoch": 3.22, + "learning_rate": 3.35198980736716e-05, + "loss": 1.2398, + "step": 5364500 + }, + { + "epoch": 3.22, + "learning_rate": 3.351780230804215e-05, + "loss": 1.1568, + "step": 5365000 + }, + { + "epoch": 3.22, + "learning_rate": 3.3515702342481584e-05, + "loss": 1.1711, + "step": 5365500 + }, + { + "epoch": 3.22, + "learning_rate": 3.351360237692102e-05, + "loss": 1.1801, + "step": 5366000 + }, + { + "epoch": 3.22, + "learning_rate": 3.351150241136046e-05, + "loss": 1.1759, + "step": 5366500 + }, + { + "epoch": 3.22, + "learning_rate": 3.350940664573101e-05, + "loss": 1.157, + "step": 5367000 + }, + { + "epoch": 3.22, + "learning_rate": 3.3507306680170445e-05, + "loss": 1.1495, + "step": 5367500 + }, + { + "epoch": 3.22, + "learning_rate": 3.350520671460988e-05, + "loss": 1.1261, + "step": 5368000 + }, + { + "epoch": 3.22, + "learning_rate": 3.350310674904932e-05, + "loss": 1.1927, + "step": 5368500 + }, + { + "epoch": 3.22, + "learning_rate": 3.350101098341987e-05, + "loss": 1.1402, + "step": 5369000 + }, + { + "epoch": 3.22, + "learning_rate": 3.3498911017859306e-05, + "loss": 1.159, + "step": 5369500 + }, + { + "epoch": 3.22, + "learning_rate": 3.3496811052298746e-05, + "loss": 1.1661, + "step": 5370000 + }, + { + "epoch": 3.22, + "learning_rate": 3.349471108673818e-05, + "loss": 1.178, + "step": 5370500 + }, + { + "epoch": 3.22, + "learning_rate": 3.349261112117761e-05, + "loss": 1.1467, + "step": 5371000 + }, + { + "epoch": 3.22, + "learning_rate": 3.349051115561705e-05, + "loss": 1.1741, + "step": 5371500 + }, + { + "epoch": 3.22, + "learning_rate": 3.348841119005648e-05, + "loss": 1.1815, + "step": 5372000 + }, + { + "epoch": 3.22, + "learning_rate": 3.348631122449591e-05, + "loss": 1.1874, + "step": 5372500 + }, + { + "epoch": 3.22, + "learning_rate": 3.3484215458866474e-05, + "loss": 1.1452, + "step": 5373000 + }, + { + "epoch": 3.22, + "learning_rate": 3.3482115493305914e-05, + "loss": 1.1737, + "step": 5373500 + }, + { + "epoch": 3.22, + "learning_rate": 3.348001552774535e-05, + "loss": 1.1563, + "step": 5374000 + }, + { + "epoch": 3.22, + "learning_rate": 3.3477915562184774e-05, + "loss": 1.169, + "step": 5374500 + }, + { + "epoch": 3.22, + "learning_rate": 3.3475815596624214e-05, + "loss": 1.1668, + "step": 5375000 + }, + { + "epoch": 3.22, + "learning_rate": 3.347371563106365e-05, + "loss": 1.1901, + "step": 5375500 + }, + { + "epoch": 3.22, + "learning_rate": 3.347161566550308e-05, + "loss": 1.1438, + "step": 5376000 + }, + { + "epoch": 3.22, + "learning_rate": 3.346951569994252e-05, + "loss": 1.1613, + "step": 5376500 + }, + { + "epoch": 3.22, + "learning_rate": 3.3467419934313075e-05, + "loss": 1.1709, + "step": 5377000 + }, + { + "epoch": 3.22, + "learning_rate": 3.3465324168683635e-05, + "loss": 1.178, + "step": 5377500 + }, + { + "epoch": 3.22, + "learning_rate": 3.346322420312307e-05, + "loss": 1.167, + "step": 5378000 + }, + { + "epoch": 3.22, + "learning_rate": 3.346112423756251e-05, + "loss": 1.1412, + "step": 5378500 + }, + { + "epoch": 3.22, + "learning_rate": 3.345902427200194e-05, + "loss": 1.1629, + "step": 5379000 + }, + { + "epoch": 3.23, + "learning_rate": 3.345692430644137e-05, + "loss": 1.1608, + "step": 5379500 + }, + { + "epoch": 3.23, + "learning_rate": 3.345482854081193e-05, + "loss": 1.1547, + "step": 5380000 + }, + { + "epoch": 3.23, + "learning_rate": 3.345272857525137e-05, + "loss": 1.1811, + "step": 5380500 + }, + { + "epoch": 3.23, + "learning_rate": 3.34506286096908e-05, + "loss": 1.1707, + "step": 5381000 + }, + { + "epoch": 3.23, + "learning_rate": 3.344853284406136e-05, + "loss": 1.158, + "step": 5381500 + }, + { + "epoch": 3.23, + "learning_rate": 3.344643287850079e-05, + "loss": 1.1686, + "step": 5382000 + }, + { + "epoch": 3.23, + "learning_rate": 3.344433291294023e-05, + "loss": 1.1531, + "step": 5382500 + }, + { + "epoch": 3.23, + "learning_rate": 3.3442232947379664e-05, + "loss": 1.1236, + "step": 5383000 + }, + { + "epoch": 3.23, + "learning_rate": 3.34401329818191e-05, + "loss": 1.1588, + "step": 5383500 + }, + { + "epoch": 3.23, + "learning_rate": 3.343803301625853e-05, + "loss": 1.1649, + "step": 5384000 + }, + { + "epoch": 3.23, + "learning_rate": 3.3435933050697964e-05, + "loss": 1.1502, + "step": 5384500 + }, + { + "epoch": 3.23, + "learning_rate": 3.3433833085137404e-05, + "loss": 1.1575, + "step": 5385000 + }, + { + "epoch": 3.23, + "learning_rate": 3.3431737319507965e-05, + "loss": 1.1721, + "step": 5385500 + }, + { + "epoch": 3.23, + "learning_rate": 3.342964155387852e-05, + "loss": 1.1654, + "step": 5386000 + }, + { + "epoch": 3.23, + "learning_rate": 3.342754158831795e-05, + "loss": 1.1513, + "step": 5386500 + }, + { + "epoch": 3.23, + "learning_rate": 3.3425441622757385e-05, + "loss": 1.1709, + "step": 5387000 + }, + { + "epoch": 3.23, + "learning_rate": 3.3423341657196826e-05, + "loss": 1.1836, + "step": 5387500 + }, + { + "epoch": 3.23, + "learning_rate": 3.342124169163626e-05, + "loss": 1.1345, + "step": 5388000 + }, + { + "epoch": 3.23, + "learning_rate": 3.341914172607569e-05, + "loss": 1.197, + "step": 5388500 + }, + { + "epoch": 3.23, + "learning_rate": 3.3417041760515126e-05, + "loss": 1.165, + "step": 5389000 + }, + { + "epoch": 3.23, + "learning_rate": 3.341494179495456e-05, + "loss": 1.14, + "step": 5389500 + }, + { + "epoch": 3.23, + "learning_rate": 3.341284182939399e-05, + "loss": 1.1837, + "step": 5390000 + }, + { + "epoch": 3.23, + "learning_rate": 3.341074186383343e-05, + "loss": 1.1634, + "step": 5390500 + }, + { + "epoch": 3.23, + "learning_rate": 3.3408641898272867e-05, + "loss": 1.183, + "step": 5391000 + }, + { + "epoch": 3.23, + "learning_rate": 3.34065419327123e-05, + "loss": 1.1713, + "step": 5391500 + }, + { + "epoch": 3.23, + "learning_rate": 3.340444616708286e-05, + "loss": 1.1559, + "step": 5392000 + }, + { + "epoch": 3.23, + "learning_rate": 3.340235040145342e-05, + "loss": 1.1913, + "step": 5392500 + }, + { + "epoch": 3.23, + "learning_rate": 3.3400250435892854e-05, + "loss": 1.1692, + "step": 5393000 + }, + { + "epoch": 3.23, + "learning_rate": 3.339815047033228e-05, + "loss": 1.1881, + "step": 5393500 + }, + { + "epoch": 3.23, + "learning_rate": 3.339605050477172e-05, + "loss": 1.1612, + "step": 5394000 + }, + { + "epoch": 3.23, + "learning_rate": 3.3393950539211155e-05, + "loss": 1.1806, + "step": 5394500 + }, + { + "epoch": 3.23, + "learning_rate": 3.3391854773581715e-05, + "loss": 1.1496, + "step": 5395000 + }, + { + "epoch": 3.23, + "learning_rate": 3.338975480802115e-05, + "loss": 1.1743, + "step": 5395500 + }, + { + "epoch": 3.24, + "learning_rate": 3.338765484246058e-05, + "loss": 1.1724, + "step": 5396000 + }, + { + "epoch": 3.24, + "learning_rate": 3.3385554876900015e-05, + "loss": 1.1768, + "step": 5396500 + }, + { + "epoch": 3.24, + "learning_rate": 3.3383459111270576e-05, + "loss": 1.1587, + "step": 5397000 + }, + { + "epoch": 3.24, + "learning_rate": 3.338135914571001e-05, + "loss": 1.1746, + "step": 5397500 + }, + { + "epoch": 3.24, + "learning_rate": 3.337925918014945e-05, + "loss": 1.1671, + "step": 5398000 + }, + { + "epoch": 3.24, + "learning_rate": 3.3377159214588876e-05, + "loss": 1.187, + "step": 5398500 + }, + { + "epoch": 3.24, + "learning_rate": 3.3375059249028316e-05, + "loss": 1.1635, + "step": 5399000 + }, + { + "epoch": 3.24, + "learning_rate": 3.337295928346775e-05, + "loss": 1.2195, + "step": 5399500 + }, + { + "epoch": 3.24, + "learning_rate": 3.337085931790718e-05, + "loss": 1.1819, + "step": 5400000 + }, + { + "epoch": 3.24, + "eval_loss": 1.1395820379257202, + "eval_runtime": 1108.0224, + "eval_samples_per_second": 475.369, + "eval_steps_per_second": 79.229, + "step": 5400000 + }, + { + "epoch": 3.24, + "learning_rate": 3.336875935234662e-05, + "loss": 1.1601, + "step": 5400500 + }, + { + "epoch": 3.24, + "learning_rate": 3.336665938678606e-05, + "loss": 1.1496, + "step": 5401000 + }, + { + "epoch": 3.24, + "learning_rate": 3.336456362115661e-05, + "loss": 1.1535, + "step": 5401500 + }, + { + "epoch": 3.24, + "learning_rate": 3.3362463655596044e-05, + "loss": 1.205, + "step": 5402000 + }, + { + "epoch": 3.24, + "learning_rate": 3.3360363690035484e-05, + "loss": 1.1644, + "step": 5402500 + }, + { + "epoch": 3.24, + "learning_rate": 3.335826372447492e-05, + "loss": 1.1973, + "step": 5403000 + }, + { + "epoch": 3.24, + "learning_rate": 3.335616375891435e-05, + "loss": 1.1611, + "step": 5403500 + }, + { + "epoch": 3.24, + "learning_rate": 3.335406379335379e-05, + "loss": 1.1469, + "step": 5404000 + }, + { + "epoch": 3.24, + "learning_rate": 3.3351968027724345e-05, + "loss": 1.1849, + "step": 5404500 + }, + { + "epoch": 3.24, + "learning_rate": 3.334986806216378e-05, + "loss": 1.1697, + "step": 5405000 + }, + { + "epoch": 3.24, + "learning_rate": 3.334776809660321e-05, + "loss": 1.1596, + "step": 5405500 + }, + { + "epoch": 3.24, + "learning_rate": 3.334566813104265e-05, + "loss": 1.1605, + "step": 5406000 + }, + { + "epoch": 3.24, + "learning_rate": 3.3343572365413206e-05, + "loss": 1.1731, + "step": 5406500 + }, + { + "epoch": 3.24, + "learning_rate": 3.334147239985264e-05, + "loss": 1.1637, + "step": 5407000 + }, + { + "epoch": 3.24, + "learning_rate": 3.333937243429208e-05, + "loss": 1.1695, + "step": 5407500 + }, + { + "epoch": 3.24, + "learning_rate": 3.333727246873151e-05, + "loss": 1.1654, + "step": 5408000 + }, + { + "epoch": 3.24, + "learning_rate": 3.3335172503170946e-05, + "loss": 1.1595, + "step": 5408500 + }, + { + "epoch": 3.24, + "learning_rate": 3.3333072537610386e-05, + "loss": 1.1599, + "step": 5409000 + }, + { + "epoch": 3.24, + "learning_rate": 3.333097257204982e-05, + "loss": 1.1603, + "step": 5409500 + }, + { + "epoch": 3.24, + "learning_rate": 3.332887260648925e-05, + "loss": 1.1606, + "step": 5410000 + }, + { + "epoch": 3.24, + "learning_rate": 3.332677684085981e-05, + "loss": 1.1638, + "step": 5410500 + }, + { + "epoch": 3.24, + "learning_rate": 3.332467687529925e-05, + "loss": 1.161, + "step": 5411000 + }, + { + "epoch": 3.24, + "learning_rate": 3.332257690973868e-05, + "loss": 1.18, + "step": 5411500 + }, + { + "epoch": 3.24, + "learning_rate": 3.3320476944178114e-05, + "loss": 1.1594, + "step": 5412000 + }, + { + "epoch": 3.25, + "learning_rate": 3.331838117854867e-05, + "loss": 1.1587, + "step": 5412500 + }, + { + "epoch": 3.25, + "learning_rate": 3.331628121298811e-05, + "loss": 1.1667, + "step": 5413000 + }, + { + "epoch": 3.25, + "learning_rate": 3.331418544735866e-05, + "loss": 1.1327, + "step": 5413500 + }, + { + "epoch": 3.25, + "learning_rate": 3.3312085481798095e-05, + "loss": 1.167, + "step": 5414000 + }, + { + "epoch": 3.25, + "learning_rate": 3.3309985516237535e-05, + "loss": 1.155, + "step": 5414500 + }, + { + "epoch": 3.25, + "learning_rate": 3.330788555067697e-05, + "loss": 1.1407, + "step": 5415000 + }, + { + "epoch": 3.25, + "learning_rate": 3.330578978504752e-05, + "loss": 1.1831, + "step": 5415500 + }, + { + "epoch": 3.25, + "learning_rate": 3.3303689819486956e-05, + "loss": 1.1503, + "step": 5416000 + }, + { + "epoch": 3.25, + "learning_rate": 3.3301589853926396e-05, + "loss": 1.1606, + "step": 5416500 + }, + { + "epoch": 3.25, + "learning_rate": 3.329948988836583e-05, + "loss": 1.1233, + "step": 5417000 + }, + { + "epoch": 3.25, + "learning_rate": 3.329738992280526e-05, + "loss": 1.159, + "step": 5417500 + }, + { + "epoch": 3.25, + "learning_rate": 3.32952899572447e-05, + "loss": 1.1773, + "step": 5418000 + }, + { + "epoch": 3.25, + "learning_rate": 3.3293194191615257e-05, + "loss": 1.1834, + "step": 5418500 + }, + { + "epoch": 3.25, + "learning_rate": 3.329109422605469e-05, + "loss": 1.1522, + "step": 5419000 + }, + { + "epoch": 3.25, + "learning_rate": 3.3288994260494123e-05, + "loss": 1.1777, + "step": 5419500 + }, + { + "epoch": 3.25, + "learning_rate": 3.3286894294933564e-05, + "loss": 1.1646, + "step": 5420000 + }, + { + "epoch": 3.25, + "learning_rate": 3.3284794329373e-05, + "loss": 1.1515, + "step": 5420500 + }, + { + "epoch": 3.25, + "learning_rate": 3.328269856374355e-05, + "loss": 1.1313, + "step": 5421000 + }, + { + "epoch": 3.25, + "learning_rate": 3.328059859818299e-05, + "loss": 1.1284, + "step": 5421500 + }, + { + "epoch": 3.25, + "learning_rate": 3.3278498632622424e-05, + "loss": 1.1759, + "step": 5422000 + }, + { + "epoch": 3.25, + "learning_rate": 3.327639866706186e-05, + "loss": 1.1636, + "step": 5422500 + }, + { + "epoch": 3.25, + "learning_rate": 3.32742987015013e-05, + "loss": 1.1684, + "step": 5423000 + }, + { + "epoch": 3.25, + "learning_rate": 3.327219873594073e-05, + "loss": 1.1606, + "step": 5423500 + }, + { + "epoch": 3.25, + "learning_rate": 3.3270098770380165e-05, + "loss": 1.1744, + "step": 5424000 + }, + { + "epoch": 3.25, + "learning_rate": 3.3267998804819605e-05, + "loss": 1.182, + "step": 5424500 + }, + { + "epoch": 3.25, + "learning_rate": 3.326590303919016e-05, + "loss": 1.1715, + "step": 5425000 + }, + { + "epoch": 3.25, + "learning_rate": 3.326380307362959e-05, + "loss": 1.1513, + "step": 5425500 + }, + { + "epoch": 3.25, + "learning_rate": 3.3261703108069026e-05, + "loss": 1.147, + "step": 5426000 + }, + { + "epoch": 3.25, + "learning_rate": 3.3259603142508466e-05, + "loss": 1.1498, + "step": 5426500 + }, + { + "epoch": 3.25, + "learning_rate": 3.325750737687902e-05, + "loss": 1.1705, + "step": 5427000 + }, + { + "epoch": 3.25, + "learning_rate": 3.325540741131845e-05, + "loss": 1.1551, + "step": 5427500 + }, + { + "epoch": 3.25, + "learning_rate": 3.325330744575789e-05, + "loss": 1.1418, + "step": 5428000 + }, + { + "epoch": 3.25, + "learning_rate": 3.325120748019733e-05, + "loss": 1.1837, + "step": 5428500 + }, + { + "epoch": 3.25, + "learning_rate": 3.324911171456788e-05, + "loss": 1.1873, + "step": 5429000 + }, + { + "epoch": 3.26, + "learning_rate": 3.3247011749007314e-05, + "loss": 1.1557, + "step": 5429500 + }, + { + "epoch": 3.26, + "learning_rate": 3.3244911783446754e-05, + "loss": 1.1508, + "step": 5430000 + }, + { + "epoch": 3.26, + "learning_rate": 3.324281181788619e-05, + "loss": 1.1694, + "step": 5430500 + }, + { + "epoch": 3.26, + "learning_rate": 3.324071605225674e-05, + "loss": 1.1638, + "step": 5431000 + }, + { + "epoch": 3.26, + "learning_rate": 3.32386202866273e-05, + "loss": 1.161, + "step": 5431500 + }, + { + "epoch": 3.26, + "learning_rate": 3.323652032106673e-05, + "loss": 1.2012, + "step": 5432000 + }, + { + "epoch": 3.26, + "learning_rate": 3.323442035550617e-05, + "loss": 1.1718, + "step": 5432500 + }, + { + "epoch": 3.26, + "learning_rate": 3.32323203899456e-05, + "loss": 1.1588, + "step": 5433000 + }, + { + "epoch": 3.26, + "learning_rate": 3.323022042438504e-05, + "loss": 1.1767, + "step": 5433500 + }, + { + "epoch": 3.26, + "learning_rate": 3.3228124658755596e-05, + "loss": 1.1446, + "step": 5434000 + }, + { + "epoch": 3.26, + "learning_rate": 3.322602469319503e-05, + "loss": 1.1679, + "step": 5434500 + }, + { + "epoch": 3.26, + "learning_rate": 3.322392472763446e-05, + "loss": 1.1593, + "step": 5435000 + }, + { + "epoch": 3.26, + "learning_rate": 3.32218247620739e-05, + "loss": 1.1671, + "step": 5435500 + }, + { + "epoch": 3.26, + "learning_rate": 3.3219724796513336e-05, + "loss": 1.1591, + "step": 5436000 + }, + { + "epoch": 3.26, + "learning_rate": 3.321762483095277e-05, + "loss": 1.1673, + "step": 5436500 + }, + { + "epoch": 3.26, + "learning_rate": 3.321552486539221e-05, + "loss": 1.1723, + "step": 5437000 + }, + { + "epoch": 3.26, + "learning_rate": 3.321342489983164e-05, + "loss": 1.1452, + "step": 5437500 + }, + { + "epoch": 3.26, + "learning_rate": 3.321132493427108e-05, + "loss": 1.1548, + "step": 5438000 + }, + { + "epoch": 3.26, + "learning_rate": 3.320922916864163e-05, + "loss": 1.1962, + "step": 5438500 + }, + { + "epoch": 3.26, + "learning_rate": 3.320712920308107e-05, + "loss": 1.1682, + "step": 5439000 + }, + { + "epoch": 3.26, + "learning_rate": 3.3205033437451624e-05, + "loss": 1.1735, + "step": 5439500 + }, + { + "epoch": 3.26, + "learning_rate": 3.320293347189106e-05, + "loss": 1.1659, + "step": 5440000 + }, + { + "epoch": 3.26, + "learning_rate": 3.32008335063305e-05, + "loss": 1.1713, + "step": 5440500 + }, + { + "epoch": 3.26, + "learning_rate": 3.319873354076993e-05, + "loss": 1.1762, + "step": 5441000 + }, + { + "epoch": 3.26, + "learning_rate": 3.3196633575209365e-05, + "loss": 1.1634, + "step": 5441500 + }, + { + "epoch": 3.26, + "learning_rate": 3.3194533609648805e-05, + "loss": 1.1536, + "step": 5442000 + }, + { + "epoch": 3.26, + "learning_rate": 3.319243364408824e-05, + "loss": 1.1683, + "step": 5442500 + }, + { + "epoch": 3.26, + "learning_rate": 3.319033367852767e-05, + "loss": 1.1876, + "step": 5443000 + }, + { + "epoch": 3.26, + "learning_rate": 3.3188237912898226e-05, + "loss": 1.1797, + "step": 5443500 + }, + { + "epoch": 3.26, + "learning_rate": 3.3186137947337666e-05, + "loss": 1.1658, + "step": 5444000 + }, + { + "epoch": 3.26, + "learning_rate": 3.318404218170822e-05, + "loss": 1.1528, + "step": 5444500 + }, + { + "epoch": 3.26, + "learning_rate": 3.318194221614765e-05, + "loss": 1.1678, + "step": 5445000 + }, + { + "epoch": 3.26, + "learning_rate": 3.3179842250587086e-05, + "loss": 1.1561, + "step": 5445500 + }, + { + "epoch": 3.27, + "learning_rate": 3.3177742285026526e-05, + "loss": 1.1711, + "step": 5446000 + }, + { + "epoch": 3.27, + "learning_rate": 3.317564231946596e-05, + "loss": 1.1713, + "step": 5446500 + }, + { + "epoch": 3.27, + "learning_rate": 3.3173542353905393e-05, + "loss": 1.1589, + "step": 5447000 + }, + { + "epoch": 3.27, + "learning_rate": 3.3171446588275954e-05, + "loss": 1.1698, + "step": 5447500 + }, + { + "epoch": 3.27, + "learning_rate": 3.316934662271539e-05, + "loss": 1.1669, + "step": 5448000 + }, + { + "epoch": 3.27, + "learning_rate": 3.316724665715482e-05, + "loss": 1.1636, + "step": 5448500 + }, + { + "epoch": 3.27, + "learning_rate": 3.316514669159426e-05, + "loss": 1.1744, + "step": 5449000 + }, + { + "epoch": 3.27, + "learning_rate": 3.3163046726033694e-05, + "loss": 1.1552, + "step": 5449500 + }, + { + "epoch": 3.27, + "learning_rate": 3.316094676047313e-05, + "loss": 1.1609, + "step": 5450000 + }, + { + "epoch": 3.27, + "learning_rate": 3.315885099484368e-05, + "loss": 1.1952, + "step": 5450500 + }, + { + "epoch": 3.27, + "learning_rate": 3.315675102928312e-05, + "loss": 1.1671, + "step": 5451000 + }, + { + "epoch": 3.27, + "learning_rate": 3.3154651063722555e-05, + "loss": 1.1823, + "step": 5451500 + }, + { + "epoch": 3.27, + "learning_rate": 3.315255529809311e-05, + "loss": 1.172, + "step": 5452000 + }, + { + "epoch": 3.27, + "learning_rate": 3.315045533253254e-05, + "loss": 1.1717, + "step": 5452500 + }, + { + "epoch": 3.27, + "learning_rate": 3.314835536697198e-05, + "loss": 1.1687, + "step": 5453000 + }, + { + "epoch": 3.27, + "learning_rate": 3.3146255401411416e-05, + "loss": 1.1762, + "step": 5453500 + }, + { + "epoch": 3.27, + "learning_rate": 3.314415543585085e-05, + "loss": 1.1514, + "step": 5454000 + }, + { + "epoch": 3.27, + "learning_rate": 3.314205547029029e-05, + "loss": 1.1831, + "step": 5454500 + }, + { + "epoch": 3.27, + "learning_rate": 3.313995550472972e-05, + "loss": 1.1613, + "step": 5455000 + }, + { + "epoch": 3.27, + "learning_rate": 3.3137855539169156e-05, + "loss": 1.1873, + "step": 5455500 + }, + { + "epoch": 3.27, + "learning_rate": 3.31357555736086e-05, + "loss": 1.1413, + "step": 5456000 + }, + { + "epoch": 3.27, + "learning_rate": 3.313365980797915e-05, + "loss": 1.1679, + "step": 5456500 + }, + { + "epoch": 3.27, + "learning_rate": 3.3131559842418584e-05, + "loss": 1.1777, + "step": 5457000 + }, + { + "epoch": 3.27, + "learning_rate": 3.3129459876858024e-05, + "loss": 1.1392, + "step": 5457500 + }, + { + "epoch": 3.27, + "learning_rate": 3.312735991129746e-05, + "loss": 1.2143, + "step": 5458000 + }, + { + "epoch": 3.27, + "learning_rate": 3.312526414566801e-05, + "loss": 1.1879, + "step": 5458500 + }, + { + "epoch": 3.27, + "learning_rate": 3.3123164180107444e-05, + "loss": 1.1868, + "step": 5459000 + }, + { + "epoch": 3.27, + "learning_rate": 3.3121064214546885e-05, + "loss": 1.185, + "step": 5459500 + }, + { + "epoch": 3.27, + "learning_rate": 3.311896424898632e-05, + "loss": 1.1529, + "step": 5460000 + }, + { + "epoch": 3.27, + "learning_rate": 3.311686428342575e-05, + "loss": 1.1518, + "step": 5460500 + }, + { + "epoch": 3.27, + "learning_rate": 3.3114764317865185e-05, + "loss": 1.188, + "step": 5461000 + }, + { + "epoch": 3.27, + "learning_rate": 3.311266435230462e-05, + "loss": 1.1941, + "step": 5461500 + }, + { + "epoch": 3.27, + "learning_rate": 3.311056438674405e-05, + "loss": 1.1534, + "step": 5462000 + }, + { + "epoch": 3.27, + "learning_rate": 3.310847282104573e-05, + "loss": 1.1342, + "step": 5462500 + }, + { + "epoch": 3.28, + "learning_rate": 3.310637285548517e-05, + "loss": 1.1565, + "step": 5463000 + }, + { + "epoch": 3.28, + "learning_rate": 3.3104272889924606e-05, + "loss": 1.1423, + "step": 5463500 + }, + { + "epoch": 3.28, + "learning_rate": 3.310217292436404e-05, + "loss": 1.1779, + "step": 5464000 + }, + { + "epoch": 3.28, + "learning_rate": 3.310007295880348e-05, + "loss": 1.1334, + "step": 5464500 + }, + { + "epoch": 3.28, + "learning_rate": 3.309797299324291e-05, + "loss": 1.1437, + "step": 5465000 + }, + { + "epoch": 3.28, + "learning_rate": 3.309587722761347e-05, + "loss": 1.1339, + "step": 5465500 + }, + { + "epoch": 3.28, + "learning_rate": 3.30937772620529e-05, + "loss": 1.1452, + "step": 5466000 + }, + { + "epoch": 3.28, + "learning_rate": 3.309167729649234e-05, + "loss": 1.1541, + "step": 5466500 + }, + { + "epoch": 3.28, + "learning_rate": 3.3089577330931774e-05, + "loss": 1.1505, + "step": 5467000 + }, + { + "epoch": 3.28, + "learning_rate": 3.308747736537121e-05, + "loss": 1.1438, + "step": 5467500 + }, + { + "epoch": 3.28, + "learning_rate": 3.308537739981065e-05, + "loss": 1.1637, + "step": 5468000 + }, + { + "epoch": 3.28, + "learning_rate": 3.30832816341812e-05, + "loss": 1.1919, + "step": 5468500 + }, + { + "epoch": 3.28, + "learning_rate": 3.3081181668620635e-05, + "loss": 1.1632, + "step": 5469000 + }, + { + "epoch": 3.28, + "learning_rate": 3.307908170306007e-05, + "loss": 1.1246, + "step": 5469500 + }, + { + "epoch": 3.28, + "learning_rate": 3.307698173749951e-05, + "loss": 1.1809, + "step": 5470000 + }, + { + "epoch": 3.28, + "learning_rate": 3.3074881771938935e-05, + "loss": 1.1424, + "step": 5470500 + }, + { + "epoch": 3.28, + "learning_rate": 3.3072786006309495e-05, + "loss": 1.1798, + "step": 5471000 + }, + { + "epoch": 3.28, + "learning_rate": 3.3070686040748936e-05, + "loss": 1.155, + "step": 5471500 + }, + { + "epoch": 3.28, + "learning_rate": 3.306858607518837e-05, + "loss": 1.1303, + "step": 5472000 + }, + { + "epoch": 3.28, + "learning_rate": 3.30664861096278e-05, + "loss": 1.1962, + "step": 5472500 + }, + { + "epoch": 3.28, + "learning_rate": 3.3064386144067236e-05, + "loss": 1.1701, + "step": 5473000 + }, + { + "epoch": 3.28, + "learning_rate": 3.306228617850667e-05, + "loss": 1.1438, + "step": 5473500 + }, + { + "epoch": 3.28, + "learning_rate": 3.30601862129461e-05, + "loss": 1.1357, + "step": 5474000 + }, + { + "epoch": 3.28, + "learning_rate": 3.305808624738554e-05, + "loss": 1.1799, + "step": 5474500 + }, + { + "epoch": 3.28, + "learning_rate": 3.305598628182498e-05, + "loss": 1.1823, + "step": 5475000 + }, + { + "epoch": 3.28, + "learning_rate": 3.305389051619553e-05, + "loss": 1.1611, + "step": 5475500 + }, + { + "epoch": 3.28, + "learning_rate": 3.305179895049721e-05, + "loss": 1.2026, + "step": 5476000 + }, + { + "epoch": 3.28, + "learning_rate": 3.3049698984936644e-05, + "loss": 1.1814, + "step": 5476500 + }, + { + "epoch": 3.28, + "learning_rate": 3.3047599019376084e-05, + "loss": 1.1593, + "step": 5477000 + }, + { + "epoch": 3.28, + "learning_rate": 3.304549905381552e-05, + "loss": 1.1712, + "step": 5477500 + }, + { + "epoch": 3.28, + "learning_rate": 3.304339908825495e-05, + "loss": 1.1595, + "step": 5478000 + }, + { + "epoch": 3.28, + "learning_rate": 3.304129912269439e-05, + "loss": 1.1467, + "step": 5478500 + }, + { + "epoch": 3.28, + "learning_rate": 3.3039199157133825e-05, + "loss": 1.1679, + "step": 5479000 + }, + { + "epoch": 3.29, + "learning_rate": 3.303709919157326e-05, + "loss": 1.1644, + "step": 5479500 + }, + { + "epoch": 3.29, + "learning_rate": 3.303499922601269e-05, + "loss": 1.1744, + "step": 5480000 + }, + { + "epoch": 3.29, + "learning_rate": 3.303290346038325e-05, + "loss": 1.1404, + "step": 5480500 + }, + { + "epoch": 3.29, + "learning_rate": 3.3030803494822686e-05, + "loss": 1.1551, + "step": 5481000 + }, + { + "epoch": 3.29, + "learning_rate": 3.302870352926212e-05, + "loss": 1.199, + "step": 5481500 + }, + { + "epoch": 3.29, + "learning_rate": 3.302660356370156e-05, + "loss": 1.1867, + "step": 5482000 + }, + { + "epoch": 3.29, + "learning_rate": 3.3024503598140986e-05, + "loss": 1.1575, + "step": 5482500 + }, + { + "epoch": 3.29, + "learning_rate": 3.3022407832511547e-05, + "loss": 1.1857, + "step": 5483000 + }, + { + "epoch": 3.29, + "learning_rate": 3.302030786695098e-05, + "loss": 1.1904, + "step": 5483500 + }, + { + "epoch": 3.29, + "learning_rate": 3.301820790139042e-05, + "loss": 1.1542, + "step": 5484000 + }, + { + "epoch": 3.29, + "learning_rate": 3.3016107935829854e-05, + "loss": 1.1606, + "step": 5484500 + }, + { + "epoch": 3.29, + "learning_rate": 3.301400797026929e-05, + "loss": 1.185, + "step": 5485000 + }, + { + "epoch": 3.29, + "learning_rate": 3.301190800470872e-05, + "loss": 1.1759, + "step": 5485500 + }, + { + "epoch": 3.29, + "learning_rate": 3.300981223907928e-05, + "loss": 1.1896, + "step": 5486000 + }, + { + "epoch": 3.29, + "learning_rate": 3.3007712273518714e-05, + "loss": 1.1442, + "step": 5486500 + }, + { + "epoch": 3.29, + "learning_rate": 3.3005612307958155e-05, + "loss": 1.1471, + "step": 5487000 + }, + { + "epoch": 3.29, + "learning_rate": 3.300351234239758e-05, + "loss": 1.1923, + "step": 5487500 + }, + { + "epoch": 3.29, + "learning_rate": 3.300141657676814e-05, + "loss": 1.2072, + "step": 5488000 + }, + { + "epoch": 3.29, + "learning_rate": 3.2999316611207575e-05, + "loss": 1.1648, + "step": 5488500 + }, + { + "epoch": 3.29, + "learning_rate": 3.2997216645647015e-05, + "loss": 1.1729, + "step": 5489000 + }, + { + "epoch": 3.29, + "learning_rate": 3.299511668008644e-05, + "loss": 1.1857, + "step": 5489500 + }, + { + "epoch": 3.29, + "learning_rate": 3.2993016714525875e-05, + "loss": 1.1502, + "step": 5490000 + }, + { + "epoch": 3.29, + "learning_rate": 3.2990916748965316e-05, + "loss": 1.1833, + "step": 5490500 + }, + { + "epoch": 3.29, + "learning_rate": 3.298881678340475e-05, + "loss": 1.1546, + "step": 5491000 + }, + { + "epoch": 3.29, + "learning_rate": 3.298671681784418e-05, + "loss": 1.1611, + "step": 5491500 + }, + { + "epoch": 3.29, + "learning_rate": 3.298462105221474e-05, + "loss": 1.1693, + "step": 5492000 + }, + { + "epoch": 3.29, + "learning_rate": 3.2982521086654176e-05, + "loss": 1.1526, + "step": 5492500 + }, + { + "epoch": 3.29, + "learning_rate": 3.298042112109361e-05, + "loss": 1.1717, + "step": 5493000 + }, + { + "epoch": 3.29, + "learning_rate": 3.297832115553305e-05, + "loss": 1.1482, + "step": 5493500 + }, + { + "epoch": 3.29, + "learning_rate": 3.2976221189972484e-05, + "loss": 1.1426, + "step": 5494000 + }, + { + "epoch": 3.29, + "learning_rate": 3.297412542434304e-05, + "loss": 1.1362, + "step": 5494500 + }, + { + "epoch": 3.29, + "learning_rate": 3.297202545878247e-05, + "loss": 1.1979, + "step": 5495000 + }, + { + "epoch": 3.29, + "learning_rate": 3.296992549322191e-05, + "loss": 1.1556, + "step": 5495500 + }, + { + "epoch": 3.3, + "learning_rate": 3.2967825527661344e-05, + "loss": 1.1764, + "step": 5496000 + }, + { + "epoch": 3.3, + "learning_rate": 3.296572556210078e-05, + "loss": 1.1845, + "step": 5496500 + }, + { + "epoch": 3.3, + "learning_rate": 3.296362979647133e-05, + "loss": 1.1422, + "step": 5497000 + }, + { + "epoch": 3.3, + "learning_rate": 3.296152983091077e-05, + "loss": 1.1756, + "step": 5497500 + }, + { + "epoch": 3.3, + "learning_rate": 3.2959429865350205e-05, + "loss": 1.1572, + "step": 5498000 + }, + { + "epoch": 3.3, + "learning_rate": 3.2957329899789645e-05, + "loss": 1.167, + "step": 5498500 + }, + { + "epoch": 3.3, + "learning_rate": 3.295522993422908e-05, + "loss": 1.1462, + "step": 5499000 + }, + { + "epoch": 3.3, + "learning_rate": 3.295312996866851e-05, + "loss": 1.1485, + "step": 5499500 + }, + { + "epoch": 3.3, + "learning_rate": 3.295103000310795e-05, + "loss": 1.1524, + "step": 5500000 + }, + { + "epoch": 3.3, + "eval_loss": 1.1378905773162842, + "eval_runtime": 1099.047, + "eval_samples_per_second": 479.252, + "eval_steps_per_second": 79.876, + "step": 5500000 + }, + { + "epoch": 3.3, + "learning_rate": 3.2948930037547386e-05, + "loss": 1.1625, + "step": 5500500 + }, + { + "epoch": 3.3, + "learning_rate": 3.2946838471849066e-05, + "loss": 1.1708, + "step": 5501000 + }, + { + "epoch": 3.3, + "learning_rate": 3.294473850628849e-05, + "loss": 1.1493, + "step": 5501500 + }, + { + "epoch": 3.3, + "learning_rate": 3.2942638540727927e-05, + "loss": 1.1854, + "step": 5502000 + }, + { + "epoch": 3.3, + "learning_rate": 3.294053857516737e-05, + "loss": 1.1913, + "step": 5502500 + }, + { + "epoch": 3.3, + "learning_rate": 3.29384386096068e-05, + "loss": 1.1578, + "step": 5503000 + }, + { + "epoch": 3.3, + "learning_rate": 3.2936338644046234e-05, + "loss": 1.1537, + "step": 5503500 + }, + { + "epoch": 3.3, + "learning_rate": 3.2934238678485674e-05, + "loss": 1.1585, + "step": 5504000 + }, + { + "epoch": 3.3, + "learning_rate": 3.293213871292511e-05, + "loss": 1.1688, + "step": 5504500 + }, + { + "epoch": 3.3, + "learning_rate": 3.293003874736454e-05, + "loss": 1.1186, + "step": 5505000 + }, + { + "epoch": 3.3, + "learning_rate": 3.29279429817351e-05, + "loss": 1.1879, + "step": 5505500 + }, + { + "epoch": 3.3, + "learning_rate": 3.2925843016174535e-05, + "loss": 1.1895, + "step": 5506000 + }, + { + "epoch": 3.3, + "learning_rate": 3.292374725054509e-05, + "loss": 1.1507, + "step": 5506500 + }, + { + "epoch": 3.3, + "learning_rate": 3.292164728498452e-05, + "loss": 1.1481, + "step": 5507000 + }, + { + "epoch": 3.3, + "learning_rate": 3.291954731942396e-05, + "loss": 1.1713, + "step": 5507500 + }, + { + "epoch": 3.3, + "learning_rate": 3.2917447353863395e-05, + "loss": 1.138, + "step": 5508000 + }, + { + "epoch": 3.3, + "learning_rate": 3.291534738830283e-05, + "loss": 1.1811, + "step": 5508500 + }, + { + "epoch": 3.3, + "learning_rate": 3.291324742274227e-05, + "loss": 1.133, + "step": 5509000 + }, + { + "epoch": 3.3, + "learning_rate": 3.29111474571817e-05, + "loss": 1.155, + "step": 5509500 + }, + { + "epoch": 3.3, + "learning_rate": 3.2909047491621136e-05, + "loss": 1.1687, + "step": 5510000 + }, + { + "epoch": 3.3, + "learning_rate": 3.290695172599169e-05, + "loss": 1.1658, + "step": 5510500 + }, + { + "epoch": 3.3, + "learning_rate": 3.290485176043113e-05, + "loss": 1.1992, + "step": 5511000 + }, + { + "epoch": 3.3, + "learning_rate": 3.290275179487056e-05, + "loss": 1.1878, + "step": 5511500 + }, + { + "epoch": 3.3, + "learning_rate": 3.290065182931e-05, + "loss": 1.1614, + "step": 5512000 + }, + { + "epoch": 3.3, + "learning_rate": 3.289855606368056e-05, + "loss": 1.145, + "step": 5512500 + }, + { + "epoch": 3.31, + "learning_rate": 3.289646029805112e-05, + "loss": 1.1661, + "step": 5513000 + }, + { + "epoch": 3.31, + "learning_rate": 3.2894360332490544e-05, + "loss": 1.17, + "step": 5513500 + }, + { + "epoch": 3.31, + "learning_rate": 3.289226036692998e-05, + "loss": 1.1862, + "step": 5514000 + }, + { + "epoch": 3.31, + "learning_rate": 3.289016040136942e-05, + "loss": 1.186, + "step": 5514500 + }, + { + "epoch": 3.31, + "learning_rate": 3.288806043580885e-05, + "loss": 1.1473, + "step": 5515000 + }, + { + "epoch": 3.31, + "learning_rate": 3.2885960470248285e-05, + "loss": 1.1498, + "step": 5515500 + }, + { + "epoch": 3.31, + "learning_rate": 3.288386470461884e-05, + "loss": 1.1522, + "step": 5516000 + }, + { + "epoch": 3.31, + "learning_rate": 3.288176473905828e-05, + "loss": 1.1552, + "step": 5516500 + }, + { + "epoch": 3.31, + "learning_rate": 3.287966477349771e-05, + "loss": 1.1527, + "step": 5517000 + }, + { + "epoch": 3.31, + "learning_rate": 3.2877564807937145e-05, + "loss": 1.1367, + "step": 5517500 + }, + { + "epoch": 3.31, + "learning_rate": 3.2875464842376586e-05, + "loss": 1.1411, + "step": 5518000 + }, + { + "epoch": 3.31, + "learning_rate": 3.287336907674714e-05, + "loss": 1.1738, + "step": 5518500 + }, + { + "epoch": 3.31, + "learning_rate": 3.287126911118657e-05, + "loss": 1.1583, + "step": 5519000 + }, + { + "epoch": 3.31, + "learning_rate": 3.286916914562601e-05, + "loss": 1.1476, + "step": 5519500 + }, + { + "epoch": 3.31, + "learning_rate": 3.2867069180065446e-05, + "loss": 1.1541, + "step": 5520000 + }, + { + "epoch": 3.31, + "learning_rate": 3.2864973414436e-05, + "loss": 1.1742, + "step": 5520500 + }, + { + "epoch": 3.31, + "learning_rate": 3.2862873448875433e-05, + "loss": 1.1569, + "step": 5521000 + }, + { + "epoch": 3.31, + "learning_rate": 3.2860773483314874e-05, + "loss": 1.1619, + "step": 5521500 + }, + { + "epoch": 3.31, + "learning_rate": 3.285867351775431e-05, + "loss": 1.1665, + "step": 5522000 + }, + { + "epoch": 3.31, + "learning_rate": 3.285657355219374e-05, + "loss": 1.141, + "step": 5522500 + }, + { + "epoch": 3.31, + "learning_rate": 3.2854477786564294e-05, + "loss": 1.14, + "step": 5523000 + }, + { + "epoch": 3.31, + "learning_rate": 3.2852377821003734e-05, + "loss": 1.1496, + "step": 5523500 + }, + { + "epoch": 3.31, + "learning_rate": 3.285027785544317e-05, + "loss": 1.1462, + "step": 5524000 + }, + { + "epoch": 3.31, + "learning_rate": 3.28481778898826e-05, + "loss": 1.2024, + "step": 5524500 + }, + { + "epoch": 3.31, + "learning_rate": 3.284607792432204e-05, + "loss": 1.1859, + "step": 5525000 + }, + { + "epoch": 3.31, + "learning_rate": 3.2843977958761475e-05, + "loss": 1.153, + "step": 5525500 + }, + { + "epoch": 3.31, + "learning_rate": 3.284187799320091e-05, + "loss": 1.1873, + "step": 5526000 + }, + { + "epoch": 3.31, + "learning_rate": 3.283977802764035e-05, + "loss": 1.1425, + "step": 5526500 + }, + { + "epoch": 3.31, + "learning_rate": 3.283768646194203e-05, + "loss": 1.1748, + "step": 5527000 + }, + { + "epoch": 3.31, + "learning_rate": 3.283558649638146e-05, + "loss": 1.1748, + "step": 5527500 + }, + { + "epoch": 3.31, + "learning_rate": 3.283348653082089e-05, + "loss": 1.1708, + "step": 5528000 + }, + { + "epoch": 3.31, + "learning_rate": 3.283138656526033e-05, + "loss": 1.1719, + "step": 5528500 + }, + { + "epoch": 3.31, + "learning_rate": 3.282928659969976e-05, + "loss": 1.1561, + "step": 5529000 + }, + { + "epoch": 3.32, + "learning_rate": 3.282719083407032e-05, + "loss": 1.1466, + "step": 5529500 + }, + { + "epoch": 3.32, + "learning_rate": 3.282509086850976e-05, + "loss": 1.1463, + "step": 5530000 + }, + { + "epoch": 3.32, + "learning_rate": 3.282299090294919e-05, + "loss": 1.1693, + "step": 5530500 + }, + { + "epoch": 3.32, + "learning_rate": 3.2820890937388624e-05, + "loss": 1.1917, + "step": 5531000 + }, + { + "epoch": 3.32, + "learning_rate": 3.281879097182806e-05, + "loss": 1.1586, + "step": 5531500 + }, + { + "epoch": 3.32, + "learning_rate": 3.28166910062675e-05, + "loss": 1.1624, + "step": 5532000 + }, + { + "epoch": 3.32, + "learning_rate": 3.281459104070693e-05, + "loss": 1.1395, + "step": 5532500 + }, + { + "epoch": 3.32, + "learning_rate": 3.2812491075146364e-05, + "loss": 1.1747, + "step": 5533000 + }, + { + "epoch": 3.32, + "learning_rate": 3.2810395309516925e-05, + "loss": 1.1776, + "step": 5533500 + }, + { + "epoch": 3.32, + "learning_rate": 3.2808299543887485e-05, + "loss": 1.1549, + "step": 5534000 + }, + { + "epoch": 3.32, + "learning_rate": 3.280619957832692e-05, + "loss": 1.156, + "step": 5534500 + }, + { + "epoch": 3.32, + "learning_rate": 3.2804099612766345e-05, + "loss": 1.1574, + "step": 5535000 + }, + { + "epoch": 3.32, + "learning_rate": 3.2801999647205785e-05, + "loss": 1.1571, + "step": 5535500 + }, + { + "epoch": 3.32, + "learning_rate": 3.279989968164522e-05, + "loss": 1.1835, + "step": 5536000 + }, + { + "epoch": 3.32, + "learning_rate": 3.279779971608465e-05, + "loss": 1.1816, + "step": 5536500 + }, + { + "epoch": 3.32, + "learning_rate": 3.279569975052409e-05, + "loss": 1.1394, + "step": 5537000 + }, + { + "epoch": 3.32, + "learning_rate": 3.2793599784963526e-05, + "loss": 1.131, + "step": 5537500 + }, + { + "epoch": 3.32, + "learning_rate": 3.279149981940296e-05, + "loss": 1.1544, + "step": 5538000 + }, + { + "epoch": 3.32, + "learning_rate": 3.27893998538424e-05, + "loss": 1.182, + "step": 5538500 + }, + { + "epoch": 3.32, + "learning_rate": 3.278729988828183e-05, + "loss": 1.1708, + "step": 5539000 + }, + { + "epoch": 3.32, + "learning_rate": 3.278519992272127e-05, + "loss": 1.1644, + "step": 5539500 + }, + { + "epoch": 3.32, + "learning_rate": 3.278310415709182e-05, + "loss": 1.1766, + "step": 5540000 + }, + { + "epoch": 3.32, + "learning_rate": 3.278100419153126e-05, + "loss": 1.1715, + "step": 5540500 + }, + { + "epoch": 3.32, + "learning_rate": 3.2778904225970694e-05, + "loss": 1.1449, + "step": 5541000 + }, + { + "epoch": 3.32, + "learning_rate": 3.277680846034125e-05, + "loss": 1.1597, + "step": 5541500 + }, + { + "epoch": 3.32, + "learning_rate": 3.277470849478069e-05, + "loss": 1.1782, + "step": 5542000 + }, + { + "epoch": 3.32, + "learning_rate": 3.277260852922012e-05, + "loss": 1.1908, + "step": 5542500 + }, + { + "epoch": 3.32, + "learning_rate": 3.2770508563659555e-05, + "loss": 1.1709, + "step": 5543000 + }, + { + "epoch": 3.32, + "learning_rate": 3.2768408598098995e-05, + "loss": 1.1774, + "step": 5543500 + }, + { + "epoch": 3.32, + "learning_rate": 3.276631283246955e-05, + "loss": 1.1743, + "step": 5544000 + }, + { + "epoch": 3.32, + "learning_rate": 3.276421286690898e-05, + "loss": 1.1763, + "step": 5544500 + }, + { + "epoch": 3.32, + "learning_rate": 3.2762117101279535e-05, + "loss": 1.1776, + "step": 5545000 + }, + { + "epoch": 3.32, + "learning_rate": 3.276001713571897e-05, + "loss": 1.1648, + "step": 5545500 + }, + { + "epoch": 3.33, + "learning_rate": 3.275791717015841e-05, + "loss": 1.1566, + "step": 5546000 + }, + { + "epoch": 3.33, + "learning_rate": 3.275581720459784e-05, + "loss": 1.172, + "step": 5546500 + }, + { + "epoch": 3.33, + "learning_rate": 3.2753717239037276e-05, + "loss": 1.1489, + "step": 5547000 + }, + { + "epoch": 3.33, + "learning_rate": 3.2751617273476716e-05, + "loss": 1.181, + "step": 5547500 + }, + { + "epoch": 3.33, + "learning_rate": 3.274951730791615e-05, + "loss": 1.1685, + "step": 5548000 + }, + { + "epoch": 3.33, + "learning_rate": 3.274741734235558e-05, + "loss": 1.1771, + "step": 5548500 + }, + { + "epoch": 3.33, + "learning_rate": 3.2745321576726144e-05, + "loss": 1.1511, + "step": 5549000 + }, + { + "epoch": 3.33, + "learning_rate": 3.274322161116558e-05, + "loss": 1.1519, + "step": 5549500 + }, + { + "epoch": 3.33, + "learning_rate": 3.274112164560501e-05, + "loss": 1.1419, + "step": 5550000 + }, + { + "epoch": 3.33, + "learning_rate": 3.273902168004445e-05, + "loss": 1.1502, + "step": 5550500 + }, + { + "epoch": 3.33, + "learning_rate": 3.2736921714483884e-05, + "loss": 1.1489, + "step": 5551000 + }, + { + "epoch": 3.33, + "learning_rate": 3.273482174892332e-05, + "loss": 1.1766, + "step": 5551500 + }, + { + "epoch": 3.33, + "learning_rate": 3.273272598329387e-05, + "loss": 1.1568, + "step": 5552000 + }, + { + "epoch": 3.33, + "learning_rate": 3.2730630217664425e-05, + "loss": 1.1706, + "step": 5552500 + }, + { + "epoch": 3.33, + "learning_rate": 3.2728530252103865e-05, + "loss": 1.1486, + "step": 5553000 + }, + { + "epoch": 3.33, + "learning_rate": 3.27264302865433e-05, + "loss": 1.1537, + "step": 5553500 + }, + { + "epoch": 3.33, + "learning_rate": 3.272433032098273e-05, + "loss": 1.1729, + "step": 5554000 + }, + { + "epoch": 3.33, + "learning_rate": 3.272223035542217e-05, + "loss": 1.1485, + "step": 5554500 + }, + { + "epoch": 3.33, + "learning_rate": 3.2720130389861606e-05, + "loss": 1.1878, + "step": 5555000 + }, + { + "epoch": 3.33, + "learning_rate": 3.271803042430104e-05, + "loss": 1.1427, + "step": 5555500 + }, + { + "epoch": 3.33, + "learning_rate": 3.271593045874048e-05, + "loss": 1.1504, + "step": 5556000 + }, + { + "epoch": 3.33, + "learning_rate": 3.271383049317991e-05, + "loss": 1.1673, + "step": 5556500 + }, + { + "epoch": 3.33, + "learning_rate": 3.2711730527619346e-05, + "loss": 1.1653, + "step": 5557000 + }, + { + "epoch": 3.33, + "learning_rate": 3.270963476198991e-05, + "loss": 1.1439, + "step": 5557500 + }, + { + "epoch": 3.33, + "learning_rate": 3.270753479642934e-05, + "loss": 1.1659, + "step": 5558000 + }, + { + "epoch": 3.33, + "learning_rate": 3.2705434830868774e-05, + "loss": 1.1871, + "step": 5558500 + }, + { + "epoch": 3.33, + "learning_rate": 3.270333906523933e-05, + "loss": 1.1778, + "step": 5559000 + }, + { + "epoch": 3.33, + "learning_rate": 3.270123909967877e-05, + "loss": 1.1815, + "step": 5559500 + }, + { + "epoch": 3.33, + "learning_rate": 3.26991391341182e-05, + "loss": 1.1466, + "step": 5560000 + }, + { + "epoch": 3.33, + "learning_rate": 3.2697039168557634e-05, + "loss": 1.1849, + "step": 5560500 + }, + { + "epoch": 3.33, + "learning_rate": 3.2694939202997075e-05, + "loss": 1.1925, + "step": 5561000 + }, + { + "epoch": 3.33, + "learning_rate": 3.269283923743651e-05, + "loss": 1.1572, + "step": 5561500 + }, + { + "epoch": 3.33, + "learning_rate": 3.269074347180706e-05, + "loss": 1.1872, + "step": 5562000 + }, + { + "epoch": 3.33, + "learning_rate": 3.2688643506246495e-05, + "loss": 1.1712, + "step": 5562500 + }, + { + "epoch": 3.34, + "learning_rate": 3.2686543540685935e-05, + "loss": 1.1812, + "step": 5563000 + }, + { + "epoch": 3.34, + "learning_rate": 3.268444357512537e-05, + "loss": 1.1597, + "step": 5563500 + }, + { + "epoch": 3.34, + "learning_rate": 3.268234360956481e-05, + "loss": 1.1551, + "step": 5564000 + }, + { + "epoch": 3.34, + "learning_rate": 3.2680243644004236e-05, + "loss": 1.184, + "step": 5564500 + }, + { + "epoch": 3.34, + "learning_rate": 3.267814367844367e-05, + "loss": 1.1797, + "step": 5565000 + }, + { + "epoch": 3.34, + "learning_rate": 3.267604371288311e-05, + "loss": 1.1994, + "step": 5565500 + }, + { + "epoch": 3.34, + "learning_rate": 3.267394374732254e-05, + "loss": 1.1719, + "step": 5566000 + }, + { + "epoch": 3.34, + "learning_rate": 3.26718479816931e-05, + "loss": 1.1334, + "step": 5566500 + }, + { + "epoch": 3.34, + "learning_rate": 3.266974801613253e-05, + "loss": 1.1589, + "step": 5567000 + }, + { + "epoch": 3.34, + "learning_rate": 3.266764805057197e-05, + "loss": 1.1703, + "step": 5567500 + }, + { + "epoch": 3.34, + "learning_rate": 3.2665548085011403e-05, + "loss": 1.1446, + "step": 5568000 + }, + { + "epoch": 3.34, + "learning_rate": 3.266344811945084e-05, + "loss": 1.1726, + "step": 5568500 + }, + { + "epoch": 3.34, + "learning_rate": 3.266135235382139e-05, + "loss": 1.1584, + "step": 5569000 + }, + { + "epoch": 3.34, + "learning_rate": 3.265925238826083e-05, + "loss": 1.1766, + "step": 5569500 + }, + { + "epoch": 3.34, + "learning_rate": 3.2657152422700264e-05, + "loss": 1.172, + "step": 5570000 + }, + { + "epoch": 3.34, + "learning_rate": 3.2655052457139704e-05, + "loss": 1.176, + "step": 5570500 + }, + { + "epoch": 3.34, + "learning_rate": 3.265295249157914e-05, + "loss": 1.161, + "step": 5571000 + }, + { + "epoch": 3.34, + "learning_rate": 3.265085252601857e-05, + "loss": 1.1801, + "step": 5571500 + }, + { + "epoch": 3.34, + "learning_rate": 3.264875256045801e-05, + "loss": 1.1811, + "step": 5572000 + }, + { + "epoch": 3.34, + "learning_rate": 3.2646656794828565e-05, + "loss": 1.1385, + "step": 5572500 + }, + { + "epoch": 3.34, + "learning_rate": 3.2644561029199126e-05, + "loss": 1.1526, + "step": 5573000 + }, + { + "epoch": 3.34, + "learning_rate": 3.264246106363856e-05, + "loss": 1.1776, + "step": 5573500 + }, + { + "epoch": 3.34, + "learning_rate": 3.2640361098077986e-05, + "loss": 1.1654, + "step": 5574000 + }, + { + "epoch": 3.34, + "learning_rate": 3.2638261132517426e-05, + "loss": 1.1677, + "step": 5574500 + }, + { + "epoch": 3.34, + "learning_rate": 3.263616116695686e-05, + "loss": 1.1366, + "step": 5575000 + }, + { + "epoch": 3.34, + "learning_rate": 3.263406120139629e-05, + "loss": 1.1738, + "step": 5575500 + }, + { + "epoch": 3.34, + "learning_rate": 3.263196123583573e-05, + "loss": 1.1653, + "step": 5576000 + }, + { + "epoch": 3.34, + "learning_rate": 3.2629861270275167e-05, + "loss": 1.1197, + "step": 5576500 + }, + { + "epoch": 3.34, + "learning_rate": 3.26277613047146e-05, + "loss": 1.1872, + "step": 5577000 + }, + { + "epoch": 3.34, + "learning_rate": 3.262566133915404e-05, + "loss": 1.1891, + "step": 5577500 + }, + { + "epoch": 3.34, + "learning_rate": 3.2623565573524594e-05, + "loss": 1.1761, + "step": 5578000 + }, + { + "epoch": 3.34, + "learning_rate": 3.262146560796403e-05, + "loss": 1.173, + "step": 5578500 + }, + { + "epoch": 3.34, + "learning_rate": 3.261936564240347e-05, + "loss": 1.2092, + "step": 5579000 + }, + { + "epoch": 3.35, + "learning_rate": 3.26172656768429e-05, + "loss": 1.1165, + "step": 5579500 + }, + { + "epoch": 3.35, + "learning_rate": 3.2615165711282334e-05, + "loss": 1.174, + "step": 5580000 + }, + { + "epoch": 3.35, + "learning_rate": 3.2613065745721775e-05, + "loss": 1.1497, + "step": 5580500 + }, + { + "epoch": 3.35, + "learning_rate": 3.261096578016121e-05, + "loss": 1.1227, + "step": 5581000 + }, + { + "epoch": 3.35, + "learning_rate": 3.260886581460064e-05, + "loss": 1.1727, + "step": 5581500 + }, + { + "epoch": 3.35, + "learning_rate": 3.2606774248902315e-05, + "loss": 1.1553, + "step": 5582000 + }, + { + "epoch": 3.35, + "learning_rate": 3.260467428334175e-05, + "loss": 1.1526, + "step": 5582500 + }, + { + "epoch": 3.35, + "learning_rate": 3.260257431778119e-05, + "loss": 1.1357, + "step": 5583000 + }, + { + "epoch": 3.35, + "learning_rate": 3.260047435222062e-05, + "loss": 1.1499, + "step": 5583500 + }, + { + "epoch": 3.35, + "learning_rate": 3.2598374386660056e-05, + "loss": 1.1476, + "step": 5584000 + }, + { + "epoch": 3.35, + "learning_rate": 3.2596274421099496e-05, + "loss": 1.177, + "step": 5584500 + }, + { + "epoch": 3.35, + "learning_rate": 3.259417445553893e-05, + "loss": 1.1596, + "step": 5585000 + }, + { + "epoch": 3.35, + "learning_rate": 3.259207868990948e-05, + "loss": 1.1741, + "step": 5585500 + }, + { + "epoch": 3.35, + "learning_rate": 3.258997872434892e-05, + "loss": 1.1765, + "step": 5586000 + }, + { + "epoch": 3.35, + "learning_rate": 3.258787875878836e-05, + "loss": 1.1748, + "step": 5586500 + }, + { + "epoch": 3.35, + "learning_rate": 3.258577879322779e-05, + "loss": 1.1551, + "step": 5587000 + }, + { + "epoch": 3.35, + "learning_rate": 3.258367882766723e-05, + "loss": 1.1721, + "step": 5587500 + }, + { + "epoch": 3.35, + "learning_rate": 3.2581578862106664e-05, + "loss": 1.1569, + "step": 5588000 + }, + { + "epoch": 3.35, + "learning_rate": 3.25794788965461e-05, + "loss": 1.1801, + "step": 5588500 + }, + { + "epoch": 3.35, + "learning_rate": 3.257737893098553e-05, + "loss": 1.1581, + "step": 5589000 + }, + { + "epoch": 3.35, + "learning_rate": 3.2575278965424964e-05, + "loss": 1.1685, + "step": 5589500 + }, + { + "epoch": 3.35, + "learning_rate": 3.25731789998644e-05, + "loss": 1.1671, + "step": 5590000 + }, + { + "epoch": 3.35, + "learning_rate": 3.257107903430384e-05, + "loss": 1.181, + "step": 5590500 + }, + { + "epoch": 3.35, + "learning_rate": 3.256897906874327e-05, + "loss": 1.1481, + "step": 5591000 + }, + { + "epoch": 3.35, + "learning_rate": 3.2566883303113825e-05, + "loss": 1.1585, + "step": 5591500 + }, + { + "epoch": 3.35, + "learning_rate": 3.256478333755326e-05, + "loss": 1.1532, + "step": 5592000 + }, + { + "epoch": 3.35, + "learning_rate": 3.256268757192382e-05, + "loss": 1.1392, + "step": 5592500 + }, + { + "epoch": 3.35, + "learning_rate": 3.256058760636326e-05, + "loss": 1.1482, + "step": 5593000 + }, + { + "epoch": 3.35, + "learning_rate": 3.2558487640802686e-05, + "loss": 1.1802, + "step": 5593500 + }, + { + "epoch": 3.35, + "learning_rate": 3.2556387675242126e-05, + "loss": 1.1745, + "step": 5594000 + }, + { + "epoch": 3.35, + "learning_rate": 3.255428770968156e-05, + "loss": 1.1544, + "step": 5594500 + }, + { + "epoch": 3.35, + "learning_rate": 3.255219194405212e-05, + "loss": 1.177, + "step": 5595000 + }, + { + "epoch": 3.35, + "learning_rate": 3.255009197849155e-05, + "loss": 1.1666, + "step": 5595500 + }, + { + "epoch": 3.36, + "learning_rate": 3.254799201293099e-05, + "loss": 1.149, + "step": 5596000 + }, + { + "epoch": 3.36, + "learning_rate": 3.254589204737042e-05, + "loss": 1.1711, + "step": 5596500 + }, + { + "epoch": 3.36, + "learning_rate": 3.2543792081809854e-05, + "loss": 1.1619, + "step": 5597000 + }, + { + "epoch": 3.36, + "learning_rate": 3.2541692116249294e-05, + "loss": 1.1795, + "step": 5597500 + }, + { + "epoch": 3.36, + "learning_rate": 3.253959215068873e-05, + "loss": 1.1767, + "step": 5598000 + }, + { + "epoch": 3.36, + "learning_rate": 3.253749218512816e-05, + "loss": 1.156, + "step": 5598500 + }, + { + "epoch": 3.36, + "learning_rate": 3.2535396419498714e-05, + "loss": 1.1811, + "step": 5599000 + }, + { + "epoch": 3.36, + "learning_rate": 3.2533296453938155e-05, + "loss": 1.1904, + "step": 5599500 + }, + { + "epoch": 3.36, + "learning_rate": 3.253119648837759e-05, + "loss": 1.1792, + "step": 5600000 + }, + { + "epoch": 3.36, + "eval_loss": 1.133358359336853, + "eval_runtime": 1102.5182, + "eval_samples_per_second": 477.743, + "eval_steps_per_second": 79.624, + "step": 5600000 + }, + { + "epoch": 3.36, + "learning_rate": 3.252909652281702e-05, + "loss": 1.1808, + "step": 5600500 + }, + { + "epoch": 3.36, + "learning_rate": 3.252699655725646e-05, + "loss": 1.1619, + "step": 5601000 + }, + { + "epoch": 3.36, + "learning_rate": 3.2524896591695895e-05, + "loss": 1.1489, + "step": 5601500 + }, + { + "epoch": 3.36, + "learning_rate": 3.252279662613533e-05, + "loss": 1.1419, + "step": 5602000 + }, + { + "epoch": 3.36, + "learning_rate": 3.252069666057477e-05, + "loss": 1.1486, + "step": 5602500 + }, + { + "epoch": 3.36, + "learning_rate": 3.251860509487645e-05, + "loss": 1.1613, + "step": 5603000 + }, + { + "epoch": 3.36, + "learning_rate": 3.2516505129315876e-05, + "loss": 1.1704, + "step": 5603500 + }, + { + "epoch": 3.36, + "learning_rate": 3.251440516375531e-05, + "loss": 1.1719, + "step": 5604000 + }, + { + "epoch": 3.36, + "learning_rate": 3.251230519819475e-05, + "loss": 1.1553, + "step": 5604500 + }, + { + "epoch": 3.36, + "learning_rate": 3.251020943256531e-05, + "loss": 1.1717, + "step": 5605000 + }, + { + "epoch": 3.36, + "learning_rate": 3.250810946700474e-05, + "loss": 1.1541, + "step": 5605500 + }, + { + "epoch": 3.36, + "learning_rate": 3.250600950144417e-05, + "loss": 1.1557, + "step": 5606000 + }, + { + "epoch": 3.36, + "learning_rate": 3.250390953588361e-05, + "loss": 1.1617, + "step": 5606500 + }, + { + "epoch": 3.36, + "learning_rate": 3.2501809570323044e-05, + "loss": 1.1977, + "step": 5607000 + }, + { + "epoch": 3.36, + "learning_rate": 3.249970960476248e-05, + "loss": 1.1225, + "step": 5607500 + }, + { + "epoch": 3.36, + "learning_rate": 3.249761383913304e-05, + "loss": 1.1355, + "step": 5608000 + }, + { + "epoch": 3.36, + "learning_rate": 3.249551387357247e-05, + "loss": 1.2044, + "step": 5608500 + }, + { + "epoch": 3.36, + "learning_rate": 3.2493413908011905e-05, + "loss": 1.1625, + "step": 5609000 + }, + { + "epoch": 3.36, + "learning_rate": 3.2491313942451345e-05, + "loss": 1.218, + "step": 5609500 + }, + { + "epoch": 3.36, + "learning_rate": 3.248921397689078e-05, + "loss": 1.1813, + "step": 5610000 + }, + { + "epoch": 3.36, + "learning_rate": 3.248711401133021e-05, + "loss": 1.1655, + "step": 5610500 + }, + { + "epoch": 3.36, + "learning_rate": 3.248501404576965e-05, + "loss": 1.1774, + "step": 5611000 + }, + { + "epoch": 3.36, + "learning_rate": 3.2482914080209086e-05, + "loss": 1.1804, + "step": 5611500 + }, + { + "epoch": 3.36, + "learning_rate": 3.248081411464852e-05, + "loss": 1.1701, + "step": 5612000 + }, + { + "epoch": 3.36, + "learning_rate": 3.247871414908796e-05, + "loss": 1.1715, + "step": 5612500 + }, + { + "epoch": 3.37, + "learning_rate": 3.247661418352739e-05, + "loss": 1.1438, + "step": 5613000 + }, + { + "epoch": 3.37, + "learning_rate": 3.247451421796682e-05, + "loss": 1.1704, + "step": 5613500 + }, + { + "epoch": 3.37, + "learning_rate": 3.247241845233738e-05, + "loss": 1.1741, + "step": 5614000 + }, + { + "epoch": 3.37, + "learning_rate": 3.247032268670793e-05, + "loss": 1.1767, + "step": 5614500 + }, + { + "epoch": 3.37, + "learning_rate": 3.2468222721147374e-05, + "loss": 1.1701, + "step": 5615000 + }, + { + "epoch": 3.37, + "learning_rate": 3.246612275558681e-05, + "loss": 1.1704, + "step": 5615500 + }, + { + "epoch": 3.37, + "learning_rate": 3.246402279002624e-05, + "loss": 1.1816, + "step": 5616000 + }, + { + "epoch": 3.37, + "learning_rate": 3.246192282446568e-05, + "loss": 1.1832, + "step": 5616500 + }, + { + "epoch": 3.37, + "learning_rate": 3.2459822858905114e-05, + "loss": 1.1316, + "step": 5617000 + }, + { + "epoch": 3.37, + "learning_rate": 3.2457722893344554e-05, + "loss": 1.1756, + "step": 5617500 + }, + { + "epoch": 3.37, + "learning_rate": 3.245562292778399e-05, + "loss": 1.1507, + "step": 5618000 + }, + { + "epoch": 3.37, + "learning_rate": 3.245352716215454e-05, + "loss": 1.1894, + "step": 5618500 + }, + { + "epoch": 3.37, + "learning_rate": 3.2451427196593975e-05, + "loss": 1.1887, + "step": 5619000 + }, + { + "epoch": 3.37, + "learning_rate": 3.2449327231033415e-05, + "loss": 1.1716, + "step": 5619500 + }, + { + "epoch": 3.37, + "learning_rate": 3.244722726547285e-05, + "loss": 1.1675, + "step": 5620000 + }, + { + "epoch": 3.37, + "learning_rate": 3.24451314998434e-05, + "loss": 1.162, + "step": 5620500 + }, + { + "epoch": 3.37, + "learning_rate": 3.2443035734213956e-05, + "loss": 1.1518, + "step": 5621000 + }, + { + "epoch": 3.37, + "learning_rate": 3.244093576865339e-05, + "loss": 1.1753, + "step": 5621500 + }, + { + "epoch": 3.37, + "learning_rate": 3.243883580309283e-05, + "loss": 1.174, + "step": 5622000 + }, + { + "epoch": 3.37, + "learning_rate": 3.243673583753226e-05, + "loss": 1.1792, + "step": 5622500 + }, + { + "epoch": 3.37, + "learning_rate": 3.24346358719717e-05, + "loss": 1.1855, + "step": 5623000 + }, + { + "epoch": 3.37, + "learning_rate": 3.2432535906411137e-05, + "loss": 1.1751, + "step": 5623500 + }, + { + "epoch": 3.37, + "learning_rate": 3.243043594085057e-05, + "loss": 1.1527, + "step": 5624000 + }, + { + "epoch": 3.37, + "learning_rate": 3.242833597529001e-05, + "loss": 1.1513, + "step": 5624500 + }, + { + "epoch": 3.37, + "learning_rate": 3.2426240209660564e-05, + "loss": 1.1684, + "step": 5625000 + }, + { + "epoch": 3.37, + "learning_rate": 3.242414444403112e-05, + "loss": 1.1693, + "step": 5625500 + }, + { + "epoch": 3.37, + "learning_rate": 3.242204447847055e-05, + "loss": 1.1837, + "step": 5626000 + }, + { + "epoch": 3.37, + "learning_rate": 3.2419944512909984e-05, + "loss": 1.1661, + "step": 5626500 + }, + { + "epoch": 3.37, + "learning_rate": 3.2417844547349425e-05, + "loss": 1.1463, + "step": 5627000 + }, + { + "epoch": 3.37, + "learning_rate": 3.241574878171998e-05, + "loss": 1.1676, + "step": 5627500 + }, + { + "epoch": 3.37, + "learning_rate": 3.241364881615941e-05, + "loss": 1.1473, + "step": 5628000 + }, + { + "epoch": 3.37, + "learning_rate": 3.2411548850598845e-05, + "loss": 1.1347, + "step": 5628500 + }, + { + "epoch": 3.37, + "learning_rate": 3.2409448885038285e-05, + "loss": 1.1763, + "step": 5629000 + }, + { + "epoch": 3.38, + "learning_rate": 3.240734891947772e-05, + "loss": 1.1725, + "step": 5629500 + }, + { + "epoch": 3.38, + "learning_rate": 3.240524895391716e-05, + "loss": 1.1698, + "step": 5630000 + }, + { + "epoch": 3.38, + "learning_rate": 3.240315318828771e-05, + "loss": 1.179, + "step": 5630500 + }, + { + "epoch": 3.38, + "learning_rate": 3.2401053222727146e-05, + "loss": 1.1706, + "step": 5631000 + }, + { + "epoch": 3.38, + "learning_rate": 3.239895325716658e-05, + "loss": 1.1759, + "step": 5631500 + }, + { + "epoch": 3.38, + "learning_rate": 3.239685329160602e-05, + "loss": 1.1692, + "step": 5632000 + }, + { + "epoch": 3.38, + "learning_rate": 3.239475332604545e-05, + "loss": 1.1353, + "step": 5632500 + }, + { + "epoch": 3.38, + "learning_rate": 3.239265336048489e-05, + "loss": 1.1444, + "step": 5633000 + }, + { + "epoch": 3.38, + "learning_rate": 3.239055759485544e-05, + "loss": 1.1692, + "step": 5633500 + }, + { + "epoch": 3.38, + "learning_rate": 3.238845762929488e-05, + "loss": 1.1608, + "step": 5634000 + }, + { + "epoch": 3.38, + "learning_rate": 3.2386357663734314e-05, + "loss": 1.1699, + "step": 5634500 + }, + { + "epoch": 3.38, + "learning_rate": 3.238425769817375e-05, + "loss": 1.1614, + "step": 5635000 + }, + { + "epoch": 3.38, + "learning_rate": 3.238216193254431e-05, + "loss": 1.15, + "step": 5635500 + }, + { + "epoch": 3.38, + "learning_rate": 3.238006196698374e-05, + "loss": 1.1875, + "step": 5636000 + }, + { + "epoch": 3.38, + "learning_rate": 3.2377962001423175e-05, + "loss": 1.1752, + "step": 5636500 + }, + { + "epoch": 3.38, + "learning_rate": 3.2375862035862615e-05, + "loss": 1.1718, + "step": 5637000 + }, + { + "epoch": 3.38, + "learning_rate": 3.237376207030205e-05, + "loss": 1.1659, + "step": 5637500 + }, + { + "epoch": 3.38, + "learning_rate": 3.23716663046726e-05, + "loss": 1.1794, + "step": 5638000 + }, + { + "epoch": 3.38, + "learning_rate": 3.2369566339112035e-05, + "loss": 1.1792, + "step": 5638500 + }, + { + "epoch": 3.38, + "learning_rate": 3.2367466373551476e-05, + "loss": 1.1934, + "step": 5639000 + }, + { + "epoch": 3.38, + "learning_rate": 3.236537060792203e-05, + "loss": 1.1655, + "step": 5639500 + }, + { + "epoch": 3.38, + "learning_rate": 3.236327064236146e-05, + "loss": 1.1692, + "step": 5640000 + }, + { + "epoch": 3.38, + "learning_rate": 3.2361170676800896e-05, + "loss": 1.1593, + "step": 5640500 + }, + { + "epoch": 3.38, + "learning_rate": 3.2359070711240336e-05, + "loss": 1.1541, + "step": 5641000 + }, + { + "epoch": 3.38, + "learning_rate": 3.235697074567977e-05, + "loss": 1.1514, + "step": 5641500 + }, + { + "epoch": 3.38, + "learning_rate": 3.23548707801192e-05, + "loss": 1.1655, + "step": 5642000 + }, + { + "epoch": 3.38, + "learning_rate": 3.2352770814558644e-05, + "loss": 1.1697, + "step": 5642500 + }, + { + "epoch": 3.38, + "learning_rate": 3.235067084899808e-05, + "loss": 1.1728, + "step": 5643000 + }, + { + "epoch": 3.38, + "learning_rate": 3.234857088343751e-05, + "loss": 1.1941, + "step": 5643500 + }, + { + "epoch": 3.38, + "learning_rate": 3.234647091787695e-05, + "loss": 1.1786, + "step": 5644000 + }, + { + "epoch": 3.38, + "learning_rate": 3.234437095231638e-05, + "loss": 1.1452, + "step": 5644500 + }, + { + "epoch": 3.38, + "learning_rate": 3.234227098675582e-05, + "loss": 1.167, + "step": 5645000 + }, + { + "epoch": 3.38, + "learning_rate": 3.234017522112638e-05, + "loss": 1.1921, + "step": 5645500 + }, + { + "epoch": 3.39, + "learning_rate": 3.233807525556581e-05, + "loss": 1.174, + "step": 5646000 + }, + { + "epoch": 3.39, + "learning_rate": 3.2335979489936365e-05, + "loss": 1.1794, + "step": 5646500 + }, + { + "epoch": 3.39, + "learning_rate": 3.23338795243758e-05, + "loss": 1.1822, + "step": 5647000 + }, + { + "epoch": 3.39, + "learning_rate": 3.233177955881524e-05, + "loss": 1.1585, + "step": 5647500 + }, + { + "epoch": 3.39, + "learning_rate": 3.232967959325467e-05, + "loss": 1.1533, + "step": 5648000 + }, + { + "epoch": 3.39, + "learning_rate": 3.2327579627694106e-05, + "loss": 1.2131, + "step": 5648500 + }, + { + "epoch": 3.39, + "learning_rate": 3.2325479662133546e-05, + "loss": 1.171, + "step": 5649000 + }, + { + "epoch": 3.39, + "learning_rate": 3.23233838965041e-05, + "loss": 1.1628, + "step": 5649500 + }, + { + "epoch": 3.39, + "learning_rate": 3.232128813087465e-05, + "loss": 1.1329, + "step": 5650000 + }, + { + "epoch": 3.39, + "learning_rate": 3.2319188165314086e-05, + "loss": 1.1574, + "step": 5650500 + }, + { + "epoch": 3.39, + "learning_rate": 3.231708819975353e-05, + "loss": 1.1651, + "step": 5651000 + }, + { + "epoch": 3.39, + "learning_rate": 3.231498823419296e-05, + "loss": 1.1865, + "step": 5651500 + }, + { + "epoch": 3.39, + "learning_rate": 3.2312888268632394e-05, + "loss": 1.185, + "step": 5652000 + }, + { + "epoch": 3.39, + "learning_rate": 3.2310788303071834e-05, + "loss": 1.1538, + "step": 5652500 + }, + { + "epoch": 3.39, + "learning_rate": 3.230868833751127e-05, + "loss": 1.1288, + "step": 5653000 + }, + { + "epoch": 3.39, + "learning_rate": 3.23065883719507e-05, + "loss": 1.1766, + "step": 5653500 + }, + { + "epoch": 3.39, + "learning_rate": 3.2304492606321254e-05, + "loss": 1.1806, + "step": 5654000 + }, + { + "epoch": 3.39, + "learning_rate": 3.2302392640760695e-05, + "loss": 1.126, + "step": 5654500 + }, + { + "epoch": 3.39, + "learning_rate": 3.230029267520013e-05, + "loss": 1.2017, + "step": 5655000 + }, + { + "epoch": 3.39, + "learning_rate": 3.229819270963956e-05, + "loss": 1.1871, + "step": 5655500 + }, + { + "epoch": 3.39, + "learning_rate": 3.2296092744079e-05, + "loss": 1.156, + "step": 5656000 + }, + { + "epoch": 3.39, + "learning_rate": 3.2293996978449555e-05, + "loss": 1.2061, + "step": 5656500 + }, + { + "epoch": 3.39, + "learning_rate": 3.229189701288899e-05, + "loss": 1.1497, + "step": 5657000 + }, + { + "epoch": 3.39, + "learning_rate": 3.228979704732842e-05, + "loss": 1.1775, + "step": 5657500 + }, + { + "epoch": 3.39, + "learning_rate": 3.228769708176786e-05, + "loss": 1.1785, + "step": 5658000 + }, + { + "epoch": 3.39, + "learning_rate": 3.2285597116207296e-05, + "loss": 1.1697, + "step": 5658500 + }, + { + "epoch": 3.39, + "learning_rate": 3.228349715064673e-05, + "loss": 1.144, + "step": 5659000 + }, + { + "epoch": 3.39, + "learning_rate": 3.228140138501729e-05, + "loss": 1.1763, + "step": 5659500 + }, + { + "epoch": 3.39, + "learning_rate": 3.227930141945672e-05, + "loss": 1.1717, + "step": 5660000 + }, + { + "epoch": 3.39, + "learning_rate": 3.2277201453896157e-05, + "loss": 1.1606, + "step": 5660500 + }, + { + "epoch": 3.39, + "learning_rate": 3.227510148833559e-05, + "loss": 1.1757, + "step": 5661000 + }, + { + "epoch": 3.39, + "learning_rate": 3.2273001522775024e-05, + "loss": 1.1664, + "step": 5661500 + }, + { + "epoch": 3.39, + "learning_rate": 3.227090155721446e-05, + "loss": 1.1787, + "step": 5662000 + }, + { + "epoch": 3.39, + "learning_rate": 3.22688015916539e-05, + "loss": 1.1451, + "step": 5662500 + }, + { + "epoch": 3.4, + "learning_rate": 3.226670582602446e-05, + "loss": 1.1424, + "step": 5663000 + }, + { + "epoch": 3.4, + "learning_rate": 3.2264605860463884e-05, + "loss": 1.1706, + "step": 5663500 + }, + { + "epoch": 3.4, + "learning_rate": 3.226250589490332e-05, + "loss": 1.1625, + "step": 5664000 + }, + { + "epoch": 3.4, + "learning_rate": 3.226040592934276e-05, + "loss": 1.1533, + "step": 5664500 + }, + { + "epoch": 3.4, + "learning_rate": 3.225830596378219e-05, + "loss": 1.1547, + "step": 5665000 + }, + { + "epoch": 3.4, + "learning_rate": 3.2256205998221625e-05, + "loss": 1.1701, + "step": 5665500 + }, + { + "epoch": 3.4, + "learning_rate": 3.2254110232592185e-05, + "loss": 1.1818, + "step": 5666000 + }, + { + "epoch": 3.4, + "learning_rate": 3.225201026703162e-05, + "loss": 1.161, + "step": 5666500 + }, + { + "epoch": 3.4, + "learning_rate": 3.224991030147105e-05, + "loss": 1.1827, + "step": 5667000 + }, + { + "epoch": 3.4, + "learning_rate": 3.224781033591049e-05, + "loss": 1.1575, + "step": 5667500 + }, + { + "epoch": 3.4, + "learning_rate": 3.2245710370349926e-05, + "loss": 1.1496, + "step": 5668000 + }, + { + "epoch": 3.4, + "learning_rate": 3.224361040478936e-05, + "loss": 1.1928, + "step": 5668500 + }, + { + "epoch": 3.4, + "learning_rate": 3.224151463915991e-05, + "loss": 1.1721, + "step": 5669000 + }, + { + "epoch": 3.4, + "learning_rate": 3.223941467359935e-05, + "loss": 1.1539, + "step": 5669500 + }, + { + "epoch": 3.4, + "learning_rate": 3.2237314708038787e-05, + "loss": 1.1492, + "step": 5670000 + }, + { + "epoch": 3.4, + "learning_rate": 3.223521474247822e-05, + "loss": 1.1445, + "step": 5670500 + }, + { + "epoch": 3.4, + "learning_rate": 3.223311477691766e-05, + "loss": 1.1622, + "step": 5671000 + }, + { + "epoch": 3.4, + "learning_rate": 3.2231019011288214e-05, + "loss": 1.165, + "step": 5671500 + }, + { + "epoch": 3.4, + "learning_rate": 3.222891904572765e-05, + "loss": 1.1525, + "step": 5672000 + }, + { + "epoch": 3.4, + "learning_rate": 3.222682328009821e-05, + "loss": 1.175, + "step": 5672500 + }, + { + "epoch": 3.4, + "learning_rate": 3.222472331453764e-05, + "loss": 1.1524, + "step": 5673000 + }, + { + "epoch": 3.4, + "learning_rate": 3.2222623348977075e-05, + "loss": 1.1751, + "step": 5673500 + }, + { + "epoch": 3.4, + "learning_rate": 3.222052338341651e-05, + "loss": 1.1471, + "step": 5674000 + }, + { + "epoch": 3.4, + "learning_rate": 3.221842341785595e-05, + "loss": 1.185, + "step": 5674500 + }, + { + "epoch": 3.4, + "learning_rate": 3.221632345229538e-05, + "loss": 1.1564, + "step": 5675000 + }, + { + "epoch": 3.4, + "learning_rate": 3.2214223486734815e-05, + "loss": 1.1444, + "step": 5675500 + }, + { + "epoch": 3.4, + "learning_rate": 3.2212123521174255e-05, + "loss": 1.1681, + "step": 5676000 + }, + { + "epoch": 3.4, + "learning_rate": 3.221002355561369e-05, + "loss": 1.1551, + "step": 5676500 + }, + { + "epoch": 3.4, + "learning_rate": 3.220792359005312e-05, + "loss": 1.1717, + "step": 5677000 + }, + { + "epoch": 3.4, + "learning_rate": 3.220582362449256e-05, + "loss": 1.1558, + "step": 5677500 + }, + { + "epoch": 3.4, + "learning_rate": 3.2203727858863116e-05, + "loss": 1.1918, + "step": 5678000 + }, + { + "epoch": 3.4, + "learning_rate": 3.220162789330255e-05, + "loss": 1.1825, + "step": 5678500 + }, + { + "epoch": 3.4, + "learning_rate": 3.219952792774198e-05, + "loss": 1.1828, + "step": 5679000 + }, + { + "epoch": 3.41, + "learning_rate": 3.219742796218142e-05, + "loss": 1.1944, + "step": 5679500 + }, + { + "epoch": 3.41, + "learning_rate": 3.219533219655198e-05, + "loss": 1.1802, + "step": 5680000 + }, + { + "epoch": 3.41, + "learning_rate": 3.219323223099141e-05, + "loss": 1.1518, + "step": 5680500 + }, + { + "epoch": 3.41, + "learning_rate": 3.2191132265430844e-05, + "loss": 1.1576, + "step": 5681000 + }, + { + "epoch": 3.41, + "learning_rate": 3.2189032299870284e-05, + "loss": 1.1639, + "step": 5681500 + }, + { + "epoch": 3.41, + "learning_rate": 3.218693233430972e-05, + "loss": 1.1385, + "step": 5682000 + }, + { + "epoch": 3.41, + "learning_rate": 3.218483236874916e-05, + "loss": 1.1536, + "step": 5682500 + }, + { + "epoch": 3.41, + "learning_rate": 3.218273240318859e-05, + "loss": 1.1451, + "step": 5683000 + }, + { + "epoch": 3.41, + "learning_rate": 3.218063243762802e-05, + "loss": 1.1563, + "step": 5683500 + }, + { + "epoch": 3.41, + "learning_rate": 3.217853667199858e-05, + "loss": 1.1711, + "step": 5684000 + }, + { + "epoch": 3.41, + "learning_rate": 3.217643670643802e-05, + "loss": 1.168, + "step": 5684500 + }, + { + "epoch": 3.41, + "learning_rate": 3.217434094080857e-05, + "loss": 1.1343, + "step": 5685000 + }, + { + "epoch": 3.41, + "learning_rate": 3.2172240975248005e-05, + "loss": 1.1616, + "step": 5685500 + }, + { + "epoch": 3.41, + "learning_rate": 3.217014100968744e-05, + "loss": 1.1852, + "step": 5686000 + }, + { + "epoch": 3.41, + "learning_rate": 3.216804104412688e-05, + "loss": 1.1697, + "step": 5686500 + }, + { + "epoch": 3.41, + "learning_rate": 3.216594527849743e-05, + "loss": 1.1901, + "step": 5687000 + }, + { + "epoch": 3.41, + "learning_rate": 3.2163845312936866e-05, + "loss": 1.139, + "step": 5687500 + }, + { + "epoch": 3.41, + "learning_rate": 3.2161745347376306e-05, + "loss": 1.1687, + "step": 5688000 + }, + { + "epoch": 3.41, + "learning_rate": 3.215964538181574e-05, + "loss": 1.1308, + "step": 5688500 + }, + { + "epoch": 3.41, + "learning_rate": 3.215754541625517e-05, + "loss": 1.1691, + "step": 5689000 + }, + { + "epoch": 3.41, + "learning_rate": 3.215544965062573e-05, + "loss": 1.1597, + "step": 5689500 + }, + { + "epoch": 3.41, + "learning_rate": 3.215334968506517e-05, + "loss": 1.1738, + "step": 5690000 + }, + { + "epoch": 3.41, + "learning_rate": 3.21512497195046e-05, + "loss": 1.1635, + "step": 5690500 + }, + { + "epoch": 3.41, + "learning_rate": 3.2149153953875154e-05, + "loss": 1.1641, + "step": 5691000 + }, + { + "epoch": 3.41, + "learning_rate": 3.214705398831459e-05, + "loss": 1.1459, + "step": 5691500 + }, + { + "epoch": 3.41, + "learning_rate": 3.214495402275403e-05, + "loss": 1.1648, + "step": 5692000 + }, + { + "epoch": 3.41, + "learning_rate": 3.214285405719346e-05, + "loss": 1.1644, + "step": 5692500 + }, + { + "epoch": 3.41, + "learning_rate": 3.2140754091632895e-05, + "loss": 1.1855, + "step": 5693000 + }, + { + "epoch": 3.41, + "learning_rate": 3.2138654126072335e-05, + "loss": 1.1784, + "step": 5693500 + }, + { + "epoch": 3.41, + "learning_rate": 3.213655416051177e-05, + "loss": 1.1485, + "step": 5694000 + }, + { + "epoch": 3.41, + "learning_rate": 3.21344541949512e-05, + "loss": 1.1618, + "step": 5694500 + }, + { + "epoch": 3.41, + "learning_rate": 3.213235422939064e-05, + "loss": 1.1597, + "step": 5695000 + }, + { + "epoch": 3.41, + "learning_rate": 3.213025426383007e-05, + "loss": 1.131, + "step": 5695500 + }, + { + "epoch": 3.41, + "learning_rate": 3.212815429826951e-05, + "loss": 1.1581, + "step": 5696000 + }, + { + "epoch": 3.42, + "learning_rate": 3.212605433270894e-05, + "loss": 1.1535, + "step": 5696500 + }, + { + "epoch": 3.42, + "learning_rate": 3.21239585670795e-05, + "loss": 1.1674, + "step": 5697000 + }, + { + "epoch": 3.42, + "learning_rate": 3.212185860151893e-05, + "loss": 1.1664, + "step": 5697500 + }, + { + "epoch": 3.42, + "learning_rate": 3.211975863595837e-05, + "loss": 1.1804, + "step": 5698000 + }, + { + "epoch": 3.42, + "learning_rate": 3.211766287032893e-05, + "loss": 1.1701, + "step": 5698500 + }, + { + "epoch": 3.42, + "learning_rate": 3.2115562904768364e-05, + "loss": 1.1658, + "step": 5699000 + }, + { + "epoch": 3.42, + "learning_rate": 3.21134629392078e-05, + "loss": 1.1671, + "step": 5699500 + }, + { + "epoch": 3.42, + "learning_rate": 3.211136297364723e-05, + "loss": 1.1312, + "step": 5700000 + }, + { + "epoch": 3.42, + "eval_loss": 1.132497787475586, + "eval_runtime": 1107.8438, + "eval_samples_per_second": 475.446, + "eval_steps_per_second": 79.241, + "step": 5700000 + }, + { + "epoch": 3.42, + "learning_rate": 3.210926720801779e-05, + "loss": 1.1698, + "step": 5700500 + }, + { + "epoch": 3.42, + "learning_rate": 3.2107167242457224e-05, + "loss": 1.141, + "step": 5701000 + }, + { + "epoch": 3.42, + "learning_rate": 3.210506727689666e-05, + "loss": 1.1997, + "step": 5701500 + }, + { + "epoch": 3.42, + "learning_rate": 3.21029673113361e-05, + "loss": 1.1503, + "step": 5702000 + }, + { + "epoch": 3.42, + "learning_rate": 3.2100867345775525e-05, + "loss": 1.1567, + "step": 5702500 + }, + { + "epoch": 3.42, + "learning_rate": 3.2098767380214965e-05, + "loss": 1.1632, + "step": 5703000 + }, + { + "epoch": 3.42, + "learning_rate": 3.20966674146544e-05, + "loss": 1.1549, + "step": 5703500 + }, + { + "epoch": 3.42, + "learning_rate": 3.209456744909383e-05, + "loss": 1.1559, + "step": 5704000 + }, + { + "epoch": 3.42, + "learning_rate": 3.209247168346439e-05, + "loss": 1.1703, + "step": 5704500 + }, + { + "epoch": 3.42, + "learning_rate": 3.2090371717903826e-05, + "loss": 1.1443, + "step": 5705000 + }, + { + "epoch": 3.42, + "learning_rate": 3.208827175234326e-05, + "loss": 1.1661, + "step": 5705500 + }, + { + "epoch": 3.42, + "learning_rate": 3.208617178678269e-05, + "loss": 1.1596, + "step": 5706000 + }, + { + "epoch": 3.42, + "learning_rate": 3.208407602115325e-05, + "loss": 1.1798, + "step": 5706500 + }, + { + "epoch": 3.42, + "learning_rate": 3.208197605559269e-05, + "loss": 1.1757, + "step": 5707000 + }, + { + "epoch": 3.42, + "learning_rate": 3.207988028996325e-05, + "loss": 1.1434, + "step": 5707500 + }, + { + "epoch": 3.42, + "learning_rate": 3.207778032440268e-05, + "loss": 1.161, + "step": 5708000 + }, + { + "epoch": 3.42, + "learning_rate": 3.2075680358842114e-05, + "loss": 1.1387, + "step": 5708500 + }, + { + "epoch": 3.42, + "learning_rate": 3.2073580393281554e-05, + "loss": 1.1582, + "step": 5709000 + }, + { + "epoch": 3.42, + "learning_rate": 3.207148042772098e-05, + "loss": 1.1697, + "step": 5709500 + }, + { + "epoch": 3.42, + "learning_rate": 3.206938046216042e-05, + "loss": 1.1606, + "step": 5710000 + }, + { + "epoch": 3.42, + "learning_rate": 3.2067280496599854e-05, + "loss": 1.1694, + "step": 5710500 + }, + { + "epoch": 3.42, + "learning_rate": 3.206518053103929e-05, + "loss": 1.1898, + "step": 5711000 + }, + { + "epoch": 3.42, + "learning_rate": 3.206308056547873e-05, + "loss": 1.1401, + "step": 5711500 + }, + { + "epoch": 3.42, + "learning_rate": 3.206098479984928e-05, + "loss": 1.1683, + "step": 5712000 + }, + { + "epoch": 3.42, + "learning_rate": 3.2058884834288715e-05, + "loss": 1.1466, + "step": 5712500 + }, + { + "epoch": 3.43, + "learning_rate": 3.205678486872815e-05, + "loss": 1.1438, + "step": 5713000 + }, + { + "epoch": 3.43, + "learning_rate": 3.205468490316759e-05, + "loss": 1.1322, + "step": 5713500 + }, + { + "epoch": 3.43, + "learning_rate": 3.205259333746926e-05, + "loss": 1.1568, + "step": 5714000 + }, + { + "epoch": 3.43, + "learning_rate": 3.20504933719087e-05, + "loss": 1.1858, + "step": 5714500 + }, + { + "epoch": 3.43, + "learning_rate": 3.2048397606279256e-05, + "loss": 1.1724, + "step": 5715000 + }, + { + "epoch": 3.43, + "learning_rate": 3.204629764071869e-05, + "loss": 1.1717, + "step": 5715500 + }, + { + "epoch": 3.43, + "learning_rate": 3.204419767515813e-05, + "loss": 1.1658, + "step": 5716000 + }, + { + "epoch": 3.43, + "learning_rate": 3.2042097709597563e-05, + "loss": 1.1671, + "step": 5716500 + }, + { + "epoch": 3.43, + "learning_rate": 3.204000194396812e-05, + "loss": 1.1767, + "step": 5717000 + }, + { + "epoch": 3.43, + "learning_rate": 3.203790197840755e-05, + "loss": 1.1672, + "step": 5717500 + }, + { + "epoch": 3.43, + "learning_rate": 3.203580201284699e-05, + "loss": 1.1223, + "step": 5718000 + }, + { + "epoch": 3.43, + "learning_rate": 3.2033702047286424e-05, + "loss": 1.1336, + "step": 5718500 + }, + { + "epoch": 3.43, + "learning_rate": 3.203160208172586e-05, + "loss": 1.1758, + "step": 5719000 + }, + { + "epoch": 3.43, + "learning_rate": 3.20295021161653e-05, + "loss": 1.1549, + "step": 5719500 + }, + { + "epoch": 3.43, + "learning_rate": 3.202740215060473e-05, + "loss": 1.1436, + "step": 5720000 + }, + { + "epoch": 3.43, + "learning_rate": 3.2025302185044165e-05, + "loss": 1.1579, + "step": 5720500 + }, + { + "epoch": 3.43, + "learning_rate": 3.2023202219483605e-05, + "loss": 1.158, + "step": 5721000 + }, + { + "epoch": 3.43, + "learning_rate": 3.202110225392303e-05, + "loss": 1.1989, + "step": 5721500 + }, + { + "epoch": 3.43, + "learning_rate": 3.2019002288362465e-05, + "loss": 1.1657, + "step": 5722000 + }, + { + "epoch": 3.43, + "learning_rate": 3.2016902322801905e-05, + "loss": 1.1528, + "step": 5722500 + }, + { + "epoch": 3.43, + "learning_rate": 3.201480235724134e-05, + "loss": 1.1252, + "step": 5723000 + }, + { + "epoch": 3.43, + "learning_rate": 3.20127065916119e-05, + "loss": 1.1798, + "step": 5723500 + }, + { + "epoch": 3.43, + "learning_rate": 3.201060662605133e-05, + "loss": 1.1683, + "step": 5724000 + }, + { + "epoch": 3.43, + "learning_rate": 3.2008506660490766e-05, + "loss": 1.1565, + "step": 5724500 + }, + { + "epoch": 3.43, + "learning_rate": 3.2006410894861326e-05, + "loss": 1.1366, + "step": 5725000 + }, + { + "epoch": 3.43, + "learning_rate": 3.200431092930076e-05, + "loss": 1.1843, + "step": 5725500 + }, + { + "epoch": 3.43, + "learning_rate": 3.20022109637402e-05, + "loss": 1.1941, + "step": 5726000 + }, + { + "epoch": 3.43, + "learning_rate": 3.200011099817963e-05, + "loss": 1.1581, + "step": 5726500 + }, + { + "epoch": 3.43, + "learning_rate": 3.199801103261906e-05, + "loss": 1.1518, + "step": 5727000 + }, + { + "epoch": 3.43, + "learning_rate": 3.19959110670585e-05, + "loss": 1.1554, + "step": 5727500 + }, + { + "epoch": 3.43, + "learning_rate": 3.1993811101497934e-05, + "loss": 1.1484, + "step": 5728000 + }, + { + "epoch": 3.43, + "learning_rate": 3.199171113593737e-05, + "loss": 1.1363, + "step": 5728500 + }, + { + "epoch": 3.43, + "learning_rate": 3.198961117037681e-05, + "loss": 1.1542, + "step": 5729000 + }, + { + "epoch": 3.44, + "learning_rate": 3.198751120481624e-05, + "loss": 1.1512, + "step": 5729500 + }, + { + "epoch": 3.44, + "learning_rate": 3.1985411239255675e-05, + "loss": 1.156, + "step": 5730000 + }, + { + "epoch": 3.44, + "learning_rate": 3.1983311273695115e-05, + "loss": 1.1595, + "step": 5730500 + }, + { + "epoch": 3.44, + "learning_rate": 3.198121550806567e-05, + "loss": 1.1697, + "step": 5731000 + }, + { + "epoch": 3.44, + "learning_rate": 3.19791155425051e-05, + "loss": 1.1599, + "step": 5731500 + }, + { + "epoch": 3.44, + "learning_rate": 3.1977015576944535e-05, + "loss": 1.164, + "step": 5732000 + }, + { + "epoch": 3.44, + "learning_rate": 3.1974915611383976e-05, + "loss": 1.1451, + "step": 5732500 + }, + { + "epoch": 3.44, + "learning_rate": 3.197281984575453e-05, + "loss": 1.1526, + "step": 5733000 + }, + { + "epoch": 3.44, + "learning_rate": 3.197072408012508e-05, + "loss": 1.1653, + "step": 5733500 + }, + { + "epoch": 3.44, + "learning_rate": 3.1968624114564516e-05, + "loss": 1.1628, + "step": 5734000 + }, + { + "epoch": 3.44, + "learning_rate": 3.1966524149003956e-05, + "loss": 1.1765, + "step": 5734500 + }, + { + "epoch": 3.44, + "learning_rate": 3.196442418344339e-05, + "loss": 1.1748, + "step": 5735000 + }, + { + "epoch": 3.44, + "learning_rate": 3.196232421788282e-05, + "loss": 1.1739, + "step": 5735500 + }, + { + "epoch": 3.44, + "learning_rate": 3.1960224252322264e-05, + "loss": 1.1745, + "step": 5736000 + }, + { + "epoch": 3.44, + "learning_rate": 3.19581242867617e-05, + "loss": 1.1649, + "step": 5736500 + }, + { + "epoch": 3.44, + "learning_rate": 3.195602432120113e-05, + "loss": 1.1575, + "step": 5737000 + }, + { + "epoch": 3.44, + "learning_rate": 3.1953928555571684e-05, + "loss": 1.1695, + "step": 5737500 + }, + { + "epoch": 3.44, + "learning_rate": 3.1951828590011124e-05, + "loss": 1.1413, + "step": 5738000 + }, + { + "epoch": 3.44, + "learning_rate": 3.194972862445056e-05, + "loss": 1.173, + "step": 5738500 + }, + { + "epoch": 3.44, + "learning_rate": 3.194762865888999e-05, + "loss": 1.1741, + "step": 5739000 + }, + { + "epoch": 3.44, + "learning_rate": 3.194552869332943e-05, + "loss": 1.1698, + "step": 5739500 + }, + { + "epoch": 3.44, + "learning_rate": 3.1943428727768865e-05, + "loss": 1.1832, + "step": 5740000 + }, + { + "epoch": 3.44, + "learning_rate": 3.19413287622083e-05, + "loss": 1.166, + "step": 5740500 + }, + { + "epoch": 3.44, + "learning_rate": 3.193922879664774e-05, + "loss": 1.1473, + "step": 5741000 + }, + { + "epoch": 3.44, + "learning_rate": 3.193713723094941e-05, + "loss": 1.1404, + "step": 5741500 + }, + { + "epoch": 3.44, + "learning_rate": 3.1935037265388846e-05, + "loss": 1.1378, + "step": 5742000 + }, + { + "epoch": 3.44, + "learning_rate": 3.193293729982828e-05, + "loss": 1.1562, + "step": 5742500 + }, + { + "epoch": 3.44, + "learning_rate": 3.193083733426772e-05, + "loss": 1.1554, + "step": 5743000 + }, + { + "epoch": 3.44, + "learning_rate": 3.192873736870715e-05, + "loss": 1.1599, + "step": 5743500 + }, + { + "epoch": 3.44, + "learning_rate": 3.1926637403146586e-05, + "loss": 1.1747, + "step": 5744000 + }, + { + "epoch": 3.44, + "learning_rate": 3.192454163751714e-05, + "loss": 1.1836, + "step": 5744500 + }, + { + "epoch": 3.44, + "learning_rate": 3.192244167195658e-05, + "loss": 1.1401, + "step": 5745000 + }, + { + "epoch": 3.44, + "learning_rate": 3.1920341706396014e-05, + "loss": 1.185, + "step": 5745500 + }, + { + "epoch": 3.44, + "learning_rate": 3.191824174083545e-05, + "loss": 1.1379, + "step": 5746000 + }, + { + "epoch": 3.45, + "learning_rate": 3.191614177527489e-05, + "loss": 1.16, + "step": 5746500 + }, + { + "epoch": 3.45, + "learning_rate": 3.191404180971432e-05, + "loss": 1.1609, + "step": 5747000 + }, + { + "epoch": 3.45, + "learning_rate": 3.191194184415376e-05, + "loss": 1.1223, + "step": 5747500 + }, + { + "epoch": 3.45, + "learning_rate": 3.1909841878593194e-05, + "loss": 1.1856, + "step": 5748000 + }, + { + "epoch": 3.45, + "learning_rate": 3.190774611296375e-05, + "loss": 1.1863, + "step": 5748500 + }, + { + "epoch": 3.45, + "learning_rate": 3.190564614740318e-05, + "loss": 1.1641, + "step": 5749000 + }, + { + "epoch": 3.45, + "learning_rate": 3.190354618184262e-05, + "loss": 1.159, + "step": 5749500 + }, + { + "epoch": 3.45, + "learning_rate": 3.1901446216282055e-05, + "loss": 1.16, + "step": 5750000 + }, + { + "epoch": 3.45, + "learning_rate": 3.189935045065261e-05, + "loss": 1.1428, + "step": 5750500 + }, + { + "epoch": 3.45, + "learning_rate": 3.189725048509204e-05, + "loss": 1.1544, + "step": 5751000 + }, + { + "epoch": 3.45, + "learning_rate": 3.189515051953148e-05, + "loss": 1.1539, + "step": 5751500 + }, + { + "epoch": 3.45, + "learning_rate": 3.1893050553970916e-05, + "loss": 1.1577, + "step": 5752000 + }, + { + "epoch": 3.45, + "learning_rate": 3.189095478834147e-05, + "loss": 1.1709, + "step": 5752500 + }, + { + "epoch": 3.45, + "learning_rate": 3.188885902271202e-05, + "loss": 1.1955, + "step": 5753000 + }, + { + "epoch": 3.45, + "learning_rate": 3.188675905715146e-05, + "loss": 1.1875, + "step": 5753500 + }, + { + "epoch": 3.45, + "learning_rate": 3.18846590915909e-05, + "loss": 1.1712, + "step": 5754000 + }, + { + "epoch": 3.45, + "learning_rate": 3.188255912603033e-05, + "loss": 1.1749, + "step": 5754500 + }, + { + "epoch": 3.45, + "learning_rate": 3.188045916046977e-05, + "loss": 1.1689, + "step": 5755000 + }, + { + "epoch": 3.45, + "learning_rate": 3.1878359194909204e-05, + "loss": 1.1436, + "step": 5755500 + }, + { + "epoch": 3.45, + "learning_rate": 3.187625922934864e-05, + "loss": 1.1434, + "step": 5756000 + }, + { + "epoch": 3.45, + "learning_rate": 3.187415926378808e-05, + "loss": 1.1472, + "step": 5756500 + }, + { + "epoch": 3.45, + "learning_rate": 3.187206349815863e-05, + "loss": 1.1759, + "step": 5757000 + }, + { + "epoch": 3.45, + "learning_rate": 3.1869963532598065e-05, + "loss": 1.1698, + "step": 5757500 + }, + { + "epoch": 3.45, + "learning_rate": 3.18678635670375e-05, + "loss": 1.1382, + "step": 5758000 + }, + { + "epoch": 3.45, + "learning_rate": 3.186576360147694e-05, + "loss": 1.1299, + "step": 5758500 + }, + { + "epoch": 3.45, + "learning_rate": 3.186366363591637e-05, + "loss": 1.1768, + "step": 5759000 + }, + { + "epoch": 3.45, + "learning_rate": 3.1861563670355805e-05, + "loss": 1.1664, + "step": 5759500 + }, + { + "epoch": 3.45, + "learning_rate": 3.1859463704795245e-05, + "loss": 1.1634, + "step": 5760000 + }, + { + "epoch": 3.45, + "learning_rate": 3.185736373923467e-05, + "loss": 1.1438, + "step": 5760500 + }, + { + "epoch": 3.45, + "learning_rate": 3.185526797360523e-05, + "loss": 1.1761, + "step": 5761000 + }, + { + "epoch": 3.45, + "learning_rate": 3.1853172207975786e-05, + "loss": 1.1601, + "step": 5761500 + }, + { + "epoch": 3.45, + "learning_rate": 3.1851072242415226e-05, + "loss": 1.1417, + "step": 5762000 + }, + { + "epoch": 3.45, + "learning_rate": 3.184897227685466e-05, + "loss": 1.1427, + "step": 5762500 + }, + { + "epoch": 3.46, + "learning_rate": 3.184687231129409e-05, + "loss": 1.1951, + "step": 5763000 + }, + { + "epoch": 3.46, + "learning_rate": 3.1844772345733533e-05, + "loss": 1.2043, + "step": 5763500 + }, + { + "epoch": 3.46, + "learning_rate": 3.184267238017297e-05, + "loss": 1.1861, + "step": 5764000 + }, + { + "epoch": 3.46, + "learning_rate": 3.18405724146124e-05, + "loss": 1.1932, + "step": 5764500 + }, + { + "epoch": 3.46, + "learning_rate": 3.1838472449051834e-05, + "loss": 1.1597, + "step": 5765000 + }, + { + "epoch": 3.46, + "learning_rate": 3.1836376683422394e-05, + "loss": 1.1548, + "step": 5765500 + }, + { + "epoch": 3.46, + "learning_rate": 3.183427671786183e-05, + "loss": 1.1686, + "step": 5766000 + }, + { + "epoch": 3.46, + "learning_rate": 3.183217675230126e-05, + "loss": 1.169, + "step": 5766500 + }, + { + "epoch": 3.46, + "learning_rate": 3.18300767867407e-05, + "loss": 1.1618, + "step": 5767000 + }, + { + "epoch": 3.46, + "learning_rate": 3.182797682118013e-05, + "loss": 1.1584, + "step": 5767500 + }, + { + "epoch": 3.46, + "learning_rate": 3.182587685561957e-05, + "loss": 1.1554, + "step": 5768000 + }, + { + "epoch": 3.46, + "learning_rate": 3.1823776890059e-05, + "loss": 1.1216, + "step": 5768500 + }, + { + "epoch": 3.46, + "learning_rate": 3.1821676924498435e-05, + "loss": 1.1736, + "step": 5769000 + }, + { + "epoch": 3.46, + "learning_rate": 3.1819581158868996e-05, + "loss": 1.1407, + "step": 5769500 + }, + { + "epoch": 3.46, + "learning_rate": 3.181748119330843e-05, + "loss": 1.1424, + "step": 5770000 + }, + { + "epoch": 3.46, + "learning_rate": 3.181538122774786e-05, + "loss": 1.1856, + "step": 5770500 + }, + { + "epoch": 3.46, + "learning_rate": 3.1813281262187296e-05, + "loss": 1.1468, + "step": 5771000 + }, + { + "epoch": 3.46, + "learning_rate": 3.1811185496557856e-05, + "loss": 1.1634, + "step": 5771500 + }, + { + "epoch": 3.46, + "learning_rate": 3.1809085530997296e-05, + "loss": 1.181, + "step": 5772000 + }, + { + "epoch": 3.46, + "learning_rate": 3.180698556543672e-05, + "loss": 1.1913, + "step": 5772500 + }, + { + "epoch": 3.46, + "learning_rate": 3.180488559987616e-05, + "loss": 1.1863, + "step": 5773000 + }, + { + "epoch": 3.46, + "learning_rate": 3.180278983424672e-05, + "loss": 1.1734, + "step": 5773500 + }, + { + "epoch": 3.46, + "learning_rate": 3.180068986868616e-05, + "loss": 1.141, + "step": 5774000 + }, + { + "epoch": 3.46, + "learning_rate": 3.179859410305671e-05, + "loss": 1.1709, + "step": 5774500 + }, + { + "epoch": 3.46, + "learning_rate": 3.1796494137496144e-05, + "loss": 1.1674, + "step": 5775000 + }, + { + "epoch": 3.46, + "learning_rate": 3.1794394171935584e-05, + "loss": 1.1371, + "step": 5775500 + }, + { + "epoch": 3.46, + "learning_rate": 3.179229420637502e-05, + "loss": 1.1782, + "step": 5776000 + }, + { + "epoch": 3.46, + "learning_rate": 3.179019424081445e-05, + "loss": 1.1681, + "step": 5776500 + }, + { + "epoch": 3.46, + "learning_rate": 3.1788094275253885e-05, + "loss": 1.1641, + "step": 5777000 + }, + { + "epoch": 3.46, + "learning_rate": 3.1785998509624445e-05, + "loss": 1.1706, + "step": 5777500 + }, + { + "epoch": 3.46, + "learning_rate": 3.178389854406388e-05, + "loss": 1.1547, + "step": 5778000 + }, + { + "epoch": 3.46, + "learning_rate": 3.178179857850331e-05, + "loss": 1.1442, + "step": 5778500 + }, + { + "epoch": 3.46, + "learning_rate": 3.177969861294275e-05, + "loss": 1.1858, + "step": 5779000 + }, + { + "epoch": 3.47, + "learning_rate": 3.177759864738218e-05, + "loss": 1.183, + "step": 5779500 + }, + { + "epoch": 3.47, + "learning_rate": 3.177550288175274e-05, + "loss": 1.1728, + "step": 5780000 + }, + { + "epoch": 3.47, + "learning_rate": 3.177340291619217e-05, + "loss": 1.1735, + "step": 5780500 + }, + { + "epoch": 3.47, + "learning_rate": 3.177130295063161e-05, + "loss": 1.1416, + "step": 5781000 + }, + { + "epoch": 3.47, + "learning_rate": 3.1769202985071047e-05, + "loss": 1.1453, + "step": 5781500 + }, + { + "epoch": 3.47, + "learning_rate": 3.176710301951048e-05, + "loss": 1.186, + "step": 5782000 + }, + { + "epoch": 3.47, + "learning_rate": 3.176500725388104e-05, + "loss": 1.1518, + "step": 5782500 + }, + { + "epoch": 3.47, + "learning_rate": 3.1762911488251594e-05, + "loss": 1.1559, + "step": 5783000 + }, + { + "epoch": 3.47, + "learning_rate": 3.176081152269103e-05, + "loss": 1.1769, + "step": 5783500 + }, + { + "epoch": 3.47, + "learning_rate": 3.175871155713046e-05, + "loss": 1.1566, + "step": 5784000 + }, + { + "epoch": 3.47, + "learning_rate": 3.17566115915699e-05, + "loss": 1.1915, + "step": 5784500 + }, + { + "epoch": 3.47, + "learning_rate": 3.1754511626009335e-05, + "loss": 1.1506, + "step": 5785000 + }, + { + "epoch": 3.47, + "learning_rate": 3.175241166044877e-05, + "loss": 1.1684, + "step": 5785500 + }, + { + "epoch": 3.47, + "learning_rate": 3.175031169488821e-05, + "loss": 1.1573, + "step": 5786000 + }, + { + "epoch": 3.47, + "learning_rate": 3.1748211729327635e-05, + "loss": 1.1638, + "step": 5786500 + }, + { + "epoch": 3.47, + "learning_rate": 3.174611176376707e-05, + "loss": 1.1645, + "step": 5787000 + }, + { + "epoch": 3.47, + "learning_rate": 3.174401599813763e-05, + "loss": 1.1609, + "step": 5787500 + }, + { + "epoch": 3.47, + "learning_rate": 3.174191603257707e-05, + "loss": 1.1569, + "step": 5788000 + }, + { + "epoch": 3.47, + "learning_rate": 3.17398160670165e-05, + "loss": 1.1569, + "step": 5788500 + }, + { + "epoch": 3.47, + "learning_rate": 3.1737716101455936e-05, + "loss": 1.1638, + "step": 5789000 + }, + { + "epoch": 3.47, + "learning_rate": 3.1735620335826496e-05, + "loss": 1.1536, + "step": 5789500 + }, + { + "epoch": 3.47, + "learning_rate": 3.173352037026593e-05, + "loss": 1.1527, + "step": 5790000 + }, + { + "epoch": 3.47, + "learning_rate": 3.173142040470536e-05, + "loss": 1.1528, + "step": 5790500 + }, + { + "epoch": 3.47, + "learning_rate": 3.1729320439144803e-05, + "loss": 1.1453, + "step": 5791000 + }, + { + "epoch": 3.47, + "learning_rate": 3.172722047358423e-05, + "loss": 1.1826, + "step": 5791500 + }, + { + "epoch": 3.47, + "learning_rate": 3.1725120508023664e-05, + "loss": 1.1846, + "step": 5792000 + }, + { + "epoch": 3.47, + "learning_rate": 3.1723020542463104e-05, + "loss": 1.1534, + "step": 5792500 + }, + { + "epoch": 3.47, + "learning_rate": 3.1720924776833664e-05, + "loss": 1.1646, + "step": 5793000 + }, + { + "epoch": 3.47, + "learning_rate": 3.17188248112731e-05, + "loss": 1.1722, + "step": 5793500 + }, + { + "epoch": 3.47, + "learning_rate": 3.1716724845712524e-05, + "loss": 1.1695, + "step": 5794000 + }, + { + "epoch": 3.47, + "learning_rate": 3.1714624880151965e-05, + "loss": 1.1833, + "step": 5794500 + }, + { + "epoch": 3.47, + "learning_rate": 3.17125249145914e-05, + "loss": 1.1601, + "step": 5795000 + }, + { + "epoch": 3.47, + "learning_rate": 3.171042494903083e-05, + "loss": 1.1613, + "step": 5795500 + }, + { + "epoch": 3.47, + "learning_rate": 3.170833338333251e-05, + "loss": 1.1725, + "step": 5796000 + }, + { + "epoch": 3.48, + "learning_rate": 3.170623341777195e-05, + "loss": 1.1689, + "step": 5796500 + }, + { + "epoch": 3.48, + "learning_rate": 3.1704133452211386e-05, + "loss": 1.152, + "step": 5797000 + }, + { + "epoch": 3.48, + "learning_rate": 3.170203348665082e-05, + "loss": 1.1679, + "step": 5797500 + }, + { + "epoch": 3.48, + "learning_rate": 3.169993352109026e-05, + "loss": 1.1857, + "step": 5798000 + }, + { + "epoch": 3.48, + "learning_rate": 3.1697833555529686e-05, + "loss": 1.1694, + "step": 5798500 + }, + { + "epoch": 3.48, + "learning_rate": 3.169573358996912e-05, + "loss": 1.1209, + "step": 5799000 + }, + { + "epoch": 3.48, + "learning_rate": 3.169363362440856e-05, + "loss": 1.157, + "step": 5799500 + }, + { + "epoch": 3.48, + "learning_rate": 3.169153365884799e-05, + "loss": 1.1589, + "step": 5800000 + }, + { + "epoch": 3.48, + "eval_loss": 1.126969337463379, + "eval_runtime": 1104.9848, + "eval_samples_per_second": 476.676, + "eval_steps_per_second": 79.446, + "step": 5800000 + }, + { + "epoch": 3.48, + "learning_rate": 3.1689437893218553e-05, + "loss": 1.1474, + "step": 5800500 + }, + { + "epoch": 3.48, + "learning_rate": 3.168733792765798e-05, + "loss": 1.1733, + "step": 5801000 + }, + { + "epoch": 3.48, + "learning_rate": 3.168523796209742e-05, + "loss": 1.1411, + "step": 5801500 + }, + { + "epoch": 3.48, + "learning_rate": 3.1683137996536854e-05, + "loss": 1.1551, + "step": 5802000 + }, + { + "epoch": 3.48, + "learning_rate": 3.168103803097629e-05, + "loss": 1.1822, + "step": 5802500 + }, + { + "epoch": 3.48, + "learning_rate": 3.167893806541573e-05, + "loss": 1.151, + "step": 5803000 + }, + { + "epoch": 3.48, + "learning_rate": 3.167683809985516e-05, + "loss": 1.1747, + "step": 5803500 + }, + { + "epoch": 3.48, + "learning_rate": 3.1674738134294594e-05, + "loss": 1.136, + "step": 5804000 + }, + { + "epoch": 3.48, + "learning_rate": 3.1672638168734035e-05, + "loss": 1.1617, + "step": 5804500 + }, + { + "epoch": 3.48, + "learning_rate": 3.167054240310459e-05, + "loss": 1.1403, + "step": 5805000 + }, + { + "epoch": 3.48, + "learning_rate": 3.166844243754402e-05, + "loss": 1.1473, + "step": 5805500 + }, + { + "epoch": 3.48, + "learning_rate": 3.166634247198346e-05, + "loss": 1.155, + "step": 5806000 + }, + { + "epoch": 3.48, + "learning_rate": 3.1664242506422895e-05, + "loss": 1.1345, + "step": 5806500 + }, + { + "epoch": 3.48, + "learning_rate": 3.166214674079345e-05, + "loss": 1.176, + "step": 5807000 + }, + { + "epoch": 3.48, + "learning_rate": 3.166004677523288e-05, + "loss": 1.179, + "step": 5807500 + }, + { + "epoch": 3.48, + "learning_rate": 3.165794680967232e-05, + "loss": 1.1442, + "step": 5808000 + }, + { + "epoch": 3.48, + "learning_rate": 3.1655851044042876e-05, + "loss": 1.1426, + "step": 5808500 + }, + { + "epoch": 3.48, + "learning_rate": 3.165375107848231e-05, + "loss": 1.1678, + "step": 5809000 + }, + { + "epoch": 3.48, + "learning_rate": 3.165165531285287e-05, + "loss": 1.1509, + "step": 5809500 + }, + { + "epoch": 3.48, + "learning_rate": 3.1649555347292304e-05, + "loss": 1.1512, + "step": 5810000 + }, + { + "epoch": 3.48, + "learning_rate": 3.1647459581662864e-05, + "loss": 1.1695, + "step": 5810500 + }, + { + "epoch": 3.48, + "learning_rate": 3.16453596161023e-05, + "loss": 1.1764, + "step": 5811000 + }, + { + "epoch": 3.48, + "learning_rate": 3.164325965054173e-05, + "loss": 1.1493, + "step": 5811500 + }, + { + "epoch": 3.48, + "learning_rate": 3.164115968498117e-05, + "loss": 1.1406, + "step": 5812000 + }, + { + "epoch": 3.48, + "learning_rate": 3.1639059719420604e-05, + "loss": 1.1742, + "step": 5812500 + }, + { + "epoch": 3.49, + "learning_rate": 3.163696395379116e-05, + "loss": 1.1853, + "step": 5813000 + }, + { + "epoch": 3.49, + "learning_rate": 3.163486398823059e-05, + "loss": 1.1598, + "step": 5813500 + }, + { + "epoch": 3.49, + "learning_rate": 3.163276402267003e-05, + "loss": 1.1819, + "step": 5814000 + }, + { + "epoch": 3.49, + "learning_rate": 3.1630664057109465e-05, + "loss": 1.1759, + "step": 5814500 + }, + { + "epoch": 3.49, + "learning_rate": 3.16285640915489e-05, + "loss": 1.1665, + "step": 5815000 + }, + { + "epoch": 3.49, + "learning_rate": 3.162646412598833e-05, + "loss": 1.1531, + "step": 5815500 + }, + { + "epoch": 3.49, + "learning_rate": 3.1624364160427766e-05, + "loss": 1.1687, + "step": 5816000 + }, + { + "epoch": 3.49, + "learning_rate": 3.16222641948672e-05, + "loss": 1.1561, + "step": 5816500 + }, + { + "epoch": 3.49, + "learning_rate": 3.162016422930664e-05, + "loss": 1.1491, + "step": 5817000 + }, + { + "epoch": 3.49, + "learning_rate": 3.161806426374607e-05, + "loss": 1.1724, + "step": 5817500 + }, + { + "epoch": 3.49, + "learning_rate": 3.1615964298185506e-05, + "loss": 1.1631, + "step": 5818000 + }, + { + "epoch": 3.49, + "learning_rate": 3.1613864332624946e-05, + "loss": 1.1127, + "step": 5818500 + }, + { + "epoch": 3.49, + "learning_rate": 3.161176436706438e-05, + "loss": 1.163, + "step": 5819000 + }, + { + "epoch": 3.49, + "learning_rate": 3.1609668601434933e-05, + "loss": 1.1589, + "step": 5819500 + }, + { + "epoch": 3.49, + "learning_rate": 3.1607568635874374e-05, + "loss": 1.1583, + "step": 5820000 + }, + { + "epoch": 3.49, + "learning_rate": 3.160546867031381e-05, + "loss": 1.1588, + "step": 5820500 + }, + { + "epoch": 3.49, + "learning_rate": 3.160336870475324e-05, + "loss": 1.1696, + "step": 5821000 + }, + { + "epoch": 3.49, + "learning_rate": 3.160126873919268e-05, + "loss": 1.1645, + "step": 5821500 + }, + { + "epoch": 3.49, + "learning_rate": 3.1599168773632114e-05, + "loss": 1.1631, + "step": 5822000 + }, + { + "epoch": 3.49, + "learning_rate": 3.159706880807155e-05, + "loss": 1.1635, + "step": 5822500 + }, + { + "epoch": 3.49, + "learning_rate": 3.159496884251098e-05, + "loss": 1.1264, + "step": 5823000 + }, + { + "epoch": 3.49, + "learning_rate": 3.159287307688154e-05, + "loss": 1.1745, + "step": 5823500 + }, + { + "epoch": 3.49, + "learning_rate": 3.1590773111320975e-05, + "loss": 1.1581, + "step": 5824000 + }, + { + "epoch": 3.49, + "learning_rate": 3.158867314576041e-05, + "loss": 1.1759, + "step": 5824500 + }, + { + "epoch": 3.49, + "learning_rate": 3.158657318019985e-05, + "loss": 1.1641, + "step": 5825000 + }, + { + "epoch": 3.49, + "learning_rate": 3.15844774145704e-05, + "loss": 1.1687, + "step": 5825500 + }, + { + "epoch": 3.49, + "learning_rate": 3.1582377449009836e-05, + "loss": 1.1481, + "step": 5826000 + }, + { + "epoch": 3.49, + "learning_rate": 3.1580277483449276e-05, + "loss": 1.1726, + "step": 5826500 + }, + { + "epoch": 3.49, + "learning_rate": 3.157818171781983e-05, + "loss": 1.1738, + "step": 5827000 + }, + { + "epoch": 3.49, + "learning_rate": 3.157608175225926e-05, + "loss": 1.1579, + "step": 5827500 + }, + { + "epoch": 3.49, + "learning_rate": 3.1573981786698697e-05, + "loss": 1.1702, + "step": 5828000 + }, + { + "epoch": 3.49, + "learning_rate": 3.157188182113814e-05, + "loss": 1.1387, + "step": 5828500 + }, + { + "epoch": 3.49, + "learning_rate": 3.156978185557757e-05, + "loss": 1.1632, + "step": 5829000 + }, + { + "epoch": 3.5, + "learning_rate": 3.1567681890017004e-05, + "loss": 1.1309, + "step": 5829500 + }, + { + "epoch": 3.5, + "learning_rate": 3.156559032431868e-05, + "loss": 1.178, + "step": 5830000 + }, + { + "epoch": 3.5, + "learning_rate": 3.156349035875811e-05, + "loss": 1.1572, + "step": 5830500 + }, + { + "epoch": 3.5, + "learning_rate": 3.156139039319755e-05, + "loss": 1.1894, + "step": 5831000 + }, + { + "epoch": 3.5, + "learning_rate": 3.155929462756811e-05, + "loss": 1.1866, + "step": 5831500 + }, + { + "epoch": 3.5, + "learning_rate": 3.155719466200754e-05, + "loss": 1.1443, + "step": 5832000 + }, + { + "epoch": 3.5, + "learning_rate": 3.155509469644698e-05, + "loss": 1.1382, + "step": 5832500 + }, + { + "epoch": 3.5, + "learning_rate": 3.155299473088641e-05, + "loss": 1.1823, + "step": 5833000 + }, + { + "epoch": 3.5, + "learning_rate": 3.155089896525697e-05, + "loss": 1.1531, + "step": 5833500 + }, + { + "epoch": 3.5, + "learning_rate": 3.1548798999696406e-05, + "loss": 1.1411, + "step": 5834000 + }, + { + "epoch": 3.5, + "learning_rate": 3.154669903413584e-05, + "loss": 1.1435, + "step": 5834500 + }, + { + "epoch": 3.5, + "learning_rate": 3.154459906857527e-05, + "loss": 1.148, + "step": 5835000 + }, + { + "epoch": 3.5, + "learning_rate": 3.1542499103014706e-05, + "loss": 1.1628, + "step": 5835500 + }, + { + "epoch": 3.5, + "learning_rate": 3.1540399137454146e-05, + "loss": 1.1548, + "step": 5836000 + }, + { + "epoch": 3.5, + "learning_rate": 3.153829917189358e-05, + "loss": 1.1289, + "step": 5836500 + }, + { + "epoch": 3.5, + "learning_rate": 3.153619920633301e-05, + "loss": 1.1408, + "step": 5837000 + }, + { + "epoch": 3.5, + "learning_rate": 3.153409924077245e-05, + "loss": 1.1604, + "step": 5837500 + }, + { + "epoch": 3.5, + "learning_rate": 3.153199927521189e-05, + "loss": 1.1388, + "step": 5838000 + }, + { + "epoch": 3.5, + "learning_rate": 3.152989930965132e-05, + "loss": 1.1554, + "step": 5838500 + }, + { + "epoch": 3.5, + "learning_rate": 3.152779934409076e-05, + "loss": 1.1423, + "step": 5839000 + }, + { + "epoch": 3.5, + "learning_rate": 3.1525703578461314e-05, + "loss": 1.1777, + "step": 5839500 + }, + { + "epoch": 3.5, + "learning_rate": 3.152360361290075e-05, + "loss": 1.148, + "step": 5840000 + }, + { + "epoch": 3.5, + "learning_rate": 3.152150364734019e-05, + "loss": 1.1437, + "step": 5840500 + }, + { + "epoch": 3.5, + "learning_rate": 3.151940368177962e-05, + "loss": 1.1623, + "step": 5841000 + }, + { + "epoch": 3.5, + "learning_rate": 3.1517303716219055e-05, + "loss": 1.1405, + "step": 5841500 + }, + { + "epoch": 3.5, + "learning_rate": 3.151520795058961e-05, + "loss": 1.1685, + "step": 5842000 + }, + { + "epoch": 3.5, + "learning_rate": 3.151310798502905e-05, + "loss": 1.1948, + "step": 5842500 + }, + { + "epoch": 3.5, + "learning_rate": 3.151100801946848e-05, + "loss": 1.1379, + "step": 5843000 + }, + { + "epoch": 3.5, + "learning_rate": 3.1508908053907915e-05, + "loss": 1.1651, + "step": 5843500 + }, + { + "epoch": 3.5, + "learning_rate": 3.1506808088347356e-05, + "loss": 1.1415, + "step": 5844000 + }, + { + "epoch": 3.5, + "learning_rate": 3.150471232271791e-05, + "loss": 1.1794, + "step": 5844500 + }, + { + "epoch": 3.5, + "learning_rate": 3.150261235715734e-05, + "loss": 1.1576, + "step": 5845000 + }, + { + "epoch": 3.5, + "learning_rate": 3.1500512391596776e-05, + "loss": 1.1421, + "step": 5845500 + }, + { + "epoch": 3.5, + "learning_rate": 3.1498412426036216e-05, + "loss": 1.1453, + "step": 5846000 + }, + { + "epoch": 3.51, + "learning_rate": 3.149631246047565e-05, + "loss": 1.1576, + "step": 5846500 + }, + { + "epoch": 3.51, + "learning_rate": 3.149421249491508e-05, + "loss": 1.1578, + "step": 5847000 + }, + { + "epoch": 3.51, + "learning_rate": 3.149211252935452e-05, + "loss": 1.1643, + "step": 5847500 + }, + { + "epoch": 3.51, + "learning_rate": 3.149001256379395e-05, + "loss": 1.1605, + "step": 5848000 + }, + { + "epoch": 3.51, + "learning_rate": 3.148791679816451e-05, + "loss": 1.157, + "step": 5848500 + }, + { + "epoch": 3.51, + "learning_rate": 3.148581683260395e-05, + "loss": 1.1816, + "step": 5849000 + }, + { + "epoch": 3.51, + "learning_rate": 3.1483721066974504e-05, + "loss": 1.1649, + "step": 5849500 + }, + { + "epoch": 3.51, + "learning_rate": 3.148162110141394e-05, + "loss": 1.208, + "step": 5850000 + }, + { + "epoch": 3.51, + "learning_rate": 3.147952113585337e-05, + "loss": 1.1497, + "step": 5850500 + }, + { + "epoch": 3.51, + "learning_rate": 3.1477425370223925e-05, + "loss": 1.1664, + "step": 5851000 + }, + { + "epoch": 3.51, + "learning_rate": 3.1475325404663365e-05, + "loss": 1.141, + "step": 5851500 + }, + { + "epoch": 3.51, + "learning_rate": 3.14732254391028e-05, + "loss": 1.1654, + "step": 5852000 + }, + { + "epoch": 3.51, + "learning_rate": 3.147112547354223e-05, + "loss": 1.1721, + "step": 5852500 + }, + { + "epoch": 3.51, + "learning_rate": 3.146902550798167e-05, + "loss": 1.1481, + "step": 5853000 + }, + { + "epoch": 3.51, + "learning_rate": 3.1466925542421106e-05, + "loss": 1.153, + "step": 5853500 + }, + { + "epoch": 3.51, + "learning_rate": 3.146482557686054e-05, + "loss": 1.185, + "step": 5854000 + }, + { + "epoch": 3.51, + "learning_rate": 3.14627298112311e-05, + "loss": 1.1701, + "step": 5854500 + }, + { + "epoch": 3.51, + "learning_rate": 3.146062984567053e-05, + "loss": 1.1651, + "step": 5855000 + }, + { + "epoch": 3.51, + "learning_rate": 3.1458529880109966e-05, + "loss": 1.1614, + "step": 5855500 + }, + { + "epoch": 3.51, + "learning_rate": 3.145642991454941e-05, + "loss": 1.166, + "step": 5856000 + }, + { + "epoch": 3.51, + "learning_rate": 3.145432994898883e-05, + "loss": 1.1691, + "step": 5856500 + }, + { + "epoch": 3.51, + "learning_rate": 3.145222998342827e-05, + "loss": 1.1458, + "step": 5857000 + }, + { + "epoch": 3.51, + "learning_rate": 3.145013001786771e-05, + "loss": 1.1614, + "step": 5857500 + }, + { + "epoch": 3.51, + "learning_rate": 3.144803005230714e-05, + "loss": 1.1673, + "step": 5858000 + }, + { + "epoch": 3.51, + "learning_rate": 3.1445930086746574e-05, + "loss": 1.1499, + "step": 5858500 + }, + { + "epoch": 3.51, + "learning_rate": 3.1443830121186014e-05, + "loss": 1.1274, + "step": 5859000 + }, + { + "epoch": 3.51, + "learning_rate": 3.144173015562545e-05, + "loss": 1.1846, + "step": 5859500 + }, + { + "epoch": 3.51, + "learning_rate": 3.143963019006488e-05, + "loss": 1.1797, + "step": 5860000 + }, + { + "epoch": 3.51, + "learning_rate": 3.143753862436656e-05, + "loss": 1.1241, + "step": 5860500 + }, + { + "epoch": 3.51, + "learning_rate": 3.1435438658805995e-05, + "loss": 1.1764, + "step": 5861000 + }, + { + "epoch": 3.51, + "learning_rate": 3.1433342893176555e-05, + "loss": 1.1689, + "step": 5861500 + }, + { + "epoch": 3.51, + "learning_rate": 3.143124292761599e-05, + "loss": 1.1577, + "step": 5862000 + }, + { + "epoch": 3.51, + "learning_rate": 3.142914296205542e-05, + "loss": 1.1434, + "step": 5862500 + }, + { + "epoch": 3.52, + "learning_rate": 3.142704299649486e-05, + "loss": 1.1596, + "step": 5863000 + }, + { + "epoch": 3.52, + "learning_rate": 3.142494303093429e-05, + "loss": 1.1496, + "step": 5863500 + }, + { + "epoch": 3.52, + "learning_rate": 3.142284306537372e-05, + "loss": 1.1566, + "step": 5864000 + }, + { + "epoch": 3.52, + "learning_rate": 3.142074309981316e-05, + "loss": 1.1699, + "step": 5864500 + }, + { + "epoch": 3.52, + "learning_rate": 3.1418643134252596e-05, + "loss": 1.1789, + "step": 5865000 + }, + { + "epoch": 3.52, + "learning_rate": 3.141654736862316e-05, + "loss": 1.1931, + "step": 5865500 + }, + { + "epoch": 3.52, + "learning_rate": 3.1414447403062583e-05, + "loss": 1.1471, + "step": 5866000 + }, + { + "epoch": 3.52, + "learning_rate": 3.1412347437502024e-05, + "loss": 1.1312, + "step": 5866500 + }, + { + "epoch": 3.52, + "learning_rate": 3.141024747194146e-05, + "loss": 1.1229, + "step": 5867000 + }, + { + "epoch": 3.52, + "learning_rate": 3.140814750638089e-05, + "loss": 1.1285, + "step": 5867500 + }, + { + "epoch": 3.52, + "learning_rate": 3.140604754082033e-05, + "loss": 1.1751, + "step": 5868000 + }, + { + "epoch": 3.52, + "learning_rate": 3.1403951775190884e-05, + "loss": 1.1562, + "step": 5868500 + }, + { + "epoch": 3.52, + "learning_rate": 3.140185180963032e-05, + "loss": 1.1648, + "step": 5869000 + }, + { + "epoch": 3.52, + "learning_rate": 3.139975184406976e-05, + "loss": 1.1274, + "step": 5869500 + }, + { + "epoch": 3.52, + "learning_rate": 3.139765187850919e-05, + "loss": 1.1679, + "step": 5870000 + }, + { + "epoch": 3.52, + "learning_rate": 3.1395551912948625e-05, + "loss": 1.1176, + "step": 5870500 + }, + { + "epoch": 3.52, + "learning_rate": 3.1393451947388065e-05, + "loss": 1.1843, + "step": 5871000 + }, + { + "epoch": 3.52, + "learning_rate": 3.13913519818275e-05, + "loss": 1.1483, + "step": 5871500 + }, + { + "epoch": 3.52, + "learning_rate": 3.138925621619805e-05, + "loss": 1.1767, + "step": 5872000 + }, + { + "epoch": 3.52, + "learning_rate": 3.1387156250637486e-05, + "loss": 1.1732, + "step": 5872500 + }, + { + "epoch": 3.52, + "learning_rate": 3.1385056285076926e-05, + "loss": 1.1732, + "step": 5873000 + }, + { + "epoch": 3.52, + "learning_rate": 3.138295631951636e-05, + "loss": 1.16, + "step": 5873500 + }, + { + "epoch": 3.52, + "learning_rate": 3.138085635395579e-05, + "loss": 1.1241, + "step": 5874000 + }, + { + "epoch": 3.52, + "learning_rate": 3.137875638839523e-05, + "loss": 1.1589, + "step": 5874500 + }, + { + "epoch": 3.52, + "learning_rate": 3.1376656422834667e-05, + "loss": 1.1451, + "step": 5875000 + }, + { + "epoch": 3.52, + "learning_rate": 3.13745564572741e-05, + "loss": 1.1634, + "step": 5875500 + }, + { + "epoch": 3.52, + "learning_rate": 3.1372460691644654e-05, + "loss": 1.1576, + "step": 5876000 + }, + { + "epoch": 3.52, + "learning_rate": 3.1370360726084094e-05, + "loss": 1.1629, + "step": 5876500 + }, + { + "epoch": 3.52, + "learning_rate": 3.136826496045465e-05, + "loss": 1.1765, + "step": 5877000 + }, + { + "epoch": 3.52, + "learning_rate": 3.136616499489408e-05, + "loss": 1.1559, + "step": 5877500 + }, + { + "epoch": 3.52, + "learning_rate": 3.136406502933352e-05, + "loss": 1.169, + "step": 5878000 + }, + { + "epoch": 3.52, + "learning_rate": 3.1361965063772955e-05, + "loss": 1.1637, + "step": 5878500 + }, + { + "epoch": 3.52, + "learning_rate": 3.135986509821239e-05, + "loss": 1.1497, + "step": 5879000 + }, + { + "epoch": 3.52, + "learning_rate": 3.135776513265183e-05, + "loss": 1.1297, + "step": 5879500 + }, + { + "epoch": 3.53, + "learning_rate": 3.135566516709126e-05, + "loss": 1.1368, + "step": 5880000 + }, + { + "epoch": 3.53, + "learning_rate": 3.1353565201530695e-05, + "loss": 1.1574, + "step": 5880500 + }, + { + "epoch": 3.53, + "learning_rate": 3.135146523597013e-05, + "loss": 1.1638, + "step": 5881000 + }, + { + "epoch": 3.53, + "learning_rate": 3.134936527040956e-05, + "loss": 1.1624, + "step": 5881500 + }, + { + "epoch": 3.53, + "learning_rate": 3.1347265304848996e-05, + "loss": 1.1721, + "step": 5882000 + }, + { + "epoch": 3.53, + "learning_rate": 3.1345165339288436e-05, + "loss": 1.1526, + "step": 5882500 + }, + { + "epoch": 3.53, + "learning_rate": 3.1343069573658996e-05, + "loss": 1.1727, + "step": 5883000 + }, + { + "epoch": 3.53, + "learning_rate": 3.134096960809842e-05, + "loss": 1.1446, + "step": 5883500 + }, + { + "epoch": 3.53, + "learning_rate": 3.1338869642537856e-05, + "loss": 1.1466, + "step": 5884000 + }, + { + "epoch": 3.53, + "learning_rate": 3.1336769676977297e-05, + "loss": 1.1409, + "step": 5884500 + }, + { + "epoch": 3.53, + "learning_rate": 3.133467391134786e-05, + "loss": 1.1721, + "step": 5885000 + }, + { + "epoch": 3.53, + "learning_rate": 3.133257394578729e-05, + "loss": 1.1691, + "step": 5885500 + }, + { + "epoch": 3.53, + "learning_rate": 3.1330473980226724e-05, + "loss": 1.1641, + "step": 5886000 + }, + { + "epoch": 3.53, + "learning_rate": 3.132837401466616e-05, + "loss": 1.1508, + "step": 5886500 + }, + { + "epoch": 3.53, + "learning_rate": 3.132627404910559e-05, + "loss": 1.1638, + "step": 5887000 + }, + { + "epoch": 3.53, + "learning_rate": 3.132417408354503e-05, + "loss": 1.16, + "step": 5887500 + }, + { + "epoch": 3.53, + "learning_rate": 3.1322074117984464e-05, + "loss": 1.1597, + "step": 5888000 + }, + { + "epoch": 3.53, + "learning_rate": 3.131997835235502e-05, + "loss": 1.146, + "step": 5888500 + }, + { + "epoch": 3.53, + "learning_rate": 3.131787838679445e-05, + "loss": 1.1556, + "step": 5889000 + }, + { + "epoch": 3.53, + "learning_rate": 3.131577842123389e-05, + "loss": 1.1483, + "step": 5889500 + }, + { + "epoch": 3.53, + "learning_rate": 3.1313678455673325e-05, + "loss": 1.1471, + "step": 5890000 + }, + { + "epoch": 3.53, + "learning_rate": 3.131157849011276e-05, + "loss": 1.1895, + "step": 5890500 + }, + { + "epoch": 3.53, + "learning_rate": 3.13094785245522e-05, + "loss": 1.1553, + "step": 5891000 + }, + { + "epoch": 3.53, + "learning_rate": 3.130738275892275e-05, + "loss": 1.1956, + "step": 5891500 + }, + { + "epoch": 3.53, + "learning_rate": 3.1305282793362186e-05, + "loss": 1.1658, + "step": 5892000 + }, + { + "epoch": 3.53, + "learning_rate": 3.1303182827801626e-05, + "loss": 1.1574, + "step": 5892500 + }, + { + "epoch": 3.53, + "learning_rate": 3.130108286224106e-05, + "loss": 1.1656, + "step": 5893000 + }, + { + "epoch": 3.53, + "learning_rate": 3.129898709661161e-05, + "loss": 1.148, + "step": 5893500 + }, + { + "epoch": 3.53, + "learning_rate": 3.1296887131051047e-05, + "loss": 1.1602, + "step": 5894000 + }, + { + "epoch": 3.53, + "learning_rate": 3.129479136542161e-05, + "loss": 1.1267, + "step": 5894500 + }, + { + "epoch": 3.53, + "learning_rate": 3.129269139986105e-05, + "loss": 1.1904, + "step": 5895000 + }, + { + "epoch": 3.53, + "learning_rate": 3.1290591434300474e-05, + "loss": 1.137, + "step": 5895500 + }, + { + "epoch": 3.53, + "learning_rate": 3.128849146873991e-05, + "loss": 1.158, + "step": 5896000 + }, + { + "epoch": 3.54, + "learning_rate": 3.128639150317935e-05, + "loss": 1.1577, + "step": 5896500 + }, + { + "epoch": 3.54, + "learning_rate": 3.128429153761878e-05, + "loss": 1.1484, + "step": 5897000 + }, + { + "epoch": 3.54, + "learning_rate": 3.1282191572058214e-05, + "loss": 1.1578, + "step": 5897500 + }, + { + "epoch": 3.54, + "learning_rate": 3.1280091606497655e-05, + "loss": 1.1826, + "step": 5898000 + }, + { + "epoch": 3.54, + "learning_rate": 3.127799164093709e-05, + "loss": 1.1629, + "step": 5898500 + }, + { + "epoch": 3.54, + "learning_rate": 3.127589167537652e-05, + "loss": 1.1477, + "step": 5899000 + }, + { + "epoch": 3.54, + "learning_rate": 3.127379590974708e-05, + "loss": 1.1423, + "step": 5899500 + }, + { + "epoch": 3.54, + "learning_rate": 3.1271695944186515e-05, + "loss": 1.2008, + "step": 5900000 + }, + { + "epoch": 3.54, + "eval_loss": 1.1256732940673828, + "eval_runtime": 1107.8778, + "eval_samples_per_second": 475.431, + "eval_steps_per_second": 79.239, + "step": 5900000 + }, + { + "epoch": 3.54, + "learning_rate": 3.126959597862595e-05, + "loss": 1.1658, + "step": 5900500 + }, + { + "epoch": 3.54, + "learning_rate": 3.126749601306539e-05, + "loss": 1.1671, + "step": 5901000 + }, + { + "epoch": 3.54, + "learning_rate": 3.126539604750482e-05, + "loss": 1.1394, + "step": 5901500 + }, + { + "epoch": 3.54, + "learning_rate": 3.1263296081944256e-05, + "loss": 1.1411, + "step": 5902000 + }, + { + "epoch": 3.54, + "learning_rate": 3.126120031631481e-05, + "loss": 1.1527, + "step": 5902500 + }, + { + "epoch": 3.54, + "learning_rate": 3.125910035075425e-05, + "loss": 1.102, + "step": 5903000 + }, + { + "epoch": 3.54, + "learning_rate": 3.125700038519368e-05, + "loss": 1.1446, + "step": 5903500 + }, + { + "epoch": 3.54, + "learning_rate": 3.125490041963312e-05, + "loss": 1.1394, + "step": 5904000 + }, + { + "epoch": 3.54, + "learning_rate": 3.125280465400367e-05, + "loss": 1.1653, + "step": 5904500 + }, + { + "epoch": 3.54, + "learning_rate": 3.125070468844311e-05, + "loss": 1.1468, + "step": 5905000 + }, + { + "epoch": 3.54, + "learning_rate": 3.1248604722882544e-05, + "loss": 1.1894, + "step": 5905500 + }, + { + "epoch": 3.54, + "learning_rate": 3.124650475732198e-05, + "loss": 1.1312, + "step": 5906000 + }, + { + "epoch": 3.54, + "learning_rate": 3.124440479176142e-05, + "loss": 1.1552, + "step": 5906500 + }, + { + "epoch": 3.54, + "learning_rate": 3.124230482620085e-05, + "loss": 1.1743, + "step": 5907000 + }, + { + "epoch": 3.54, + "learning_rate": 3.1240204860640285e-05, + "loss": 1.1693, + "step": 5907500 + }, + { + "epoch": 3.54, + "learning_rate": 3.123810489507972e-05, + "loss": 1.1624, + "step": 5908000 + }, + { + "epoch": 3.54, + "learning_rate": 3.123600912945028e-05, + "loss": 1.1454, + "step": 5908500 + }, + { + "epoch": 3.54, + "learning_rate": 3.123390916388971e-05, + "loss": 1.1572, + "step": 5909000 + }, + { + "epoch": 3.54, + "learning_rate": 3.123180919832915e-05, + "loss": 1.1449, + "step": 5909500 + }, + { + "epoch": 3.54, + "learning_rate": 3.1229713432699706e-05, + "loss": 1.1558, + "step": 5910000 + }, + { + "epoch": 3.54, + "learning_rate": 3.122761346713914e-05, + "loss": 1.1809, + "step": 5910500 + }, + { + "epoch": 3.54, + "learning_rate": 3.122551350157857e-05, + "loss": 1.1526, + "step": 5911000 + }, + { + "epoch": 3.54, + "learning_rate": 3.122341353601801e-05, + "loss": 1.1782, + "step": 5911500 + }, + { + "epoch": 3.54, + "learning_rate": 3.1221317770388566e-05, + "loss": 1.171, + "step": 5912000 + }, + { + "epoch": 3.54, + "learning_rate": 3.1219217804828e-05, + "loss": 1.149, + "step": 5912500 + }, + { + "epoch": 3.55, + "learning_rate": 3.1217117839267433e-05, + "loss": 1.1716, + "step": 5913000 + }, + { + "epoch": 3.55, + "learning_rate": 3.1215022073637994e-05, + "loss": 1.1784, + "step": 5913500 + }, + { + "epoch": 3.55, + "learning_rate": 3.121292210807743e-05, + "loss": 1.1595, + "step": 5914000 + }, + { + "epoch": 3.55, + "learning_rate": 3.121082214251686e-05, + "loss": 1.1569, + "step": 5914500 + }, + { + "epoch": 3.55, + "learning_rate": 3.12087221769563e-05, + "loss": 1.1631, + "step": 5915000 + }, + { + "epoch": 3.55, + "learning_rate": 3.1206622211395734e-05, + "loss": 1.1243, + "step": 5915500 + }, + { + "epoch": 3.55, + "learning_rate": 3.120452224583517e-05, + "loss": 1.153, + "step": 5916000 + }, + { + "epoch": 3.55, + "learning_rate": 3.120242228027461e-05, + "loss": 1.1717, + "step": 5916500 + }, + { + "epoch": 3.55, + "learning_rate": 3.120032231471404e-05, + "loss": 1.1821, + "step": 5917000 + }, + { + "epoch": 3.55, + "learning_rate": 3.119822234915347e-05, + "loss": 1.1428, + "step": 5917500 + }, + { + "epoch": 3.55, + "learning_rate": 3.119612238359291e-05, + "loss": 1.1571, + "step": 5918000 + }, + { + "epoch": 3.55, + "learning_rate": 3.119402661796347e-05, + "loss": 1.1713, + "step": 5918500 + }, + { + "epoch": 3.55, + "learning_rate": 3.11919266524029e-05, + "loss": 1.1632, + "step": 5919000 + }, + { + "epoch": 3.55, + "learning_rate": 3.1189826686842336e-05, + "loss": 1.1444, + "step": 5919500 + }, + { + "epoch": 3.55, + "learning_rate": 3.118772672128177e-05, + "loss": 1.1484, + "step": 5920000 + }, + { + "epoch": 3.55, + "learning_rate": 3.11856267557212e-05, + "loss": 1.1473, + "step": 5920500 + }, + { + "epoch": 3.55, + "learning_rate": 3.1183526790160636e-05, + "loss": 1.1684, + "step": 5921000 + }, + { + "epoch": 3.55, + "learning_rate": 3.1181426824600076e-05, + "loss": 1.1445, + "step": 5921500 + }, + { + "epoch": 3.55, + "learning_rate": 3.117932685903951e-05, + "loss": 1.142, + "step": 5922000 + }, + { + "epoch": 3.55, + "learning_rate": 3.117723109341006e-05, + "loss": 1.1717, + "step": 5922500 + }, + { + "epoch": 3.55, + "learning_rate": 3.1175135327780624e-05, + "loss": 1.1472, + "step": 5923000 + }, + { + "epoch": 3.55, + "learning_rate": 3.1173035362220064e-05, + "loss": 1.1487, + "step": 5923500 + }, + { + "epoch": 3.55, + "learning_rate": 3.117093959659062e-05, + "loss": 1.1424, + "step": 5924000 + }, + { + "epoch": 3.55, + "learning_rate": 3.116883963103005e-05, + "loss": 1.1648, + "step": 5924500 + }, + { + "epoch": 3.55, + "learning_rate": 3.1166739665469484e-05, + "loss": 1.1606, + "step": 5925000 + }, + { + "epoch": 3.55, + "learning_rate": 3.1164639699908925e-05, + "loss": 1.1984, + "step": 5925500 + }, + { + "epoch": 3.55, + "learning_rate": 3.116253973434836e-05, + "loss": 1.1892, + "step": 5926000 + }, + { + "epoch": 3.55, + "learning_rate": 3.116043976878779e-05, + "loss": 1.1595, + "step": 5926500 + }, + { + "epoch": 3.55, + "learning_rate": 3.1158339803227225e-05, + "loss": 1.1622, + "step": 5927000 + }, + { + "epoch": 3.55, + "learning_rate": 3.115623983766666e-05, + "loss": 1.1702, + "step": 5927500 + }, + { + "epoch": 3.55, + "learning_rate": 3.115413987210609e-05, + "loss": 1.1708, + "step": 5928000 + }, + { + "epoch": 3.55, + "learning_rate": 3.115203990654553e-05, + "loss": 1.1384, + "step": 5928500 + }, + { + "epoch": 3.55, + "learning_rate": 3.1149939940984966e-05, + "loss": 1.1754, + "step": 5929000 + }, + { + "epoch": 3.55, + "learning_rate": 3.11478399754244e-05, + "loss": 1.1443, + "step": 5929500 + }, + { + "epoch": 3.56, + "learning_rate": 3.114574420979496e-05, + "loss": 1.174, + "step": 5930000 + }, + { + "epoch": 3.56, + "learning_rate": 3.114364424423439e-05, + "loss": 1.148, + "step": 5930500 + }, + { + "epoch": 3.56, + "learning_rate": 3.1141544278673826e-05, + "loss": 1.1494, + "step": 5931000 + }, + { + "epoch": 3.56, + "learning_rate": 3.1139444313113267e-05, + "loss": 1.1627, + "step": 5931500 + }, + { + "epoch": 3.56, + "learning_rate": 3.113734854748382e-05, + "loss": 1.1867, + "step": 5932000 + }, + { + "epoch": 3.56, + "learning_rate": 3.113525278185438e-05, + "loss": 1.1328, + "step": 5932500 + }, + { + "epoch": 3.56, + "learning_rate": 3.1133152816293814e-05, + "loss": 1.1611, + "step": 5933000 + }, + { + "epoch": 3.56, + "learning_rate": 3.113105285073325e-05, + "loss": 1.1465, + "step": 5933500 + }, + { + "epoch": 3.56, + "learning_rate": 3.112895288517269e-05, + "loss": 1.1771, + "step": 5934000 + }, + { + "epoch": 3.56, + "learning_rate": 3.112685711954324e-05, + "loss": 1.1597, + "step": 5934500 + }, + { + "epoch": 3.56, + "learning_rate": 3.1124757153982675e-05, + "loss": 1.1522, + "step": 5935000 + }, + { + "epoch": 3.56, + "learning_rate": 3.112265718842211e-05, + "loss": 1.1517, + "step": 5935500 + }, + { + "epoch": 3.56, + "learning_rate": 3.112055722286155e-05, + "loss": 1.1615, + "step": 5936000 + }, + { + "epoch": 3.56, + "learning_rate": 3.1118457257300975e-05, + "loss": 1.1654, + "step": 5936500 + }, + { + "epoch": 3.56, + "learning_rate": 3.1116357291740415e-05, + "loss": 1.1515, + "step": 5937000 + }, + { + "epoch": 3.56, + "learning_rate": 3.1114261526110976e-05, + "loss": 1.1499, + "step": 5937500 + }, + { + "epoch": 3.56, + "learning_rate": 3.111216156055041e-05, + "loss": 1.1658, + "step": 5938000 + }, + { + "epoch": 3.56, + "learning_rate": 3.111006159498984e-05, + "loss": 1.1458, + "step": 5938500 + }, + { + "epoch": 3.56, + "learning_rate": 3.1107961629429276e-05, + "loss": 1.1981, + "step": 5939000 + }, + { + "epoch": 3.56, + "learning_rate": 3.110586166386871e-05, + "loss": 1.1786, + "step": 5939500 + }, + { + "epoch": 3.56, + "learning_rate": 3.110376169830814e-05, + "loss": 1.1494, + "step": 5940000 + }, + { + "epoch": 3.56, + "learning_rate": 3.110166173274758e-05, + "loss": 1.1672, + "step": 5940500 + }, + { + "epoch": 3.56, + "learning_rate": 3.1099565967118144e-05, + "loss": 1.1746, + "step": 5941000 + }, + { + "epoch": 3.56, + "learning_rate": 3.109746600155757e-05, + "loss": 1.1339, + "step": 5941500 + }, + { + "epoch": 3.56, + "learning_rate": 3.1095366035997004e-05, + "loss": 1.1534, + "step": 5942000 + }, + { + "epoch": 3.56, + "learning_rate": 3.1093266070436444e-05, + "loss": 1.1508, + "step": 5942500 + }, + { + "epoch": 3.56, + "learning_rate": 3.1091170304807004e-05, + "loss": 1.1381, + "step": 5943000 + }, + { + "epoch": 3.56, + "learning_rate": 3.108907033924644e-05, + "loss": 1.1895, + "step": 5943500 + }, + { + "epoch": 3.56, + "learning_rate": 3.108697037368587e-05, + "loss": 1.1562, + "step": 5944000 + }, + { + "epoch": 3.56, + "learning_rate": 3.1084870408125305e-05, + "loss": 1.1579, + "step": 5944500 + }, + { + "epoch": 3.56, + "learning_rate": 3.1082774642495865e-05, + "loss": 1.1596, + "step": 5945000 + }, + { + "epoch": 3.56, + "learning_rate": 3.10806746769353e-05, + "loss": 1.1483, + "step": 5945500 + }, + { + "epoch": 3.56, + "learning_rate": 3.107857471137473e-05, + "loss": 1.1724, + "step": 5946000 + }, + { + "epoch": 3.57, + "learning_rate": 3.1076474745814165e-05, + "loss": 1.1667, + "step": 5946500 + }, + { + "epoch": 3.57, + "learning_rate": 3.10743747802536e-05, + "loss": 1.1498, + "step": 5947000 + }, + { + "epoch": 3.57, + "learning_rate": 3.107227481469304e-05, + "loss": 1.1598, + "step": 5947500 + }, + { + "epoch": 3.57, + "learning_rate": 3.107017484913247e-05, + "loss": 1.1706, + "step": 5948000 + }, + { + "epoch": 3.57, + "learning_rate": 3.1068074883571906e-05, + "loss": 1.1771, + "step": 5948500 + }, + { + "epoch": 3.57, + "learning_rate": 3.1065974918011346e-05, + "loss": 1.161, + "step": 5949000 + }, + { + "epoch": 3.57, + "learning_rate": 3.106387495245078e-05, + "loss": 1.1309, + "step": 5949500 + }, + { + "epoch": 3.57, + "learning_rate": 3.106177498689021e-05, + "loss": 1.1689, + "step": 5950000 + }, + { + "epoch": 3.57, + "learning_rate": 3.1059675021329653e-05, + "loss": 1.1624, + "step": 5950500 + }, + { + "epoch": 3.57, + "learning_rate": 3.105757925570021e-05, + "loss": 1.1674, + "step": 5951000 + }, + { + "epoch": 3.57, + "learning_rate": 3.105547929013964e-05, + "loss": 1.1829, + "step": 5951500 + }, + { + "epoch": 3.57, + "learning_rate": 3.105337932457908e-05, + "loss": 1.1584, + "step": 5952000 + }, + { + "epoch": 3.57, + "learning_rate": 3.1051279359018514e-05, + "loss": 1.1646, + "step": 5952500 + }, + { + "epoch": 3.57, + "learning_rate": 3.104917939345795e-05, + "loss": 1.1595, + "step": 5953000 + }, + { + "epoch": 3.57, + "learning_rate": 3.104707942789739e-05, + "loss": 1.1445, + "step": 5953500 + }, + { + "epoch": 3.57, + "learning_rate": 3.1044979462336815e-05, + "loss": 1.1636, + "step": 5954000 + }, + { + "epoch": 3.57, + "learning_rate": 3.1042883696707375e-05, + "loss": 1.1679, + "step": 5954500 + }, + { + "epoch": 3.57, + "learning_rate": 3.104078373114681e-05, + "loss": 1.1605, + "step": 5955000 + }, + { + "epoch": 3.57, + "learning_rate": 3.103868376558625e-05, + "loss": 1.1737, + "step": 5955500 + }, + { + "epoch": 3.57, + "learning_rate": 3.103658380002568e-05, + "loss": 1.1143, + "step": 5956000 + }, + { + "epoch": 3.57, + "learning_rate": 3.103448383446511e-05, + "loss": 1.1539, + "step": 5956500 + }, + { + "epoch": 3.57, + "learning_rate": 3.103238386890455e-05, + "loss": 1.1602, + "step": 5957000 + }, + { + "epoch": 3.57, + "learning_rate": 3.103028390334398e-05, + "loss": 1.1507, + "step": 5957500 + }, + { + "epoch": 3.57, + "learning_rate": 3.1028183937783416e-05, + "loss": 1.1976, + "step": 5958000 + }, + { + "epoch": 3.57, + "learning_rate": 3.1026088172153976e-05, + "loss": 1.1228, + "step": 5958500 + }, + { + "epoch": 3.57, + "learning_rate": 3.1023992406524537e-05, + "loss": 1.1585, + "step": 5959000 + }, + { + "epoch": 3.57, + "learning_rate": 3.102189244096397e-05, + "loss": 1.1258, + "step": 5959500 + }, + { + "epoch": 3.57, + "learning_rate": 3.1019792475403403e-05, + "loss": 1.1572, + "step": 5960000 + }, + { + "epoch": 3.57, + "learning_rate": 3.1017692509842844e-05, + "loss": 1.1701, + "step": 5960500 + }, + { + "epoch": 3.57, + "learning_rate": 3.101559254428228e-05, + "loss": 1.1691, + "step": 5961000 + }, + { + "epoch": 3.57, + "learning_rate": 3.1013492578721704e-05, + "loss": 1.1618, + "step": 5961500 + }, + { + "epoch": 3.57, + "learning_rate": 3.1011396813092264e-05, + "loss": 1.1777, + "step": 5962000 + }, + { + "epoch": 3.57, + "learning_rate": 3.1009296847531704e-05, + "loss": 1.1518, + "step": 5962500 + }, + { + "epoch": 3.58, + "learning_rate": 3.100719688197114e-05, + "loss": 1.166, + "step": 5963000 + }, + { + "epoch": 3.58, + "learning_rate": 3.1005096916410565e-05, + "loss": 1.1649, + "step": 5963500 + }, + { + "epoch": 3.58, + "learning_rate": 3.1002996950850005e-05, + "loss": 1.166, + "step": 5964000 + }, + { + "epoch": 3.58, + "learning_rate": 3.100089698528944e-05, + "loss": 1.1495, + "step": 5964500 + }, + { + "epoch": 3.58, + "learning_rate": 3.099879701972887e-05, + "loss": 1.1867, + "step": 5965000 + }, + { + "epoch": 3.58, + "learning_rate": 3.099670125409943e-05, + "loss": 1.1511, + "step": 5965500 + }, + { + "epoch": 3.58, + "learning_rate": 3.0994601288538866e-05, + "loss": 1.1707, + "step": 5966000 + }, + { + "epoch": 3.58, + "learning_rate": 3.09925013229783e-05, + "loss": 1.1873, + "step": 5966500 + }, + { + "epoch": 3.58, + "learning_rate": 3.099040135741774e-05, + "loss": 1.1604, + "step": 5967000 + }, + { + "epoch": 3.58, + "learning_rate": 3.098830139185717e-05, + "loss": 1.1714, + "step": 5967500 + }, + { + "epoch": 3.58, + "learning_rate": 3.0986201426296606e-05, + "loss": 1.1375, + "step": 5968000 + }, + { + "epoch": 3.58, + "learning_rate": 3.0984101460736046e-05, + "loss": 1.1387, + "step": 5968500 + }, + { + "epoch": 3.58, + "learning_rate": 3.09820056951066e-05, + "loss": 1.1694, + "step": 5969000 + }, + { + "epoch": 3.58, + "learning_rate": 3.0979905729546033e-05, + "loss": 1.1235, + "step": 5969500 + }, + { + "epoch": 3.58, + "learning_rate": 3.097780576398547e-05, + "loss": 1.1716, + "step": 5970000 + }, + { + "epoch": 3.58, + "learning_rate": 3.097570579842491e-05, + "loss": 1.153, + "step": 5970500 + }, + { + "epoch": 3.58, + "learning_rate": 3.097360583286434e-05, + "loss": 1.1445, + "step": 5971000 + }, + { + "epoch": 3.58, + "learning_rate": 3.0971505867303774e-05, + "loss": 1.1352, + "step": 5971500 + }, + { + "epoch": 3.58, + "learning_rate": 3.0969405901743214e-05, + "loss": 1.1645, + "step": 5972000 + }, + { + "epoch": 3.58, + "learning_rate": 3.096730593618265e-05, + "loss": 1.1357, + "step": 5972500 + }, + { + "epoch": 3.58, + "learning_rate": 3.09652101705532e-05, + "loss": 1.1388, + "step": 5973000 + }, + { + "epoch": 3.58, + "learning_rate": 3.0963110204992635e-05, + "loss": 1.1792, + "step": 5973500 + }, + { + "epoch": 3.58, + "learning_rate": 3.0961010239432075e-05, + "loss": 1.1273, + "step": 5974000 + }, + { + "epoch": 3.58, + "learning_rate": 3.095891027387151e-05, + "loss": 1.1706, + "step": 5974500 + }, + { + "epoch": 3.58, + "learning_rate": 3.095681450824206e-05, + "loss": 1.1705, + "step": 5975000 + }, + { + "epoch": 3.58, + "learning_rate": 3.0954718742612616e-05, + "loss": 1.1275, + "step": 5975500 + }, + { + "epoch": 3.58, + "learning_rate": 3.0952618777052056e-05, + "loss": 1.1598, + "step": 5976000 + }, + { + "epoch": 3.58, + "learning_rate": 3.095051881149149e-05, + "loss": 1.1587, + "step": 5976500 + }, + { + "epoch": 3.58, + "learning_rate": 3.094841884593092e-05, + "loss": 1.1657, + "step": 5977000 + }, + { + "epoch": 3.58, + "learning_rate": 3.094632308030148e-05, + "loss": 1.1642, + "step": 5977500 + }, + { + "epoch": 3.58, + "learning_rate": 3.0944223114740917e-05, + "loss": 1.1787, + "step": 5978000 + }, + { + "epoch": 3.58, + "learning_rate": 3.094212314918035e-05, + "loss": 1.1465, + "step": 5978500 + }, + { + "epoch": 3.58, + "learning_rate": 3.0940023183619783e-05, + "loss": 1.1526, + "step": 5979000 + }, + { + "epoch": 3.58, + "learning_rate": 3.0937927417990344e-05, + "loss": 1.1358, + "step": 5979500 + }, + { + "epoch": 3.59, + "learning_rate": 3.0935827452429784e-05, + "loss": 1.1675, + "step": 5980000 + }, + { + "epoch": 3.59, + "learning_rate": 3.093372748686921e-05, + "loss": 1.1751, + "step": 5980500 + }, + { + "epoch": 3.59, + "learning_rate": 3.093162752130865e-05, + "loss": 1.1717, + "step": 5981000 + }, + { + "epoch": 3.59, + "learning_rate": 3.0929527555748084e-05, + "loss": 1.154, + "step": 5981500 + }, + { + "epoch": 3.59, + "learning_rate": 3.092742759018752e-05, + "loss": 1.1558, + "step": 5982000 + }, + { + "epoch": 3.59, + "learning_rate": 3.092532762462696e-05, + "loss": 1.1396, + "step": 5982500 + }, + { + "epoch": 3.59, + "learning_rate": 3.092322765906639e-05, + "loss": 1.1186, + "step": 5983000 + }, + { + "epoch": 3.59, + "learning_rate": 3.0921131893436945e-05, + "loss": 1.164, + "step": 5983500 + }, + { + "epoch": 3.59, + "learning_rate": 3.091903192787638e-05, + "loss": 1.162, + "step": 5984000 + }, + { + "epoch": 3.59, + "learning_rate": 3.091693196231582e-05, + "loss": 1.1591, + "step": 5984500 + }, + { + "epoch": 3.59, + "learning_rate": 3.091483199675525e-05, + "loss": 1.1537, + "step": 5985000 + }, + { + "epoch": 3.59, + "learning_rate": 3.091274043105693e-05, + "loss": 1.1672, + "step": 5985500 + }, + { + "epoch": 3.59, + "learning_rate": 3.0910640465496366e-05, + "loss": 1.1763, + "step": 5986000 + }, + { + "epoch": 3.59, + "learning_rate": 3.09085404999358e-05, + "loss": 1.1476, + "step": 5986500 + }, + { + "epoch": 3.59, + "learning_rate": 3.090644053437524e-05, + "loss": 1.1415, + "step": 5987000 + }, + { + "epoch": 3.59, + "learning_rate": 3.090434056881467e-05, + "loss": 1.1441, + "step": 5987500 + }, + { + "epoch": 3.59, + "learning_rate": 3.090224480318523e-05, + "loss": 1.1301, + "step": 5988000 + }, + { + "epoch": 3.59, + "learning_rate": 3.090014483762467e-05, + "loss": 1.1673, + "step": 5988500 + }, + { + "epoch": 3.59, + "learning_rate": 3.08980448720641e-05, + "loss": 1.1652, + "step": 5989000 + }, + { + "epoch": 3.59, + "learning_rate": 3.0895949106434654e-05, + "loss": 1.1336, + "step": 5989500 + }, + { + "epoch": 3.59, + "learning_rate": 3.089384914087409e-05, + "loss": 1.1357, + "step": 5990000 + }, + { + "epoch": 3.59, + "learning_rate": 3.089174917531353e-05, + "loss": 1.172, + "step": 5990500 + }, + { + "epoch": 3.59, + "learning_rate": 3.088964920975296e-05, + "loss": 1.1875, + "step": 5991000 + }, + { + "epoch": 3.59, + "learning_rate": 3.0887549244192395e-05, + "loss": 1.1785, + "step": 5991500 + }, + { + "epoch": 3.59, + "learning_rate": 3.0885449278631835e-05, + "loss": 1.1766, + "step": 5992000 + }, + { + "epoch": 3.59, + "learning_rate": 3.088334931307126e-05, + "loss": 1.1447, + "step": 5992500 + }, + { + "epoch": 3.59, + "learning_rate": 3.0881249347510695e-05, + "loss": 1.1534, + "step": 5993000 + }, + { + "epoch": 3.59, + "learning_rate": 3.0879149381950135e-05, + "loss": 1.1705, + "step": 5993500 + }, + { + "epoch": 3.59, + "learning_rate": 3.087704941638957e-05, + "loss": 1.1489, + "step": 5994000 + }, + { + "epoch": 3.59, + "learning_rate": 3.0874949450829e-05, + "loss": 1.1322, + "step": 5994500 + }, + { + "epoch": 3.59, + "learning_rate": 3.087284948526844e-05, + "loss": 1.1457, + "step": 5995000 + }, + { + "epoch": 3.59, + "learning_rate": 3.0870753719638996e-05, + "loss": 1.156, + "step": 5995500 + }, + { + "epoch": 3.59, + "learning_rate": 3.0868657954009557e-05, + "loss": 1.1486, + "step": 5996000 + }, + { + "epoch": 3.6, + "learning_rate": 3.086655798844899e-05, + "loss": 1.1583, + "step": 5996500 + }, + { + "epoch": 3.6, + "learning_rate": 3.0864458022888423e-05, + "loss": 1.1577, + "step": 5997000 + }, + { + "epoch": 3.6, + "learning_rate": 3.086235805732786e-05, + "loss": 1.1596, + "step": 5997500 + }, + { + "epoch": 3.6, + "learning_rate": 3.086025809176729e-05, + "loss": 1.1564, + "step": 5998000 + }, + { + "epoch": 3.6, + "learning_rate": 3.085815812620673e-05, + "loss": 1.1541, + "step": 5998500 + }, + { + "epoch": 3.6, + "learning_rate": 3.0856058160646164e-05, + "loss": 1.1695, + "step": 5999000 + }, + { + "epoch": 3.6, + "learning_rate": 3.08539581950856e-05, + "loss": 1.1244, + "step": 5999500 + }, + { + "epoch": 3.6, + "learning_rate": 3.085186242945615e-05, + "loss": 1.1495, + "step": 6000000 + }, + { + "epoch": 3.6, + "eval_loss": 1.1201045513153076, + "eval_runtime": 1104.3731, + "eval_samples_per_second": 476.94, + "eval_steps_per_second": 79.49, + "step": 6000000 + }, + { + "epoch": 3.6, + "learning_rate": 3.084976246389559e-05, + "loss": 1.1418, + "step": 6000500 + }, + { + "epoch": 3.6, + "learning_rate": 3.0847662498335025e-05, + "loss": 1.1379, + "step": 6001000 + }, + { + "epoch": 3.6, + "learning_rate": 3.084556253277446e-05, + "loss": 1.1534, + "step": 6001500 + }, + { + "epoch": 3.6, + "learning_rate": 3.08434625672139e-05, + "loss": 1.1605, + "step": 6002000 + }, + { + "epoch": 3.6, + "learning_rate": 3.084136260165333e-05, + "loss": 1.162, + "step": 6002500 + }, + { + "epoch": 3.6, + "learning_rate": 3.083926263609277e-05, + "loss": 1.1471, + "step": 6003000 + }, + { + "epoch": 3.6, + "learning_rate": 3.0837166870463326e-05, + "loss": 1.1559, + "step": 6003500 + }, + { + "epoch": 3.6, + "learning_rate": 3.083506690490276e-05, + "loss": 1.1769, + "step": 6004000 + }, + { + "epoch": 3.6, + "learning_rate": 3.083296693934219e-05, + "loss": 1.1522, + "step": 6004500 + }, + { + "epoch": 3.6, + "learning_rate": 3.083086697378163e-05, + "loss": 1.1381, + "step": 6005000 + }, + { + "epoch": 3.6, + "learning_rate": 3.0828767008221066e-05, + "loss": 1.1566, + "step": 6005500 + }, + { + "epoch": 3.6, + "learning_rate": 3.08266670426605e-05, + "loss": 1.1572, + "step": 6006000 + }, + { + "epoch": 3.6, + "learning_rate": 3.082456707709994e-05, + "loss": 1.1549, + "step": 6006500 + }, + { + "epoch": 3.6, + "learning_rate": 3.0822471311470494e-05, + "loss": 1.1552, + "step": 6007000 + }, + { + "epoch": 3.6, + "learning_rate": 3.082037134590993e-05, + "loss": 1.1433, + "step": 6007500 + }, + { + "epoch": 3.6, + "learning_rate": 3.081827138034936e-05, + "loss": 1.1434, + "step": 6008000 + }, + { + "epoch": 3.6, + "learning_rate": 3.08161714147888e-05, + "loss": 1.1637, + "step": 6008500 + }, + { + "epoch": 3.6, + "learning_rate": 3.0814075649159354e-05, + "loss": 1.1689, + "step": 6009000 + }, + { + "epoch": 3.6, + "learning_rate": 3.081197568359879e-05, + "loss": 1.1448, + "step": 6009500 + }, + { + "epoch": 3.6, + "learning_rate": 3.080987571803823e-05, + "loss": 1.1786, + "step": 6010000 + }, + { + "epoch": 3.6, + "learning_rate": 3.080777575247766e-05, + "loss": 1.1791, + "step": 6010500 + }, + { + "epoch": 3.6, + "learning_rate": 3.0805675786917095e-05, + "loss": 1.1565, + "step": 6011000 + }, + { + "epoch": 3.6, + "learning_rate": 3.0803575821356535e-05, + "loss": 1.1375, + "step": 6011500 + }, + { + "epoch": 3.6, + "learning_rate": 3.080147585579596e-05, + "loss": 1.1463, + "step": 6012000 + }, + { + "epoch": 3.6, + "learning_rate": 3.0799375890235395e-05, + "loss": 1.1686, + "step": 6012500 + }, + { + "epoch": 3.61, + "learning_rate": 3.0797280124605956e-05, + "loss": 1.1566, + "step": 6013000 + }, + { + "epoch": 3.61, + "learning_rate": 3.0795180159045396e-05, + "loss": 1.165, + "step": 6013500 + }, + { + "epoch": 3.61, + "learning_rate": 3.079308019348483e-05, + "loss": 1.1428, + "step": 6014000 + }, + { + "epoch": 3.61, + "learning_rate": 3.079098442785538e-05, + "loss": 1.1633, + "step": 6014500 + }, + { + "epoch": 3.61, + "learning_rate": 3.0788884462294816e-05, + "loss": 1.1404, + "step": 6015000 + }, + { + "epoch": 3.61, + "learning_rate": 3.078678449673426e-05, + "loss": 1.1488, + "step": 6015500 + }, + { + "epoch": 3.61, + "learning_rate": 3.078468453117369e-05, + "loss": 1.1348, + "step": 6016000 + }, + { + "epoch": 3.61, + "learning_rate": 3.0782584565613124e-05, + "loss": 1.1345, + "step": 6016500 + }, + { + "epoch": 3.61, + "learning_rate": 3.078048460005256e-05, + "loss": 1.1492, + "step": 6017000 + }, + { + "epoch": 3.61, + "learning_rate": 3.077838463449199e-05, + "loss": 1.1607, + "step": 6017500 + }, + { + "epoch": 3.61, + "learning_rate": 3.077628886886255e-05, + "loss": 1.1618, + "step": 6018000 + }, + { + "epoch": 3.61, + "learning_rate": 3.077418890330199e-05, + "loss": 1.1709, + "step": 6018500 + }, + { + "epoch": 3.61, + "learning_rate": 3.077208893774142e-05, + "loss": 1.1431, + "step": 6019000 + }, + { + "epoch": 3.61, + "learning_rate": 3.076998897218085e-05, + "loss": 1.1381, + "step": 6019500 + }, + { + "epoch": 3.61, + "learning_rate": 3.076788900662029e-05, + "loss": 1.129, + "step": 6020000 + }, + { + "epoch": 3.61, + "learning_rate": 3.0765789041059725e-05, + "loss": 1.1241, + "step": 6020500 + }, + { + "epoch": 3.61, + "learning_rate": 3.076368907549916e-05, + "loss": 1.1424, + "step": 6021000 + }, + { + "epoch": 3.61, + "learning_rate": 3.07615891099386e-05, + "loss": 1.1486, + "step": 6021500 + }, + { + "epoch": 3.61, + "learning_rate": 3.075949334430915e-05, + "loss": 1.1554, + "step": 6022000 + }, + { + "epoch": 3.61, + "learning_rate": 3.0757393378748586e-05, + "loss": 1.1303, + "step": 6022500 + }, + { + "epoch": 3.61, + "learning_rate": 3.0755297613119146e-05, + "loss": 1.1941, + "step": 6023000 + }, + { + "epoch": 3.61, + "learning_rate": 3.075319764755858e-05, + "loss": 1.1647, + "step": 6023500 + }, + { + "epoch": 3.61, + "learning_rate": 3.075109768199801e-05, + "loss": 1.1657, + "step": 6024000 + }, + { + "epoch": 3.61, + "learning_rate": 3.0748997716437446e-05, + "loss": 1.1495, + "step": 6024500 + }, + { + "epoch": 3.61, + "learning_rate": 3.074689775087689e-05, + "loss": 1.1555, + "step": 6025000 + }, + { + "epoch": 3.61, + "learning_rate": 3.074479778531632e-05, + "loss": 1.131, + "step": 6025500 + }, + { + "epoch": 3.61, + "learning_rate": 3.0742697819755754e-05, + "loss": 1.1432, + "step": 6026000 + }, + { + "epoch": 3.61, + "learning_rate": 3.0740597854195194e-05, + "loss": 1.1818, + "step": 6026500 + }, + { + "epoch": 3.61, + "learning_rate": 3.073850208856575e-05, + "loss": 1.1621, + "step": 6027000 + }, + { + "epoch": 3.61, + "learning_rate": 3.073640212300518e-05, + "loss": 1.1507, + "step": 6027500 + }, + { + "epoch": 3.61, + "learning_rate": 3.073430635737574e-05, + "loss": 1.1735, + "step": 6028000 + }, + { + "epoch": 3.61, + "learning_rate": 3.0732206391815175e-05, + "loss": 1.1571, + "step": 6028500 + }, + { + "epoch": 3.61, + "learning_rate": 3.073010642625461e-05, + "loss": 1.1825, + "step": 6029000 + }, + { + "epoch": 3.61, + "learning_rate": 3.072800646069404e-05, + "loss": 1.1564, + "step": 6029500 + }, + { + "epoch": 3.62, + "learning_rate": 3.0725906495133475e-05, + "loss": 1.1501, + "step": 6030000 + }, + { + "epoch": 3.62, + "learning_rate": 3.0723806529572915e-05, + "loss": 1.1777, + "step": 6030500 + }, + { + "epoch": 3.62, + "learning_rate": 3.0721714963874596e-05, + "loss": 1.1715, + "step": 6031000 + }, + { + "epoch": 3.62, + "learning_rate": 3.071961499831403e-05, + "loss": 1.1574, + "step": 6031500 + }, + { + "epoch": 3.62, + "learning_rate": 3.071751503275346e-05, + "loss": 1.1526, + "step": 6032000 + }, + { + "epoch": 3.62, + "learning_rate": 3.07154150671929e-05, + "loss": 1.1695, + "step": 6032500 + }, + { + "epoch": 3.62, + "learning_rate": 3.0713315101632336e-05, + "loss": 1.1318, + "step": 6033000 + }, + { + "epoch": 3.62, + "learning_rate": 3.071121513607176e-05, + "loss": 1.1538, + "step": 6033500 + }, + { + "epoch": 3.62, + "learning_rate": 3.07091151705112e-05, + "loss": 1.1761, + "step": 6034000 + }, + { + "epoch": 3.62, + "learning_rate": 3.070701520495064e-05, + "loss": 1.1666, + "step": 6034500 + }, + { + "epoch": 3.62, + "learning_rate": 3.070491523939007e-05, + "loss": 1.128, + "step": 6035000 + }, + { + "epoch": 3.62, + "learning_rate": 3.070281947376063e-05, + "loss": 1.1584, + "step": 6035500 + }, + { + "epoch": 3.62, + "learning_rate": 3.0700719508200064e-05, + "loss": 1.1697, + "step": 6036000 + }, + { + "epoch": 3.62, + "learning_rate": 3.06986195426395e-05, + "loss": 1.1391, + "step": 6036500 + }, + { + "epoch": 3.62, + "learning_rate": 3.069651957707893e-05, + "loss": 1.1531, + "step": 6037000 + }, + { + "epoch": 3.62, + "learning_rate": 3.069442381144949e-05, + "loss": 1.1702, + "step": 6037500 + }, + { + "epoch": 3.62, + "learning_rate": 3.069232384588893e-05, + "loss": 1.1404, + "step": 6038000 + }, + { + "epoch": 3.62, + "learning_rate": 3.069022388032836e-05, + "loss": 1.158, + "step": 6038500 + }, + { + "epoch": 3.62, + "learning_rate": 3.06881239147678e-05, + "loss": 1.154, + "step": 6039000 + }, + { + "epoch": 3.62, + "learning_rate": 3.068602394920723e-05, + "loss": 1.1648, + "step": 6039500 + }, + { + "epoch": 3.62, + "learning_rate": 3.0683923983646665e-05, + "loss": 1.141, + "step": 6040000 + }, + { + "epoch": 3.62, + "learning_rate": 3.0681824018086106e-05, + "loss": 1.1544, + "step": 6040500 + }, + { + "epoch": 3.62, + "learning_rate": 3.067972405252554e-05, + "loss": 1.1476, + "step": 6041000 + }, + { + "epoch": 3.62, + "learning_rate": 3.067762828689609e-05, + "loss": 1.1499, + "step": 6041500 + }, + { + "epoch": 3.62, + "learning_rate": 3.0675528321335526e-05, + "loss": 1.1424, + "step": 6042000 + }, + { + "epoch": 3.62, + "learning_rate": 3.0673428355774966e-05, + "loss": 1.166, + "step": 6042500 + }, + { + "epoch": 3.62, + "learning_rate": 3.06713283902144e-05, + "loss": 1.1406, + "step": 6043000 + }, + { + "epoch": 3.62, + "learning_rate": 3.066923262458495e-05, + "loss": 1.1702, + "step": 6043500 + }, + { + "epoch": 3.62, + "learning_rate": 3.0667136858955514e-05, + "loss": 1.1587, + "step": 6044000 + }, + { + "epoch": 3.62, + "learning_rate": 3.066503689339495e-05, + "loss": 1.1714, + "step": 6044500 + }, + { + "epoch": 3.62, + "learning_rate": 3.066293692783439e-05, + "loss": 1.1564, + "step": 6045000 + }, + { + "epoch": 3.62, + "learning_rate": 3.0660836962273814e-05, + "loss": 1.1633, + "step": 6045500 + }, + { + "epoch": 3.62, + "learning_rate": 3.0658736996713254e-05, + "loss": 1.1154, + "step": 6046000 + }, + { + "epoch": 3.63, + "learning_rate": 3.0656641231083815e-05, + "loss": 1.1355, + "step": 6046500 + }, + { + "epoch": 3.63, + "learning_rate": 3.065454126552325e-05, + "loss": 1.1485, + "step": 6047000 + }, + { + "epoch": 3.63, + "learning_rate": 3.065244129996268e-05, + "loss": 1.1724, + "step": 6047500 + }, + { + "epoch": 3.63, + "learning_rate": 3.0650341334402115e-05, + "loss": 1.171, + "step": 6048000 + }, + { + "epoch": 3.63, + "learning_rate": 3.064824136884155e-05, + "loss": 1.1207, + "step": 6048500 + }, + { + "epoch": 3.63, + "learning_rate": 3.064614560321211e-05, + "loss": 1.1697, + "step": 6049000 + }, + { + "epoch": 3.63, + "learning_rate": 3.064404563765154e-05, + "loss": 1.1359, + "step": 6049500 + }, + { + "epoch": 3.63, + "learning_rate": 3.0641945672090976e-05, + "loss": 1.1411, + "step": 6050000 + }, + { + "epoch": 3.63, + "learning_rate": 3.063984570653041e-05, + "loss": 1.1821, + "step": 6050500 + }, + { + "epoch": 3.63, + "learning_rate": 3.063774574096984e-05, + "loss": 1.1201, + "step": 6051000 + }, + { + "epoch": 3.63, + "learning_rate": 3.063564577540928e-05, + "loss": 1.1454, + "step": 6051500 + }, + { + "epoch": 3.63, + "learning_rate": 3.0633545809848716e-05, + "loss": 1.1424, + "step": 6052000 + }, + { + "epoch": 3.63, + "learning_rate": 3.063145004421927e-05, + "loss": 1.1445, + "step": 6052500 + }, + { + "epoch": 3.63, + "learning_rate": 3.062935007865871e-05, + "loss": 1.1573, + "step": 6053000 + }, + { + "epoch": 3.63, + "learning_rate": 3.0627250113098144e-05, + "loss": 1.156, + "step": 6053500 + }, + { + "epoch": 3.63, + "learning_rate": 3.062515014753758e-05, + "loss": 1.1538, + "step": 6054000 + }, + { + "epoch": 3.63, + "learning_rate": 3.062305018197702e-05, + "loss": 1.1468, + "step": 6054500 + }, + { + "epoch": 3.63, + "learning_rate": 3.062095021641645e-05, + "loss": 1.1716, + "step": 6055000 + }, + { + "epoch": 3.63, + "learning_rate": 3.0618850250855884e-05, + "loss": 1.1697, + "step": 6055500 + }, + { + "epoch": 3.63, + "learning_rate": 3.061675448522644e-05, + "loss": 1.1826, + "step": 6056000 + }, + { + "epoch": 3.63, + "learning_rate": 3.061465451966588e-05, + "loss": 1.1528, + "step": 6056500 + }, + { + "epoch": 3.63, + "learning_rate": 3.061255875403644e-05, + "loss": 1.1555, + "step": 6057000 + }, + { + "epoch": 3.63, + "learning_rate": 3.0610458788475865e-05, + "loss": 1.1688, + "step": 6057500 + }, + { + "epoch": 3.63, + "learning_rate": 3.06083588229153e-05, + "loss": 1.1609, + "step": 6058000 + }, + { + "epoch": 3.63, + "learning_rate": 3.060625885735474e-05, + "loss": 1.1878, + "step": 6058500 + }, + { + "epoch": 3.63, + "learning_rate": 3.060415889179417e-05, + "loss": 1.1578, + "step": 6059000 + }, + { + "epoch": 3.63, + "learning_rate": 3.0602058926233606e-05, + "loss": 1.1734, + "step": 6059500 + }, + { + "epoch": 3.63, + "learning_rate": 3.0599958960673046e-05, + "loss": 1.1419, + "step": 6060000 + }, + { + "epoch": 3.63, + "learning_rate": 3.059785899511248e-05, + "loss": 1.18, + "step": 6060500 + }, + { + "epoch": 3.63, + "learning_rate": 3.059576322948303e-05, + "loss": 1.1561, + "step": 6061000 + }, + { + "epoch": 3.63, + "learning_rate": 3.059366326392247e-05, + "loss": 1.159, + "step": 6061500 + }, + { + "epoch": 3.63, + "learning_rate": 3.059156329836191e-05, + "loss": 1.1631, + "step": 6062000 + }, + { + "epoch": 3.63, + "learning_rate": 3.058946333280134e-05, + "loss": 1.1621, + "step": 6062500 + }, + { + "epoch": 3.64, + "learning_rate": 3.058736336724078e-05, + "loss": 1.1111, + "step": 6063000 + }, + { + "epoch": 3.64, + "learning_rate": 3.0585263401680214e-05, + "loss": 1.1738, + "step": 6063500 + }, + { + "epoch": 3.64, + "learning_rate": 3.058316343611965e-05, + "loss": 1.1416, + "step": 6064000 + }, + { + "epoch": 3.64, + "learning_rate": 3.058106347055909e-05, + "loss": 1.1578, + "step": 6064500 + }, + { + "epoch": 3.64, + "learning_rate": 3.057896770492964e-05, + "loss": 1.1813, + "step": 6065000 + }, + { + "epoch": 3.64, + "learning_rate": 3.0576867739369075e-05, + "loss": 1.1445, + "step": 6065500 + }, + { + "epoch": 3.64, + "learning_rate": 3.057476777380851e-05, + "loss": 1.1671, + "step": 6066000 + }, + { + "epoch": 3.64, + "learning_rate": 3.057266780824795e-05, + "loss": 1.1427, + "step": 6066500 + }, + { + "epoch": 3.64, + "learning_rate": 3.05705720426185e-05, + "loss": 1.1394, + "step": 6067000 + }, + { + "epoch": 3.64, + "learning_rate": 3.0568472077057935e-05, + "loss": 1.1664, + "step": 6067500 + }, + { + "epoch": 3.64, + "learning_rate": 3.056637211149737e-05, + "loss": 1.1623, + "step": 6068000 + }, + { + "epoch": 3.64, + "learning_rate": 3.056427214593681e-05, + "loss": 1.1842, + "step": 6068500 + }, + { + "epoch": 3.64, + "learning_rate": 3.056217638030736e-05, + "loss": 1.1929, + "step": 6069000 + }, + { + "epoch": 3.64, + "learning_rate": 3.0560076414746796e-05, + "loss": 1.1508, + "step": 6069500 + }, + { + "epoch": 3.64, + "learning_rate": 3.0557976449186236e-05, + "loss": 1.1145, + "step": 6070000 + }, + { + "epoch": 3.64, + "learning_rate": 3.055588068355679e-05, + "loss": 1.1495, + "step": 6070500 + }, + { + "epoch": 3.64, + "learning_rate": 3.055378071799622e-05, + "loss": 1.1704, + "step": 6071000 + }, + { + "epoch": 3.64, + "learning_rate": 3.055168075243566e-05, + "loss": 1.152, + "step": 6071500 + }, + { + "epoch": 3.64, + "learning_rate": 3.05495807868751e-05, + "loss": 1.1681, + "step": 6072000 + }, + { + "epoch": 3.64, + "learning_rate": 3.054748082131453e-05, + "loss": 1.1432, + "step": 6072500 + }, + { + "epoch": 3.64, + "learning_rate": 3.0545380855753964e-05, + "loss": 1.1594, + "step": 6073000 + }, + { + "epoch": 3.64, + "learning_rate": 3.0543280890193404e-05, + "loss": 1.1433, + "step": 6073500 + }, + { + "epoch": 3.64, + "learning_rate": 3.054118092463284e-05, + "loss": 1.1563, + "step": 6074000 + }, + { + "epoch": 3.64, + "learning_rate": 3.053908095907227e-05, + "loss": 1.1349, + "step": 6074500 + }, + { + "epoch": 3.64, + "learning_rate": 3.0536980993511704e-05, + "loss": 1.1558, + "step": 6075000 + }, + { + "epoch": 3.64, + "learning_rate": 3.053488102795114e-05, + "loss": 1.1547, + "step": 6075500 + }, + { + "epoch": 3.64, + "learning_rate": 3.053278106239058e-05, + "loss": 1.1457, + "step": 6076000 + }, + { + "epoch": 3.64, + "learning_rate": 3.053068529676114e-05, + "loss": 1.1422, + "step": 6076500 + }, + { + "epoch": 3.64, + "learning_rate": 3.0528585331200565e-05, + "loss": 1.1602, + "step": 6077000 + }, + { + "epoch": 3.64, + "learning_rate": 3.052648536564e-05, + "loss": 1.1473, + "step": 6077500 + }, + { + "epoch": 3.64, + "learning_rate": 3.052438540007944e-05, + "loss": 1.1515, + "step": 6078000 + }, + { + "epoch": 3.64, + "learning_rate": 3.052228543451887e-05, + "loss": 1.153, + "step": 6078500 + }, + { + "epoch": 3.64, + "learning_rate": 3.0520185468958306e-05, + "loss": 1.136, + "step": 6079000 + }, + { + "epoch": 3.64, + "learning_rate": 3.051808970332886e-05, + "loss": 1.134, + "step": 6079500 + }, + { + "epoch": 3.65, + "learning_rate": 3.0515989737768296e-05, + "loss": 1.1399, + "step": 6080000 + }, + { + "epoch": 3.65, + "learning_rate": 3.0513893972138857e-05, + "loss": 1.1549, + "step": 6080500 + }, + { + "epoch": 3.65, + "learning_rate": 3.0511794006578293e-05, + "loss": 1.138, + "step": 6081000 + }, + { + "epoch": 3.65, + "learning_rate": 3.050969404101773e-05, + "loss": 1.1546, + "step": 6081500 + }, + { + "epoch": 3.65, + "learning_rate": 3.050759407545716e-05, + "loss": 1.1754, + "step": 6082000 + }, + { + "epoch": 3.65, + "learning_rate": 3.0505494109896594e-05, + "loss": 1.1669, + "step": 6082500 + }, + { + "epoch": 3.65, + "learning_rate": 3.050339414433603e-05, + "loss": 1.1386, + "step": 6083000 + }, + { + "epoch": 3.65, + "learning_rate": 3.0501294178775467e-05, + "loss": 1.1635, + "step": 6083500 + }, + { + "epoch": 3.65, + "learning_rate": 3.04991942132149e-05, + "loss": 1.1371, + "step": 6084000 + }, + { + "epoch": 3.65, + "learning_rate": 3.0497098447585458e-05, + "loss": 1.1633, + "step": 6084500 + }, + { + "epoch": 3.65, + "learning_rate": 3.049499848202489e-05, + "loss": 1.1573, + "step": 6085000 + }, + { + "epoch": 3.65, + "learning_rate": 3.0492898516464328e-05, + "loss": 1.1577, + "step": 6085500 + }, + { + "epoch": 3.65, + "learning_rate": 3.0490798550903765e-05, + "loss": 1.1513, + "step": 6086000 + }, + { + "epoch": 3.65, + "learning_rate": 3.04886985853432e-05, + "loss": 1.1378, + "step": 6086500 + }, + { + "epoch": 3.65, + "learning_rate": 3.0486602819713752e-05, + "loss": 1.1809, + "step": 6087000 + }, + { + "epoch": 3.65, + "learning_rate": 3.048450285415319e-05, + "loss": 1.1462, + "step": 6087500 + }, + { + "epoch": 3.65, + "learning_rate": 3.0482402888592626e-05, + "loss": 1.1566, + "step": 6088000 + }, + { + "epoch": 3.65, + "learning_rate": 3.0480302923032063e-05, + "loss": 1.1596, + "step": 6088500 + }, + { + "epoch": 3.65, + "learning_rate": 3.0478202957471496e-05, + "loss": 1.1632, + "step": 6089000 + }, + { + "epoch": 3.65, + "learning_rate": 3.0476102991910933e-05, + "loss": 1.1371, + "step": 6089500 + }, + { + "epoch": 3.65, + "learning_rate": 3.047400302635037e-05, + "loss": 1.1612, + "step": 6090000 + }, + { + "epoch": 3.65, + "learning_rate": 3.0471903060789803e-05, + "loss": 1.1379, + "step": 6090500 + }, + { + "epoch": 3.65, + "learning_rate": 3.0469807295160357e-05, + "loss": 1.1444, + "step": 6091000 + }, + { + "epoch": 3.65, + "learning_rate": 3.0467707329599794e-05, + "loss": 1.1628, + "step": 6091500 + }, + { + "epoch": 3.65, + "learning_rate": 3.046560736403923e-05, + "loss": 1.1588, + "step": 6092000 + }, + { + "epoch": 3.65, + "learning_rate": 3.0463507398478664e-05, + "loss": 1.1528, + "step": 6092500 + }, + { + "epoch": 3.65, + "learning_rate": 3.04614074329181e-05, + "loss": 1.1547, + "step": 6093000 + }, + { + "epoch": 3.65, + "learning_rate": 3.0459311667288654e-05, + "loss": 1.1631, + "step": 6093500 + }, + { + "epoch": 3.65, + "learning_rate": 3.045721170172809e-05, + "loss": 1.134, + "step": 6094000 + }, + { + "epoch": 3.65, + "learning_rate": 3.0455111736167528e-05, + "loss": 1.1726, + "step": 6094500 + }, + { + "epoch": 3.65, + "learning_rate": 3.045301177060696e-05, + "loss": 1.1727, + "step": 6095000 + }, + { + "epoch": 3.65, + "learning_rate": 3.04509118050464e-05, + "loss": 1.1599, + "step": 6095500 + }, + { + "epoch": 3.65, + "learning_rate": 3.0448811839485835e-05, + "loss": 1.1706, + "step": 6096000 + }, + { + "epoch": 3.66, + "learning_rate": 3.044671187392527e-05, + "loss": 1.1685, + "step": 6096500 + }, + { + "epoch": 3.66, + "learning_rate": 3.04446119083647e-05, + "loss": 1.1744, + "step": 6097000 + }, + { + "epoch": 3.66, + "learning_rate": 3.044251614273526e-05, + "loss": 1.151, + "step": 6097500 + }, + { + "epoch": 3.66, + "learning_rate": 3.0440416177174696e-05, + "loss": 1.1316, + "step": 6098000 + }, + { + "epoch": 3.66, + "learning_rate": 3.043832041154525e-05, + "loss": 1.1459, + "step": 6098500 + }, + { + "epoch": 3.66, + "learning_rate": 3.0436220445984686e-05, + "loss": 1.1428, + "step": 6099000 + }, + { + "epoch": 3.66, + "learning_rate": 3.043412048042412e-05, + "loss": 1.1525, + "step": 6099500 + }, + { + "epoch": 3.66, + "learning_rate": 3.0432020514863557e-05, + "loss": 1.1382, + "step": 6100000 + }, + { + "epoch": 3.66, + "eval_loss": 1.120174527168274, + "eval_runtime": 1109.1773, + "eval_samples_per_second": 474.874, + "eval_steps_per_second": 79.146, + "step": 6100000 + }, + { + "epoch": 3.66, + "learning_rate": 3.0429920549302994e-05, + "loss": 1.1469, + "step": 6100500 + }, + { + "epoch": 3.66, + "learning_rate": 3.0427820583742427e-05, + "loss": 1.1104, + "step": 6101000 + }, + { + "epoch": 3.66, + "learning_rate": 3.0425724818112984e-05, + "loss": 1.1603, + "step": 6101500 + }, + { + "epoch": 3.66, + "learning_rate": 3.0423624852552417e-05, + "loss": 1.1347, + "step": 6102000 + }, + { + "epoch": 3.66, + "learning_rate": 3.0421524886991854e-05, + "loss": 1.1423, + "step": 6102500 + }, + { + "epoch": 3.66, + "learning_rate": 3.0419429121362408e-05, + "loss": 1.156, + "step": 6103000 + }, + { + "epoch": 3.66, + "learning_rate": 3.0417329155801845e-05, + "loss": 1.1341, + "step": 6103500 + }, + { + "epoch": 3.66, + "learning_rate": 3.041522919024128e-05, + "loss": 1.1442, + "step": 6104000 + }, + { + "epoch": 3.66, + "learning_rate": 3.0413129224680715e-05, + "loss": 1.1476, + "step": 6104500 + }, + { + "epoch": 3.66, + "learning_rate": 3.0411029259120152e-05, + "loss": 1.1441, + "step": 6105000 + }, + { + "epoch": 3.66, + "learning_rate": 3.040892929355959e-05, + "loss": 1.1622, + "step": 6105500 + }, + { + "epoch": 3.66, + "learning_rate": 3.0406829327999022e-05, + "loss": 1.1632, + "step": 6106000 + }, + { + "epoch": 3.66, + "learning_rate": 3.0404729362438452e-05, + "loss": 1.1527, + "step": 6106500 + }, + { + "epoch": 3.66, + "learning_rate": 3.0402633596809013e-05, + "loss": 1.1411, + "step": 6107000 + }, + { + "epoch": 3.66, + "learning_rate": 3.040053363124845e-05, + "loss": 1.1428, + "step": 6107500 + }, + { + "epoch": 3.66, + "learning_rate": 3.0398433665687886e-05, + "loss": 1.172, + "step": 6108000 + }, + { + "epoch": 3.66, + "learning_rate": 3.039633370012732e-05, + "loss": 1.1856, + "step": 6108500 + }, + { + "epoch": 3.66, + "learning_rate": 3.039423373456675e-05, + "loss": 1.1281, + "step": 6109000 + }, + { + "epoch": 3.66, + "learning_rate": 3.039213796893731e-05, + "loss": 1.1458, + "step": 6109500 + }, + { + "epoch": 3.66, + "learning_rate": 3.0390038003376747e-05, + "loss": 1.1556, + "step": 6110000 + }, + { + "epoch": 3.66, + "learning_rate": 3.038793803781618e-05, + "loss": 1.1614, + "step": 6110500 + }, + { + "epoch": 3.66, + "learning_rate": 3.0385838072255617e-05, + "loss": 1.1637, + "step": 6111000 + }, + { + "epoch": 3.66, + "learning_rate": 3.038374230662617e-05, + "loss": 1.142, + "step": 6111500 + }, + { + "epoch": 3.66, + "learning_rate": 3.0381642341065608e-05, + "loss": 1.1465, + "step": 6112000 + }, + { + "epoch": 3.66, + "learning_rate": 3.0379542375505045e-05, + "loss": 1.1742, + "step": 6112500 + }, + { + "epoch": 3.66, + "learning_rate": 3.0377442409944478e-05, + "loss": 1.1331, + "step": 6113000 + }, + { + "epoch": 3.67, + "learning_rate": 3.0375342444383908e-05, + "loss": 1.1521, + "step": 6113500 + }, + { + "epoch": 3.67, + "learning_rate": 3.037324667875447e-05, + "loss": 1.1347, + "step": 6114000 + }, + { + "epoch": 3.67, + "learning_rate": 3.0371146713193905e-05, + "loss": 1.1877, + "step": 6114500 + }, + { + "epoch": 3.67, + "learning_rate": 3.0369046747633342e-05, + "loss": 1.1685, + "step": 6115000 + }, + { + "epoch": 3.67, + "learning_rate": 3.0366946782072776e-05, + "loss": 1.1454, + "step": 6115500 + }, + { + "epoch": 3.67, + "learning_rate": 3.0364846816512206e-05, + "loss": 1.1869, + "step": 6116000 + }, + { + "epoch": 3.67, + "learning_rate": 3.0362755250813886e-05, + "loss": 1.1657, + "step": 6116500 + }, + { + "epoch": 3.67, + "learning_rate": 3.036065528525332e-05, + "loss": 1.1685, + "step": 6117000 + }, + { + "epoch": 3.67, + "learning_rate": 3.0358555319692756e-05, + "loss": 1.1543, + "step": 6117500 + }, + { + "epoch": 3.67, + "learning_rate": 3.035645955406331e-05, + "loss": 1.1836, + "step": 6118000 + }, + { + "epoch": 3.67, + "learning_rate": 3.0354359588502747e-05, + "loss": 1.1189, + "step": 6118500 + }, + { + "epoch": 3.67, + "learning_rate": 3.035225962294218e-05, + "loss": 1.1723, + "step": 6119000 + }, + { + "epoch": 3.67, + "learning_rate": 3.0350159657381617e-05, + "loss": 1.1785, + "step": 6119500 + }, + { + "epoch": 3.67, + "learning_rate": 3.0348059691821054e-05, + "loss": 1.1609, + "step": 6120000 + }, + { + "epoch": 3.67, + "learning_rate": 3.034595972626049e-05, + "loss": 1.1392, + "step": 6120500 + }, + { + "epoch": 3.67, + "learning_rate": 3.0343859760699924e-05, + "loss": 1.1431, + "step": 6121000 + }, + { + "epoch": 3.67, + "learning_rate": 3.034175979513936e-05, + "loss": 1.1535, + "step": 6121500 + }, + { + "epoch": 3.67, + "learning_rate": 3.0339659829578798e-05, + "loss": 1.1606, + "step": 6122000 + }, + { + "epoch": 3.67, + "learning_rate": 3.033755986401823e-05, + "loss": 1.1676, + "step": 6122500 + }, + { + "epoch": 3.67, + "learning_rate": 3.033545989845766e-05, + "loss": 1.1375, + "step": 6123000 + }, + { + "epoch": 3.67, + "learning_rate": 3.03333599328971e-05, + "loss": 1.1511, + "step": 6123500 + }, + { + "epoch": 3.67, + "learning_rate": 3.0331259967336532e-05, + "loss": 1.1453, + "step": 6124000 + }, + { + "epoch": 3.67, + "learning_rate": 3.032916000177597e-05, + "loss": 1.1658, + "step": 6124500 + }, + { + "epoch": 3.67, + "learning_rate": 3.0327060036215406e-05, + "loss": 1.1464, + "step": 6125000 + }, + { + "epoch": 3.67, + "learning_rate": 3.032496427058596e-05, + "loss": 1.1363, + "step": 6125500 + }, + { + "epoch": 3.67, + "learning_rate": 3.0322864305025396e-05, + "loss": 1.1365, + "step": 6126000 + }, + { + "epoch": 3.67, + "learning_rate": 3.032076433946483e-05, + "loss": 1.1587, + "step": 6126500 + }, + { + "epoch": 3.67, + "learning_rate": 3.0318664373904266e-05, + "loss": 1.1528, + "step": 6127000 + }, + { + "epoch": 3.67, + "learning_rate": 3.0316564408343703e-05, + "loss": 1.1654, + "step": 6127500 + }, + { + "epoch": 3.67, + "learning_rate": 3.0314464442783137e-05, + "loss": 1.1701, + "step": 6128000 + }, + { + "epoch": 3.67, + "learning_rate": 3.0312364477222573e-05, + "loss": 1.1637, + "step": 6128500 + }, + { + "epoch": 3.67, + "learning_rate": 3.031026451166201e-05, + "loss": 1.1302, + "step": 6129000 + }, + { + "epoch": 3.67, + "learning_rate": 3.0308168746032564e-05, + "loss": 1.171, + "step": 6129500 + }, + { + "epoch": 3.68, + "learning_rate": 3.0306068780472e-05, + "loss": 1.1277, + "step": 6130000 + }, + { + "epoch": 3.68, + "learning_rate": 3.0303968814911434e-05, + "loss": 1.1384, + "step": 6130500 + }, + { + "epoch": 3.68, + "learning_rate": 3.030186884935087e-05, + "loss": 1.1523, + "step": 6131000 + }, + { + "epoch": 3.68, + "learning_rate": 3.0299768883790308e-05, + "loss": 1.1489, + "step": 6131500 + }, + { + "epoch": 3.68, + "learning_rate": 3.029766891822974e-05, + "loss": 1.1221, + "step": 6132000 + }, + { + "epoch": 3.68, + "learning_rate": 3.0295568952669178e-05, + "loss": 1.179, + "step": 6132500 + }, + { + "epoch": 3.68, + "learning_rate": 3.0293473187039732e-05, + "loss": 1.141, + "step": 6133000 + }, + { + "epoch": 3.68, + "learning_rate": 3.0291377421410285e-05, + "loss": 1.1342, + "step": 6133500 + }, + { + "epoch": 3.68, + "learning_rate": 3.0289277455849722e-05, + "loss": 1.1506, + "step": 6134000 + }, + { + "epoch": 3.68, + "learning_rate": 3.028717749028916e-05, + "loss": 1.15, + "step": 6134500 + }, + { + "epoch": 3.68, + "learning_rate": 3.0285077524728592e-05, + "loss": 1.1467, + "step": 6135000 + }, + { + "epoch": 3.68, + "learning_rate": 3.028297755916803e-05, + "loss": 1.1683, + "step": 6135500 + }, + { + "epoch": 3.68, + "learning_rate": 3.0280877593607466e-05, + "loss": 1.1494, + "step": 6136000 + }, + { + "epoch": 3.68, + "learning_rate": 3.02787776280469e-05, + "loss": 1.1451, + "step": 6136500 + }, + { + "epoch": 3.68, + "learning_rate": 3.0276677662486336e-05, + "loss": 1.1338, + "step": 6137000 + }, + { + "epoch": 3.68, + "learning_rate": 3.0274577696925773e-05, + "loss": 1.1574, + "step": 6137500 + }, + { + "epoch": 3.68, + "learning_rate": 3.0272477731365207e-05, + "loss": 1.1702, + "step": 6138000 + }, + { + "epoch": 3.68, + "learning_rate": 3.0270377765804637e-05, + "loss": 1.1666, + "step": 6138500 + }, + { + "epoch": 3.68, + "learning_rate": 3.0268277800244074e-05, + "loss": 1.131, + "step": 6139000 + }, + { + "epoch": 3.68, + "learning_rate": 3.0266182034614634e-05, + "loss": 1.142, + "step": 6139500 + }, + { + "epoch": 3.68, + "learning_rate": 3.0264086268985188e-05, + "loss": 1.151, + "step": 6140000 + }, + { + "epoch": 3.68, + "learning_rate": 3.0261986303424624e-05, + "loss": 1.1583, + "step": 6140500 + }, + { + "epoch": 3.68, + "learning_rate": 3.025988633786406e-05, + "loss": 1.155, + "step": 6141000 + }, + { + "epoch": 3.68, + "learning_rate": 3.0257786372303495e-05, + "loss": 1.1406, + "step": 6141500 + }, + { + "epoch": 3.68, + "learning_rate": 3.025568640674293e-05, + "loss": 1.1571, + "step": 6142000 + }, + { + "epoch": 3.68, + "learning_rate": 3.025358644118237e-05, + "loss": 1.154, + "step": 6142500 + }, + { + "epoch": 3.68, + "learning_rate": 3.0251486475621795e-05, + "loss": 1.1897, + "step": 6143000 + }, + { + "epoch": 3.68, + "learning_rate": 3.0249386510061232e-05, + "loss": 1.1374, + "step": 6143500 + }, + { + "epoch": 3.68, + "learning_rate": 3.0247290744431792e-05, + "loss": 1.1547, + "step": 6144000 + }, + { + "epoch": 3.68, + "learning_rate": 3.0245194978802346e-05, + "loss": 1.1428, + "step": 6144500 + }, + { + "epoch": 3.68, + "learning_rate": 3.0243095013241783e-05, + "loss": 1.1508, + "step": 6145000 + }, + { + "epoch": 3.68, + "learning_rate": 3.024099504768122e-05, + "loss": 1.1623, + "step": 6145500 + }, + { + "epoch": 3.68, + "learning_rate": 3.0238895082120653e-05, + "loss": 1.1682, + "step": 6146000 + }, + { + "epoch": 3.69, + "learning_rate": 3.023679511656009e-05, + "loss": 1.1247, + "step": 6146500 + }, + { + "epoch": 3.69, + "learning_rate": 3.0234699350930644e-05, + "loss": 1.1689, + "step": 6147000 + }, + { + "epoch": 3.69, + "learning_rate": 3.023259938537008e-05, + "loss": 1.1537, + "step": 6147500 + }, + { + "epoch": 3.69, + "learning_rate": 3.0230499419809517e-05, + "loss": 1.1619, + "step": 6148000 + }, + { + "epoch": 3.69, + "learning_rate": 3.022839945424895e-05, + "loss": 1.1738, + "step": 6148500 + }, + { + "epoch": 3.69, + "learning_rate": 3.0226299488688388e-05, + "loss": 1.1228, + "step": 6149000 + }, + { + "epoch": 3.69, + "learning_rate": 3.0224199523127824e-05, + "loss": 1.1653, + "step": 6149500 + }, + { + "epoch": 3.69, + "learning_rate": 3.0222103757498378e-05, + "loss": 1.1271, + "step": 6150000 + }, + { + "epoch": 3.69, + "learning_rate": 3.022000379193781e-05, + "loss": 1.1697, + "step": 6150500 + }, + { + "epoch": 3.69, + "learning_rate": 3.0217903826377248e-05, + "loss": 1.156, + "step": 6151000 + }, + { + "epoch": 3.69, + "learning_rate": 3.0215803860816685e-05, + "loss": 1.1462, + "step": 6151500 + }, + { + "epoch": 3.69, + "learning_rate": 3.021370389525612e-05, + "loss": 1.1442, + "step": 6152000 + }, + { + "epoch": 3.69, + "learning_rate": 3.021160392969555e-05, + "loss": 1.1822, + "step": 6152500 + }, + { + "epoch": 3.69, + "learning_rate": 3.020950816406611e-05, + "loss": 1.1674, + "step": 6153000 + }, + { + "epoch": 3.69, + "learning_rate": 3.0207408198505546e-05, + "loss": 1.1494, + "step": 6153500 + }, + { + "epoch": 3.69, + "learning_rate": 3.0205308232944983e-05, + "loss": 1.1846, + "step": 6154000 + }, + { + "epoch": 3.69, + "learning_rate": 3.0203208267384416e-05, + "loss": 1.1832, + "step": 6154500 + }, + { + "epoch": 3.69, + "learning_rate": 3.0201108301823846e-05, + "loss": 1.1234, + "step": 6155000 + }, + { + "epoch": 3.69, + "learning_rate": 3.0199008336263283e-05, + "loss": 1.1712, + "step": 6155500 + }, + { + "epoch": 3.69, + "learning_rate": 3.0196912570633843e-05, + "loss": 1.1637, + "step": 6156000 + }, + { + "epoch": 3.69, + "learning_rate": 3.019481260507328e-05, + "loss": 1.1471, + "step": 6156500 + }, + { + "epoch": 3.69, + "learning_rate": 3.0192712639512714e-05, + "loss": 1.1457, + "step": 6157000 + }, + { + "epoch": 3.69, + "learning_rate": 3.0190612673952144e-05, + "loss": 1.1688, + "step": 6157500 + }, + { + "epoch": 3.69, + "learning_rate": 3.018851270839158e-05, + "loss": 1.1388, + "step": 6158000 + }, + { + "epoch": 3.69, + "learning_rate": 3.0186412742831014e-05, + "loss": 1.1892, + "step": 6158500 + }, + { + "epoch": 3.69, + "learning_rate": 3.018431277727045e-05, + "loss": 1.1355, + "step": 6159000 + }, + { + "epoch": 3.69, + "learning_rate": 3.0182212811709888e-05, + "loss": 1.1735, + "step": 6159500 + }, + { + "epoch": 3.69, + "learning_rate": 3.018011704608044e-05, + "loss": 1.1391, + "step": 6160000 + }, + { + "epoch": 3.69, + "learning_rate": 3.0178017080519878e-05, + "loss": 1.1477, + "step": 6160500 + }, + { + "epoch": 3.69, + "learning_rate": 3.017591711495931e-05, + "loss": 1.1407, + "step": 6161000 + }, + { + "epoch": 3.69, + "learning_rate": 3.017381714939875e-05, + "loss": 1.1555, + "step": 6161500 + }, + { + "epoch": 3.69, + "learning_rate": 3.0171721383769302e-05, + "loss": 1.1495, + "step": 6162000 + }, + { + "epoch": 3.69, + "learning_rate": 3.016962141820874e-05, + "loss": 1.1324, + "step": 6162500 + }, + { + "epoch": 3.69, + "learning_rate": 3.0167521452648176e-05, + "loss": 1.1659, + "step": 6163000 + }, + { + "epoch": 3.7, + "learning_rate": 3.016542148708761e-05, + "loss": 1.1679, + "step": 6163500 + }, + { + "epoch": 3.7, + "learning_rate": 3.0163321521527046e-05, + "loss": 1.1372, + "step": 6164000 + }, + { + "epoch": 3.7, + "learning_rate": 3.0161229955828723e-05, + "loss": 1.1401, + "step": 6164500 + }, + { + "epoch": 3.7, + "learning_rate": 3.015913419019928e-05, + "loss": 1.148, + "step": 6165000 + }, + { + "epoch": 3.7, + "learning_rate": 3.0157034224638714e-05, + "loss": 1.1725, + "step": 6165500 + }, + { + "epoch": 3.7, + "learning_rate": 3.015493425907815e-05, + "loss": 1.1322, + "step": 6166000 + }, + { + "epoch": 3.7, + "learning_rate": 3.0152834293517587e-05, + "loss": 1.1643, + "step": 6166500 + }, + { + "epoch": 3.7, + "learning_rate": 3.015073432795702e-05, + "loss": 1.1411, + "step": 6167000 + }, + { + "epoch": 3.7, + "learning_rate": 3.0148634362396458e-05, + "loss": 1.1757, + "step": 6167500 + }, + { + "epoch": 3.7, + "learning_rate": 3.0146534396835894e-05, + "loss": 1.1603, + "step": 6168000 + }, + { + "epoch": 3.7, + "learning_rate": 3.0144434431275328e-05, + "loss": 1.1595, + "step": 6168500 + }, + { + "epoch": 3.7, + "learning_rate": 3.0142334465714765e-05, + "loss": 1.1181, + "step": 6169000 + }, + { + "epoch": 3.7, + "learning_rate": 3.0140238700085318e-05, + "loss": 1.1606, + "step": 6169500 + }, + { + "epoch": 3.7, + "learning_rate": 3.0138138734524755e-05, + "loss": 1.1676, + "step": 6170000 + }, + { + "epoch": 3.7, + "learning_rate": 3.0136038768964192e-05, + "loss": 1.1244, + "step": 6170500 + }, + { + "epoch": 3.7, + "learning_rate": 3.0133938803403625e-05, + "loss": 1.1441, + "step": 6171000 + }, + { + "epoch": 3.7, + "learning_rate": 3.013184303777418e-05, + "loss": 1.1508, + "step": 6171500 + }, + { + "epoch": 3.7, + "learning_rate": 3.0129743072213616e-05, + "loss": 1.1545, + "step": 6172000 + }, + { + "epoch": 3.7, + "learning_rate": 3.0127643106653053e-05, + "loss": 1.139, + "step": 6172500 + }, + { + "epoch": 3.7, + "learning_rate": 3.0125543141092486e-05, + "loss": 1.172, + "step": 6173000 + }, + { + "epoch": 3.7, + "learning_rate": 3.0123443175531923e-05, + "loss": 1.1656, + "step": 6173500 + }, + { + "epoch": 3.7, + "learning_rate": 3.0121343209971353e-05, + "loss": 1.1637, + "step": 6174000 + }, + { + "epoch": 3.7, + "learning_rate": 3.011924324441079e-05, + "loss": 1.1281, + "step": 6174500 + }, + { + "epoch": 3.7, + "learning_rate": 3.0117143278850223e-05, + "loss": 1.1649, + "step": 6175000 + }, + { + "epoch": 3.7, + "learning_rate": 3.011504331328966e-05, + "loss": 1.1623, + "step": 6175500 + }, + { + "epoch": 3.7, + "learning_rate": 3.011294754766022e-05, + "loss": 1.1399, + "step": 6176000 + }, + { + "epoch": 3.7, + "learning_rate": 3.011084758209965e-05, + "loss": 1.1642, + "step": 6176500 + }, + { + "epoch": 3.7, + "learning_rate": 3.010875181647021e-05, + "loss": 1.1463, + "step": 6177000 + }, + { + "epoch": 3.7, + "learning_rate": 3.0106651850909648e-05, + "loss": 1.1625, + "step": 6177500 + }, + { + "epoch": 3.7, + "learning_rate": 3.010455188534908e-05, + "loss": 1.1537, + "step": 6178000 + }, + { + "epoch": 3.7, + "learning_rate": 3.0102451919788518e-05, + "loss": 1.1777, + "step": 6178500 + }, + { + "epoch": 3.7, + "learning_rate": 3.0100351954227948e-05, + "loss": 1.1529, + "step": 6179000 + }, + { + "epoch": 3.7, + "learning_rate": 3.0098251988667382e-05, + "loss": 1.1825, + "step": 6179500 + }, + { + "epoch": 3.71, + "learning_rate": 3.009615202310682e-05, + "loss": 1.1534, + "step": 6180000 + }, + { + "epoch": 3.71, + "learning_rate": 3.0094052057546255e-05, + "loss": 1.161, + "step": 6180500 + }, + { + "epoch": 3.71, + "learning_rate": 3.0091952091985692e-05, + "loss": 1.1582, + "step": 6181000 + }, + { + "epoch": 3.71, + "learning_rate": 3.0089852126425126e-05, + "loss": 1.1355, + "step": 6181500 + }, + { + "epoch": 3.71, + "learning_rate": 3.0087752160864563e-05, + "loss": 1.1742, + "step": 6182000 + }, + { + "epoch": 3.71, + "learning_rate": 3.0085656395235116e-05, + "loss": 1.1637, + "step": 6182500 + }, + { + "epoch": 3.71, + "learning_rate": 3.0083556429674553e-05, + "loss": 1.1849, + "step": 6183000 + }, + { + "epoch": 3.71, + "learning_rate": 3.0081460664045107e-05, + "loss": 1.1681, + "step": 6183500 + }, + { + "epoch": 3.71, + "learning_rate": 3.0079360698484543e-05, + "loss": 1.1403, + "step": 6184000 + }, + { + "epoch": 3.71, + "learning_rate": 3.0077260732923977e-05, + "loss": 1.1678, + "step": 6184500 + }, + { + "epoch": 3.71, + "learning_rate": 3.0075160767363414e-05, + "loss": 1.1464, + "step": 6185000 + }, + { + "epoch": 3.71, + "learning_rate": 3.007306080180285e-05, + "loss": 1.1458, + "step": 6185500 + }, + { + "epoch": 3.71, + "learning_rate": 3.0070960836242284e-05, + "loss": 1.1497, + "step": 6186000 + }, + { + "epoch": 3.71, + "learning_rate": 3.006886087068172e-05, + "loss": 1.1336, + "step": 6186500 + }, + { + "epoch": 3.71, + "learning_rate": 3.0066760905121158e-05, + "loss": 1.1482, + "step": 6187000 + }, + { + "epoch": 3.71, + "learning_rate": 3.006466093956059e-05, + "loss": 1.157, + "step": 6187500 + }, + { + "epoch": 3.71, + "learning_rate": 3.0062560974000028e-05, + "loss": 1.1463, + "step": 6188000 + }, + { + "epoch": 3.71, + "learning_rate": 3.006046520837058e-05, + "loss": 1.1655, + "step": 6188500 + }, + { + "epoch": 3.71, + "learning_rate": 3.005836524281002e-05, + "loss": 1.1561, + "step": 6189000 + }, + { + "epoch": 3.71, + "learning_rate": 3.0056265277249455e-05, + "loss": 1.1351, + "step": 6189500 + }, + { + "epoch": 3.71, + "learning_rate": 3.005416531168889e-05, + "loss": 1.1453, + "step": 6190000 + }, + { + "epoch": 3.71, + "learning_rate": 3.0052065346128326e-05, + "loss": 1.1432, + "step": 6190500 + }, + { + "epoch": 3.71, + "learning_rate": 3.0049965380567762e-05, + "loss": 1.1202, + "step": 6191000 + }, + { + "epoch": 3.71, + "learning_rate": 3.004786541500719e-05, + "loss": 1.1381, + "step": 6191500 + }, + { + "epoch": 3.71, + "learning_rate": 3.0045765449446626e-05, + "loss": 1.1366, + "step": 6192000 + }, + { + "epoch": 3.71, + "learning_rate": 3.0043673883748306e-05, + "loss": 1.1712, + "step": 6192500 + }, + { + "epoch": 3.71, + "learning_rate": 3.004157391818774e-05, + "loss": 1.1298, + "step": 6193000 + }, + { + "epoch": 3.71, + "learning_rate": 3.0039473952627177e-05, + "loss": 1.1207, + "step": 6193500 + }, + { + "epoch": 3.71, + "learning_rate": 3.0037373987066614e-05, + "loss": 1.1794, + "step": 6194000 + }, + { + "epoch": 3.71, + "learning_rate": 3.0035274021506047e-05, + "loss": 1.1426, + "step": 6194500 + }, + { + "epoch": 3.71, + "learning_rate": 3.0033174055945484e-05, + "loss": 1.133, + "step": 6195000 + }, + { + "epoch": 3.71, + "learning_rate": 3.003107409038492e-05, + "loss": 1.1499, + "step": 6195500 + }, + { + "epoch": 3.71, + "learning_rate": 3.0028978324755474e-05, + "loss": 1.1531, + "step": 6196000 + }, + { + "epoch": 3.72, + "learning_rate": 3.002687835919491e-05, + "loss": 1.155, + "step": 6196500 + }, + { + "epoch": 3.72, + "learning_rate": 3.0024778393634345e-05, + "loss": 1.1642, + "step": 6197000 + }, + { + "epoch": 3.72, + "learning_rate": 3.002267842807378e-05, + "loss": 1.1266, + "step": 6197500 + }, + { + "epoch": 3.72, + "learning_rate": 3.002057846251322e-05, + "loss": 1.1724, + "step": 6198000 + }, + { + "epoch": 3.72, + "learning_rate": 3.0018478496952645e-05, + "loss": 1.1978, + "step": 6198500 + }, + { + "epoch": 3.72, + "learning_rate": 3.0016378531392082e-05, + "loss": 1.1399, + "step": 6199000 + }, + { + "epoch": 3.72, + "learning_rate": 3.001427856583152e-05, + "loss": 1.164, + "step": 6199500 + }, + { + "epoch": 3.72, + "learning_rate": 3.001218280020208e-05, + "loss": 1.1524, + "step": 6200000 + }, + { + "epoch": 3.72, + "eval_loss": 1.116134762763977, + "eval_runtime": 1099.3849, + "eval_samples_per_second": 479.104, + "eval_steps_per_second": 79.851, + "step": 6200000 + }, + { + "epoch": 3.72, + "learning_rate": 3.0010082834641516e-05, + "loss": 1.1378, + "step": 6200500 + }, + { + "epoch": 3.72, + "learning_rate": 3.0007982869080943e-05, + "loss": 1.1335, + "step": 6201000 + }, + { + "epoch": 3.72, + "learning_rate": 3.000588290352038e-05, + "loss": 1.142, + "step": 6201500 + }, + { + "epoch": 3.72, + "learning_rate": 3.000378713789094e-05, + "loss": 1.1316, + "step": 6202000 + }, + { + "epoch": 3.72, + "learning_rate": 3.0001691372261493e-05, + "loss": 1.1503, + "step": 6202500 + }, + { + "epoch": 3.72, + "learning_rate": 2.999959140670093e-05, + "loss": 1.1535, + "step": 6203000 + }, + { + "epoch": 3.72, + "learning_rate": 2.9997491441140367e-05, + "loss": 1.1509, + "step": 6203500 + }, + { + "epoch": 3.72, + "learning_rate": 2.99953914755798e-05, + "loss": 1.1635, + "step": 6204000 + }, + { + "epoch": 3.72, + "learning_rate": 2.9993291510019237e-05, + "loss": 1.1405, + "step": 6204500 + }, + { + "epoch": 3.72, + "learning_rate": 2.9991191544458674e-05, + "loss": 1.1681, + "step": 6205000 + }, + { + "epoch": 3.72, + "learning_rate": 2.9989095778829228e-05, + "loss": 1.1532, + "step": 6205500 + }, + { + "epoch": 3.72, + "learning_rate": 2.9986995813268665e-05, + "loss": 1.1368, + "step": 6206000 + }, + { + "epoch": 3.72, + "learning_rate": 2.9984895847708098e-05, + "loss": 1.139, + "step": 6206500 + }, + { + "epoch": 3.72, + "learning_rate": 2.9982795882147535e-05, + "loss": 1.143, + "step": 6207000 + }, + { + "epoch": 3.72, + "learning_rate": 2.9980695916586972e-05, + "loss": 1.1573, + "step": 6207500 + }, + { + "epoch": 3.72, + "learning_rate": 2.99785959510264e-05, + "loss": 1.134, + "step": 6208000 + }, + { + "epoch": 3.72, + "learning_rate": 2.997650018539696e-05, + "loss": 1.1822, + "step": 6208500 + }, + { + "epoch": 3.72, + "learning_rate": 2.9974400219836396e-05, + "loss": 1.1359, + "step": 6209000 + }, + { + "epoch": 3.72, + "learning_rate": 2.9972300254275832e-05, + "loss": 1.1359, + "step": 6209500 + }, + { + "epoch": 3.72, + "learning_rate": 2.9970204488646386e-05, + "loss": 1.1415, + "step": 6210000 + }, + { + "epoch": 3.72, + "learning_rate": 2.9968104523085823e-05, + "loss": 1.1546, + "step": 6210500 + }, + { + "epoch": 3.72, + "learning_rate": 2.9966004557525256e-05, + "loss": 1.1757, + "step": 6211000 + }, + { + "epoch": 3.72, + "learning_rate": 2.9963904591964693e-05, + "loss": 1.1412, + "step": 6211500 + }, + { + "epoch": 3.72, + "learning_rate": 2.996180462640413e-05, + "loss": 1.1388, + "step": 6212000 + }, + { + "epoch": 3.72, + "learning_rate": 2.9959704660843564e-05, + "loss": 1.1408, + "step": 6212500 + }, + { + "epoch": 3.72, + "learning_rate": 2.9957604695282994e-05, + "loss": 1.1606, + "step": 6213000 + }, + { + "epoch": 3.73, + "learning_rate": 2.995550472972243e-05, + "loss": 1.154, + "step": 6213500 + }, + { + "epoch": 3.73, + "learning_rate": 2.9953404764161867e-05, + "loss": 1.1552, + "step": 6214000 + }, + { + "epoch": 3.73, + "learning_rate": 2.99513047986013e-05, + "loss": 1.1552, + "step": 6214500 + }, + { + "epoch": 3.73, + "learning_rate": 2.9949204833040738e-05, + "loss": 1.125, + "step": 6215000 + }, + { + "epoch": 3.73, + "learning_rate": 2.9947104867480174e-05, + "loss": 1.1474, + "step": 6215500 + }, + { + "epoch": 3.73, + "learning_rate": 2.9945009101850728e-05, + "loss": 1.1463, + "step": 6216000 + }, + { + "epoch": 3.73, + "learning_rate": 2.994290913629016e-05, + "loss": 1.1347, + "step": 6216500 + }, + { + "epoch": 3.73, + "learning_rate": 2.99408091707296e-05, + "loss": 1.1755, + "step": 6217000 + }, + { + "epoch": 3.73, + "learning_rate": 2.9938709205169035e-05, + "loss": 1.1816, + "step": 6217500 + }, + { + "epoch": 3.73, + "learning_rate": 2.993661343953959e-05, + "loss": 1.1187, + "step": 6218000 + }, + { + "epoch": 3.73, + "learning_rate": 2.9934513473979026e-05, + "loss": 1.1528, + "step": 6218500 + }, + { + "epoch": 3.73, + "learning_rate": 2.993241350841846e-05, + "loss": 1.163, + "step": 6219000 + }, + { + "epoch": 3.73, + "learning_rate": 2.9930313542857896e-05, + "loss": 1.1477, + "step": 6219500 + }, + { + "epoch": 3.73, + "learning_rate": 2.9928213577297333e-05, + "loss": 1.1582, + "step": 6220000 + }, + { + "epoch": 3.73, + "learning_rate": 2.9926113611736766e-05, + "loss": 1.1518, + "step": 6220500 + }, + { + "epoch": 3.73, + "learning_rate": 2.9924017846107323e-05, + "loss": 1.171, + "step": 6221000 + }, + { + "epoch": 3.73, + "learning_rate": 2.9921917880546757e-05, + "loss": 1.1644, + "step": 6221500 + }, + { + "epoch": 3.73, + "learning_rate": 2.9919817914986193e-05, + "loss": 1.1546, + "step": 6222000 + }, + { + "epoch": 3.73, + "learning_rate": 2.991771794942563e-05, + "loss": 1.1343, + "step": 6222500 + }, + { + "epoch": 3.73, + "learning_rate": 2.9915617983865064e-05, + "loss": 1.1176, + "step": 6223000 + }, + { + "epoch": 3.73, + "learning_rate": 2.9913522218235617e-05, + "loss": 1.1206, + "step": 6223500 + }, + { + "epoch": 3.73, + "learning_rate": 2.9911422252675054e-05, + "loss": 1.1348, + "step": 6224000 + }, + { + "epoch": 3.73, + "learning_rate": 2.990932228711449e-05, + "loss": 1.1622, + "step": 6224500 + }, + { + "epoch": 3.73, + "learning_rate": 2.9907222321553925e-05, + "loss": 1.1402, + "step": 6225000 + }, + { + "epoch": 3.73, + "learning_rate": 2.990512235599336e-05, + "loss": 1.1905, + "step": 6225500 + }, + { + "epoch": 3.73, + "learning_rate": 2.9903022390432798e-05, + "loss": 1.1585, + "step": 6226000 + }, + { + "epoch": 3.73, + "learning_rate": 2.9900922424872235e-05, + "loss": 1.1717, + "step": 6226500 + }, + { + "epoch": 3.73, + "learning_rate": 2.989882245931167e-05, + "loss": 1.1645, + "step": 6227000 + }, + { + "epoch": 3.73, + "learning_rate": 2.9896726693682222e-05, + "loss": 1.1655, + "step": 6227500 + }, + { + "epoch": 3.73, + "learning_rate": 2.989463092805278e-05, + "loss": 1.1356, + "step": 6228000 + }, + { + "epoch": 3.73, + "learning_rate": 2.9892530962492213e-05, + "loss": 1.1626, + "step": 6228500 + }, + { + "epoch": 3.73, + "learning_rate": 2.989043099693165e-05, + "loss": 1.1559, + "step": 6229000 + }, + { + "epoch": 3.73, + "learning_rate": 2.9888331031371086e-05, + "loss": 1.1745, + "step": 6229500 + }, + { + "epoch": 3.74, + "learning_rate": 2.988623106581052e-05, + "loss": 1.1542, + "step": 6230000 + }, + { + "epoch": 3.74, + "learning_rate": 2.9884131100249957e-05, + "loss": 1.1397, + "step": 6230500 + }, + { + "epoch": 3.74, + "learning_rate": 2.9882031134689393e-05, + "loss": 1.1583, + "step": 6231000 + }, + { + "epoch": 3.74, + "learning_rate": 2.9879931169128827e-05, + "loss": 1.1385, + "step": 6231500 + }, + { + "epoch": 3.74, + "learning_rate": 2.987783540349938e-05, + "loss": 1.1636, + "step": 6232000 + }, + { + "epoch": 3.74, + "learning_rate": 2.9875735437938817e-05, + "loss": 1.1114, + "step": 6232500 + }, + { + "epoch": 3.74, + "learning_rate": 2.9873635472378254e-05, + "loss": 1.1735, + "step": 6233000 + }, + { + "epoch": 3.74, + "learning_rate": 2.987153550681769e-05, + "loss": 1.1443, + "step": 6233500 + }, + { + "epoch": 3.74, + "learning_rate": 2.9869439741188245e-05, + "loss": 1.1686, + "step": 6234000 + }, + { + "epoch": 3.74, + "learning_rate": 2.9867339775627678e-05, + "loss": 1.1631, + "step": 6234500 + }, + { + "epoch": 3.74, + "learning_rate": 2.9865244009998235e-05, + "loss": 1.161, + "step": 6235000 + }, + { + "epoch": 3.74, + "learning_rate": 2.986314404443767e-05, + "loss": 1.1653, + "step": 6235500 + }, + { + "epoch": 3.74, + "learning_rate": 2.9861044078877105e-05, + "loss": 1.1406, + "step": 6236000 + }, + { + "epoch": 3.74, + "learning_rate": 2.9858944113316542e-05, + "loss": 1.1427, + "step": 6236500 + }, + { + "epoch": 3.74, + "learning_rate": 2.9856844147755976e-05, + "loss": 1.1699, + "step": 6237000 + }, + { + "epoch": 3.74, + "learning_rate": 2.9854744182195412e-05, + "loss": 1.1711, + "step": 6237500 + }, + { + "epoch": 3.74, + "learning_rate": 2.985264421663485e-05, + "loss": 1.1351, + "step": 6238000 + }, + { + "epoch": 3.74, + "learning_rate": 2.9850544251074283e-05, + "loss": 1.1303, + "step": 6238500 + }, + { + "epoch": 3.74, + "learning_rate": 2.984844848544484e-05, + "loss": 1.1704, + "step": 6239000 + }, + { + "epoch": 3.74, + "learning_rate": 2.9846348519884273e-05, + "loss": 1.1441, + "step": 6239500 + }, + { + "epoch": 3.74, + "learning_rate": 2.984424855432371e-05, + "loss": 1.1517, + "step": 6240000 + }, + { + "epoch": 3.74, + "learning_rate": 2.9842152788694264e-05, + "loss": 1.1693, + "step": 6240500 + }, + { + "epoch": 3.74, + "learning_rate": 2.98400528231337e-05, + "loss": 1.1604, + "step": 6241000 + }, + { + "epoch": 3.74, + "learning_rate": 2.9837952857573134e-05, + "loss": 1.1735, + "step": 6241500 + }, + { + "epoch": 3.74, + "learning_rate": 2.983585289201257e-05, + "loss": 1.1373, + "step": 6242000 + }, + { + "epoch": 3.74, + "learning_rate": 2.9833757126383124e-05, + "loss": 1.1246, + "step": 6242500 + }, + { + "epoch": 3.74, + "learning_rate": 2.983165716082256e-05, + "loss": 1.1595, + "step": 6243000 + }, + { + "epoch": 3.74, + "learning_rate": 2.9829557195261998e-05, + "loss": 1.1617, + "step": 6243500 + }, + { + "epoch": 3.74, + "learning_rate": 2.982745722970143e-05, + "loss": 1.1339, + "step": 6244000 + }, + { + "epoch": 3.74, + "learning_rate": 2.9825357264140868e-05, + "loss": 1.1773, + "step": 6244500 + }, + { + "epoch": 3.74, + "learning_rate": 2.9823257298580305e-05, + "loss": 1.1555, + "step": 6245000 + }, + { + "epoch": 3.74, + "learning_rate": 2.982116153295086e-05, + "loss": 1.1475, + "step": 6245500 + }, + { + "epoch": 3.74, + "learning_rate": 2.9819061567390296e-05, + "loss": 1.1174, + "step": 6246000 + }, + { + "epoch": 3.75, + "learning_rate": 2.981696160182973e-05, + "loss": 1.1641, + "step": 6246500 + }, + { + "epoch": 3.75, + "learning_rate": 2.9814861636269166e-05, + "loss": 1.1374, + "step": 6247000 + }, + { + "epoch": 3.75, + "learning_rate": 2.9812761670708603e-05, + "loss": 1.1506, + "step": 6247500 + }, + { + "epoch": 3.75, + "learning_rate": 2.9810661705148036e-05, + "loss": 1.143, + "step": 6248000 + }, + { + "epoch": 3.75, + "learning_rate": 2.9808561739587473e-05, + "loss": 1.1523, + "step": 6248500 + }, + { + "epoch": 3.75, + "learning_rate": 2.980646177402691e-05, + "loss": 1.1386, + "step": 6249000 + }, + { + "epoch": 3.75, + "learning_rate": 2.9804366008397463e-05, + "loss": 1.1433, + "step": 6249500 + }, + { + "epoch": 3.75, + "learning_rate": 2.9802266042836897e-05, + "loss": 1.1422, + "step": 6250000 + }, + { + "epoch": 3.75, + "learning_rate": 2.9800170277207454e-05, + "loss": 1.1313, + "step": 6250500 + }, + { + "epoch": 3.75, + "learning_rate": 2.9798070311646887e-05, + "loss": 1.1299, + "step": 6251000 + }, + { + "epoch": 3.75, + "learning_rate": 2.9795970346086324e-05, + "loss": 1.1819, + "step": 6251500 + }, + { + "epoch": 3.75, + "learning_rate": 2.979387038052576e-05, + "loss": 1.1488, + "step": 6252000 + }, + { + "epoch": 3.75, + "learning_rate": 2.9791770414965194e-05, + "loss": 1.1652, + "step": 6252500 + }, + { + "epoch": 3.75, + "learning_rate": 2.978967464933575e-05, + "loss": 1.1318, + "step": 6253000 + }, + { + "epoch": 3.75, + "learning_rate": 2.9787574683775185e-05, + "loss": 1.1702, + "step": 6253500 + }, + { + "epoch": 3.75, + "learning_rate": 2.9785474718214622e-05, + "loss": 1.1657, + "step": 6254000 + }, + { + "epoch": 3.75, + "learning_rate": 2.978337475265406e-05, + "loss": 1.1684, + "step": 6254500 + }, + { + "epoch": 3.75, + "learning_rate": 2.9781274787093492e-05, + "loss": 1.1595, + "step": 6255000 + }, + { + "epoch": 3.75, + "learning_rate": 2.9779179021464046e-05, + "loss": 1.1631, + "step": 6255500 + }, + { + "epoch": 3.75, + "learning_rate": 2.9777079055903482e-05, + "loss": 1.1437, + "step": 6256000 + }, + { + "epoch": 3.75, + "learning_rate": 2.977497909034292e-05, + "loss": 1.1439, + "step": 6256500 + }, + { + "epoch": 3.75, + "learning_rate": 2.9772879124782353e-05, + "loss": 1.1488, + "step": 6257000 + }, + { + "epoch": 3.75, + "learning_rate": 2.977077915922179e-05, + "loss": 1.1564, + "step": 6257500 + }, + { + "epoch": 3.75, + "learning_rate": 2.9768679193661226e-05, + "loss": 1.1542, + "step": 6258000 + }, + { + "epoch": 3.75, + "learning_rate": 2.976657922810066e-05, + "loss": 1.15, + "step": 6258500 + }, + { + "epoch": 3.75, + "learning_rate": 2.976447926254009e-05, + "loss": 1.1665, + "step": 6259000 + }, + { + "epoch": 3.75, + "learning_rate": 2.976238349691065e-05, + "loss": 1.1457, + "step": 6259500 + }, + { + "epoch": 3.75, + "learning_rate": 2.9760287731281207e-05, + "loss": 1.1529, + "step": 6260000 + }, + { + "epoch": 3.75, + "learning_rate": 2.975818776572064e-05, + "loss": 1.1935, + "step": 6260500 + }, + { + "epoch": 3.75, + "learning_rate": 2.9756087800160078e-05, + "loss": 1.1577, + "step": 6261000 + }, + { + "epoch": 3.75, + "learning_rate": 2.9753987834599514e-05, + "loss": 1.1538, + "step": 6261500 + }, + { + "epoch": 3.75, + "learning_rate": 2.9751887869038948e-05, + "loss": 1.1364, + "step": 6262000 + }, + { + "epoch": 3.75, + "learning_rate": 2.9749787903478385e-05, + "loss": 1.166, + "step": 6262500 + }, + { + "epoch": 3.75, + "learning_rate": 2.974768793791782e-05, + "loss": 1.1569, + "step": 6263000 + }, + { + "epoch": 3.76, + "learning_rate": 2.9745592172288375e-05, + "loss": 1.158, + "step": 6263500 + }, + { + "epoch": 3.76, + "learning_rate": 2.974349220672781e-05, + "loss": 1.15, + "step": 6264000 + }, + { + "epoch": 3.76, + "learning_rate": 2.9741392241167245e-05, + "loss": 1.1489, + "step": 6264500 + }, + { + "epoch": 3.76, + "learning_rate": 2.9739292275606682e-05, + "loss": 1.1415, + "step": 6265000 + }, + { + "epoch": 3.76, + "learning_rate": 2.9737196509977236e-05, + "loss": 1.1564, + "step": 6265500 + }, + { + "epoch": 3.76, + "learning_rate": 2.9735096544416673e-05, + "loss": 1.1131, + "step": 6266000 + }, + { + "epoch": 3.76, + "learning_rate": 2.9732996578856106e-05, + "loss": 1.1563, + "step": 6266500 + }, + { + "epoch": 3.76, + "learning_rate": 2.9730900813226663e-05, + "loss": 1.1357, + "step": 6267000 + }, + { + "epoch": 3.76, + "learning_rate": 2.9728800847666097e-05, + "loss": 1.1619, + "step": 6267500 + }, + { + "epoch": 3.76, + "learning_rate": 2.9726700882105533e-05, + "loss": 1.1224, + "step": 6268000 + }, + { + "epoch": 3.76, + "learning_rate": 2.972460091654497e-05, + "loss": 1.1513, + "step": 6268500 + }, + { + "epoch": 3.76, + "learning_rate": 2.9722500950984404e-05, + "loss": 1.143, + "step": 6269000 + }, + { + "epoch": 3.76, + "learning_rate": 2.972040098542384e-05, + "loss": 1.152, + "step": 6269500 + }, + { + "epoch": 3.76, + "learning_rate": 2.9718301019863277e-05, + "loss": 1.1715, + "step": 6270000 + }, + { + "epoch": 3.76, + "learning_rate": 2.971620105430271e-05, + "loss": 1.1505, + "step": 6270500 + }, + { + "epoch": 3.76, + "learning_rate": 2.971410108874214e-05, + "loss": 1.1466, + "step": 6271000 + }, + { + "epoch": 3.76, + "learning_rate": 2.9712001123181578e-05, + "loss": 1.1332, + "step": 6271500 + }, + { + "epoch": 3.76, + "learning_rate": 2.9709901157621015e-05, + "loss": 1.1262, + "step": 6272000 + }, + { + "epoch": 3.76, + "learning_rate": 2.9707805391991575e-05, + "loss": 1.1396, + "step": 6272500 + }, + { + "epoch": 3.76, + "learning_rate": 2.9705705426431002e-05, + "loss": 1.1239, + "step": 6273000 + }, + { + "epoch": 3.76, + "learning_rate": 2.970360546087044e-05, + "loss": 1.184, + "step": 6273500 + }, + { + "epoch": 3.76, + "learning_rate": 2.9701505495309875e-05, + "loss": 1.1437, + "step": 6274000 + }, + { + "epoch": 3.76, + "learning_rate": 2.969940552974931e-05, + "loss": 1.1741, + "step": 6274500 + }, + { + "epoch": 3.76, + "learning_rate": 2.9697305564188746e-05, + "loss": 1.1421, + "step": 6275000 + }, + { + "epoch": 3.76, + "learning_rate": 2.96952097985593e-05, + "loss": 1.1659, + "step": 6275500 + }, + { + "epoch": 3.76, + "learning_rate": 2.9693109832998736e-05, + "loss": 1.1747, + "step": 6276000 + }, + { + "epoch": 3.76, + "learning_rate": 2.9691009867438173e-05, + "loss": 1.1387, + "step": 6276500 + }, + { + "epoch": 3.76, + "learning_rate": 2.9688909901877606e-05, + "loss": 1.1428, + "step": 6277000 + }, + { + "epoch": 3.76, + "learning_rate": 2.9686809936317043e-05, + "loss": 1.1533, + "step": 6277500 + }, + { + "epoch": 3.76, + "learning_rate": 2.968470997075648e-05, + "loss": 1.1676, + "step": 6278000 + }, + { + "epoch": 3.76, + "learning_rate": 2.9682610005195914e-05, + "loss": 1.1697, + "step": 6278500 + }, + { + "epoch": 3.76, + "learning_rate": 2.968051003963535e-05, + "loss": 1.1545, + "step": 6279000 + }, + { + "epoch": 3.76, + "learning_rate": 2.9678414274005904e-05, + "loss": 1.14, + "step": 6279500 + }, + { + "epoch": 3.77, + "learning_rate": 2.967631430844534e-05, + "loss": 1.1575, + "step": 6280000 + }, + { + "epoch": 3.77, + "learning_rate": 2.9674218542815894e-05, + "loss": 1.1682, + "step": 6280500 + }, + { + "epoch": 3.77, + "learning_rate": 2.967211857725533e-05, + "loss": 1.1556, + "step": 6281000 + }, + { + "epoch": 3.77, + "learning_rate": 2.9670018611694765e-05, + "loss": 1.1523, + "step": 6281500 + }, + { + "epoch": 3.77, + "learning_rate": 2.96679186461342e-05, + "loss": 1.1714, + "step": 6282000 + }, + { + "epoch": 3.77, + "learning_rate": 2.966581868057364e-05, + "loss": 1.1294, + "step": 6282500 + }, + { + "epoch": 3.77, + "learning_rate": 2.9663718715013072e-05, + "loss": 1.1608, + "step": 6283000 + }, + { + "epoch": 3.77, + "learning_rate": 2.966162294938363e-05, + "loss": 1.1272, + "step": 6283500 + }, + { + "epoch": 3.77, + "learning_rate": 2.9659522983823062e-05, + "loss": 1.1666, + "step": 6284000 + }, + { + "epoch": 3.77, + "learning_rate": 2.96574230182625e-05, + "loss": 1.1339, + "step": 6284500 + }, + { + "epoch": 3.77, + "learning_rate": 2.9655323052701936e-05, + "loss": 1.1549, + "step": 6285000 + }, + { + "epoch": 3.77, + "learning_rate": 2.965322308714137e-05, + "loss": 1.1629, + "step": 6285500 + }, + { + "epoch": 3.77, + "learning_rate": 2.9651123121580806e-05, + "loss": 1.149, + "step": 6286000 + }, + { + "epoch": 3.77, + "learning_rate": 2.964902735595136e-05, + "loss": 1.1445, + "step": 6286500 + }, + { + "epoch": 3.77, + "learning_rate": 2.9646927390390797e-05, + "loss": 1.1671, + "step": 6287000 + }, + { + "epoch": 3.77, + "learning_rate": 2.9644827424830234e-05, + "loss": 1.1227, + "step": 6287500 + }, + { + "epoch": 3.77, + "learning_rate": 2.9642727459269667e-05, + "loss": 1.1383, + "step": 6288000 + }, + { + "epoch": 3.77, + "learning_rate": 2.9640627493709104e-05, + "loss": 1.1429, + "step": 6288500 + }, + { + "epoch": 3.77, + "learning_rate": 2.9638531728079657e-05, + "loss": 1.1414, + "step": 6289000 + }, + { + "epoch": 3.77, + "learning_rate": 2.9636431762519094e-05, + "loss": 1.176, + "step": 6289500 + }, + { + "epoch": 3.77, + "learning_rate": 2.9634331796958528e-05, + "loss": 1.1837, + "step": 6290000 + }, + { + "epoch": 3.77, + "learning_rate": 2.9632231831397965e-05, + "loss": 1.1506, + "step": 6290500 + }, + { + "epoch": 3.77, + "learning_rate": 2.96301318658374e-05, + "loss": 1.13, + "step": 6291000 + }, + { + "epoch": 3.77, + "learning_rate": 2.9628036100207955e-05, + "loss": 1.1653, + "step": 6291500 + }, + { + "epoch": 3.77, + "learning_rate": 2.9625936134647392e-05, + "loss": 1.163, + "step": 6292000 + }, + { + "epoch": 3.77, + "learning_rate": 2.9623836169086825e-05, + "loss": 1.1654, + "step": 6292500 + }, + { + "epoch": 3.77, + "learning_rate": 2.9621736203526262e-05, + "loss": 1.1521, + "step": 6293000 + }, + { + "epoch": 3.77, + "learning_rate": 2.96196362379657e-05, + "loss": 1.1532, + "step": 6293500 + }, + { + "epoch": 3.77, + "learning_rate": 2.9617536272405133e-05, + "loss": 1.1584, + "step": 6294000 + }, + { + "epoch": 3.77, + "learning_rate": 2.961543630684457e-05, + "loss": 1.1674, + "step": 6294500 + }, + { + "epoch": 3.77, + "learning_rate": 2.9613336341284006e-05, + "loss": 1.1768, + "step": 6295000 + }, + { + "epoch": 3.77, + "learning_rate": 2.961124057565456e-05, + "loss": 1.1619, + "step": 6295500 + }, + { + "epoch": 3.77, + "learning_rate": 2.9609140610093997e-05, + "loss": 1.1394, + "step": 6296000 + }, + { + "epoch": 3.78, + "learning_rate": 2.960704064453343e-05, + "loss": 1.173, + "step": 6296500 + }, + { + "epoch": 3.78, + "learning_rate": 2.9604940678972867e-05, + "loss": 1.1725, + "step": 6297000 + }, + { + "epoch": 3.78, + "learning_rate": 2.9602840713412304e-05, + "loss": 1.1771, + "step": 6297500 + }, + { + "epoch": 3.78, + "learning_rate": 2.960074074785173e-05, + "loss": 1.1591, + "step": 6298000 + }, + { + "epoch": 3.78, + "learning_rate": 2.9598644982222294e-05, + "loss": 1.1779, + "step": 6298500 + }, + { + "epoch": 3.78, + "learning_rate": 2.9596545016661728e-05, + "loss": 1.1493, + "step": 6299000 + }, + { + "epoch": 3.78, + "learning_rate": 2.959444925103228e-05, + "loss": 1.1416, + "step": 6299500 + }, + { + "epoch": 3.78, + "learning_rate": 2.9592349285471718e-05, + "loss": 1.1302, + "step": 6300000 + }, + { + "epoch": 3.78, + "eval_loss": 1.1142629384994507, + "eval_runtime": 1100.8622, + "eval_samples_per_second": 478.461, + "eval_steps_per_second": 79.744, + "step": 6300000 + }, + { + "epoch": 3.78, + "learning_rate": 2.9590249319911155e-05, + "loss": 1.1701, + "step": 6300500 + }, + { + "epoch": 3.78, + "learning_rate": 2.958814935435059e-05, + "loss": 1.1607, + "step": 6301000 + }, + { + "epoch": 3.78, + "learning_rate": 2.9586049388790025e-05, + "loss": 1.1362, + "step": 6301500 + }, + { + "epoch": 3.78, + "learning_rate": 2.9583949423229462e-05, + "loss": 1.1545, + "step": 6302000 + }, + { + "epoch": 3.78, + "learning_rate": 2.9581849457668892e-05, + "loss": 1.1501, + "step": 6302500 + }, + { + "epoch": 3.78, + "learning_rate": 2.9579749492108326e-05, + "loss": 1.1484, + "step": 6303000 + }, + { + "epoch": 3.78, + "learning_rate": 2.9577649526547762e-05, + "loss": 1.1433, + "step": 6303500 + }, + { + "epoch": 3.78, + "learning_rate": 2.95755495609872e-05, + "loss": 1.1189, + "step": 6304000 + }, + { + "epoch": 3.78, + "learning_rate": 2.957345379535776e-05, + "loss": 1.1372, + "step": 6304500 + }, + { + "epoch": 3.78, + "learning_rate": 2.957135382979719e-05, + "loss": 1.1482, + "step": 6305000 + }, + { + "epoch": 3.78, + "learning_rate": 2.9569253864236623e-05, + "loss": 1.1552, + "step": 6305500 + }, + { + "epoch": 3.78, + "learning_rate": 2.956715389867606e-05, + "loss": 1.145, + "step": 6306000 + }, + { + "epoch": 3.78, + "learning_rate": 2.9565053933115497e-05, + "loss": 1.1358, + "step": 6306500 + }, + { + "epoch": 3.78, + "learning_rate": 2.956295396755493e-05, + "loss": 1.1507, + "step": 6307000 + }, + { + "epoch": 3.78, + "learning_rate": 2.9560854001994367e-05, + "loss": 1.1342, + "step": 6307500 + }, + { + "epoch": 3.78, + "learning_rate": 2.9558754036433804e-05, + "loss": 1.1372, + "step": 6308000 + }, + { + "epoch": 3.78, + "learning_rate": 2.9556658270804358e-05, + "loss": 1.133, + "step": 6308500 + }, + { + "epoch": 3.78, + "learning_rate": 2.955455830524379e-05, + "loss": 1.1505, + "step": 6309000 + }, + { + "epoch": 3.78, + "learning_rate": 2.9552458339683228e-05, + "loss": 1.1621, + "step": 6309500 + }, + { + "epoch": 3.78, + "learning_rate": 2.9550358374122665e-05, + "loss": 1.1193, + "step": 6310000 + }, + { + "epoch": 3.78, + "learning_rate": 2.954826260849322e-05, + "loss": 1.1537, + "step": 6310500 + }, + { + "epoch": 3.78, + "learning_rate": 2.9546162642932655e-05, + "loss": 1.1231, + "step": 6311000 + }, + { + "epoch": 3.78, + "learning_rate": 2.954406267737209e-05, + "loss": 1.171, + "step": 6311500 + }, + { + "epoch": 3.78, + "learning_rate": 2.9541966911742646e-05, + "loss": 1.1666, + "step": 6312000 + }, + { + "epoch": 3.78, + "learning_rate": 2.953986694618208e-05, + "loss": 1.1388, + "step": 6312500 + }, + { + "epoch": 3.78, + "learning_rate": 2.953777118055264e-05, + "loss": 1.1517, + "step": 6313000 + }, + { + "epoch": 3.79, + "learning_rate": 2.9535671214992076e-05, + "loss": 1.1569, + "step": 6313500 + }, + { + "epoch": 3.79, + "learning_rate": 2.9533571249431513e-05, + "loss": 1.1477, + "step": 6314000 + }, + { + "epoch": 3.79, + "learning_rate": 2.953147128387094e-05, + "loss": 1.1786, + "step": 6314500 + }, + { + "epoch": 3.79, + "learning_rate": 2.9529371318310377e-05, + "loss": 1.1513, + "step": 6315000 + }, + { + "epoch": 3.79, + "learning_rate": 2.9527271352749813e-05, + "loss": 1.1475, + "step": 6315500 + }, + { + "epoch": 3.79, + "learning_rate": 2.9525171387189247e-05, + "loss": 1.154, + "step": 6316000 + }, + { + "epoch": 3.79, + "learning_rate": 2.9523071421628684e-05, + "loss": 1.1686, + "step": 6316500 + }, + { + "epoch": 3.79, + "learning_rate": 2.952097145606812e-05, + "loss": 1.1663, + "step": 6317000 + }, + { + "epoch": 3.79, + "learning_rate": 2.9518871490507554e-05, + "loss": 1.1444, + "step": 6317500 + }, + { + "epoch": 3.79, + "learning_rate": 2.951677152494699e-05, + "loss": 1.1384, + "step": 6318000 + }, + { + "epoch": 3.79, + "learning_rate": 2.9514671559386428e-05, + "loss": 1.1615, + "step": 6318500 + }, + { + "epoch": 3.79, + "learning_rate": 2.951257579375698e-05, + "loss": 1.1554, + "step": 6319000 + }, + { + "epoch": 3.79, + "learning_rate": 2.9510475828196418e-05, + "loss": 1.1294, + "step": 6319500 + }, + { + "epoch": 3.79, + "learning_rate": 2.950837586263585e-05, + "loss": 1.1821, + "step": 6320000 + }, + { + "epoch": 3.79, + "learning_rate": 2.950628009700641e-05, + "loss": 1.1441, + "step": 6320500 + }, + { + "epoch": 3.79, + "learning_rate": 2.9504180131445842e-05, + "loss": 1.169, + "step": 6321000 + }, + { + "epoch": 3.79, + "learning_rate": 2.950208016588528e-05, + "loss": 1.1573, + "step": 6321500 + }, + { + "epoch": 3.79, + "learning_rate": 2.9499980200324716e-05, + "loss": 1.115, + "step": 6322000 + }, + { + "epoch": 3.79, + "learning_rate": 2.949788023476415e-05, + "loss": 1.1358, + "step": 6322500 + }, + { + "epoch": 3.79, + "learning_rate": 2.9495780269203586e-05, + "loss": 1.1601, + "step": 6323000 + }, + { + "epoch": 3.79, + "learning_rate": 2.9493680303643023e-05, + "loss": 1.1571, + "step": 6323500 + }, + { + "epoch": 3.79, + "learning_rate": 2.9491580338082456e-05, + "loss": 1.1412, + "step": 6324000 + }, + { + "epoch": 3.79, + "learning_rate": 2.9489484572453013e-05, + "loss": 1.1825, + "step": 6324500 + }, + { + "epoch": 3.79, + "learning_rate": 2.9487384606892447e-05, + "loss": 1.1538, + "step": 6325000 + }, + { + "epoch": 3.79, + "learning_rate": 2.9485284641331884e-05, + "loss": 1.1491, + "step": 6325500 + }, + { + "epoch": 3.79, + "learning_rate": 2.9483188875702437e-05, + "loss": 1.1397, + "step": 6326000 + }, + { + "epoch": 3.79, + "learning_rate": 2.9481088910141874e-05, + "loss": 1.1431, + "step": 6326500 + }, + { + "epoch": 3.79, + "learning_rate": 2.9478988944581308e-05, + "loss": 1.1394, + "step": 6327000 + }, + { + "epoch": 3.79, + "learning_rate": 2.9476888979020744e-05, + "loss": 1.1483, + "step": 6327500 + }, + { + "epoch": 3.79, + "learning_rate": 2.9474793213391298e-05, + "loss": 1.1308, + "step": 6328000 + }, + { + "epoch": 3.79, + "learning_rate": 2.9472693247830735e-05, + "loss": 1.1452, + "step": 6328500 + }, + { + "epoch": 3.79, + "learning_rate": 2.947059328227017e-05, + "loss": 1.1507, + "step": 6329000 + }, + { + "epoch": 3.79, + "learning_rate": 2.9468493316709605e-05, + "loss": 1.1404, + "step": 6329500 + }, + { + "epoch": 3.8, + "learning_rate": 2.9466393351149042e-05, + "loss": 1.1502, + "step": 6330000 + }, + { + "epoch": 3.8, + "learning_rate": 2.946429338558848e-05, + "loss": 1.1437, + "step": 6330500 + }, + { + "epoch": 3.8, + "learning_rate": 2.9462197619959032e-05, + "loss": 1.1592, + "step": 6331000 + }, + { + "epoch": 3.8, + "learning_rate": 2.946009765439847e-05, + "loss": 1.1501, + "step": 6331500 + }, + { + "epoch": 3.8, + "learning_rate": 2.9457997688837903e-05, + "loss": 1.1403, + "step": 6332000 + }, + { + "epoch": 3.8, + "learning_rate": 2.945589772327734e-05, + "loss": 1.1749, + "step": 6332500 + }, + { + "epoch": 3.8, + "learning_rate": 2.9453801957647893e-05, + "loss": 1.1462, + "step": 6333000 + }, + { + "epoch": 3.8, + "learning_rate": 2.945170199208733e-05, + "loss": 1.1396, + "step": 6333500 + }, + { + "epoch": 3.8, + "learning_rate": 2.9449602026526763e-05, + "loss": 1.1589, + "step": 6334000 + }, + { + "epoch": 3.8, + "learning_rate": 2.94475020609662e-05, + "loss": 1.1418, + "step": 6334500 + }, + { + "epoch": 3.8, + "learning_rate": 2.9445402095405637e-05, + "loss": 1.1617, + "step": 6335000 + }, + { + "epoch": 3.8, + "learning_rate": 2.944330212984507e-05, + "loss": 1.1423, + "step": 6335500 + }, + { + "epoch": 3.8, + "learning_rate": 2.9441202164284507e-05, + "loss": 1.1528, + "step": 6336000 + }, + { + "epoch": 3.8, + "learning_rate": 2.9439102198723938e-05, + "loss": 1.1143, + "step": 6336500 + }, + { + "epoch": 3.8, + "learning_rate": 2.9437010633025615e-05, + "loss": 1.1624, + "step": 6337000 + }, + { + "epoch": 3.8, + "learning_rate": 2.943491066746505e-05, + "loss": 1.1583, + "step": 6337500 + }, + { + "epoch": 3.8, + "learning_rate": 2.9432810701904488e-05, + "loss": 1.146, + "step": 6338000 + }, + { + "epoch": 3.8, + "learning_rate": 2.9430710736343925e-05, + "loss": 1.1578, + "step": 6338500 + }, + { + "epoch": 3.8, + "learning_rate": 2.942861077078336e-05, + "loss": 1.1176, + "step": 6339000 + }, + { + "epoch": 3.8, + "learning_rate": 2.9426510805222795e-05, + "loss": 1.1487, + "step": 6339500 + }, + { + "epoch": 3.8, + "learning_rate": 2.942441503959335e-05, + "loss": 1.127, + "step": 6340000 + }, + { + "epoch": 3.8, + "learning_rate": 2.9422315074032786e-05, + "loss": 1.1658, + "step": 6340500 + }, + { + "epoch": 3.8, + "learning_rate": 2.942021510847222e-05, + "loss": 1.1271, + "step": 6341000 + }, + { + "epoch": 3.8, + "learning_rate": 2.9418115142911656e-05, + "loss": 1.1245, + "step": 6341500 + }, + { + "epoch": 3.8, + "learning_rate": 2.9416015177351093e-05, + "loss": 1.1522, + "step": 6342000 + }, + { + "epoch": 3.8, + "learning_rate": 2.9413919411721647e-05, + "loss": 1.1556, + "step": 6342500 + }, + { + "epoch": 3.8, + "learning_rate": 2.9411819446161083e-05, + "loss": 1.1405, + "step": 6343000 + }, + { + "epoch": 3.8, + "learning_rate": 2.9409719480600517e-05, + "loss": 1.1715, + "step": 6343500 + }, + { + "epoch": 3.8, + "learning_rate": 2.9407619515039954e-05, + "loss": 1.1773, + "step": 6344000 + }, + { + "epoch": 3.8, + "learning_rate": 2.940551954947939e-05, + "loss": 1.171, + "step": 6344500 + }, + { + "epoch": 3.8, + "learning_rate": 2.9403419583918824e-05, + "loss": 1.1588, + "step": 6345000 + }, + { + "epoch": 3.8, + "learning_rate": 2.940132381828938e-05, + "loss": 1.166, + "step": 6345500 + }, + { + "epoch": 3.8, + "learning_rate": 2.9399223852728814e-05, + "loss": 1.1576, + "step": 6346000 + }, + { + "epoch": 3.8, + "learning_rate": 2.939712388716825e-05, + "loss": 1.1573, + "step": 6346500 + }, + { + "epoch": 3.81, + "learning_rate": 2.9395023921607688e-05, + "loss": 1.163, + "step": 6347000 + }, + { + "epoch": 3.81, + "learning_rate": 2.939292395604712e-05, + "loss": 1.1324, + "step": 6347500 + }, + { + "epoch": 3.81, + "learning_rate": 2.939082399048656e-05, + "loss": 1.1718, + "step": 6348000 + }, + { + "epoch": 3.81, + "learning_rate": 2.938872402492599e-05, + "loss": 1.1377, + "step": 6348500 + }, + { + "epoch": 3.81, + "learning_rate": 2.9386624059365422e-05, + "loss": 1.1313, + "step": 6349000 + }, + { + "epoch": 3.81, + "learning_rate": 2.9384532493667102e-05, + "loss": 1.139, + "step": 6349500 + }, + { + "epoch": 3.81, + "learning_rate": 2.938243252810654e-05, + "loss": 1.1574, + "step": 6350000 + }, + { + "epoch": 3.81, + "learning_rate": 2.9380332562545973e-05, + "loss": 1.153, + "step": 6350500 + }, + { + "epoch": 3.81, + "learning_rate": 2.937823259698541e-05, + "loss": 1.1503, + "step": 6351000 + }, + { + "epoch": 3.81, + "learning_rate": 2.9376132631424846e-05, + "loss": 1.1715, + "step": 6351500 + }, + { + "epoch": 3.81, + "learning_rate": 2.93740368657954e-05, + "loss": 1.1512, + "step": 6352000 + }, + { + "epoch": 3.81, + "learning_rate": 2.9371936900234837e-05, + "loss": 1.1293, + "step": 6352500 + }, + { + "epoch": 3.81, + "learning_rate": 2.936983693467427e-05, + "loss": 1.1209, + "step": 6353000 + }, + { + "epoch": 3.81, + "learning_rate": 2.9367736969113707e-05, + "loss": 1.1816, + "step": 6353500 + }, + { + "epoch": 3.81, + "learning_rate": 2.936564120348426e-05, + "loss": 1.1411, + "step": 6354000 + }, + { + "epoch": 3.81, + "learning_rate": 2.9363541237923698e-05, + "loss": 1.1558, + "step": 6354500 + }, + { + "epoch": 3.81, + "learning_rate": 2.936144127236313e-05, + "loss": 1.1356, + "step": 6355000 + }, + { + "epoch": 3.81, + "learning_rate": 2.9359341306802568e-05, + "loss": 1.156, + "step": 6355500 + }, + { + "epoch": 3.81, + "learning_rate": 2.935724554117312e-05, + "loss": 1.1202, + "step": 6356000 + }, + { + "epoch": 3.81, + "learning_rate": 2.935514557561256e-05, + "loss": 1.1772, + "step": 6356500 + }, + { + "epoch": 3.81, + "learning_rate": 2.9353045610051995e-05, + "loss": 1.1753, + "step": 6357000 + }, + { + "epoch": 3.81, + "learning_rate": 2.935094564449143e-05, + "loss": 1.1371, + "step": 6357500 + }, + { + "epoch": 3.81, + "learning_rate": 2.9348845678930865e-05, + "loss": 1.1471, + "step": 6358000 + }, + { + "epoch": 3.81, + "learning_rate": 2.9346745713370302e-05, + "loss": 1.1319, + "step": 6358500 + }, + { + "epoch": 3.81, + "learning_rate": 2.9344645747809736e-05, + "loss": 1.1418, + "step": 6359000 + }, + { + "epoch": 3.81, + "learning_rate": 2.9342545782249173e-05, + "loss": 1.1395, + "step": 6359500 + }, + { + "epoch": 3.81, + "learning_rate": 2.9340454216550846e-05, + "loss": 1.1293, + "step": 6360000 + }, + { + "epoch": 3.81, + "learning_rate": 2.933835425099028e-05, + "loss": 1.1455, + "step": 6360500 + }, + { + "epoch": 3.81, + "learning_rate": 2.9336254285429717e-05, + "loss": 1.1404, + "step": 6361000 + }, + { + "epoch": 3.81, + "learning_rate": 2.9334154319869153e-05, + "loss": 1.1588, + "step": 6361500 + }, + { + "epoch": 3.81, + "learning_rate": 2.9332054354308587e-05, + "loss": 1.1373, + "step": 6362000 + }, + { + "epoch": 3.81, + "learning_rate": 2.9329954388748024e-05, + "loss": 1.1468, + "step": 6362500 + }, + { + "epoch": 3.81, + "learning_rate": 2.932785442318746e-05, + "loss": 1.1653, + "step": 6363000 + }, + { + "epoch": 3.82, + "learning_rate": 2.9325754457626897e-05, + "loss": 1.1602, + "step": 6363500 + }, + { + "epoch": 3.82, + "learning_rate": 2.932365449206633e-05, + "loss": 1.1418, + "step": 6364000 + }, + { + "epoch": 3.82, + "learning_rate": 2.9321554526505768e-05, + "loss": 1.158, + "step": 6364500 + }, + { + "epoch": 3.82, + "learning_rate": 2.9319454560945205e-05, + "loss": 1.121, + "step": 6365000 + }, + { + "epoch": 3.82, + "learning_rate": 2.931735459538463e-05, + "loss": 1.1797, + "step": 6365500 + }, + { + "epoch": 3.82, + "learning_rate": 2.931525882975519e-05, + "loss": 1.1629, + "step": 6366000 + }, + { + "epoch": 3.82, + "learning_rate": 2.931316306412575e-05, + "loss": 1.1188, + "step": 6366500 + }, + { + "epoch": 3.82, + "learning_rate": 2.9311063098565182e-05, + "loss": 1.1251, + "step": 6367000 + }, + { + "epoch": 3.82, + "learning_rate": 2.930896313300462e-05, + "loss": 1.141, + "step": 6367500 + }, + { + "epoch": 3.82, + "learning_rate": 2.9306863167444056e-05, + "loss": 1.1275, + "step": 6368000 + }, + { + "epoch": 3.82, + "learning_rate": 2.930476320188349e-05, + "loss": 1.1674, + "step": 6368500 + }, + { + "epoch": 3.82, + "learning_rate": 2.9302663236322926e-05, + "loss": 1.1019, + "step": 6369000 + }, + { + "epoch": 3.82, + "learning_rate": 2.93005716706246e-05, + "loss": 1.1742, + "step": 6369500 + }, + { + "epoch": 3.82, + "learning_rate": 2.9298471705064033e-05, + "loss": 1.1239, + "step": 6370000 + }, + { + "epoch": 3.82, + "learning_rate": 2.929637173950347e-05, + "loss": 1.1452, + "step": 6370500 + }, + { + "epoch": 3.82, + "learning_rate": 2.9294271773942907e-05, + "loss": 1.1468, + "step": 6371000 + }, + { + "epoch": 3.82, + "learning_rate": 2.929217180838234e-05, + "loss": 1.1493, + "step": 6371500 + }, + { + "epoch": 3.82, + "learning_rate": 2.9290071842821777e-05, + "loss": 1.1628, + "step": 6372000 + }, + { + "epoch": 3.82, + "learning_rate": 2.9287971877261214e-05, + "loss": 1.1375, + "step": 6372500 + }, + { + "epoch": 3.82, + "learning_rate": 2.9285871911700648e-05, + "loss": 1.1646, + "step": 6373000 + }, + { + "epoch": 3.82, + "learning_rate": 2.9283771946140084e-05, + "loss": 1.1526, + "step": 6373500 + }, + { + "epoch": 3.82, + "learning_rate": 2.928167198057952e-05, + "loss": 1.1391, + "step": 6374000 + }, + { + "epoch": 3.82, + "learning_rate": 2.9279576214950075e-05, + "loss": 1.1551, + "step": 6374500 + }, + { + "epoch": 3.82, + "learning_rate": 2.927747624938951e-05, + "loss": 1.158, + "step": 6375000 + }, + { + "epoch": 3.82, + "learning_rate": 2.9275376283828945e-05, + "loss": 1.1802, + "step": 6375500 + }, + { + "epoch": 3.82, + "learning_rate": 2.9273276318268382e-05, + "loss": 1.1535, + "step": 6376000 + }, + { + "epoch": 3.82, + "learning_rate": 2.927117635270782e-05, + "loss": 1.1703, + "step": 6376500 + }, + { + "epoch": 3.82, + "learning_rate": 2.926907638714725e-05, + "loss": 1.137, + "step": 6377000 + }, + { + "epoch": 3.82, + "learning_rate": 2.926698062151781e-05, + "loss": 1.1448, + "step": 6377500 + }, + { + "epoch": 3.82, + "learning_rate": 2.9264880655957243e-05, + "loss": 1.1348, + "step": 6378000 + }, + { + "epoch": 3.82, + "learning_rate": 2.926278069039668e-05, + "loss": 1.1508, + "step": 6378500 + }, + { + "epoch": 3.82, + "learning_rate": 2.9260680724836116e-05, + "loss": 1.1369, + "step": 6379000 + }, + { + "epoch": 3.82, + "learning_rate": 2.9258580759275543e-05, + "loss": 1.1485, + "step": 6379500 + }, + { + "epoch": 3.83, + "learning_rate": 2.925648079371498e-05, + "loss": 1.1537, + "step": 6380000 + }, + { + "epoch": 3.83, + "learning_rate": 2.9254380828154417e-05, + "loss": 1.1395, + "step": 6380500 + }, + { + "epoch": 3.83, + "learning_rate": 2.925228086259385e-05, + "loss": 1.1564, + "step": 6381000 + }, + { + "epoch": 3.83, + "learning_rate": 2.925018509696441e-05, + "loss": 1.1556, + "step": 6381500 + }, + { + "epoch": 3.83, + "learning_rate": 2.924808513140384e-05, + "loss": 1.1532, + "step": 6382000 + }, + { + "epoch": 3.83, + "learning_rate": 2.9245985165843278e-05, + "loss": 1.1614, + "step": 6382500 + }, + { + "epoch": 3.83, + "learning_rate": 2.9243893600144958e-05, + "loss": 1.145, + "step": 6383000 + }, + { + "epoch": 3.83, + "learning_rate": 2.924179363458439e-05, + "loss": 1.1421, + "step": 6383500 + }, + { + "epoch": 3.83, + "learning_rate": 2.9239693669023828e-05, + "loss": 1.1744, + "step": 6384000 + }, + { + "epoch": 3.83, + "learning_rate": 2.9237593703463265e-05, + "loss": 1.1265, + "step": 6384500 + }, + { + "epoch": 3.83, + "learning_rate": 2.92354937379027e-05, + "loss": 1.1366, + "step": 6385000 + }, + { + "epoch": 3.83, + "learning_rate": 2.9233393772342135e-05, + "loss": 1.1851, + "step": 6385500 + }, + { + "epoch": 3.83, + "learning_rate": 2.9231293806781572e-05, + "loss": 1.1504, + "step": 6386000 + }, + { + "epoch": 3.83, + "learning_rate": 2.9229193841221006e-05, + "loss": 1.1536, + "step": 6386500 + }, + { + "epoch": 3.83, + "learning_rate": 2.9227093875660436e-05, + "loss": 1.1324, + "step": 6387000 + }, + { + "epoch": 3.83, + "learning_rate": 2.9224993910099873e-05, + "loss": 1.1473, + "step": 6387500 + }, + { + "epoch": 3.83, + "learning_rate": 2.9222898144470433e-05, + "loss": 1.1204, + "step": 6388000 + }, + { + "epoch": 3.83, + "learning_rate": 2.9220798178909866e-05, + "loss": 1.1727, + "step": 6388500 + }, + { + "epoch": 3.83, + "learning_rate": 2.9218698213349297e-05, + "loss": 1.137, + "step": 6389000 + }, + { + "epoch": 3.83, + "learning_rate": 2.9216598247788733e-05, + "loss": 1.1289, + "step": 6389500 + }, + { + "epoch": 3.83, + "learning_rate": 2.921449828222817e-05, + "loss": 1.1758, + "step": 6390000 + }, + { + "epoch": 3.83, + "learning_rate": 2.921240251659873e-05, + "loss": 1.1721, + "step": 6390500 + }, + { + "epoch": 3.83, + "learning_rate": 2.9210302551038164e-05, + "loss": 1.1607, + "step": 6391000 + }, + { + "epoch": 3.83, + "learning_rate": 2.9208202585477594e-05, + "loss": 1.1545, + "step": 6391500 + }, + { + "epoch": 3.83, + "learning_rate": 2.920610261991703e-05, + "loss": 1.1675, + "step": 6392000 + }, + { + "epoch": 3.83, + "learning_rate": 2.9204002654356468e-05, + "loss": 1.1299, + "step": 6392500 + }, + { + "epoch": 3.83, + "learning_rate": 2.9201906888727028e-05, + "loss": 1.1492, + "step": 6393000 + }, + { + "epoch": 3.83, + "learning_rate": 2.919980692316646e-05, + "loss": 1.1301, + "step": 6393500 + }, + { + "epoch": 3.83, + "learning_rate": 2.919770695760589e-05, + "loss": 1.1461, + "step": 6394000 + }, + { + "epoch": 3.83, + "learning_rate": 2.919560699204533e-05, + "loss": 1.1367, + "step": 6394500 + }, + { + "epoch": 3.83, + "learning_rate": 2.9193507026484762e-05, + "loss": 1.1604, + "step": 6395000 + }, + { + "epoch": 3.83, + "learning_rate": 2.91914070609242e-05, + "loss": 1.1456, + "step": 6395500 + }, + { + "epoch": 3.83, + "learning_rate": 2.918931549522588e-05, + "loss": 1.1273, + "step": 6396000 + }, + { + "epoch": 3.83, + "learning_rate": 2.9187215529665313e-05, + "loss": 1.1729, + "step": 6396500 + }, + { + "epoch": 3.84, + "learning_rate": 2.918511556410475e-05, + "loss": 1.1879, + "step": 6397000 + }, + { + "epoch": 3.84, + "learning_rate": 2.9183015598544186e-05, + "loss": 1.1485, + "step": 6397500 + }, + { + "epoch": 3.84, + "learning_rate": 2.918091563298362e-05, + "loss": 1.1499, + "step": 6398000 + }, + { + "epoch": 3.84, + "learning_rate": 2.917881566742305e-05, + "loss": 1.1123, + "step": 6398500 + }, + { + "epoch": 3.84, + "learning_rate": 2.9176715701862487e-05, + "loss": 1.1355, + "step": 6399000 + }, + { + "epoch": 3.84, + "learning_rate": 2.9174615736301924e-05, + "loss": 1.1486, + "step": 6399500 + }, + { + "epoch": 3.84, + "learning_rate": 2.9172515770741357e-05, + "loss": 1.1244, + "step": 6400000 + }, + { + "epoch": 3.84, + "eval_loss": 1.1107254028320312, + "eval_runtime": 1100.5378, + "eval_samples_per_second": 478.602, + "eval_steps_per_second": 79.767, + "step": 6400000 + }, + { + "epoch": 3.84, + "learning_rate": 2.9170420005111918e-05, + "loss": 1.1573, + "step": 6400500 + }, + { + "epoch": 3.84, + "learning_rate": 2.9168320039551348e-05, + "loss": 1.1234, + "step": 6401000 + }, + { + "epoch": 3.84, + "learning_rate": 2.9166220073990784e-05, + "loss": 1.144, + "step": 6401500 + }, + { + "epoch": 3.84, + "learning_rate": 2.9164124308361345e-05, + "loss": 1.1565, + "step": 6402000 + }, + { + "epoch": 3.84, + "learning_rate": 2.916202434280078e-05, + "loss": 1.152, + "step": 6402500 + }, + { + "epoch": 3.84, + "learning_rate": 2.9159924377240215e-05, + "loss": 1.1448, + "step": 6403000 + }, + { + "epoch": 3.84, + "learning_rate": 2.9157824411679645e-05, + "loss": 1.1413, + "step": 6403500 + }, + { + "epoch": 3.84, + "learning_rate": 2.9155724446119082e-05, + "loss": 1.1792, + "step": 6404000 + }, + { + "epoch": 3.84, + "learning_rate": 2.9153624480558515e-05, + "loss": 1.1571, + "step": 6404500 + }, + { + "epoch": 3.84, + "learning_rate": 2.9151524514997952e-05, + "loss": 1.1721, + "step": 6405000 + }, + { + "epoch": 3.84, + "learning_rate": 2.914942454943739e-05, + "loss": 1.1198, + "step": 6405500 + }, + { + "epoch": 3.84, + "learning_rate": 2.9147324583876823e-05, + "loss": 1.1264, + "step": 6406000 + }, + { + "epoch": 3.84, + "learning_rate": 2.914522461831626e-05, + "loss": 1.1476, + "step": 6406500 + }, + { + "epoch": 3.84, + "learning_rate": 2.9143124652755696e-05, + "loss": 1.1388, + "step": 6407000 + }, + { + "epoch": 3.84, + "learning_rate": 2.914102888712625e-05, + "loss": 1.1306, + "step": 6407500 + }, + { + "epoch": 3.84, + "learning_rate": 2.9138928921565687e-05, + "loss": 1.1588, + "step": 6408000 + }, + { + "epoch": 3.84, + "learning_rate": 2.913682895600512e-05, + "loss": 1.1406, + "step": 6408500 + }, + { + "epoch": 3.84, + "learning_rate": 2.9134728990444557e-05, + "loss": 1.1738, + "step": 6409000 + }, + { + "epoch": 3.84, + "learning_rate": 2.9132629024883994e-05, + "loss": 1.1782, + "step": 6409500 + }, + { + "epoch": 3.84, + "learning_rate": 2.9130529059323427e-05, + "loss": 1.119, + "step": 6410000 + }, + { + "epoch": 3.84, + "learning_rate": 2.9128429093762864e-05, + "loss": 1.1334, + "step": 6410500 + }, + { + "epoch": 3.84, + "learning_rate": 2.91263291282023e-05, + "loss": 1.1608, + "step": 6411000 + }, + { + "epoch": 3.84, + "learning_rate": 2.9124233362572855e-05, + "loss": 1.1497, + "step": 6411500 + }, + { + "epoch": 3.84, + "learning_rate": 2.912213339701229e-05, + "loss": 1.1336, + "step": 6412000 + }, + { + "epoch": 3.84, + "learning_rate": 2.9120033431451725e-05, + "loss": 1.1536, + "step": 6412500 + }, + { + "epoch": 3.84, + "learning_rate": 2.911793766582228e-05, + "loss": 1.1429, + "step": 6413000 + }, + { + "epoch": 3.85, + "learning_rate": 2.9115837700261715e-05, + "loss": 1.1292, + "step": 6413500 + }, + { + "epoch": 3.85, + "learning_rate": 2.9113737734701152e-05, + "loss": 1.1498, + "step": 6414000 + }, + { + "epoch": 3.85, + "learning_rate": 2.9111637769140586e-05, + "loss": 1.1494, + "step": 6414500 + }, + { + "epoch": 3.85, + "learning_rate": 2.9109537803580022e-05, + "loss": 1.1556, + "step": 6415000 + }, + { + "epoch": 3.85, + "learning_rate": 2.9107442037950576e-05, + "loss": 1.1526, + "step": 6415500 + }, + { + "epoch": 3.85, + "learning_rate": 2.9105342072390013e-05, + "loss": 1.151, + "step": 6416000 + }, + { + "epoch": 3.85, + "learning_rate": 2.910324210682945e-05, + "loss": 1.135, + "step": 6416500 + }, + { + "epoch": 3.85, + "learning_rate": 2.9101142141268883e-05, + "loss": 1.1464, + "step": 6417000 + }, + { + "epoch": 3.85, + "learning_rate": 2.909904217570832e-05, + "loss": 1.1554, + "step": 6417500 + }, + { + "epoch": 3.85, + "learning_rate": 2.9096942210147757e-05, + "loss": 1.1383, + "step": 6418000 + }, + { + "epoch": 3.85, + "learning_rate": 2.9094842244587187e-05, + "loss": 1.1781, + "step": 6418500 + }, + { + "epoch": 3.85, + "learning_rate": 2.909274227902662e-05, + "loss": 1.1394, + "step": 6419000 + }, + { + "epoch": 3.85, + "learning_rate": 2.909064651339718e-05, + "loss": 1.1346, + "step": 6419500 + }, + { + "epoch": 3.85, + "learning_rate": 2.9088546547836618e-05, + "loss": 1.153, + "step": 6420000 + }, + { + "epoch": 3.85, + "learning_rate": 2.9086446582276054e-05, + "loss": 1.1331, + "step": 6420500 + }, + { + "epoch": 3.85, + "learning_rate": 2.908434661671548e-05, + "loss": 1.1259, + "step": 6421000 + }, + { + "epoch": 3.85, + "learning_rate": 2.908225085108604e-05, + "loss": 1.1542, + "step": 6421500 + }, + { + "epoch": 3.85, + "learning_rate": 2.908015088552548e-05, + "loss": 1.136, + "step": 6422000 + }, + { + "epoch": 3.85, + "learning_rate": 2.9078050919964915e-05, + "loss": 1.1283, + "step": 6422500 + }, + { + "epoch": 3.85, + "learning_rate": 2.9075950954404352e-05, + "loss": 1.1524, + "step": 6423000 + }, + { + "epoch": 3.85, + "learning_rate": 2.907385098884378e-05, + "loss": 1.1465, + "step": 6423500 + }, + { + "epoch": 3.85, + "learning_rate": 2.9071751023283216e-05, + "loss": 1.1425, + "step": 6424000 + }, + { + "epoch": 3.85, + "learning_rate": 2.9069651057722652e-05, + "loss": 1.1341, + "step": 6424500 + }, + { + "epoch": 3.85, + "learning_rate": 2.9067555292093213e-05, + "loss": 1.1486, + "step": 6425000 + }, + { + "epoch": 3.85, + "learning_rate": 2.9065455326532643e-05, + "loss": 1.1451, + "step": 6425500 + }, + { + "epoch": 3.85, + "learning_rate": 2.9063355360972076e-05, + "loss": 1.1687, + "step": 6426000 + }, + { + "epoch": 3.85, + "learning_rate": 2.9061255395411513e-05, + "loss": 1.1163, + "step": 6426500 + }, + { + "epoch": 3.85, + "learning_rate": 2.905915542985095e-05, + "loss": 1.1413, + "step": 6427000 + }, + { + "epoch": 3.85, + "learning_rate": 2.9057055464290383e-05, + "loss": 1.1364, + "step": 6427500 + }, + { + "epoch": 3.85, + "learning_rate": 2.905495549872982e-05, + "loss": 1.1197, + "step": 6428000 + }, + { + "epoch": 3.85, + "learning_rate": 2.9052859733100374e-05, + "loss": 1.1473, + "step": 6428500 + }, + { + "epoch": 3.85, + "learning_rate": 2.905075976753981e-05, + "loss": 1.1556, + "step": 6429000 + }, + { + "epoch": 3.85, + "learning_rate": 2.9048659801979248e-05, + "loss": 1.1462, + "step": 6429500 + }, + { + "epoch": 3.86, + "learning_rate": 2.904655983641868e-05, + "loss": 1.1499, + "step": 6430000 + }, + { + "epoch": 3.86, + "learning_rate": 2.9044464070789235e-05, + "loss": 1.1586, + "step": 6430500 + }, + { + "epoch": 3.86, + "learning_rate": 2.904236410522867e-05, + "loss": 1.147, + "step": 6431000 + }, + { + "epoch": 3.86, + "learning_rate": 2.904026413966811e-05, + "loss": 1.1532, + "step": 6431500 + }, + { + "epoch": 3.86, + "learning_rate": 2.9038164174107542e-05, + "loss": 1.1314, + "step": 6432000 + }, + { + "epoch": 3.86, + "learning_rate": 2.903606420854698e-05, + "loss": 1.1111, + "step": 6432500 + }, + { + "epoch": 3.86, + "learning_rate": 2.9033968442917532e-05, + "loss": 1.1443, + "step": 6433000 + }, + { + "epoch": 3.86, + "learning_rate": 2.903186847735697e-05, + "loss": 1.1481, + "step": 6433500 + }, + { + "epoch": 3.86, + "learning_rate": 2.9029768511796406e-05, + "loss": 1.1732, + "step": 6434000 + }, + { + "epoch": 3.86, + "learning_rate": 2.902766854623584e-05, + "loss": 1.1428, + "step": 6434500 + }, + { + "epoch": 3.86, + "learning_rate": 2.9025572780606393e-05, + "loss": 1.1729, + "step": 6435000 + }, + { + "epoch": 3.86, + "learning_rate": 2.902347281504583e-05, + "loss": 1.165, + "step": 6435500 + }, + { + "epoch": 3.86, + "learning_rate": 2.9021372849485267e-05, + "loss": 1.1532, + "step": 6436000 + }, + { + "epoch": 3.86, + "learning_rate": 2.9019272883924703e-05, + "loss": 1.137, + "step": 6436500 + }, + { + "epoch": 3.86, + "learning_rate": 2.9017177118295264e-05, + "loss": 1.1305, + "step": 6437000 + }, + { + "epoch": 3.86, + "learning_rate": 2.901507715273469e-05, + "loss": 1.178, + "step": 6437500 + }, + { + "epoch": 3.86, + "learning_rate": 2.9012977187174127e-05, + "loss": 1.1525, + "step": 6438000 + }, + { + "epoch": 3.86, + "learning_rate": 2.9010877221613564e-05, + "loss": 1.1507, + "step": 6438500 + }, + { + "epoch": 3.86, + "learning_rate": 2.9008777256052998e-05, + "loss": 1.143, + "step": 6439000 + }, + { + "epoch": 3.86, + "learning_rate": 2.9006677290492434e-05, + "loss": 1.1121, + "step": 6439500 + }, + { + "epoch": 3.86, + "learning_rate": 2.900457732493187e-05, + "loss": 1.1336, + "step": 6440000 + }, + { + "epoch": 3.86, + "learning_rate": 2.9002477359371305e-05, + "loss": 1.1587, + "step": 6440500 + }, + { + "epoch": 3.86, + "learning_rate": 2.9000385793672985e-05, + "loss": 1.1389, + "step": 6441000 + }, + { + "epoch": 3.86, + "learning_rate": 2.8998285828112422e-05, + "loss": 1.1539, + "step": 6441500 + }, + { + "epoch": 3.86, + "learning_rate": 2.8996185862551856e-05, + "loss": 1.1405, + "step": 6442000 + }, + { + "epoch": 3.86, + "learning_rate": 2.8994085896991286e-05, + "loss": 1.1906, + "step": 6442500 + }, + { + "epoch": 3.86, + "learning_rate": 2.8991985931430722e-05, + "loss": 1.1849, + "step": 6443000 + }, + { + "epoch": 3.86, + "learning_rate": 2.8989890165801283e-05, + "loss": 1.1908, + "step": 6443500 + }, + { + "epoch": 3.86, + "learning_rate": 2.898779020024072e-05, + "loss": 1.1568, + "step": 6444000 + }, + { + "epoch": 3.86, + "learning_rate": 2.8985690234680146e-05, + "loss": 1.1257, + "step": 6444500 + }, + { + "epoch": 3.86, + "learning_rate": 2.8983590269119583e-05, + "loss": 1.1728, + "step": 6445000 + }, + { + "epoch": 3.86, + "learning_rate": 2.8981494503490144e-05, + "loss": 1.1386, + "step": 6445500 + }, + { + "epoch": 3.86, + "learning_rate": 2.897939453792958e-05, + "loss": 1.1422, + "step": 6446000 + }, + { + "epoch": 3.86, + "learning_rate": 2.8977294572369014e-05, + "loss": 1.1376, + "step": 6446500 + }, + { + "epoch": 3.87, + "learning_rate": 2.8975194606808444e-05, + "loss": 1.1537, + "step": 6447000 + }, + { + "epoch": 3.87, + "learning_rate": 2.897309464124788e-05, + "loss": 1.1788, + "step": 6447500 + }, + { + "epoch": 3.87, + "learning_rate": 2.897099887561844e-05, + "loss": 1.1385, + "step": 6448000 + }, + { + "epoch": 3.87, + "learning_rate": 2.8968903109988995e-05, + "loss": 1.1402, + "step": 6448500 + }, + { + "epoch": 3.87, + "learning_rate": 2.896680314442843e-05, + "loss": 1.1382, + "step": 6449000 + }, + { + "epoch": 3.87, + "learning_rate": 2.896470317886787e-05, + "loss": 1.1366, + "step": 6449500 + }, + { + "epoch": 3.87, + "learning_rate": 2.8962603213307302e-05, + "loss": 1.1474, + "step": 6450000 + }, + { + "epoch": 3.87, + "learning_rate": 2.8960507447677855e-05, + "loss": 1.1267, + "step": 6450500 + }, + { + "epoch": 3.87, + "learning_rate": 2.8958407482117292e-05, + "loss": 1.1365, + "step": 6451000 + }, + { + "epoch": 3.87, + "learning_rate": 2.895630751655673e-05, + "loss": 1.155, + "step": 6451500 + }, + { + "epoch": 3.87, + "learning_rate": 2.8954207550996163e-05, + "loss": 1.1758, + "step": 6452000 + }, + { + "epoch": 3.87, + "learning_rate": 2.89521075854356e-05, + "loss": 1.1655, + "step": 6452500 + }, + { + "epoch": 3.87, + "learning_rate": 2.8950007619875036e-05, + "loss": 1.1277, + "step": 6453000 + }, + { + "epoch": 3.87, + "learning_rate": 2.894790765431447e-05, + "loss": 1.139, + "step": 6453500 + }, + { + "epoch": 3.87, + "learning_rate": 2.8945807688753907e-05, + "loss": 1.1529, + "step": 6454000 + }, + { + "epoch": 3.87, + "learning_rate": 2.8943707723193337e-05, + "loss": 1.1426, + "step": 6454500 + }, + { + "epoch": 3.87, + "learning_rate": 2.8941616157495017e-05, + "loss": 1.157, + "step": 6455000 + }, + { + "epoch": 3.87, + "learning_rate": 2.893951619193445e-05, + "loss": 1.159, + "step": 6455500 + }, + { + "epoch": 3.87, + "learning_rate": 2.8937416226373887e-05, + "loss": 1.1571, + "step": 6456000 + }, + { + "epoch": 3.87, + "learning_rate": 2.8935316260813324e-05, + "loss": 1.1744, + "step": 6456500 + }, + { + "epoch": 3.87, + "learning_rate": 2.8933216295252758e-05, + "loss": 1.1638, + "step": 6457000 + }, + { + "epoch": 3.87, + "learning_rate": 2.8931116329692195e-05, + "loss": 1.1291, + "step": 6457500 + }, + { + "epoch": 3.87, + "learning_rate": 2.892901636413163e-05, + "loss": 1.1318, + "step": 6458000 + }, + { + "epoch": 3.87, + "learning_rate": 2.8926916398571065e-05, + "loss": 1.1379, + "step": 6458500 + }, + { + "epoch": 3.87, + "learning_rate": 2.8924816433010495e-05, + "loss": 1.1387, + "step": 6459000 + }, + { + "epoch": 3.87, + "learning_rate": 2.8922720667381055e-05, + "loss": 1.1413, + "step": 6459500 + }, + { + "epoch": 3.87, + "learning_rate": 2.8920620701820492e-05, + "loss": 1.127, + "step": 6460000 + }, + { + "epoch": 3.87, + "learning_rate": 2.8918520736259926e-05, + "loss": 1.167, + "step": 6460500 + }, + { + "epoch": 3.87, + "learning_rate": 2.8916420770699362e-05, + "loss": 1.1246, + "step": 6461000 + }, + { + "epoch": 3.87, + "learning_rate": 2.8914320805138793e-05, + "loss": 1.1271, + "step": 6461500 + }, + { + "epoch": 3.87, + "learning_rate": 2.891222083957823e-05, + "loss": 1.1196, + "step": 6462000 + }, + { + "epoch": 3.87, + "learning_rate": 2.891012507394879e-05, + "loss": 1.1589, + "step": 6462500 + }, + { + "epoch": 3.87, + "learning_rate": 2.8908025108388223e-05, + "loss": 1.1359, + "step": 6463000 + }, + { + "epoch": 3.88, + "learning_rate": 2.890592514282766e-05, + "loss": 1.1473, + "step": 6463500 + }, + { + "epoch": 3.88, + "learning_rate": 2.890382517726709e-05, + "loss": 1.16, + "step": 6464000 + }, + { + "epoch": 3.88, + "learning_rate": 2.8901725211706527e-05, + "loss": 1.1472, + "step": 6464500 + }, + { + "epoch": 3.88, + "learning_rate": 2.8899629446077087e-05, + "loss": 1.1498, + "step": 6465000 + }, + { + "epoch": 3.88, + "learning_rate": 2.889752948051652e-05, + "loss": 1.1576, + "step": 6465500 + }, + { + "epoch": 3.88, + "learning_rate": 2.889542951495595e-05, + "loss": 1.1406, + "step": 6466000 + }, + { + "epoch": 3.88, + "learning_rate": 2.8893329549395388e-05, + "loss": 1.1619, + "step": 6466500 + }, + { + "epoch": 3.88, + "learning_rate": 2.889122958383482e-05, + "loss": 1.1479, + "step": 6467000 + }, + { + "epoch": 3.88, + "learning_rate": 2.8889129618274258e-05, + "loss": 1.1235, + "step": 6467500 + }, + { + "epoch": 3.88, + "learning_rate": 2.8887029652713695e-05, + "loss": 1.1605, + "step": 6468000 + }, + { + "epoch": 3.88, + "learning_rate": 2.8884929687153132e-05, + "loss": 1.1235, + "step": 6468500 + }, + { + "epoch": 3.88, + "learning_rate": 2.8882833921523685e-05, + "loss": 1.1486, + "step": 6469000 + }, + { + "epoch": 3.88, + "learning_rate": 2.888073395596312e-05, + "loss": 1.1016, + "step": 6469500 + }, + { + "epoch": 3.88, + "learning_rate": 2.887863819033368e-05, + "loss": 1.1495, + "step": 6470000 + }, + { + "epoch": 3.88, + "learning_rate": 2.8876538224773116e-05, + "loss": 1.1418, + "step": 6470500 + }, + { + "epoch": 3.88, + "learning_rate": 2.8874438259212546e-05, + "loss": 1.1416, + "step": 6471000 + }, + { + "epoch": 3.88, + "learning_rate": 2.8872338293651983e-05, + "loss": 1.1443, + "step": 6471500 + }, + { + "epoch": 3.88, + "learning_rate": 2.8870238328091416e-05, + "loss": 1.1518, + "step": 6472000 + }, + { + "epoch": 3.88, + "learning_rate": 2.8868138362530853e-05, + "loss": 1.1398, + "step": 6472500 + }, + { + "epoch": 3.88, + "learning_rate": 2.886603839697029e-05, + "loss": 1.1661, + "step": 6473000 + }, + { + "epoch": 3.88, + "learning_rate": 2.8863938431409723e-05, + "loss": 1.1479, + "step": 6473500 + }, + { + "epoch": 3.88, + "learning_rate": 2.8861842665780277e-05, + "loss": 1.1499, + "step": 6474000 + }, + { + "epoch": 3.88, + "learning_rate": 2.885974690015084e-05, + "loss": 1.1365, + "step": 6474500 + }, + { + "epoch": 3.88, + "learning_rate": 2.8857646934590274e-05, + "loss": 1.1503, + "step": 6475000 + }, + { + "epoch": 3.88, + "learning_rate": 2.8855546969029704e-05, + "loss": 1.1549, + "step": 6475500 + }, + { + "epoch": 3.88, + "learning_rate": 2.885344700346914e-05, + "loss": 1.1305, + "step": 6476000 + }, + { + "epoch": 3.88, + "learning_rate": 2.8851347037908575e-05, + "loss": 1.1436, + "step": 6476500 + }, + { + "epoch": 3.88, + "learning_rate": 2.8849251272279135e-05, + "loss": 1.1596, + "step": 6477000 + }, + { + "epoch": 3.88, + "learning_rate": 2.8847151306718572e-05, + "loss": 1.1337, + "step": 6477500 + }, + { + "epoch": 3.88, + "learning_rate": 2.8845051341158002e-05, + "loss": 1.1407, + "step": 6478000 + }, + { + "epoch": 3.88, + "learning_rate": 2.884295137559744e-05, + "loss": 1.1245, + "step": 6478500 + }, + { + "epoch": 3.88, + "learning_rate": 2.8840851410036872e-05, + "loss": 1.146, + "step": 6479000 + }, + { + "epoch": 3.88, + "learning_rate": 2.8838755644407433e-05, + "loss": 1.1464, + "step": 6479500 + }, + { + "epoch": 3.89, + "learning_rate": 2.883665567884687e-05, + "loss": 1.1535, + "step": 6480000 + }, + { + "epoch": 3.89, + "learning_rate": 2.88345557132863e-05, + "loss": 1.1589, + "step": 6480500 + }, + { + "epoch": 3.89, + "learning_rate": 2.8832455747725736e-05, + "loss": 1.1364, + "step": 6481000 + }, + { + "epoch": 3.89, + "learning_rate": 2.883035578216517e-05, + "loss": 1.1205, + "step": 6481500 + }, + { + "epoch": 3.89, + "learning_rate": 2.882826001653573e-05, + "loss": 1.1261, + "step": 6482000 + }, + { + "epoch": 3.89, + "learning_rate": 2.8826160050975167e-05, + "loss": 1.1695, + "step": 6482500 + }, + { + "epoch": 3.89, + "learning_rate": 2.8824060085414597e-05, + "loss": 1.1242, + "step": 6483000 + }, + { + "epoch": 3.89, + "learning_rate": 2.882196011985403e-05, + "loss": 1.143, + "step": 6483500 + }, + { + "epoch": 3.89, + "learning_rate": 2.8819860154293467e-05, + "loss": 1.1313, + "step": 6484000 + }, + { + "epoch": 3.89, + "learning_rate": 2.8817764388664028e-05, + "loss": 1.1538, + "step": 6484500 + }, + { + "epoch": 3.89, + "learning_rate": 2.8815664423103465e-05, + "loss": 1.1352, + "step": 6485000 + }, + { + "epoch": 3.89, + "learning_rate": 2.8813564457542895e-05, + "loss": 1.1497, + "step": 6485500 + }, + { + "epoch": 3.89, + "learning_rate": 2.8811464491982328e-05, + "loss": 1.1356, + "step": 6486000 + }, + { + "epoch": 3.89, + "learning_rate": 2.880936872635289e-05, + "loss": 1.1486, + "step": 6486500 + }, + { + "epoch": 3.89, + "learning_rate": 2.8807268760792325e-05, + "loss": 1.118, + "step": 6487000 + }, + { + "epoch": 3.89, + "learning_rate": 2.8805168795231755e-05, + "loss": 1.1366, + "step": 6487500 + }, + { + "epoch": 3.89, + "learning_rate": 2.8803068829671192e-05, + "loss": 1.1601, + "step": 6488000 + }, + { + "epoch": 3.89, + "learning_rate": 2.8800968864110626e-05, + "loss": 1.1001, + "step": 6488500 + }, + { + "epoch": 3.89, + "learning_rate": 2.8798868898550062e-05, + "loss": 1.1464, + "step": 6489000 + }, + { + "epoch": 3.89, + "learning_rate": 2.8796773132920623e-05, + "loss": 1.1466, + "step": 6489500 + }, + { + "epoch": 3.89, + "learning_rate": 2.8794673167360053e-05, + "loss": 1.1205, + "step": 6490000 + }, + { + "epoch": 3.89, + "learning_rate": 2.8792573201799486e-05, + "loss": 1.1337, + "step": 6490500 + }, + { + "epoch": 3.89, + "learning_rate": 2.8790473236238923e-05, + "loss": 1.1529, + "step": 6491000 + }, + { + "epoch": 3.89, + "learning_rate": 2.878837327067836e-05, + "loss": 1.1405, + "step": 6491500 + }, + { + "epoch": 3.89, + "learning_rate": 2.878627750504892e-05, + "loss": 1.1446, + "step": 6492000 + }, + { + "epoch": 3.89, + "learning_rate": 2.878417753948835e-05, + "loss": 1.1416, + "step": 6492500 + }, + { + "epoch": 3.89, + "learning_rate": 2.8782077573927784e-05, + "loss": 1.1228, + "step": 6493000 + }, + { + "epoch": 3.89, + "learning_rate": 2.877997760836722e-05, + "loss": 1.1209, + "step": 6493500 + }, + { + "epoch": 3.89, + "learning_rate": 2.8777877642806658e-05, + "loss": 1.169, + "step": 6494000 + }, + { + "epoch": 3.89, + "learning_rate": 2.8775781877177218e-05, + "loss": 1.1356, + "step": 6494500 + }, + { + "epoch": 3.89, + "learning_rate": 2.877368611154777e-05, + "loss": 1.1532, + "step": 6495000 + }, + { + "epoch": 3.89, + "learning_rate": 2.877158614598721e-05, + "loss": 1.147, + "step": 6495500 + }, + { + "epoch": 3.89, + "learning_rate": 2.8769486180426642e-05, + "loss": 1.1345, + "step": 6496000 + }, + { + "epoch": 3.89, + "learning_rate": 2.876738621486608e-05, + "loss": 1.1453, + "step": 6496500 + }, + { + "epoch": 3.9, + "learning_rate": 2.876528624930551e-05, + "loss": 1.1289, + "step": 6497000 + }, + { + "epoch": 3.9, + "learning_rate": 2.8763186283744942e-05, + "loss": 1.1727, + "step": 6497500 + }, + { + "epoch": 3.9, + "learning_rate": 2.876108631818438e-05, + "loss": 1.1603, + "step": 6498000 + }, + { + "epoch": 3.9, + "learning_rate": 2.8758986352623816e-05, + "loss": 1.1164, + "step": 6498500 + }, + { + "epoch": 3.9, + "learning_rate": 2.875688638706325e-05, + "loss": 1.1503, + "step": 6499000 + }, + { + "epoch": 3.9, + "learning_rate": 2.8754790621433806e-05, + "loss": 1.1488, + "step": 6499500 + }, + { + "epoch": 3.9, + "learning_rate": 2.875269065587324e-05, + "loss": 1.1639, + "step": 6500000 + }, + { + "epoch": 3.9, + "eval_loss": 1.1070631742477417, + "eval_runtime": 1102.123, + "eval_samples_per_second": 477.914, + "eval_steps_per_second": 79.653, + "step": 6500000 + } + ], + "max_steps": 13343552, + "num_train_epochs": 8, + "total_flos": 3.442361330450166e+18, + "trial_name": null, + "trial_params": null +}