diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,25828 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.14583050847457626, + "global_step": 2151000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.999775367231639e-05, + "loss": 0.994, + "step": 500 + }, + { + "epoch": 0.0, + "learning_rate": 4.999549378531074e-05, + "loss": 0.735, + "step": 1000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999323389830509e-05, + "loss": 0.6701, + "step": 1500 + }, + { + "epoch": 0.0, + "learning_rate": 4.999097401129944e-05, + "loss": 0.6115, + "step": 2000 + }, + { + "epoch": 0.0, + "learning_rate": 4.998871412429379e-05, + "loss": 0.5883, + "step": 2500 + }, + { + "epoch": 0.0, + "learning_rate": 4.9986454237288136e-05, + "loss": 0.5659, + "step": 3000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9984194350282485e-05, + "loss": 0.5431, + "step": 3500 + }, + { + "epoch": 0.0, + "learning_rate": 4.9981934463276833e-05, + "loss": 0.5271, + "step": 4000 + }, + { + "epoch": 0.0, + "learning_rate": 4.997967457627119e-05, + "loss": 0.5072, + "step": 4500 + }, + { + "epoch": 0.0, + "learning_rate": 4.997741468926554e-05, + "loss": 0.4946, + "step": 5000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9975154802259886e-05, + "loss": 0.4913, + "step": 5500 + }, + { + "epoch": 0.0, + "learning_rate": 4.9972894915254235e-05, + "loss": 0.48, + "step": 6000 + }, + { + "epoch": 0.0, + "learning_rate": 4.997063502824859e-05, + "loss": 0.464, + "step": 6500 + }, + { + "epoch": 0.0, + "learning_rate": 4.996837966101695e-05, + "loss": 0.4509, + "step": 7000 + }, + { + "epoch": 0.0, + "learning_rate": 4.99661197740113e-05, + "loss": 0.4448, + "step": 7500 + }, + { + "epoch": 0.0, + "learning_rate": 4.996385988700565e-05, + "loss": 0.4416, + "step": 8000 + }, + { + "epoch": 0.0, + "learning_rate": 4.99616e-05, + "loss": 0.4297, + "step": 8500 + }, + { + "epoch": 0.0, + "learning_rate": 4.9959340112994355e-05, + "loss": 0.4246, + "step": 9000 + }, + { + "epoch": 0.0, + "learning_rate": 4.995708474576272e-05, + "loss": 0.4243, + "step": 9500 + }, + { + "epoch": 0.0, + "learning_rate": 4.9954824858757066e-05, + "loss": 0.4138, + "step": 10000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9952564971751415e-05, + "loss": 0.4055, + "step": 10500 + }, + { + "epoch": 0.0, + "learning_rate": 4.9950305084745764e-05, + "loss": 0.4008, + "step": 11000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9948049717514126e-05, + "loss": 0.3897, + "step": 11500 + }, + { + "epoch": 0.0, + "learning_rate": 4.9945789830508475e-05, + "loss": 0.393, + "step": 12000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9943529943502824e-05, + "loss": 0.3842, + "step": 12500 + }, + { + "epoch": 0.0, + "learning_rate": 4.994127005649718e-05, + "loss": 0.3814, + "step": 13000 + }, + { + "epoch": 0.0, + "learning_rate": 4.993901016949153e-05, + "loss": 0.3861, + "step": 13500 + }, + { + "epoch": 0.0, + "learning_rate": 4.993675480225989e-05, + "loss": 0.3787, + "step": 14000 + }, + { + "epoch": 0.0, + "learning_rate": 4.993449491525424e-05, + "loss": 0.3706, + "step": 14500 + }, + { + "epoch": 0.0, + "learning_rate": 4.9932235028248595e-05, + "loss": 0.3697, + "step": 15000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9929975141242943e-05, + "loss": 0.3688, + "step": 15500 + }, + { + "epoch": 0.0, + "learning_rate": 4.992771525423729e-05, + "loss": 0.3559, + "step": 16000 + }, + { + "epoch": 0.0, + "learning_rate": 4.992545536723164e-05, + "loss": 0.3606, + "step": 16500 + }, + { + "epoch": 0.0, + "learning_rate": 4.992319548022599e-05, + "loss": 0.3562, + "step": 17000 + }, + { + "epoch": 0.0, + "learning_rate": 4.992093559322034e-05, + "loss": 0.3523, + "step": 17500 + }, + { + "epoch": 0.0, + "learning_rate": 4.991867570621469e-05, + "loss": 0.3419, + "step": 18000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9916415819209036e-05, + "loss": 0.3459, + "step": 18500 + }, + { + "epoch": 0.01, + "learning_rate": 4.991415593220339e-05, + "loss": 0.3401, + "step": 19000 + }, + { + "epoch": 0.01, + "learning_rate": 4.991189604519774e-05, + "loss": 0.3393, + "step": 19500 + }, + { + "epoch": 0.01, + "learning_rate": 4.99096406779661e-05, + "loss": 0.3352, + "step": 20000 + }, + { + "epoch": 0.0, + "learning_rate": 4.995369491525424e-05, + "loss": 0.3279, + "step": 20500 + }, + { + "epoch": 0.0, + "learning_rate": 4.9952564971751415e-05, + "loss": 0.3253, + "step": 21000 + }, + { + "epoch": 0.0, + "learning_rate": 4.995143502824859e-05, + "loss": 0.3324, + "step": 21500 + }, + { + "epoch": 0.0, + "learning_rate": 4.9950305084745764e-05, + "loss": 0.317, + "step": 22000 + }, + { + "epoch": 0.0, + "learning_rate": 4.994917514124294e-05, + "loss": 0.3204, + "step": 22500 + }, + { + "epoch": 0.0, + "learning_rate": 4.994804519774012e-05, + "loss": 0.3125, + "step": 23000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9946915254237294e-05, + "loss": 0.3184, + "step": 23500 + }, + { + "epoch": 0.0, + "learning_rate": 4.9945787570621475e-05, + "loss": 0.3212, + "step": 24000 + }, + { + "epoch": 0.0, + "learning_rate": 4.994465762711865e-05, + "loss": 0.3209, + "step": 24500 + }, + { + "epoch": 0.0, + "learning_rate": 4.9943527683615824e-05, + "loss": 0.3075, + "step": 25000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9942397740113e-05, + "loss": 0.3162, + "step": 25500 + }, + { + "epoch": 0.0, + "learning_rate": 4.994126779661017e-05, + "loss": 0.3114, + "step": 26000 + }, + { + "epoch": 0.0, + "learning_rate": 4.994013785310735e-05, + "loss": 0.3187, + "step": 26500 + }, + { + "epoch": 0.0, + "learning_rate": 4.993900790960452e-05, + "loss": 0.3143, + "step": 27000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9937877966101696e-05, + "loss": 0.303, + "step": 27500 + }, + { + "epoch": 0.0, + "learning_rate": 4.993675028248588e-05, + "loss": 0.3195, + "step": 28000 + }, + { + "epoch": 0.0, + "learning_rate": 4.993562033898305e-05, + "loss": 0.3093, + "step": 28500 + }, + { + "epoch": 0.0, + "learning_rate": 4.9934490395480226e-05, + "loss": 0.3025, + "step": 29000 + }, + { + "epoch": 0.0, + "learning_rate": 4.99333604519774e-05, + "loss": 0.3078, + "step": 29500 + }, + { + "epoch": 0.0, + "learning_rate": 4.993223276836159e-05, + "loss": 0.3028, + "step": 30000 + }, + { + "epoch": 0.0, + "learning_rate": 4.993110282485876e-05, + "loss": 0.3056, + "step": 30500 + }, + { + "epoch": 0.0, + "learning_rate": 4.992997288135594e-05, + "loss": 0.3103, + "step": 31000 + }, + { + "epoch": 0.0, + "learning_rate": 4.992884293785311e-05, + "loss": 0.2955, + "step": 31500 + }, + { + "epoch": 0.0, + "learning_rate": 4.992771525423729e-05, + "loss": 0.2934, + "step": 32000 + }, + { + "epoch": 0.0, + "learning_rate": 4.992658757062147e-05, + "loss": 0.2981, + "step": 32500 + }, + { + "epoch": 0.0, + "learning_rate": 4.992545762711865e-05, + "loss": 0.3028, + "step": 33000 + }, + { + "epoch": 0.0, + "learning_rate": 4.992432768361582e-05, + "loss": 0.2976, + "step": 33500 + }, + { + "epoch": 0.0, + "learning_rate": 4.9923197740112997e-05, + "loss": 0.2958, + "step": 34000 + }, + { + "epoch": 0.0, + "learning_rate": 4.992207005649718e-05, + "loss": 0.2944, + "step": 34500 + }, + { + "epoch": 0.0, + "learning_rate": 4.992094011299435e-05, + "loss": 0.2936, + "step": 35000 + }, + { + "epoch": 0.0, + "learning_rate": 4.991981242937853e-05, + "loss": 0.2855, + "step": 35500 + }, + { + "epoch": 0.0, + "learning_rate": 4.991868248587571e-05, + "loss": 0.2859, + "step": 36000 + }, + { + "epoch": 0.0, + "learning_rate": 4.991755254237289e-05, + "loss": 0.2896, + "step": 36500 + }, + { + "epoch": 0.01, + "learning_rate": 4.991642259887006e-05, + "loss": 0.2939, + "step": 37000 + }, + { + "epoch": 0.01, + "learning_rate": 4.991529265536724e-05, + "loss": 0.2915, + "step": 37500 + }, + { + "epoch": 0.01, + "learning_rate": 4.991416271186441e-05, + "loss": 0.2811, + "step": 38000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9913032768361586e-05, + "loss": 0.2851, + "step": 38500 + }, + { + "epoch": 0.01, + "learning_rate": 4.991190282485876e-05, + "loss": 0.2877, + "step": 39000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9910772881355935e-05, + "loss": 0.2855, + "step": 39500 + }, + { + "epoch": 0.01, + "learning_rate": 4.990964293785311e-05, + "loss": 0.2802, + "step": 40000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9908512994350284e-05, + "loss": 0.2855, + "step": 40500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9907385310734465e-05, + "loss": 0.2894, + "step": 41000 + }, + { + "epoch": 0.01, + "learning_rate": 4.990625536723164e-05, + "loss": 0.2821, + "step": 41500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9905125423728814e-05, + "loss": 0.2857, + "step": 42000 + }, + { + "epoch": 0.0, + "learning_rate": 4.99519988700565e-05, + "loss": 0.2725, + "step": 42500 + }, + { + "epoch": 0.0, + "learning_rate": 4.9951433898305086e-05, + "loss": 0.2795, + "step": 43000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9950868926553677e-05, + "loss": 0.2712, + "step": 43500 + }, + { + "epoch": 0.0, + "learning_rate": 4.995030395480226e-05, + "loss": 0.2597, + "step": 44000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9949740112994354e-05, + "loss": 0.2736, + "step": 44500 + }, + { + "epoch": 0.0, + "learning_rate": 4.994917514124294e-05, + "loss": 0.2641, + "step": 45000 + }, + { + "epoch": 0.0, + "learning_rate": 4.994861016949153e-05, + "loss": 0.2673, + "step": 45500 + }, + { + "epoch": 0.0, + "learning_rate": 4.994804519774012e-05, + "loss": 0.2584, + "step": 46000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9947481355932207e-05, + "loss": 0.2668, + "step": 46500 + }, + { + "epoch": 0.0, + "learning_rate": 4.994691638418079e-05, + "loss": 0.2677, + "step": 47000 + }, + { + "epoch": 0.0, + "learning_rate": 4.994635141242938e-05, + "loss": 0.2729, + "step": 47500 + }, + { + "epoch": 0.0, + "learning_rate": 4.9945786440677965e-05, + "loss": 0.2587, + "step": 48000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9945221468926555e-05, + "loss": 0.2687, + "step": 48500 + }, + { + "epoch": 0.0, + "learning_rate": 4.994465762711865e-05, + "loss": 0.2743, + "step": 49000 + }, + { + "epoch": 0.0, + "learning_rate": 4.994409265536723e-05, + "loss": 0.2572, + "step": 49500 + }, + { + "epoch": 0.0, + "learning_rate": 4.9943527683615824e-05, + "loss": 0.2667, + "step": 50000 + }, + { + "epoch": 0.0, + "learning_rate": 4.994296271186441e-05, + "loss": 0.2725, + "step": 50500 + }, + { + "epoch": 0.0, + "learning_rate": 4.9942397740113e-05, + "loss": 0.2682, + "step": 51000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9941833898305085e-05, + "loss": 0.2624, + "step": 51500 + }, + { + "epoch": 0.0, + "learning_rate": 4.9941268926553676e-05, + "loss": 0.2639, + "step": 52000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9940703954802266e-05, + "loss": 0.2653, + "step": 52500 + }, + { + "epoch": 0.0, + "learning_rate": 4.994013898305085e-05, + "loss": 0.2737, + "step": 53000 + }, + { + "epoch": 0.0, + "learning_rate": 4.993957514124294e-05, + "loss": 0.2708, + "step": 53500 + }, + { + "epoch": 0.0, + "learning_rate": 4.993901016949153e-05, + "loss": 0.2678, + "step": 54000 + }, + { + "epoch": 0.0, + "learning_rate": 4.993844519774011e-05, + "loss": 0.2519, + "step": 54500 + }, + { + "epoch": 0.0, + "learning_rate": 4.99378802259887e-05, + "loss": 0.2629, + "step": 55000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9937316384180796e-05, + "loss": 0.2693, + "step": 55500 + }, + { + "epoch": 0.0, + "learning_rate": 4.993675141242938e-05, + "loss": 0.2809, + "step": 56000 + }, + { + "epoch": 0.0, + "learning_rate": 4.993618644067797e-05, + "loss": 0.2585, + "step": 56500 + }, + { + "epoch": 0.0, + "learning_rate": 4.9935621468926555e-05, + "loss": 0.2671, + "step": 57000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9935056497175145e-05, + "loss": 0.2691, + "step": 57500 + }, + { + "epoch": 0.0, + "learning_rate": 4.993449265536723e-05, + "loss": 0.2519, + "step": 58000 + }, + { + "epoch": 0.0, + "learning_rate": 4.993392768361582e-05, + "loss": 0.2781, + "step": 58500 + }, + { + "epoch": 0.0, + "learning_rate": 4.993336271186441e-05, + "loss": 0.2463, + "step": 59000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9932797740113e-05, + "loss": 0.2612, + "step": 59500 + }, + { + "epoch": 0.0, + "learning_rate": 4.9932235028248595e-05, + "loss": 0.2638, + "step": 60000 + }, + { + "epoch": 0.0, + "learning_rate": 4.993167005649718e-05, + "loss": 0.2717, + "step": 60500 + }, + { + "epoch": 0.0, + "learning_rate": 4.993110508474577e-05, + "loss": 0.2552, + "step": 61000 + }, + { + "epoch": 0.0, + "learning_rate": 4.993054011299435e-05, + "loss": 0.26, + "step": 61500 + }, + { + "epoch": 0.0, + "learning_rate": 4.9929975141242943e-05, + "loss": 0.2665, + "step": 62000 + }, + { + "epoch": 0.0, + "learning_rate": 4.992941016949153e-05, + "loss": 0.2554, + "step": 62500 + }, + { + "epoch": 0.0, + "learning_rate": 4.992884519774012e-05, + "loss": 0.2508, + "step": 63000 + }, + { + "epoch": 0.0, + "learning_rate": 4.99282802259887e-05, + "loss": 0.2651, + "step": 63500 + }, + { + "epoch": 0.0, + "learning_rate": 4.992771525423729e-05, + "loss": 0.2399, + "step": 64000 + }, + { + "epoch": 0.0, + "learning_rate": 4.992715028248588e-05, + "loss": 0.2515, + "step": 64500 + }, + { + "epoch": 0.0, + "learning_rate": 4.992658644067797e-05, + "loss": 0.2586, + "step": 65000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9926021468926554e-05, + "loss": 0.2581, + "step": 65500 + }, + { + "epoch": 0.0, + "learning_rate": 4.9925456497175144e-05, + "loss": 0.2639, + "step": 66000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9924891525423735e-05, + "loss": 0.2589, + "step": 66500 + }, + { + "epoch": 0.0, + "learning_rate": 4.992432768361582e-05, + "loss": 0.2568, + "step": 67000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9923762711864406e-05, + "loss": 0.2536, + "step": 67500 + }, + { + "epoch": 0.0, + "learning_rate": 4.9923197740112997e-05, + "loss": 0.2539, + "step": 68000 + }, + { + "epoch": 0.0, + "learning_rate": 4.992263276836158e-05, + "loss": 0.2574, + "step": 68500 + }, + { + "epoch": 0.0, + "learning_rate": 4.9922068926553674e-05, + "loss": 0.2473, + "step": 69000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9921503954802265e-05, + "loss": 0.2535, + "step": 69500 + }, + { + "epoch": 0.0, + "learning_rate": 4.9920938983050856e-05, + "loss": 0.2479, + "step": 70000 + }, + { + "epoch": 0.0, + "learning_rate": 4.992037401129944e-05, + "loss": 0.247, + "step": 70500 + }, + { + "epoch": 0.0, + "learning_rate": 4.9919810169491527e-05, + "loss": 0.2443, + "step": 71000 + }, + { + "epoch": 0.0, + "learning_rate": 4.991924519774012e-05, + "loss": 0.2537, + "step": 71500 + }, + { + "epoch": 0.0, + "learning_rate": 4.99186802259887e-05, + "loss": 0.2443, + "step": 72000 + }, + { + "epoch": 0.0, + "learning_rate": 4.991811525423729e-05, + "loss": 0.2435, + "step": 72500 + }, + { + "epoch": 0.0, + "learning_rate": 4.9917550282485875e-05, + "loss": 0.2554, + "step": 73000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9916985310734466e-05, + "loss": 0.2551, + "step": 73500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9916420338983056e-05, + "loss": 0.2447, + "step": 74000 + }, + { + "epoch": 0.01, + "learning_rate": 4.991585536723164e-05, + "loss": 0.2537, + "step": 74500 + }, + { + "epoch": 0.01, + "learning_rate": 4.991529152542373e-05, + "loss": 0.2516, + "step": 75000 + }, + { + "epoch": 0.01, + "learning_rate": 4.991472655367232e-05, + "loss": 0.2398, + "step": 75500 + }, + { + "epoch": 0.01, + "learning_rate": 4.99141615819209e-05, + "loss": 0.2441, + "step": 76000 + }, + { + "epoch": 0.01, + "learning_rate": 4.991359661016949e-05, + "loss": 0.2427, + "step": 76500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9913032768361586e-05, + "loss": 0.2505, + "step": 77000 + }, + { + "epoch": 0.01, + "learning_rate": 4.991246779661018e-05, + "loss": 0.2465, + "step": 77500 + }, + { + "epoch": 0.01, + "learning_rate": 4.991190282485876e-05, + "loss": 0.249, + "step": 78000 + }, + { + "epoch": 0.01, + "learning_rate": 4.991133785310735e-05, + "loss": 0.2444, + "step": 78500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9910772881355935e-05, + "loss": 0.2491, + "step": 79000 + }, + { + "epoch": 0.01, + "learning_rate": 4.991020903954802e-05, + "loss": 0.2389, + "step": 79500 + }, + { + "epoch": 0.01, + "learning_rate": 4.990964406779661e-05, + "loss": 0.2489, + "step": 80000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9909079096045204e-05, + "loss": 0.2481, + "step": 80500 + }, + { + "epoch": 0.01, + "learning_rate": 4.990851525423729e-05, + "loss": 0.251, + "step": 81000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9907950282485875e-05, + "loss": 0.2523, + "step": 81500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9907385310734465e-05, + "loss": 0.2507, + "step": 82000 + }, + { + "epoch": 0.01, + "learning_rate": 4.990682033898305e-05, + "loss": 0.2482, + "step": 82500 + }, + { + "epoch": 0.01, + "learning_rate": 4.990625536723164e-05, + "loss": 0.2387, + "step": 83000 + }, + { + "epoch": 0.01, + "learning_rate": 4.990569039548022e-05, + "loss": 0.2445, + "step": 83500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9905125423728814e-05, + "loss": 0.2507, + "step": 84000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9904560451977405e-05, + "loss": 0.2553, + "step": 84500 + }, + { + "epoch": 0.01, + "learning_rate": 4.990399548022599e-05, + "loss": 0.254, + "step": 85000 + }, + { + "epoch": 0.01, + "learning_rate": 4.990343163841808e-05, + "loss": 0.2534, + "step": 85500 + }, + { + "epoch": 0.01, + "learning_rate": 4.990286666666667e-05, + "loss": 0.2489, + "step": 86000 + }, + { + "epoch": 0.01, + "learning_rate": 4.990230169491526e-05, + "loss": 0.2457, + "step": 86500 + }, + { + "epoch": 0.01, + "learning_rate": 4.990173672316385e-05, + "loss": 0.2449, + "step": 87000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9901172881355935e-05, + "loss": 0.239, + "step": 87500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9900607909604525e-05, + "loss": 0.2482, + "step": 88000 + }, + { + "epoch": 0.01, + "learning_rate": 4.990004293785311e-05, + "loss": 0.2454, + "step": 88500 + }, + { + "epoch": 0.01, + "learning_rate": 4.98994779661017e-05, + "loss": 0.2665, + "step": 89000 + }, + { + "epoch": 0.01, + "learning_rate": 4.989891299435028e-05, + "loss": 0.2511, + "step": 89500 + }, + { + "epoch": 0.01, + "learning_rate": 4.989834915254237e-05, + "loss": 0.2604, + "step": 90000 + }, + { + "epoch": 0.01, + "learning_rate": 4.989778418079096e-05, + "loss": 0.2529, + "step": 90500 + }, + { + "epoch": 0.01, + "learning_rate": 4.989721920903955e-05, + "loss": 0.2519, + "step": 91000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9896654237288135e-05, + "loss": 0.2489, + "step": 91500 + }, + { + "epoch": 0.01, + "learning_rate": 4.989609039548023e-05, + "loss": 0.2569, + "step": 92000 + }, + { + "epoch": 0.01, + "learning_rate": 4.989552542372882e-05, + "loss": 0.2427, + "step": 92500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9894960451977404e-05, + "loss": 0.2485, + "step": 93000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9894395480225994e-05, + "loss": 0.2392, + "step": 93500 + }, + { + "epoch": 0.01, + "learning_rate": 4.989383163841808e-05, + "loss": 0.2446, + "step": 94000 + }, + { + "epoch": 0.01, + "learning_rate": 4.989326666666667e-05, + "loss": 0.2424, + "step": 94500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9892701694915256e-05, + "loss": 0.2472, + "step": 95000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9892136723163847e-05, + "loss": 0.2531, + "step": 95500 + }, + { + "epoch": 0.01, + "learning_rate": 4.989157175141243e-05, + "loss": 0.2609, + "step": 96000 + }, + { + "epoch": 0.01, + "learning_rate": 4.989100790960452e-05, + "loss": 0.2369, + "step": 96500 + }, + { + "epoch": 0.01, + "learning_rate": 4.989044293785311e-05, + "loss": 0.2425, + "step": 97000 + }, + { + "epoch": 0.01, + "learning_rate": 4.988987796610169e-05, + "loss": 0.2486, + "step": 97500 + }, + { + "epoch": 0.01, + "learning_rate": 4.988931299435028e-05, + "loss": 0.2407, + "step": 98000 + }, + { + "epoch": 0.01, + "learning_rate": 4.988874802259887e-05, + "loss": 0.2419, + "step": 98500 + }, + { + "epoch": 0.01, + "learning_rate": 4.988818418079097e-05, + "loss": 0.2393, + "step": 99000 + }, + { + "epoch": 0.01, + "learning_rate": 4.988761920903955e-05, + "loss": 0.246, + "step": 99500 + }, + { + "epoch": 0.01, + "learning_rate": 4.988705423728814e-05, + "loss": 0.2512, + "step": 100000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9886489265536725e-05, + "loss": 0.2512, + "step": 100500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9885924293785316e-05, + "loss": 0.2406, + "step": 101000 + }, + { + "epoch": 0.01, + "learning_rate": 4.98853604519774e-05, + "loss": 0.2385, + "step": 101500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9884795480225994e-05, + "loss": 0.2456, + "step": 102000 + }, + { + "epoch": 0.01, + "learning_rate": 4.988423050847458e-05, + "loss": 0.2394, + "step": 102500 + }, + { + "epoch": 0.01, + "learning_rate": 4.988366553672317e-05, + "loss": 0.24, + "step": 103000 + }, + { + "epoch": 0.01, + "learning_rate": 4.988310056497175e-05, + "loss": 0.2506, + "step": 103500 + }, + { + "epoch": 0.01, + "learning_rate": 4.988253559322034e-05, + "loss": 0.2467, + "step": 104000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9881970621468926e-05, + "loss": 0.2451, + "step": 104500 + }, + { + "epoch": 0.01, + "learning_rate": 4.988140564971752e-05, + "loss": 0.2393, + "step": 105000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9880841807909604e-05, + "loss": 0.2374, + "step": 105500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9880276836158195e-05, + "loss": 0.2488, + "step": 106000 + }, + { + "epoch": 0.01, + "learning_rate": 4.987971186440678e-05, + "loss": 0.2413, + "step": 106500 + }, + { + "epoch": 0.01, + "learning_rate": 4.987914689265537e-05, + "loss": 0.2442, + "step": 107000 + }, + { + "epoch": 0.01, + "learning_rate": 4.987858305084746e-05, + "loss": 0.2399, + "step": 107500 + }, + { + "epoch": 0.01, + "learning_rate": 4.987801807909605e-05, + "loss": 0.2342, + "step": 108000 + }, + { + "epoch": 0.01, + "learning_rate": 4.987745310734464e-05, + "loss": 0.2343, + "step": 108500 + }, + { + "epoch": 0.01, + "learning_rate": 4.987688813559323e-05, + "loss": 0.2306, + "step": 109000 + }, + { + "epoch": 0.01, + "learning_rate": 4.987632316384181e-05, + "loss": 0.2305, + "step": 109500 + }, + { + "epoch": 0.01, + "learning_rate": 4.98757593220339e-05, + "loss": 0.2358, + "step": 110000 + }, + { + "epoch": 0.01, + "learning_rate": 4.987519435028249e-05, + "loss": 0.2524, + "step": 110500 + }, + { + "epoch": 0.01, + "learning_rate": 4.987462937853107e-05, + "loss": 0.2306, + "step": 111000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9874064406779664e-05, + "loss": 0.2395, + "step": 111500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9873499435028255e-05, + "loss": 0.2422, + "step": 112000 + }, + { + "epoch": 0.01, + "learning_rate": 4.987293559322034e-05, + "loss": 0.2463, + "step": 112500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9872370621468926e-05, + "loss": 0.2345, + "step": 113000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9871805649717516e-05, + "loss": 0.2349, + "step": 113500 + }, + { + "epoch": 0.01, + "learning_rate": 4.98712406779661e-05, + "loss": 0.2365, + "step": 114000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9870676836158194e-05, + "loss": 0.24, + "step": 114500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9870111864406784e-05, + "loss": 0.24, + "step": 115000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9869546892655375e-05, + "loss": 0.2332, + "step": 115500 + }, + { + "epoch": 0.01, + "learning_rate": 4.986898305084746e-05, + "loss": 0.246, + "step": 116000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9868418079096046e-05, + "loss": 0.2423, + "step": 116500 + }, + { + "epoch": 0.01, + "learning_rate": 4.986785310734464e-05, + "loss": 0.2419, + "step": 117000 + }, + { + "epoch": 0.01, + "learning_rate": 4.986728813559322e-05, + "loss": 0.2437, + "step": 117500 + }, + { + "epoch": 0.01, + "learning_rate": 4.986672316384181e-05, + "loss": 0.2456, + "step": 118000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9866158192090395e-05, + "loss": 0.2321, + "step": 118500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9865593220338985e-05, + "loss": 0.2369, + "step": 119000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9865028248587576e-05, + "loss": 0.2355, + "step": 119500 + }, + { + "epoch": 0.01, + "learning_rate": 4.986446327683616e-05, + "loss": 0.2312, + "step": 120000 + }, + { + "epoch": 0.01, + "learning_rate": 4.986389943502825e-05, + "loss": 0.2285, + "step": 120500 + }, + { + "epoch": 0.01, + "learning_rate": 4.986333446327684e-05, + "loss": 0.2326, + "step": 121000 + }, + { + "epoch": 0.01, + "learning_rate": 4.986276949152542e-05, + "loss": 0.2343, + "step": 121500 + }, + { + "epoch": 0.01, + "learning_rate": 4.986220451977401e-05, + "loss": 0.2325, + "step": 122000 + }, + { + "epoch": 0.01, + "learning_rate": 4.98616395480226e-05, + "loss": 0.2329, + "step": 122500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9861075706214697e-05, + "loss": 0.2341, + "step": 123000 + }, + { + "epoch": 0.01, + "learning_rate": 4.986051073446328e-05, + "loss": 0.2362, + "step": 123500 + }, + { + "epoch": 0.01, + "learning_rate": 4.985994576271187e-05, + "loss": 0.2265, + "step": 124000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9859380790960455e-05, + "loss": 0.2445, + "step": 124500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9858815819209045e-05, + "loss": 0.2234, + "step": 125000 + }, + { + "epoch": 0.01, + "learning_rate": 4.985825197740113e-05, + "loss": 0.2263, + "step": 125500 + }, + { + "epoch": 0.01, + "learning_rate": 4.985768700564972e-05, + "loss": 0.2249, + "step": 126000 + }, + { + "epoch": 0.01, + "learning_rate": 4.985712203389831e-05, + "loss": 0.225, + "step": 126500 + }, + { + "epoch": 0.01, + "learning_rate": 4.98565570621469e-05, + "loss": 0.2192, + "step": 127000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9855993220338985e-05, + "loss": 0.2351, + "step": 127500 + }, + { + "epoch": 0.01, + "learning_rate": 4.985542824858757e-05, + "loss": 0.2289, + "step": 128000 + }, + { + "epoch": 0.01, + "learning_rate": 4.985486327683616e-05, + "loss": 0.2254, + "step": 128500 + }, + { + "epoch": 0.01, + "learning_rate": 4.985429830508474e-05, + "loss": 0.2199, + "step": 129000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9853733333333333e-05, + "loss": 0.2175, + "step": 129500 + }, + { + "epoch": 0.01, + "learning_rate": 4.985316949152543e-05, + "loss": 0.2288, + "step": 130000 + }, + { + "epoch": 0.01, + "learning_rate": 4.985260451977402e-05, + "loss": 0.2277, + "step": 130500 + }, + { + "epoch": 0.01, + "learning_rate": 4.98520395480226e-05, + "loss": 0.2283, + "step": 131000 + }, + { + "epoch": 0.01, + "learning_rate": 4.985147457627119e-05, + "loss": 0.224, + "step": 131500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9850909604519776e-05, + "loss": 0.2252, + "step": 132000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9850345762711863e-05, + "loss": 0.2283, + "step": 132500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9849780790960454e-05, + "loss": 0.2277, + "step": 133000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9849215819209045e-05, + "loss": 0.2375, + "step": 133500 + }, + { + "epoch": 0.01, + "learning_rate": 4.984865084745763e-05, + "loss": 0.2271, + "step": 134000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9848087005649716e-05, + "loss": 0.2195, + "step": 134500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9847522033898306e-05, + "loss": 0.2234, + "step": 135000 + }, + { + "epoch": 0.01, + "learning_rate": 4.984695706214689e-05, + "loss": 0.23, + "step": 135500 + }, + { + "epoch": 0.01, + "learning_rate": 4.984639209039548e-05, + "loss": 0.2272, + "step": 136000 + }, + { + "epoch": 0.01, + "learning_rate": 4.984582711864407e-05, + "loss": 0.2297, + "step": 136500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9845263276836165e-05, + "loss": 0.2216, + "step": 137000 + }, + { + "epoch": 0.01, + "learning_rate": 4.984469830508475e-05, + "loss": 0.2276, + "step": 137500 + }, + { + "epoch": 0.01, + "learning_rate": 4.984413333333334e-05, + "loss": 0.2197, + "step": 138000 + }, + { + "epoch": 0.01, + "learning_rate": 4.984356836158192e-05, + "loss": 0.2385, + "step": 138500 + }, + { + "epoch": 0.01, + "learning_rate": 4.984300451977401e-05, + "loss": 0.2194, + "step": 139000 + }, + { + "epoch": 0.01, + "learning_rate": 4.98424395480226e-05, + "loss": 0.2254, + "step": 139500 + }, + { + "epoch": 0.01, + "learning_rate": 4.984187457627119e-05, + "loss": 0.2197, + "step": 140000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9841309604519776e-05, + "loss": 0.2209, + "step": 140500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9840744632768366e-05, + "loss": 0.2319, + "step": 141000 + }, + { + "epoch": 0.01, + "learning_rate": 4.984018192090396e-05, + "loss": 0.2211, + "step": 141500 + }, + { + "epoch": 0.01, + "learning_rate": 4.983961694915255e-05, + "loss": 0.2284, + "step": 142000 + }, + { + "epoch": 0.01, + "learning_rate": 4.983905197740113e-05, + "loss": 0.2227, + "step": 142500 + }, + { + "epoch": 0.01, + "learning_rate": 4.983848700564972e-05, + "loss": 0.2294, + "step": 143000 + }, + { + "epoch": 0.01, + "learning_rate": 4.983792203389831e-05, + "loss": 0.2226, + "step": 143500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9837357062146896e-05, + "loss": 0.2215, + "step": 144000 + }, + { + "epoch": 0.01, + "learning_rate": 4.983679209039549e-05, + "loss": 0.231, + "step": 144500 + }, + { + "epoch": 0.01, + "learning_rate": 4.983622711864407e-05, + "loss": 0.2286, + "step": 145000 + }, + { + "epoch": 0.01, + "learning_rate": 4.983566327683616e-05, + "loss": 0.2171, + "step": 145500 + }, + { + "epoch": 0.01, + "learning_rate": 4.983509830508475e-05, + "loss": 0.2293, + "step": 146000 + }, + { + "epoch": 0.01, + "learning_rate": 4.983453333333333e-05, + "loss": 0.2271, + "step": 146500 + }, + { + "epoch": 0.01, + "learning_rate": 4.983396836158192e-05, + "loss": 0.2218, + "step": 147000 + }, + { + "epoch": 0.01, + "learning_rate": 4.983340338983051e-05, + "loss": 0.2184, + "step": 147500 + }, + { + "epoch": 0.01, + "learning_rate": 4.98328395480226e-05, + "loss": 0.2224, + "step": 148000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9832274576271184e-05, + "loss": 0.2212, + "step": 148500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9831709604519775e-05, + "loss": 0.2199, + "step": 149000 + }, + { + "epoch": 0.01, + "learning_rate": 4.983114463276836e-05, + "loss": 0.2293, + "step": 149500 + }, + { + "epoch": 0.01, + "learning_rate": 4.983058079096045e-05, + "loss": 0.2179, + "step": 150000 + }, + { + "epoch": 0.01, + "learning_rate": 4.983001581920904e-05, + "loss": 0.2149, + "step": 150500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9829450847457634e-05, + "loss": 0.2239, + "step": 151000 + }, + { + "epoch": 0.01, + "learning_rate": 4.982888587570622e-05, + "loss": 0.2219, + "step": 151500 + }, + { + "epoch": 0.01, + "learning_rate": 4.982832090395481e-05, + "loss": 0.2198, + "step": 152000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9827757062146895e-05, + "loss": 0.2162, + "step": 152500 + }, + { + "epoch": 0.01, + "learning_rate": 4.982719209039548e-05, + "loss": 0.2279, + "step": 153000 + }, + { + "epoch": 0.01, + "learning_rate": 4.982662711864407e-05, + "loss": 0.2156, + "step": 153500 + }, + { + "epoch": 0.01, + "learning_rate": 4.982606214689266e-05, + "loss": 0.2188, + "step": 154000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9825497175141244e-05, + "loss": 0.2132, + "step": 154500 + }, + { + "epoch": 0.01, + "learning_rate": 4.982493333333333e-05, + "loss": 0.2253, + "step": 155000 + }, + { + "epoch": 0.01, + "learning_rate": 4.982436836158192e-05, + "loss": 0.2166, + "step": 155500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9823803389830506e-05, + "loss": 0.2255, + "step": 156000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9823238418079096e-05, + "loss": 0.2145, + "step": 156500 + }, + { + "epoch": 0.01, + "learning_rate": 4.982267457627119e-05, + "loss": 0.219, + "step": 157000 + }, + { + "epoch": 0.01, + "learning_rate": 4.982210960451978e-05, + "loss": 0.2231, + "step": 157500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9821544632768365e-05, + "loss": 0.2281, + "step": 158000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9820979661016955e-05, + "loss": 0.2163, + "step": 158500 + }, + { + "epoch": 0.01, + "learning_rate": 4.982041581920904e-05, + "loss": 0.2158, + "step": 159000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9819850847457626e-05, + "loss": 0.2215, + "step": 159500 + }, + { + "epoch": 0.01, + "learning_rate": 4.981928587570622e-05, + "loss": 0.2193, + "step": 160000 + }, + { + "epoch": 0.01, + "learning_rate": 4.98187209039548e-05, + "loss": 0.2243, + "step": 160500 + }, + { + "epoch": 0.01, + "learning_rate": 4.981815593220339e-05, + "loss": 0.2158, + "step": 161000 + }, + { + "epoch": 0.01, + "learning_rate": 4.981759209039548e-05, + "loss": 0.2072, + "step": 161500 + }, + { + "epoch": 0.01, + "learning_rate": 4.981702711864407e-05, + "loss": 0.216, + "step": 162000 + }, + { + "epoch": 0.01, + "learning_rate": 4.981646214689266e-05, + "loss": 0.2247, + "step": 162500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9815897175141243e-05, + "loss": 0.2244, + "step": 163000 + }, + { + "epoch": 0.01, + "learning_rate": 4.981533333333334e-05, + "loss": 0.219, + "step": 163500 + }, + { + "epoch": 0.01, + "learning_rate": 4.981476836158193e-05, + "loss": 0.2238, + "step": 164000 + }, + { + "epoch": 0.01, + "learning_rate": 4.981420338983051e-05, + "loss": 0.2241, + "step": 164500 + }, + { + "epoch": 0.01, + "learning_rate": 4.98136384180791e-05, + "loss": 0.212, + "step": 165000 + }, + { + "epoch": 0.01, + "learning_rate": 4.981307457627119e-05, + "loss": 0.2108, + "step": 165500 + }, + { + "epoch": 0.01, + "learning_rate": 4.981250960451977e-05, + "loss": 0.2056, + "step": 166000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9811944632768364e-05, + "loss": 0.2144, + "step": 166500 + }, + { + "epoch": 0.01, + "learning_rate": 4.981137966101695e-05, + "loss": 0.2165, + "step": 167000 + }, + { + "epoch": 0.01, + "learning_rate": 4.981081581920904e-05, + "loss": 0.2084, + "step": 167500 + }, + { + "epoch": 0.01, + "learning_rate": 4.981025084745763e-05, + "loss": 0.222, + "step": 168000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9809685875706216e-05, + "loss": 0.2157, + "step": 168500 + }, + { + "epoch": 0.01, + "learning_rate": 4.980912090395481e-05, + "loss": 0.2174, + "step": 169000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9808557062146894e-05, + "loss": 0.2121, + "step": 169500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9807992090395484e-05, + "loss": 0.2135, + "step": 170000 + }, + { + "epoch": 0.01, + "learning_rate": 4.980742711864407e-05, + "loss": 0.2234, + "step": 170500 + }, + { + "epoch": 0.01, + "learning_rate": 4.980686214689266e-05, + "loss": 0.2147, + "step": 171000 + }, + { + "epoch": 0.01, + "learning_rate": 4.980629717514125e-05, + "loss": 0.2161, + "step": 171500 + }, + { + "epoch": 0.01, + "learning_rate": 4.980573333333334e-05, + "loss": 0.2154, + "step": 172000 + }, + { + "epoch": 0.01, + "learning_rate": 4.980516836158192e-05, + "loss": 0.2136, + "step": 172500 + }, + { + "epoch": 0.01, + "learning_rate": 4.980460338983051e-05, + "loss": 0.2222, + "step": 173000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9804038418079095e-05, + "loss": 0.1996, + "step": 173500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9803473446327685e-05, + "loss": 0.2093, + "step": 174000 + }, + { + "epoch": 0.01, + "learning_rate": 4.980290960451978e-05, + "loss": 0.2238, + "step": 174500 + }, + { + "epoch": 0.01, + "learning_rate": 4.980234463276836e-05, + "loss": 0.2108, + "step": 175000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9801779661016954e-05, + "loss": 0.2035, + "step": 175500 + }, + { + "epoch": 0.01, + "learning_rate": 4.980121468926554e-05, + "loss": 0.2161, + "step": 176000 + }, + { + "epoch": 0.01, + "learning_rate": 4.980065084745763e-05, + "loss": 0.2125, + "step": 176500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9800085875706215e-05, + "loss": 0.2132, + "step": 177000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9799520903954806e-05, + "loss": 0.218, + "step": 177500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9798955932203397e-05, + "loss": 0.2143, + "step": 178000 + }, + { + "epoch": 0.01, + "learning_rate": 4.979839096045198e-05, + "loss": 0.2107, + "step": 178500 + }, + { + "epoch": 0.01, + "learning_rate": 4.979782598870057e-05, + "loss": 0.2084, + "step": 179000 + }, + { + "epoch": 0.01, + "learning_rate": 4.979726214689266e-05, + "loss": 0.2151, + "step": 179500 + }, + { + "epoch": 0.01, + "learning_rate": 4.979669717514124e-05, + "loss": 0.2011, + "step": 180000 + }, + { + "epoch": 0.01, + "learning_rate": 4.979613220338983e-05, + "loss": 0.2127, + "step": 180500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9795567231638416e-05, + "loss": 0.2145, + "step": 181000 + }, + { + "epoch": 0.01, + "learning_rate": 4.979500225988701e-05, + "loss": 0.2178, + "step": 181500 + }, + { + "epoch": 0.01, + "learning_rate": 4.97944384180791e-05, + "loss": 0.202, + "step": 182000 + }, + { + "epoch": 0.01, + "learning_rate": 4.979387457627119e-05, + "loss": 0.2129, + "step": 182500 + }, + { + "epoch": 0.01, + "learning_rate": 4.979330960451978e-05, + "loss": 0.2158, + "step": 183000 + }, + { + "epoch": 0.01, + "learning_rate": 4.979274463276836e-05, + "loss": 0.2142, + "step": 183500 + }, + { + "epoch": 0.01, + "learning_rate": 4.979217966101695e-05, + "loss": 0.203, + "step": 184000 + }, + { + "epoch": 0.01, + "learning_rate": 4.979161468926554e-05, + "loss": 0.2119, + "step": 184500 + }, + { + "epoch": 0.01, + "learning_rate": 4.979104971751413e-05, + "loss": 0.209, + "step": 185000 + }, + { + "epoch": 0.01, + "learning_rate": 4.979048474576272e-05, + "loss": 0.2094, + "step": 185500 + }, + { + "epoch": 0.01, + "learning_rate": 4.97899197740113e-05, + "loss": 0.2008, + "step": 186000 + }, + { + "epoch": 0.01, + "learning_rate": 4.978935593220339e-05, + "loss": 0.2159, + "step": 186500 + }, + { + "epoch": 0.01, + "learning_rate": 4.978879096045198e-05, + "loss": 0.2096, + "step": 187000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9788225988700563e-05, + "loss": 0.2123, + "step": 187500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9787661016949154e-05, + "loss": 0.212, + "step": 188000 + }, + { + "epoch": 0.01, + "learning_rate": 4.978709604519774e-05, + "loss": 0.2066, + "step": 188500 + }, + { + "epoch": 0.01, + "learning_rate": 4.978653220338983e-05, + "loss": 0.2081, + "step": 189000 + }, + { + "epoch": 0.01, + "learning_rate": 4.978596723163842e-05, + "loss": 0.2061, + "step": 189500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9785402259887006e-05, + "loss": 0.2067, + "step": 190000 + }, + { + "epoch": 0.01, + "learning_rate": 4.97848372881356e-05, + "loss": 0.2133, + "step": 190500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9784273446327684e-05, + "loss": 0.2085, + "step": 191000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9783708474576275e-05, + "loss": 0.2088, + "step": 191500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9783143502824865e-05, + "loss": 0.2039, + "step": 192000 + }, + { + "epoch": 0.01, + "learning_rate": 4.978257853107345e-05, + "loss": 0.2116, + "step": 192500 + }, + { + "epoch": 0.01, + "learning_rate": 4.978201355932204e-05, + "loss": 0.2098, + "step": 193000 + }, + { + "epoch": 0.01, + "learning_rate": 4.978144858757062e-05, + "loss": 0.2066, + "step": 193500 + }, + { + "epoch": 0.01, + "learning_rate": 4.978088474576271e-05, + "loss": 0.1969, + "step": 194000 + }, + { + "epoch": 0.01, + "learning_rate": 4.97803197740113e-05, + "loss": 0.2102, + "step": 194500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9779754802259885e-05, + "loss": 0.2084, + "step": 195000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9779189830508476e-05, + "loss": 0.2135, + "step": 195500 + }, + { + "epoch": 0.01, + "learning_rate": 4.977862598870057e-05, + "loss": 0.211, + "step": 196000 + }, + { + "epoch": 0.01, + "learning_rate": 4.977806101694916e-05, + "loss": 0.2096, + "step": 196500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9777496045197744e-05, + "loss": 0.2061, + "step": 197000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9776931073446334e-05, + "loss": 0.2109, + "step": 197500 + }, + { + "epoch": 0.01, + "learning_rate": 4.977636610169492e-05, + "loss": 0.2003, + "step": 198000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9775802259887005e-05, + "loss": 0.2047, + "step": 198500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9775237288135596e-05, + "loss": 0.2035, + "step": 199000 + }, + { + "epoch": 0.01, + "learning_rate": 4.977467231638419e-05, + "loss": 0.2191, + "step": 199500 + }, + { + "epoch": 0.01, + "learning_rate": 4.977410734463277e-05, + "loss": 0.2075, + "step": 200000 + }, + { + "epoch": 0.01, + "learning_rate": 4.977354237288136e-05, + "loss": 0.2098, + "step": 200500 + }, + { + "epoch": 0.01, + "learning_rate": 4.977297853107345e-05, + "loss": 0.2058, + "step": 201000 + }, + { + "epoch": 0.01, + "learning_rate": 4.977241355932203e-05, + "loss": 0.203, + "step": 201500 + }, + { + "epoch": 0.01, + "learning_rate": 4.977184858757062e-05, + "loss": 0.2074, + "step": 202000 + }, + { + "epoch": 0.01, + "learning_rate": 4.977128361581921e-05, + "loss": 0.2019, + "step": 202500 + }, + { + "epoch": 0.01, + "learning_rate": 4.97707186440678e-05, + "loss": 0.2046, + "step": 203000 + }, + { + "epoch": 0.01, + "learning_rate": 4.977015480225989e-05, + "loss": 0.207, + "step": 203500 + }, + { + "epoch": 0.01, + "learning_rate": 4.976958983050848e-05, + "loss": 0.2085, + "step": 204000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9769024858757065e-05, + "loss": 0.2112, + "step": 204500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9768459887005656e-05, + "loss": 0.2111, + "step": 205000 + }, + { + "epoch": 0.01, + "learning_rate": 4.976789491525424e-05, + "loss": 0.2064, + "step": 205500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9767331073446334e-05, + "loss": 0.213, + "step": 206000 + }, + { + "epoch": 0.01, + "learning_rate": 4.976676610169492e-05, + "loss": 0.21, + "step": 206500 + }, + { + "epoch": 0.01, + "learning_rate": 4.976620112994351e-05, + "loss": 0.2064, + "step": 207000 + }, + { + "epoch": 0.01, + "learning_rate": 4.976563615819209e-05, + "loss": 0.2016, + "step": 207500 + }, + { + "epoch": 0.01, + "learning_rate": 4.976507231638418e-05, + "loss": 0.1975, + "step": 208000 + }, + { + "epoch": 0.01, + "learning_rate": 4.976450734463277e-05, + "loss": 0.1976, + "step": 208500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9763942372881354e-05, + "loss": 0.2073, + "step": 209000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9763377401129944e-05, + "loss": 0.2081, + "step": 209500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9762812429378535e-05, + "loss": 0.2157, + "step": 210000 + }, + { + "epoch": 0.01, + "learning_rate": 4.976224745762712e-05, + "loss": 0.2011, + "step": 210500 + }, + { + "epoch": 0.01, + "learning_rate": 4.976168248587571e-05, + "loss": 0.2008, + "step": 211000 + }, + { + "epoch": 0.01, + "learning_rate": 4.976111751412429e-05, + "loss": 0.1979, + "step": 211500 + }, + { + "epoch": 0.01, + "learning_rate": 4.976055367231639e-05, + "loss": 0.1947, + "step": 212000 + }, + { + "epoch": 0.01, + "learning_rate": 4.975998870056498e-05, + "loss": 0.1923, + "step": 212500 + }, + { + "epoch": 0.01, + "learning_rate": 4.975942372881356e-05, + "loss": 0.2057, + "step": 213000 + }, + { + "epoch": 0.01, + "learning_rate": 4.975885875706215e-05, + "loss": 0.2052, + "step": 213500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9758293785310736e-05, + "loss": 0.2111, + "step": 214000 + }, + { + "epoch": 0.01, + "learning_rate": 4.975772994350283e-05, + "loss": 0.1997, + "step": 214500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9757164971751413e-05, + "loss": 0.2013, + "step": 215000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9756600000000004e-05, + "loss": 0.2007, + "step": 215500 + }, + { + "epoch": 0.01, + "learning_rate": 4.975603502824859e-05, + "loss": 0.2034, + "step": 216000 + }, + { + "epoch": 0.01, + "learning_rate": 4.975547005649718e-05, + "loss": 0.2054, + "step": 216500 + }, + { + "epoch": 0.01, + "learning_rate": 4.975490508474577e-05, + "loss": 0.2007, + "step": 217000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9754341242937856e-05, + "loss": 0.1929, + "step": 217500 + }, + { + "epoch": 0.01, + "learning_rate": 4.975377627118644e-05, + "loss": 0.2041, + "step": 218000 + }, + { + "epoch": 0.01, + "learning_rate": 4.975321129943503e-05, + "loss": 0.1977, + "step": 218500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9752646327683614e-05, + "loss": 0.1938, + "step": 219000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9752081355932205e-05, + "loss": 0.2061, + "step": 219500 + }, + { + "epoch": 0.01, + "learning_rate": 4.97515175141243e-05, + "loss": 0.2037, + "step": 220000 + }, + { + "epoch": 0.01, + "learning_rate": 4.975095254237289e-05, + "loss": 0.2086, + "step": 220500 + }, + { + "epoch": 0.01, + "learning_rate": 4.975038757062147e-05, + "loss": 0.1962, + "step": 221000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9749822598870064e-05, + "loss": 0.2073, + "step": 221500 + }, + { + "epoch": 0.02, + "learning_rate": 4.974925875706215e-05, + "loss": 0.1907, + "step": 222000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9748693785310735e-05, + "loss": 0.1939, + "step": 222500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9748128813559326e-05, + "loss": 0.198, + "step": 223000 + }, + { + "epoch": 0.02, + "learning_rate": 4.974756384180791e-05, + "loss": 0.2071, + "step": 223500 + }, + { + "epoch": 0.02, + "learning_rate": 4.97469988700565e-05, + "loss": 0.2023, + "step": 224000 + }, + { + "epoch": 0.02, + "learning_rate": 4.974643502824859e-05, + "loss": 0.1931, + "step": 224500 + }, + { + "epoch": 0.02, + "learning_rate": 4.974587005649718e-05, + "loss": 0.1969, + "step": 225000 + }, + { + "epoch": 0.02, + "learning_rate": 4.974530508474576e-05, + "loss": 0.1919, + "step": 225500 + }, + { + "epoch": 0.02, + "learning_rate": 4.974474011299435e-05, + "loss": 0.1974, + "step": 226000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9744176271186446e-05, + "loss": 0.1898, + "step": 226500 + }, + { + "epoch": 0.02, + "learning_rate": 4.974361129943504e-05, + "loss": 0.2037, + "step": 227000 + }, + { + "epoch": 0.02, + "learning_rate": 4.974304632768362e-05, + "loss": 0.199, + "step": 227500 + }, + { + "epoch": 0.02, + "learning_rate": 4.974248135593221e-05, + "loss": 0.1962, + "step": 228000 + }, + { + "epoch": 0.02, + "learning_rate": 4.97419175141243e-05, + "loss": 0.1923, + "step": 228500 + }, + { + "epoch": 0.02, + "learning_rate": 4.974135254237288e-05, + "loss": 0.1849, + "step": 229000 + }, + { + "epoch": 0.02, + "learning_rate": 4.974078757062147e-05, + "loss": 0.1937, + "step": 229500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9740222598870056e-05, + "loss": 0.212, + "step": 230000 + }, + { + "epoch": 0.02, + "learning_rate": 4.973965762711865e-05, + "loss": 0.2, + "step": 230500 + }, + { + "epoch": 0.02, + "learning_rate": 4.973909265536724e-05, + "loss": 0.1996, + "step": 231000 + }, + { + "epoch": 0.02, + "learning_rate": 4.973852768361582e-05, + "loss": 0.1985, + "step": 231500 + }, + { + "epoch": 0.02, + "learning_rate": 4.973796271186441e-05, + "loss": 0.2012, + "step": 232000 + }, + { + "epoch": 0.02, + "learning_rate": 4.97373988700565e-05, + "loss": 0.1883, + "step": 232500 + }, + { + "epoch": 0.02, + "learning_rate": 4.973683389830508e-05, + "loss": 0.1851, + "step": 233000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9736268926553674e-05, + "loss": 0.1963, + "step": 233500 + }, + { + "epoch": 0.02, + "learning_rate": 4.973570395480226e-05, + "loss": 0.2054, + "step": 234000 + }, + { + "epoch": 0.02, + "learning_rate": 4.973513898305085e-05, + "loss": 0.1984, + "step": 234500 + }, + { + "epoch": 0.02, + "learning_rate": 4.973457514124294e-05, + "loss": 0.1985, + "step": 235000 + }, + { + "epoch": 0.02, + "learning_rate": 4.973401016949153e-05, + "loss": 0.1926, + "step": 235500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9733445197740116e-05, + "loss": 0.203, + "step": 236000 + }, + { + "epoch": 0.02, + "learning_rate": 4.973288022598871e-05, + "loss": 0.1988, + "step": 236500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9732316384180794e-05, + "loss": 0.1957, + "step": 237000 + }, + { + "epoch": 0.02, + "learning_rate": 4.973175141242938e-05, + "loss": 0.1924, + "step": 237500 + }, + { + "epoch": 0.02, + "learning_rate": 4.973118644067797e-05, + "loss": 0.1918, + "step": 238000 + }, + { + "epoch": 0.02, + "learning_rate": 4.973062146892656e-05, + "loss": 0.1969, + "step": 238500 + }, + { + "epoch": 0.02, + "learning_rate": 4.973005649717514e-05, + "loss": 0.2039, + "step": 239000 + }, + { + "epoch": 0.02, + "learning_rate": 4.972949265536723e-05, + "loss": 0.1937, + "step": 239500 + }, + { + "epoch": 0.02, + "learning_rate": 4.972892768361582e-05, + "loss": 0.1996, + "step": 240000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9728362711864404e-05, + "loss": 0.1993, + "step": 240500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9727797740112995e-05, + "loss": 0.1901, + "step": 241000 + }, + { + "epoch": 0.02, + "learning_rate": 4.972723389830509e-05, + "loss": 0.2075, + "step": 241500 + }, + { + "epoch": 0.02, + "learning_rate": 4.972666892655368e-05, + "loss": 0.1993, + "step": 242000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9726103954802263e-05, + "loss": 0.1892, + "step": 242500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9725538983050854e-05, + "loss": 0.2005, + "step": 243000 + }, + { + "epoch": 0.02, + "learning_rate": 4.972497401129944e-05, + "loss": 0.1944, + "step": 243500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9724410169491525e-05, + "loss": 0.201, + "step": 244000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9723845197740116e-05, + "loss": 0.204, + "step": 244500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9723280225988706e-05, + "loss": 0.2029, + "step": 245000 + }, + { + "epoch": 0.02, + "learning_rate": 4.972271525423729e-05, + "loss": 0.2045, + "step": 245500 + }, + { + "epoch": 0.02, + "learning_rate": 4.972215141242938e-05, + "loss": 0.1943, + "step": 246000 + }, + { + "epoch": 0.02, + "learning_rate": 4.972158644067797e-05, + "loss": 0.1921, + "step": 246500 + }, + { + "epoch": 0.02, + "learning_rate": 4.972102146892655e-05, + "loss": 0.1958, + "step": 247000 + }, + { + "epoch": 0.02, + "learning_rate": 4.972045649717514e-05, + "loss": 0.1893, + "step": 247500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9719891525423726e-05, + "loss": 0.198, + "step": 248000 + }, + { + "epoch": 0.02, + "learning_rate": 4.971932768361583e-05, + "loss": 0.1972, + "step": 248500 + }, + { + "epoch": 0.02, + "learning_rate": 4.971876271186441e-05, + "loss": 0.1959, + "step": 249000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9718197740113e-05, + "loss": 0.1942, + "step": 249500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9717632768361585e-05, + "loss": 0.2012, + "step": 250000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9717067796610175e-05, + "loss": 0.1999, + "step": 250500 + }, + { + "epoch": 0.02, + "learning_rate": 4.971650395480226e-05, + "loss": 0.201, + "step": 251000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9715938983050847e-05, + "loss": 0.191, + "step": 251500 + }, + { + "epoch": 0.02, + "learning_rate": 4.971537401129944e-05, + "loss": 0.1892, + "step": 252000 + }, + { + "epoch": 0.02, + "learning_rate": 4.971480903954803e-05, + "loss": 0.1975, + "step": 252500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9714245197740115e-05, + "loss": 0.1934, + "step": 253000 + }, + { + "epoch": 0.02, + "learning_rate": 4.97136802259887e-05, + "loss": 0.1846, + "step": 253500 + }, + { + "epoch": 0.02, + "learning_rate": 4.971311525423729e-05, + "loss": 0.1863, + "step": 254000 + }, + { + "epoch": 0.02, + "learning_rate": 4.971255028248587e-05, + "loss": 0.18, + "step": 254500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9711985310734464e-05, + "loss": 0.1864, + "step": 255000 + }, + { + "epoch": 0.02, + "learning_rate": 4.971142146892656e-05, + "loss": 0.1797, + "step": 255500 + }, + { + "epoch": 0.02, + "learning_rate": 4.971085649717515e-05, + "loss": 0.197, + "step": 256000 + }, + { + "epoch": 0.02, + "learning_rate": 4.971029152542373e-05, + "loss": 0.1857, + "step": 256500 + }, + { + "epoch": 0.02, + "learning_rate": 4.970972655367232e-05, + "loss": 0.1929, + "step": 257000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9709161581920906e-05, + "loss": 0.1903, + "step": 257500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9708597740112994e-05, + "loss": 0.1879, + "step": 258000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9708032768361584e-05, + "loss": 0.198, + "step": 258500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9707467796610175e-05, + "loss": 0.1957, + "step": 259000 + }, + { + "epoch": 0.02, + "learning_rate": 4.970690282485876e-05, + "loss": 0.186, + "step": 259500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9706338983050846e-05, + "loss": 0.1859, + "step": 260000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9705774011299436e-05, + "loss": 0.1833, + "step": 260500 + }, + { + "epoch": 0.02, + "learning_rate": 4.970520903954802e-05, + "loss": 0.1869, + "step": 261000 + }, + { + "epoch": 0.02, + "learning_rate": 4.970464406779661e-05, + "loss": 0.1895, + "step": 261500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9704079096045195e-05, + "loss": 0.1896, + "step": 262000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9703514124293785e-05, + "loss": 0.1864, + "step": 262500 + }, + { + "epoch": 0.02, + "learning_rate": 4.970295028248588e-05, + "loss": 0.1915, + "step": 263000 + }, + { + "epoch": 0.02, + "learning_rate": 4.970238531073447e-05, + "loss": 0.188, + "step": 263500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9701820338983054e-05, + "loss": 0.1895, + "step": 264000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9701255367231644e-05, + "loss": 0.1918, + "step": 264500 + }, + { + "epoch": 0.02, + "learning_rate": 4.970069039548023e-05, + "loss": 0.1955, + "step": 265000 + }, + { + "epoch": 0.02, + "learning_rate": 4.970012655367232e-05, + "loss": 0.1879, + "step": 265500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9699561581920906e-05, + "loss": 0.1913, + "step": 266000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9698996610169496e-05, + "loss": 0.1873, + "step": 266500 + }, + { + "epoch": 0.02, + "learning_rate": 4.969843163841808e-05, + "loss": 0.1939, + "step": 267000 + }, + { + "epoch": 0.02, + "learning_rate": 4.969786666666667e-05, + "loss": 0.1865, + "step": 267500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9697301694915254e-05, + "loss": 0.1934, + "step": 268000 + }, + { + "epoch": 0.02, + "learning_rate": 4.969673785310734e-05, + "loss": 0.1905, + "step": 268500 + }, + { + "epoch": 0.02, + "learning_rate": 4.969617288135593e-05, + "loss": 0.1829, + "step": 269000 + }, + { + "epoch": 0.02, + "learning_rate": 4.969560790960452e-05, + "loss": 0.1903, + "step": 269500 + }, + { + "epoch": 0.02, + "learning_rate": 4.969504293785311e-05, + "loss": 0.1909, + "step": 270000 + }, + { + "epoch": 0.02, + "learning_rate": 4.96944779661017e-05, + "loss": 0.1989, + "step": 270500 + }, + { + "epoch": 0.02, + "learning_rate": 4.969391412429379e-05, + "loss": 0.192, + "step": 271000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9693349152542375e-05, + "loss": 0.1898, + "step": 271500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9692784180790966e-05, + "loss": 0.189, + "step": 272000 + }, + { + "epoch": 0.02, + "learning_rate": 4.969221920903955e-05, + "loss": 0.1874, + "step": 272500 + }, + { + "epoch": 0.02, + "learning_rate": 4.969165536723164e-05, + "loss": 0.1868, + "step": 273000 + }, + { + "epoch": 0.02, + "learning_rate": 4.969109152542373e-05, + "loss": 0.1936, + "step": 273500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9690526553672314e-05, + "loss": 0.2004, + "step": 274000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9689961581920905e-05, + "loss": 0.1902, + "step": 274500 + }, + { + "epoch": 0.02, + "learning_rate": 4.968939661016949e-05, + "loss": 0.1762, + "step": 275000 + }, + { + "epoch": 0.02, + "learning_rate": 4.968883163841808e-05, + "loss": 0.1911, + "step": 275500 + }, + { + "epoch": 0.02, + "learning_rate": 4.968826666666666e-05, + "loss": 0.1937, + "step": 276000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9687701694915254e-05, + "loss": 0.1808, + "step": 276500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9687136723163844e-05, + "loss": 0.1935, + "step": 277000 + }, + { + "epoch": 0.02, + "learning_rate": 4.968657288135594e-05, + "loss": 0.1995, + "step": 277500 + }, + { + "epoch": 0.02, + "learning_rate": 4.968600790960452e-05, + "loss": 0.1892, + "step": 278000 + }, + { + "epoch": 0.02, + "learning_rate": 4.968544293785311e-05, + "loss": 0.1894, + "step": 278500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9684877966101696e-05, + "loss": 0.1892, + "step": 279000 + }, + { + "epoch": 0.02, + "learning_rate": 4.968431299435029e-05, + "loss": 0.1936, + "step": 279500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9683749152542374e-05, + "loss": 0.1881, + "step": 280000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9683184180790965e-05, + "loss": 0.1814, + "step": 280500 + }, + { + "epoch": 0.02, + "learning_rate": 4.968261920903955e-05, + "loss": 0.1935, + "step": 281000 + }, + { + "epoch": 0.02, + "learning_rate": 4.968205423728814e-05, + "loss": 0.1932, + "step": 281500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9681490395480226e-05, + "loss": 0.1786, + "step": 282000 + }, + { + "epoch": 0.02, + "learning_rate": 4.968092542372881e-05, + "loss": 0.1935, + "step": 282500 + }, + { + "epoch": 0.02, + "learning_rate": 4.96803604519774e-05, + "loss": 0.1805, + "step": 283000 + }, + { + "epoch": 0.02, + "learning_rate": 4.967979548022599e-05, + "loss": 0.1854, + "step": 283500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9679230508474575e-05, + "loss": 0.1924, + "step": 284000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9678665536723166e-05, + "loss": 0.1838, + "step": 284500 + }, + { + "epoch": 0.02, + "learning_rate": 4.967810169491526e-05, + "loss": 0.1806, + "step": 285000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9677536723163844e-05, + "loss": 0.1826, + "step": 285500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9676971751412434e-05, + "loss": 0.1945, + "step": 286000 + }, + { + "epoch": 0.02, + "learning_rate": 4.967640677966102e-05, + "loss": 0.1886, + "step": 286500 + }, + { + "epoch": 0.02, + "learning_rate": 4.967584180790961e-05, + "loss": 0.1838, + "step": 287000 + }, + { + "epoch": 0.02, + "learning_rate": 4.96752768361582e-05, + "loss": 0.1843, + "step": 287500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9674712994350286e-05, + "loss": 0.1885, + "step": 288000 + }, + { + "epoch": 0.02, + "learning_rate": 4.967414802259887e-05, + "loss": 0.1826, + "step": 288500 + }, + { + "epoch": 0.02, + "learning_rate": 4.967358305084746e-05, + "loss": 0.1889, + "step": 289000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9673018079096045e-05, + "loss": 0.1836, + "step": 289500 + }, + { + "epoch": 0.02, + "learning_rate": 4.967245423728814e-05, + "loss": 0.1816, + "step": 290000 + }, + { + "epoch": 0.02, + "learning_rate": 4.967188926553672e-05, + "loss": 0.1826, + "step": 290500 + }, + { + "epoch": 0.02, + "learning_rate": 4.967132429378531e-05, + "loss": 0.185, + "step": 291000 + }, + { + "epoch": 0.02, + "learning_rate": 4.96707593220339e-05, + "loss": 0.1829, + "step": 291500 + }, + { + "epoch": 0.02, + "learning_rate": 4.967019548022599e-05, + "loss": 0.1879, + "step": 292000 + }, + { + "epoch": 0.02, + "learning_rate": 4.966963050847458e-05, + "loss": 0.1877, + "step": 292500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9669065536723165e-05, + "loss": 0.1871, + "step": 293000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9668500564971756e-05, + "loss": 0.1782, + "step": 293500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9667935593220346e-05, + "loss": 0.1809, + "step": 294000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9667371751412433e-05, + "loss": 0.184, + "step": 294500 + }, + { + "epoch": 0.02, + "learning_rate": 4.966680677966102e-05, + "loss": 0.1762, + "step": 295000 + }, + { + "epoch": 0.02, + "learning_rate": 4.966624180790961e-05, + "loss": 0.1848, + "step": 295500 + }, + { + "epoch": 0.02, + "learning_rate": 4.966567683615819e-05, + "loss": 0.1961, + "step": 296000 + }, + { + "epoch": 0.02, + "learning_rate": 4.966511186440678e-05, + "loss": 0.1808, + "step": 296500 + }, + { + "epoch": 0.02, + "learning_rate": 4.966454802259887e-05, + "loss": 0.1753, + "step": 297000 + }, + { + "epoch": 0.02, + "learning_rate": 4.966398305084746e-05, + "loss": 0.1811, + "step": 297500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9663418079096044e-05, + "loss": 0.1784, + "step": 298000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9662853107344634e-05, + "loss": 0.1861, + "step": 298500 + }, + { + "epoch": 0.02, + "learning_rate": 4.966228926553673e-05, + "loss": 0.1775, + "step": 299000 + }, + { + "epoch": 0.02, + "learning_rate": 4.966172429378531e-05, + "loss": 0.1912, + "step": 299500 + }, + { + "epoch": 0.02, + "learning_rate": 4.96611593220339e-05, + "loss": 0.1923, + "step": 300000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9660594350282487e-05, + "loss": 0.1857, + "step": 300500 + }, + { + "epoch": 0.02, + "learning_rate": 4.966002937853108e-05, + "loss": 0.1795, + "step": 301000 + }, + { + "epoch": 0.02, + "learning_rate": 4.965946440677967e-05, + "loss": 0.1864, + "step": 301500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9658900564971755e-05, + "loss": 0.1891, + "step": 302000 + }, + { + "epoch": 0.02, + "learning_rate": 4.965833559322034e-05, + "loss": 0.1806, + "step": 302500 + }, + { + "epoch": 0.02, + "learning_rate": 4.965777062146893e-05, + "loss": 0.1745, + "step": 303000 + }, + { + "epoch": 0.02, + "learning_rate": 4.965720564971751e-05, + "loss": 0.1869, + "step": 303500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9656640677966104e-05, + "loss": 0.1863, + "step": 304000 + }, + { + "epoch": 0.02, + "learning_rate": 4.965607683615819e-05, + "loss": 0.1871, + "step": 304500 + }, + { + "epoch": 0.02, + "learning_rate": 4.965551186440678e-05, + "loss": 0.1811, + "step": 305000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9654946892655365e-05, + "loss": 0.1871, + "step": 305500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9654381920903956e-05, + "loss": 0.1828, + "step": 306000 + }, + { + "epoch": 0.02, + "learning_rate": 4.965381807909605e-05, + "loss": 0.1833, + "step": 306500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9653253107344634e-05, + "loss": 0.1709, + "step": 307000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9652688135593224e-05, + "loss": 0.1862, + "step": 307500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9652123163841815e-05, + "loss": 0.1799, + "step": 308000 + }, + { + "epoch": 0.02, + "learning_rate": 4.96515581920904e-05, + "loss": 0.1784, + "step": 308500 + }, + { + "epoch": 0.02, + "learning_rate": 4.965099322033899e-05, + "loss": 0.1896, + "step": 309000 + }, + { + "epoch": 0.02, + "learning_rate": 4.965042824858757e-05, + "loss": 0.1867, + "step": 309500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9649863276836164e-05, + "loss": 0.1747, + "step": 310000 + }, + { + "epoch": 0.02, + "learning_rate": 4.964929943502825e-05, + "loss": 0.1837, + "step": 310500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9648734463276835e-05, + "loss": 0.1832, + "step": 311000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9648169491525425e-05, + "loss": 0.1856, + "step": 311500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9647604519774016e-05, + "loss": 0.1819, + "step": 312000 + }, + { + "epoch": 0.02, + "learning_rate": 4.96470395480226e-05, + "loss": 0.1741, + "step": 312500 + }, + { + "epoch": 0.02, + "learning_rate": 4.964647570621469e-05, + "loss": 0.1853, + "step": 313000 + }, + { + "epoch": 0.02, + "learning_rate": 4.964591073446328e-05, + "loss": 0.1817, + "step": 313500 + }, + { + "epoch": 0.02, + "learning_rate": 4.964534689265537e-05, + "loss": 0.182, + "step": 314000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9644781920903955e-05, + "loss": 0.18, + "step": 314500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9644216949152546e-05, + "loss": 0.1769, + "step": 315000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9643651977401136e-05, + "loss": 0.1796, + "step": 315500 + }, + { + "epoch": 0.02, + "learning_rate": 4.964308700564972e-05, + "loss": 0.176, + "step": 316000 + }, + { + "epoch": 0.02, + "learning_rate": 4.964252203389831e-05, + "loss": 0.1815, + "step": 316500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9641957062146895e-05, + "loss": 0.1761, + "step": 317000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9641392090395485e-05, + "loss": 0.1798, + "step": 317500 + }, + { + "epoch": 0.02, + "learning_rate": 4.964082711864407e-05, + "loss": 0.1819, + "step": 318000 + }, + { + "epoch": 0.02, + "learning_rate": 4.964026214689266e-05, + "loss": 0.1744, + "step": 318500 + }, + { + "epoch": 0.02, + "learning_rate": 4.963969717514125e-05, + "loss": 0.1824, + "step": 319000 + }, + { + "epoch": 0.02, + "learning_rate": 4.963913333333334e-05, + "loss": 0.1898, + "step": 319500 + }, + { + "epoch": 0.02, + "learning_rate": 4.963856836158192e-05, + "loss": 0.18, + "step": 320000 + }, + { + "epoch": 0.02, + "learning_rate": 4.963800338983051e-05, + "loss": 0.1716, + "step": 320500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9637438418079095e-05, + "loss": 0.1864, + "step": 321000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9636873446327686e-05, + "loss": 0.1743, + "step": 321500 + }, + { + "epoch": 0.02, + "learning_rate": 4.963630960451977e-05, + "loss": 0.1876, + "step": 322000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9635744632768364e-05, + "loss": 0.1753, + "step": 322500 + }, + { + "epoch": 0.02, + "learning_rate": 4.963517966101695e-05, + "loss": 0.1828, + "step": 323000 + }, + { + "epoch": 0.02, + "learning_rate": 4.963461468926554e-05, + "loss": 0.1863, + "step": 323500 + }, + { + "epoch": 0.02, + "learning_rate": 4.963404971751412e-05, + "loss": 0.1797, + "step": 324000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9633485875706216e-05, + "loss": 0.1839, + "step": 324500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9632920903954807e-05, + "loss": 0.1852, + "step": 325000 + }, + { + "epoch": 0.02, + "learning_rate": 4.96323559322034e-05, + "loss": 0.1821, + "step": 325500 + }, + { + "epoch": 0.02, + "learning_rate": 4.963179096045198e-05, + "loss": 0.1861, + "step": 326000 + }, + { + "epoch": 0.02, + "learning_rate": 4.963122598870057e-05, + "loss": 0.1787, + "step": 326500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9630661016949155e-05, + "loss": 0.1687, + "step": 327000 + }, + { + "epoch": 0.02, + "learning_rate": 4.963009717514124e-05, + "loss": 0.1755, + "step": 327500 + }, + { + "epoch": 0.02, + "learning_rate": 4.962953220338983e-05, + "loss": 0.1779, + "step": 328000 + }, + { + "epoch": 0.02, + "learning_rate": 4.962896723163842e-05, + "loss": 0.1727, + "step": 328500 + }, + { + "epoch": 0.02, + "learning_rate": 4.962840225988701e-05, + "loss": 0.1863, + "step": 329000 + }, + { + "epoch": 0.02, + "learning_rate": 4.96278372881356e-05, + "loss": 0.183, + "step": 329500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9627273446327685e-05, + "loss": 0.1881, + "step": 330000 + }, + { + "epoch": 0.02, + "learning_rate": 4.962670847457627e-05, + "loss": 0.179, + "step": 330500 + }, + { + "epoch": 0.02, + "learning_rate": 4.962614350282486e-05, + "loss": 0.1911, + "step": 331000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9625578531073444e-05, + "loss": 0.1796, + "step": 331500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9625013559322034e-05, + "loss": 0.1782, + "step": 332000 + }, + { + "epoch": 0.02, + "learning_rate": 4.962444971751413e-05, + "loss": 0.1721, + "step": 332500 + }, + { + "epoch": 0.02, + "learning_rate": 4.962388474576272e-05, + "loss": 0.1731, + "step": 333000 + }, + { + "epoch": 0.02, + "learning_rate": 4.96233197740113e-05, + "loss": 0.1832, + "step": 333500 + }, + { + "epoch": 0.02, + "learning_rate": 4.962275480225989e-05, + "loss": 0.1782, + "step": 334000 + }, + { + "epoch": 0.02, + "learning_rate": 4.962218983050848e-05, + "loss": 0.1764, + "step": 334500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9621625988700564e-05, + "loss": 0.1806, + "step": 335000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9621061016949155e-05, + "loss": 0.1832, + "step": 335500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9620496045197745e-05, + "loss": 0.1894, + "step": 336000 + }, + { + "epoch": 0.02, + "learning_rate": 4.961993107344633e-05, + "loss": 0.1849, + "step": 336500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9619367231638416e-05, + "loss": 0.1889, + "step": 337000 + }, + { + "epoch": 0.02, + "learning_rate": 4.961880225988701e-05, + "loss": 0.1813, + "step": 337500 + }, + { + "epoch": 0.02, + "learning_rate": 4.961823728813559e-05, + "loss": 0.1798, + "step": 338000 + }, + { + "epoch": 0.02, + "learning_rate": 4.961767231638418e-05, + "loss": 0.1696, + "step": 338500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9617107344632765e-05, + "loss": 0.1722, + "step": 339000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9616543502824866e-05, + "loss": 0.1783, + "step": 339500 + }, + { + "epoch": 0.02, + "learning_rate": 4.961597853107345e-05, + "loss": 0.1755, + "step": 340000 + }, + { + "epoch": 0.02, + "learning_rate": 4.961541355932204e-05, + "loss": 0.171, + "step": 340500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9614848587570624e-05, + "loss": 0.1797, + "step": 341000 + }, + { + "epoch": 0.02, + "learning_rate": 4.961428474576271e-05, + "loss": 0.1726, + "step": 341500 + }, + { + "epoch": 0.02, + "learning_rate": 4.96137197740113e-05, + "loss": 0.1728, + "step": 342000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9613154802259886e-05, + "loss": 0.1756, + "step": 342500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9612589830508476e-05, + "loss": 0.1759, + "step": 343000 + }, + { + "epoch": 0.02, + "learning_rate": 4.961202485875707e-05, + "loss": 0.1885, + "step": 343500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9611461016949154e-05, + "loss": 0.1714, + "step": 344000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9610896045197745e-05, + "loss": 0.1807, + "step": 344500 + }, + { + "epoch": 0.02, + "learning_rate": 4.961033107344633e-05, + "loss": 0.1757, + "step": 345000 + }, + { + "epoch": 0.02, + "learning_rate": 4.960976610169492e-05, + "loss": 0.1731, + "step": 345500 + }, + { + "epoch": 0.02, + "learning_rate": 4.96092011299435e-05, + "loss": 0.1755, + "step": 346000 + }, + { + "epoch": 0.02, + "learning_rate": 4.960863615819209e-05, + "loss": 0.1764, + "step": 346500 + }, + { + "epoch": 0.02, + "learning_rate": 4.960807118644068e-05, + "loss": 0.178, + "step": 347000 + }, + { + "epoch": 0.02, + "learning_rate": 4.960750621468927e-05, + "loss": 0.1787, + "step": 347500 + }, + { + "epoch": 0.02, + "learning_rate": 4.960694237288136e-05, + "loss": 0.1693, + "step": 348000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9606377401129945e-05, + "loss": 0.1718, + "step": 348500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9605812429378536e-05, + "loss": 0.1739, + "step": 349000 + }, + { + "epoch": 0.02, + "learning_rate": 4.960524745762712e-05, + "loss": 0.1741, + "step": 349500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9604683615819214e-05, + "loss": 0.1699, + "step": 350000 + }, + { + "epoch": 0.02, + "learning_rate": 4.96041186440678e-05, + "loss": 0.1769, + "step": 350500 + }, + { + "epoch": 0.02, + "learning_rate": 4.960355367231639e-05, + "loss": 0.1732, + "step": 351000 + }, + { + "epoch": 0.02, + "learning_rate": 4.960298870056497e-05, + "loss": 0.1819, + "step": 351500 + }, + { + "epoch": 0.02, + "learning_rate": 4.960242372881356e-05, + "loss": 0.1791, + "step": 352000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9601858757062146e-05, + "loss": 0.1741, + "step": 352500 + }, + { + "epoch": 0.02, + "learning_rate": 4.960129491525424e-05, + "loss": 0.173, + "step": 353000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9600729943502824e-05, + "loss": 0.1654, + "step": 353500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9600164971751415e-05, + "loss": 0.1755, + "step": 354000 + }, + { + "epoch": 0.02, + "learning_rate": 4.95996e-05, + "loss": 0.1792, + "step": 354500 + }, + { + "epoch": 0.02, + "learning_rate": 4.959903615819209e-05, + "loss": 0.1741, + "step": 355000 + }, + { + "epoch": 0.02, + "learning_rate": 4.959847118644068e-05, + "loss": 0.1681, + "step": 355500 + }, + { + "epoch": 0.02, + "learning_rate": 4.959790621468927e-05, + "loss": 0.1808, + "step": 356000 + }, + { + "epoch": 0.02, + "learning_rate": 4.959734124293786e-05, + "loss": 0.1841, + "step": 356500 + }, + { + "epoch": 0.02, + "learning_rate": 4.959677627118644e-05, + "loss": 0.1722, + "step": 357000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9596212429378535e-05, + "loss": 0.1843, + "step": 357500 + }, + { + "epoch": 0.02, + "learning_rate": 4.959564745762712e-05, + "loss": 0.1689, + "step": 358000 + }, + { + "epoch": 0.02, + "learning_rate": 4.959508361581921e-05, + "loss": 0.1802, + "step": 358500 + }, + { + "epoch": 0.02, + "learning_rate": 4.95945186440678e-05, + "loss": 0.1759, + "step": 359000 + }, + { + "epoch": 0.02, + "learning_rate": 4.959395367231639e-05, + "loss": 0.1785, + "step": 359500 + }, + { + "epoch": 0.02, + "learning_rate": 4.959338870056497e-05, + "loss": 0.1786, + "step": 360000 + }, + { + "epoch": 0.02, + "learning_rate": 4.959282372881356e-05, + "loss": 0.1731, + "step": 360500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9592258757062146e-05, + "loss": 0.168, + "step": 361000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9591693785310736e-05, + "loss": 0.1799, + "step": 361500 + }, + { + "epoch": 0.02, + "learning_rate": 4.959112881355932e-05, + "loss": 0.1777, + "step": 362000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9590564971751414e-05, + "loss": 0.1781, + "step": 362500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9590000000000005e-05, + "loss": 0.1733, + "step": 363000 + }, + { + "epoch": 0.02, + "learning_rate": 4.958943502824859e-05, + "loss": 0.1875, + "step": 363500 + }, + { + "epoch": 0.02, + "learning_rate": 4.958887005649718e-05, + "loss": 0.18, + "step": 364000 + }, + { + "epoch": 0.02, + "learning_rate": 4.958830508474577e-05, + "loss": 0.1785, + "step": 364500 + }, + { + "epoch": 0.02, + "learning_rate": 4.958774124293786e-05, + "loss": 0.1777, + "step": 365000 + }, + { + "epoch": 0.02, + "learning_rate": 4.958717627118644e-05, + "loss": 0.1696, + "step": 365500 + }, + { + "epoch": 0.02, + "learning_rate": 4.958661129943503e-05, + "loss": 0.1793, + "step": 366000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9586046327683615e-05, + "loss": 0.1745, + "step": 366500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9585481355932206e-05, + "loss": 0.1746, + "step": 367000 + }, + { + "epoch": 0.02, + "learning_rate": 4.958491751412429e-05, + "loss": 0.1778, + "step": 367500 + }, + { + "epoch": 0.02, + "learning_rate": 4.958435254237288e-05, + "loss": 0.1714, + "step": 368000 + }, + { + "epoch": 0.02, + "learning_rate": 4.958378870056498e-05, + "loss": 0.1733, + "step": 368500 + }, + { + "epoch": 0.03, + "learning_rate": 4.958322372881356e-05, + "loss": 0.1699, + "step": 369000 + }, + { + "epoch": 0.03, + "learning_rate": 4.958265875706215e-05, + "loss": 0.1784, + "step": 369500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9582093785310736e-05, + "loss": 0.1752, + "step": 370000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9581528813559326e-05, + "loss": 0.1783, + "step": 370500 + }, + { + "epoch": 0.03, + "learning_rate": 4.958096384180791e-05, + "loss": 0.1651, + "step": 371000 + }, + { + "epoch": 0.03, + "learning_rate": 4.95803988700565e-05, + "loss": 0.1801, + "step": 371500 + }, + { + "epoch": 0.03, + "learning_rate": 4.957983389830509e-05, + "loss": 0.1723, + "step": 372000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9579268926553675e-05, + "loss": 0.1758, + "step": 372500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9578703954802265e-05, + "loss": 0.1828, + "step": 373000 + }, + { + "epoch": 0.03, + "learning_rate": 4.957814011299435e-05, + "loss": 0.1853, + "step": 373500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9577575141242936e-05, + "loss": 0.1748, + "step": 374000 + }, + { + "epoch": 0.03, + "learning_rate": 4.957701016949153e-05, + "loss": 0.1732, + "step": 374500 + }, + { + "epoch": 0.03, + "learning_rate": 4.957644519774012e-05, + "loss": 0.1783, + "step": 375000 + }, + { + "epoch": 0.03, + "learning_rate": 4.95758802259887e-05, + "loss": 0.1749, + "step": 375500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9575316384180795e-05, + "loss": 0.1691, + "step": 376000 + }, + { + "epoch": 0.03, + "learning_rate": 4.957475141242938e-05, + "loss": 0.1739, + "step": 376500 + }, + { + "epoch": 0.03, + "learning_rate": 4.957418644067797e-05, + "loss": 0.1823, + "step": 377000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9573621468926554e-05, + "loss": 0.1747, + "step": 377500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9573056497175144e-05, + "loss": 0.1728, + "step": 378000 + }, + { + "epoch": 0.03, + "learning_rate": 4.957249152542373e-05, + "loss": 0.1823, + "step": 378500 + }, + { + "epoch": 0.03, + "learning_rate": 4.957192655367232e-05, + "loss": 0.1726, + "step": 379000 + }, + { + "epoch": 0.03, + "learning_rate": 4.95713615819209e-05, + "loss": 0.1722, + "step": 379500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9570797740112996e-05, + "loss": 0.1762, + "step": 380000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9570233898305084e-05, + "loss": 0.1759, + "step": 380500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9569668926553674e-05, + "loss": 0.1653, + "step": 381000 + }, + { + "epoch": 0.03, + "learning_rate": 4.956910395480226e-05, + "loss": 0.172, + "step": 381500 + }, + { + "epoch": 0.03, + "learning_rate": 4.956853898305085e-05, + "loss": 0.1662, + "step": 382000 + }, + { + "epoch": 0.03, + "learning_rate": 4.956797401129944e-05, + "loss": 0.1815, + "step": 382500 + }, + { + "epoch": 0.03, + "learning_rate": 4.956740903954802e-05, + "loss": 0.1768, + "step": 383000 + }, + { + "epoch": 0.03, + "learning_rate": 4.956684519774012e-05, + "loss": 0.1759, + "step": 383500 + }, + { + "epoch": 0.03, + "learning_rate": 4.95662802259887e-05, + "loss": 0.1712, + "step": 384000 + }, + { + "epoch": 0.03, + "learning_rate": 4.956571525423729e-05, + "loss": 0.1769, + "step": 384500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9565150282485875e-05, + "loss": 0.1754, + "step": 385000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9564585310734466e-05, + "loss": 0.1699, + "step": 385500 + }, + { + "epoch": 0.03, + "learning_rate": 4.956402146892656e-05, + "loss": 0.1597, + "step": 386000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9563456497175143e-05, + "loss": 0.1779, + "step": 386500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9562891525423734e-05, + "loss": 0.1696, + "step": 387000 + }, + { + "epoch": 0.03, + "learning_rate": 4.956232655367232e-05, + "loss": 0.1729, + "step": 387500 + }, + { + "epoch": 0.03, + "learning_rate": 4.956176158192091e-05, + "loss": 0.1629, + "step": 388000 + }, + { + "epoch": 0.03, + "learning_rate": 4.956119661016949e-05, + "loss": 0.1704, + "step": 388500 + }, + { + "epoch": 0.03, + "learning_rate": 4.956063163841808e-05, + "loss": 0.1795, + "step": 389000 + }, + { + "epoch": 0.03, + "learning_rate": 4.956006779661017e-05, + "loss": 0.1746, + "step": 389500 + }, + { + "epoch": 0.03, + "learning_rate": 4.955950282485876e-05, + "loss": 0.1706, + "step": 390000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9558937853107344e-05, + "loss": 0.1735, + "step": 390500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9558372881355935e-05, + "loss": 0.1687, + "step": 391000 + }, + { + "epoch": 0.03, + "learning_rate": 4.955780903954802e-05, + "loss": 0.1796, + "step": 391500 + }, + { + "epoch": 0.03, + "learning_rate": 4.955724406779661e-05, + "loss": 0.1778, + "step": 392000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9556679096045197e-05, + "loss": 0.1648, + "step": 392500 + }, + { + "epoch": 0.03, + "learning_rate": 4.955611412429379e-05, + "loss": 0.1708, + "step": 393000 + }, + { + "epoch": 0.03, + "learning_rate": 4.955554915254237e-05, + "loss": 0.1715, + "step": 393500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9554985310734465e-05, + "loss": 0.1677, + "step": 394000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9554420338983056e-05, + "loss": 0.1765, + "step": 394500 + }, + { + "epoch": 0.03, + "learning_rate": 4.955385536723164e-05, + "loss": 0.1785, + "step": 395000 + }, + { + "epoch": 0.03, + "learning_rate": 4.955329039548023e-05, + "loss": 0.1723, + "step": 395500 + }, + { + "epoch": 0.03, + "learning_rate": 4.955272542372882e-05, + "loss": 0.1656, + "step": 396000 + }, + { + "epoch": 0.03, + "learning_rate": 4.955216271186441e-05, + "loss": 0.1691, + "step": 396500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9551597740112995e-05, + "loss": 0.1799, + "step": 397000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9551032768361586e-05, + "loss": 0.173, + "step": 397500 + }, + { + "epoch": 0.03, + "learning_rate": 4.955046779661017e-05, + "loss": 0.1717, + "step": 398000 + }, + { + "epoch": 0.03, + "learning_rate": 4.954990282485876e-05, + "loss": 0.1696, + "step": 398500 + }, + { + "epoch": 0.03, + "learning_rate": 4.954933785310735e-05, + "loss": 0.169, + "step": 399000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9548772881355934e-05, + "loss": 0.1679, + "step": 399500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9548207909604525e-05, + "loss": 0.1721, + "step": 400000 + }, + { + "epoch": 0.03, + "learning_rate": 4.954764293785311e-05, + "loss": 0.1592, + "step": 400500 + }, + { + "epoch": 0.03, + "learning_rate": 4.95470779661017e-05, + "loss": 0.1607, + "step": 401000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9546514124293786e-05, + "loss": 0.1806, + "step": 401500 + }, + { + "epoch": 0.03, + "learning_rate": 4.954594915254238e-05, + "loss": 0.1692, + "step": 402000 + }, + { + "epoch": 0.03, + "learning_rate": 4.954538418079096e-05, + "loss": 0.1788, + "step": 402500 + }, + { + "epoch": 0.03, + "learning_rate": 4.954481920903955e-05, + "loss": 0.1739, + "step": 403000 + }, + { + "epoch": 0.03, + "learning_rate": 4.954425423728814e-05, + "loss": 0.1714, + "step": 403500 + }, + { + "epoch": 0.03, + "learning_rate": 4.954369039548023e-05, + "loss": 0.1613, + "step": 404000 + }, + { + "epoch": 0.03, + "learning_rate": 4.954312542372881e-05, + "loss": 0.1848, + "step": 404500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9542560451977404e-05, + "loss": 0.1716, + "step": 405000 + }, + { + "epoch": 0.03, + "learning_rate": 4.954199548022599e-05, + "loss": 0.1618, + "step": 405500 + }, + { + "epoch": 0.03, + "learning_rate": 4.954143050847458e-05, + "loss": 0.1799, + "step": 406000 + }, + { + "epoch": 0.03, + "learning_rate": 4.954086553672317e-05, + "loss": 0.1802, + "step": 406500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9540301694915256e-05, + "loss": 0.1717, + "step": 407000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9539736723163846e-05, + "loss": 0.175, + "step": 407500 + }, + { + "epoch": 0.03, + "learning_rate": 4.953917175141243e-05, + "loss": 0.1715, + "step": 408000 + }, + { + "epoch": 0.03, + "learning_rate": 4.953860677966102e-05, + "loss": 0.1702, + "step": 408500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9538041807909605e-05, + "loss": 0.1715, + "step": 409000 + }, + { + "epoch": 0.03, + "learning_rate": 4.95374779661017e-05, + "loss": 0.1694, + "step": 409500 + }, + { + "epoch": 0.03, + "learning_rate": 4.953691299435029e-05, + "loss": 0.1682, + "step": 410000 + }, + { + "epoch": 0.03, + "learning_rate": 4.953634802259887e-05, + "loss": 0.1653, + "step": 410500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9535783050847464e-05, + "loss": 0.1702, + "step": 411000 + }, + { + "epoch": 0.03, + "learning_rate": 4.953521920903955e-05, + "loss": 0.1617, + "step": 411500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9534654237288135e-05, + "loss": 0.1677, + "step": 412000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9534089265536725e-05, + "loss": 0.1729, + "step": 412500 + }, + { + "epoch": 0.03, + "learning_rate": 4.953352542372882e-05, + "loss": 0.177, + "step": 413000 + }, + { + "epoch": 0.03, + "learning_rate": 4.95329604519774e-05, + "loss": 0.1721, + "step": 413500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9532395480225993e-05, + "loss": 0.1692, + "step": 414000 + }, + { + "epoch": 0.03, + "learning_rate": 4.953183050847458e-05, + "loss": 0.1643, + "step": 414500 + }, + { + "epoch": 0.03, + "learning_rate": 4.953126553672317e-05, + "loss": 0.1751, + "step": 415000 + }, + { + "epoch": 0.03, + "learning_rate": 4.953070056497175e-05, + "loss": 0.1722, + "step": 415500 + }, + { + "epoch": 0.03, + "learning_rate": 4.953013559322034e-05, + "loss": 0.1631, + "step": 416000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9529570621468926e-05, + "loss": 0.1704, + "step": 416500 + }, + { + "epoch": 0.03, + "learning_rate": 4.952900564971752e-05, + "loss": 0.166, + "step": 417000 + }, + { + "epoch": 0.03, + "learning_rate": 4.952844180790961e-05, + "loss": 0.1673, + "step": 417500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9527876836158194e-05, + "loss": 0.1726, + "step": 418000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9527311864406785e-05, + "loss": 0.1604, + "step": 418500 + }, + { + "epoch": 0.03, + "learning_rate": 4.952674689265537e-05, + "loss": 0.1693, + "step": 419000 + }, + { + "epoch": 0.03, + "learning_rate": 4.952618192090396e-05, + "loss": 0.1667, + "step": 419500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9525618079096047e-05, + "loss": 0.163, + "step": 420000 + }, + { + "epoch": 0.03, + "learning_rate": 4.952505310734464e-05, + "loss": 0.1719, + "step": 420500 + }, + { + "epoch": 0.03, + "learning_rate": 4.952448813559322e-05, + "loss": 0.159, + "step": 421000 + }, + { + "epoch": 0.03, + "learning_rate": 4.952392316384181e-05, + "loss": 0.1639, + "step": 421500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9523358192090395e-05, + "loss": 0.1639, + "step": 422000 + }, + { + "epoch": 0.03, + "learning_rate": 4.952279435028249e-05, + "loss": 0.1702, + "step": 422500 + }, + { + "epoch": 0.03, + "learning_rate": 4.952222937853107e-05, + "loss": 0.1708, + "step": 423000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9521664406779664e-05, + "loss": 0.1827, + "step": 423500 + }, + { + "epoch": 0.03, + "learning_rate": 4.952109943502825e-05, + "loss": 0.1801, + "step": 424000 + }, + { + "epoch": 0.03, + "learning_rate": 4.952053446327684e-05, + "loss": 0.1659, + "step": 424500 + }, + { + "epoch": 0.03, + "learning_rate": 4.951997062146893e-05, + "loss": 0.1677, + "step": 425000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9519405649717516e-05, + "loss": 0.1669, + "step": 425500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9518840677966106e-05, + "loss": 0.1745, + "step": 426000 + }, + { + "epoch": 0.03, + "learning_rate": 4.951827570621469e-05, + "loss": 0.1786, + "step": 426500 + }, + { + "epoch": 0.03, + "learning_rate": 4.951771073446328e-05, + "loss": 0.1691, + "step": 427000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9517145762711865e-05, + "loss": 0.1605, + "step": 427500 + }, + { + "epoch": 0.03, + "learning_rate": 4.951658192090396e-05, + "loss": 0.172, + "step": 428000 + }, + { + "epoch": 0.03, + "learning_rate": 4.951601694915254e-05, + "loss": 0.1678, + "step": 428500 + }, + { + "epoch": 0.03, + "learning_rate": 4.951545197740113e-05, + "loss": 0.1711, + "step": 429000 + }, + { + "epoch": 0.03, + "learning_rate": 4.951488700564972e-05, + "loss": 0.1692, + "step": 429500 + }, + { + "epoch": 0.03, + "learning_rate": 4.951432203389831e-05, + "loss": 0.1617, + "step": 430000 + }, + { + "epoch": 0.03, + "learning_rate": 4.95137581920904e-05, + "loss": 0.1692, + "step": 430500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9513193220338985e-05, + "loss": 0.166, + "step": 431000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9512628248587576e-05, + "loss": 0.1687, + "step": 431500 + }, + { + "epoch": 0.03, + "learning_rate": 4.951206327683616e-05, + "loss": 0.1655, + "step": 432000 + }, + { + "epoch": 0.03, + "learning_rate": 4.951149830508475e-05, + "loss": 0.1653, + "step": 432500 + }, + { + "epoch": 0.03, + "learning_rate": 4.951093446327684e-05, + "loss": 0.1754, + "step": 433000 + }, + { + "epoch": 0.03, + "learning_rate": 4.951036949152543e-05, + "loss": 0.1731, + "step": 433500 + }, + { + "epoch": 0.03, + "learning_rate": 4.950980451977401e-05, + "loss": 0.1664, + "step": 434000 + }, + { + "epoch": 0.03, + "learning_rate": 4.95092395480226e-05, + "loss": 0.1665, + "step": 434500 + }, + { + "epoch": 0.03, + "learning_rate": 4.950867570621469e-05, + "loss": 0.171, + "step": 435000 + }, + { + "epoch": 0.03, + "learning_rate": 4.950811073446328e-05, + "loss": 0.1658, + "step": 435500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9507545762711864e-05, + "loss": 0.1634, + "step": 436000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9506980790960455e-05, + "loss": 0.1579, + "step": 436500 + }, + { + "epoch": 0.03, + "learning_rate": 4.950641581920904e-05, + "loss": 0.1661, + "step": 437000 + }, + { + "epoch": 0.03, + "learning_rate": 4.950585084745763e-05, + "loss": 0.1605, + "step": 437500 + }, + { + "epoch": 0.03, + "learning_rate": 4.950528587570621e-05, + "loss": 0.1647, + "step": 438000 + }, + { + "epoch": 0.03, + "learning_rate": 4.95047209039548e-05, + "loss": 0.1699, + "step": 438500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9504155932203394e-05, + "loss": 0.1626, + "step": 439000 + }, + { + "epoch": 0.03, + "learning_rate": 4.950359096045198e-05, + "loss": 0.1703, + "step": 439500 + }, + { + "epoch": 0.03, + "learning_rate": 4.950302711864407e-05, + "loss": 0.1688, + "step": 440000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9502462146892655e-05, + "loss": 0.1621, + "step": 440500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9501897175141246e-05, + "loss": 0.1649, + "step": 441000 + }, + { + "epoch": 0.03, + "learning_rate": 4.950133220338983e-05, + "loss": 0.1657, + "step": 441500 + }, + { + "epoch": 0.03, + "learning_rate": 4.950076723163842e-05, + "loss": 0.1642, + "step": 442000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9500203389830514e-05, + "loss": 0.1698, + "step": 442500 + }, + { + "epoch": 0.03, + "learning_rate": 4.94996384180791e-05, + "loss": 0.1686, + "step": 443000 + }, + { + "epoch": 0.03, + "learning_rate": 4.949907344632769e-05, + "loss": 0.1714, + "step": 443500 + }, + { + "epoch": 0.03, + "learning_rate": 4.949850847457627e-05, + "loss": 0.166, + "step": 444000 + }, + { + "epoch": 0.03, + "learning_rate": 4.949794350282486e-05, + "loss": 0.1661, + "step": 444500 + }, + { + "epoch": 0.03, + "learning_rate": 4.949737966101695e-05, + "loss": 0.1619, + "step": 445000 + }, + { + "epoch": 0.03, + "learning_rate": 4.949681468926554e-05, + "loss": 0.1682, + "step": 445500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9496249717514125e-05, + "loss": 0.1612, + "step": 446000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9495684745762715e-05, + "loss": 0.1682, + "step": 446500 + }, + { + "epoch": 0.03, + "learning_rate": 4.94951209039548e-05, + "loss": 0.1706, + "step": 447000 + }, + { + "epoch": 0.03, + "learning_rate": 4.949455593220339e-05, + "loss": 0.1655, + "step": 447500 + }, + { + "epoch": 0.03, + "learning_rate": 4.949399096045198e-05, + "loss": 0.1705, + "step": 448000 + }, + { + "epoch": 0.03, + "learning_rate": 4.949342598870057e-05, + "loss": 0.1648, + "step": 448500 + }, + { + "epoch": 0.03, + "learning_rate": 4.949286214689266e-05, + "loss": 0.1621, + "step": 449000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9492297175141245e-05, + "loss": 0.1601, + "step": 449500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9491732203389836e-05, + "loss": 0.1632, + "step": 450000 + }, + { + "epoch": 0.03, + "learning_rate": 4.949116723163842e-05, + "loss": 0.1677, + "step": 450500 + }, + { + "epoch": 0.03, + "learning_rate": 4.949060225988701e-05, + "loss": 0.1691, + "step": 451000 + }, + { + "epoch": 0.03, + "learning_rate": 4.94900384180791e-05, + "loss": 0.1662, + "step": 451500 + }, + { + "epoch": 0.03, + "learning_rate": 4.948947344632768e-05, + "loss": 0.1736, + "step": 452000 + }, + { + "epoch": 0.03, + "learning_rate": 4.948890847457627e-05, + "loss": 0.1676, + "step": 452500 + }, + { + "epoch": 0.03, + "learning_rate": 4.948834350282486e-05, + "loss": 0.1662, + "step": 453000 + }, + { + "epoch": 0.03, + "learning_rate": 4.948777966101695e-05, + "loss": 0.1644, + "step": 453500 + }, + { + "epoch": 0.03, + "learning_rate": 4.948721468926554e-05, + "loss": 0.1595, + "step": 454000 + }, + { + "epoch": 0.03, + "learning_rate": 4.948664971751413e-05, + "loss": 0.1632, + "step": 454500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9486084745762715e-05, + "loss": 0.1674, + "step": 455000 + }, + { + "epoch": 0.03, + "learning_rate": 4.948552090395481e-05, + "loss": 0.1615, + "step": 455500 + }, + { + "epoch": 0.03, + "learning_rate": 4.948495593220339e-05, + "loss": 0.1728, + "step": 456000 + }, + { + "epoch": 0.03, + "learning_rate": 4.948439096045198e-05, + "loss": 0.1626, + "step": 456500 + }, + { + "epoch": 0.03, + "learning_rate": 4.948382598870057e-05, + "loss": 0.1677, + "step": 457000 + }, + { + "epoch": 0.03, + "learning_rate": 4.948326101694916e-05, + "loss": 0.1563, + "step": 457500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9482697175141245e-05, + "loss": 0.1708, + "step": 458000 + }, + { + "epoch": 0.03, + "learning_rate": 4.948213220338983e-05, + "loss": 0.1671, + "step": 458500 + }, + { + "epoch": 0.03, + "learning_rate": 4.948156723163842e-05, + "loss": 0.1583, + "step": 459000 + }, + { + "epoch": 0.03, + "learning_rate": 4.948100225988701e-05, + "loss": 0.164, + "step": 459500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9480437288135593e-05, + "loss": 0.164, + "step": 460000 + }, + { + "epoch": 0.03, + "learning_rate": 4.947987344632769e-05, + "loss": 0.167, + "step": 460500 + }, + { + "epoch": 0.03, + "learning_rate": 4.947930847457628e-05, + "loss": 0.1654, + "step": 461000 + }, + { + "epoch": 0.03, + "learning_rate": 4.947874350282486e-05, + "loss": 0.1601, + "step": 461500 + }, + { + "epoch": 0.03, + "learning_rate": 4.947817853107345e-05, + "loss": 0.1681, + "step": 462000 + }, + { + "epoch": 0.03, + "learning_rate": 4.947761468926554e-05, + "loss": 0.1662, + "step": 462500 + }, + { + "epoch": 0.03, + "learning_rate": 4.947704971751413e-05, + "loss": 0.1632, + "step": 463000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9476484745762714e-05, + "loss": 0.1603, + "step": 463500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9475919774011305e-05, + "loss": 0.162, + "step": 464000 + }, + { + "epoch": 0.03, + "learning_rate": 4.947535593220339e-05, + "loss": 0.1673, + "step": 464500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9474790960451976e-05, + "loss": 0.1698, + "step": 465000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9474225988700566e-05, + "loss": 0.1657, + "step": 465500 + }, + { + "epoch": 0.03, + "learning_rate": 4.947366101694916e-05, + "loss": 0.1569, + "step": 466000 + }, + { + "epoch": 0.03, + "learning_rate": 4.947309604519774e-05, + "loss": 0.1671, + "step": 466500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9472532203389834e-05, + "loss": 0.1626, + "step": 467000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9471967231638425e-05, + "loss": 0.1531, + "step": 467500 + }, + { + "epoch": 0.03, + "learning_rate": 4.947140225988701e-05, + "loss": 0.1668, + "step": 468000 + }, + { + "epoch": 0.03, + "learning_rate": 4.94708372881356e-05, + "loss": 0.1635, + "step": 468500 + }, + { + "epoch": 0.03, + "learning_rate": 4.947027231638418e-05, + "loss": 0.1773, + "step": 469000 + }, + { + "epoch": 0.03, + "learning_rate": 4.946970847457628e-05, + "loss": 0.1604, + "step": 469500 + }, + { + "epoch": 0.03, + "learning_rate": 4.946914350282486e-05, + "loss": 0.1688, + "step": 470000 + }, + { + "epoch": 0.03, + "learning_rate": 4.946857853107345e-05, + "loss": 0.1617, + "step": 470500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9468013559322035e-05, + "loss": 0.1655, + "step": 471000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9467448587570626e-05, + "loss": 0.1646, + "step": 471500 + }, + { + "epoch": 0.03, + "learning_rate": 4.946688474576271e-05, + "loss": 0.1627, + "step": 472000 + }, + { + "epoch": 0.03, + "learning_rate": 4.94663197740113e-05, + "loss": 0.1693, + "step": 472500 + }, + { + "epoch": 0.03, + "learning_rate": 4.946575480225989e-05, + "loss": 0.1541, + "step": 473000 + }, + { + "epoch": 0.03, + "learning_rate": 4.946518983050848e-05, + "loss": 0.167, + "step": 473500 + }, + { + "epoch": 0.03, + "learning_rate": 4.946462485875706e-05, + "loss": 0.171, + "step": 474000 + }, + { + "epoch": 0.03, + "learning_rate": 4.946405988700565e-05, + "loss": 0.1643, + "step": 474500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9463496045197747e-05, + "loss": 0.1659, + "step": 475000 + }, + { + "epoch": 0.03, + "learning_rate": 4.946293107344633e-05, + "loss": 0.1646, + "step": 475500 + }, + { + "epoch": 0.03, + "learning_rate": 4.946236610169492e-05, + "loss": 0.1541, + "step": 476000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9461801129943505e-05, + "loss": 0.1633, + "step": 476500 + }, + { + "epoch": 0.03, + "learning_rate": 4.94612372881356e-05, + "loss": 0.1669, + "step": 477000 + }, + { + "epoch": 0.03, + "learning_rate": 4.946067231638418e-05, + "loss": 0.1649, + "step": 477500 + }, + { + "epoch": 0.03, + "learning_rate": 4.946010734463277e-05, + "loss": 0.1617, + "step": 478000 + }, + { + "epoch": 0.03, + "learning_rate": 4.945954237288136e-05, + "loss": 0.1615, + "step": 478500 + }, + { + "epoch": 0.03, + "learning_rate": 4.945897740112995e-05, + "loss": 0.1594, + "step": 479000 + }, + { + "epoch": 0.03, + "learning_rate": 4.945841242937853e-05, + "loss": 0.1648, + "step": 479500 + }, + { + "epoch": 0.03, + "learning_rate": 4.945784745762712e-05, + "loss": 0.1653, + "step": 480000 + }, + { + "epoch": 0.03, + "learning_rate": 4.945728248587571e-05, + "loss": 0.1612, + "step": 480500 + }, + { + "epoch": 0.03, + "learning_rate": 4.94567186440678e-05, + "loss": 0.1495, + "step": 481000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9456153672316383e-05, + "loss": 0.1623, + "step": 481500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9455588700564974e-05, + "loss": 0.1642, + "step": 482000 + }, + { + "epoch": 0.03, + "learning_rate": 4.945502372881356e-05, + "loss": 0.1597, + "step": 482500 + }, + { + "epoch": 0.03, + "learning_rate": 4.945445875706215e-05, + "loss": 0.1617, + "step": 483000 + }, + { + "epoch": 0.03, + "learning_rate": 4.945389491525424e-05, + "loss": 0.1613, + "step": 483500 + }, + { + "epoch": 0.03, + "learning_rate": 4.945332994350283e-05, + "loss": 0.1624, + "step": 484000 + }, + { + "epoch": 0.03, + "learning_rate": 4.945276497175142e-05, + "loss": 0.1713, + "step": 484500 + }, + { + "epoch": 0.03, + "learning_rate": 4.945220000000001e-05, + "loss": 0.1572, + "step": 485000 + }, + { + "epoch": 0.03, + "learning_rate": 4.945163502824859e-05, + "loss": 0.1505, + "step": 485500 + }, + { + "epoch": 0.03, + "learning_rate": 4.945107005649718e-05, + "loss": 0.1636, + "step": 486000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9450505084745766e-05, + "loss": 0.1633, + "step": 486500 + }, + { + "epoch": 0.03, + "learning_rate": 4.944994124293785e-05, + "loss": 0.174, + "step": 487000 + }, + { + "epoch": 0.03, + "learning_rate": 4.944937740112995e-05, + "loss": 0.1682, + "step": 487500 + }, + { + "epoch": 0.03, + "learning_rate": 4.944881242937853e-05, + "loss": 0.1672, + "step": 488000 + }, + { + "epoch": 0.03, + "learning_rate": 4.944824745762712e-05, + "loss": 0.1599, + "step": 488500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9447682485875705e-05, + "loss": 0.1625, + "step": 489000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9447117514124296e-05, + "loss": 0.1698, + "step": 489500 + }, + { + "epoch": 0.03, + "learning_rate": 4.944655254237288e-05, + "loss": 0.1543, + "step": 490000 + }, + { + "epoch": 0.03, + "learning_rate": 4.944598757062147e-05, + "loss": 0.158, + "step": 490500 + }, + { + "epoch": 0.03, + "learning_rate": 4.944542259887006e-05, + "loss": 0.1617, + "step": 491000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9444857627118644e-05, + "loss": 0.1651, + "step": 491500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9444292655367235e-05, + "loss": 0.1639, + "step": 492000 + }, + { + "epoch": 0.03, + "learning_rate": 4.944372881355933e-05, + "loss": 0.1603, + "step": 492500 + }, + { + "epoch": 0.03, + "learning_rate": 4.944316384180791e-05, + "loss": 0.1585, + "step": 493000 + }, + { + "epoch": 0.03, + "learning_rate": 4.94425988700565e-05, + "loss": 0.1567, + "step": 493500 + }, + { + "epoch": 0.03, + "learning_rate": 4.944203389830509e-05, + "loss": 0.1616, + "step": 494000 + }, + { + "epoch": 0.03, + "learning_rate": 4.944146892655368e-05, + "loss": 0.1585, + "step": 494500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9440905084745765e-05, + "loss": 0.1613, + "step": 495000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9440340112994355e-05, + "loss": 0.1557, + "step": 495500 + }, + { + "epoch": 0.03, + "learning_rate": 4.943977514124294e-05, + "loss": 0.1672, + "step": 496000 + }, + { + "epoch": 0.03, + "learning_rate": 4.943921016949153e-05, + "loss": 0.1574, + "step": 496500 + }, + { + "epoch": 0.03, + "learning_rate": 4.943864632768362e-05, + "loss": 0.1702, + "step": 497000 + }, + { + "epoch": 0.03, + "learning_rate": 4.94380813559322e-05, + "loss": 0.1547, + "step": 497500 + }, + { + "epoch": 0.03, + "learning_rate": 4.943751638418079e-05, + "loss": 0.1611, + "step": 498000 + }, + { + "epoch": 0.03, + "learning_rate": 4.943695141242938e-05, + "loss": 0.1581, + "step": 498500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9436387570621476e-05, + "loss": 0.1739, + "step": 499000 + }, + { + "epoch": 0.03, + "learning_rate": 4.943582372881356e-05, + "loss": 0.1574, + "step": 499500 + }, + { + "epoch": 0.03, + "learning_rate": 4.943525875706215e-05, + "loss": 0.1657, + "step": 500000 + }, + { + "epoch": 0.03, + "learning_rate": 4.943469378531074e-05, + "loss": 0.1745, + "step": 500500 + }, + { + "epoch": 0.03, + "learning_rate": 4.943412881355932e-05, + "loss": 0.1596, + "step": 501000 + }, + { + "epoch": 0.03, + "learning_rate": 4.943356384180791e-05, + "loss": 0.1644, + "step": 501500 + }, + { + "epoch": 0.03, + "learning_rate": 4.94329988700565e-05, + "loss": 0.1593, + "step": 502000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9432433898305086e-05, + "loss": 0.1656, + "step": 502500 + }, + { + "epoch": 0.03, + "learning_rate": 4.943186892655368e-05, + "loss": 0.1553, + "step": 503000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9431305084745764e-05, + "loss": 0.1531, + "step": 503500 + }, + { + "epoch": 0.03, + "learning_rate": 4.943074011299435e-05, + "loss": 0.149, + "step": 504000 + }, + { + "epoch": 0.03, + "learning_rate": 4.943017514124294e-05, + "loss": 0.1622, + "step": 504500 + }, + { + "epoch": 0.03, + "learning_rate": 4.942961016949153e-05, + "loss": 0.1557, + "step": 505000 + }, + { + "epoch": 0.03, + "learning_rate": 4.942904519774011e-05, + "loss": 0.1638, + "step": 505500 + }, + { + "epoch": 0.03, + "learning_rate": 4.942848135593221e-05, + "loss": 0.1568, + "step": 506000 + }, + { + "epoch": 0.03, + "learning_rate": 4.94279163841808e-05, + "loss": 0.1594, + "step": 506500 + }, + { + "epoch": 0.03, + "learning_rate": 4.942735141242938e-05, + "loss": 0.1574, + "step": 507000 + }, + { + "epoch": 0.03, + "learning_rate": 4.942678644067797e-05, + "loss": 0.1674, + "step": 507500 + }, + { + "epoch": 0.03, + "learning_rate": 4.942622259887006e-05, + "loss": 0.1603, + "step": 508000 + }, + { + "epoch": 0.03, + "learning_rate": 4.942565762711865e-05, + "loss": 0.1625, + "step": 508500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9425092655367233e-05, + "loss": 0.1628, + "step": 509000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9424527683615824e-05, + "loss": 0.1571, + "step": 509500 + }, + { + "epoch": 0.03, + "learning_rate": 4.942396384180791e-05, + "loss": 0.153, + "step": 510000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9423398870056495e-05, + "loss": 0.1667, + "step": 510500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9422833898305086e-05, + "loss": 0.1657, + "step": 511000 + }, + { + "epoch": 0.03, + "learning_rate": 4.942226892655367e-05, + "loss": 0.1684, + "step": 511500 + }, + { + "epoch": 0.03, + "learning_rate": 4.942170395480226e-05, + "loss": 0.1611, + "step": 512000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9421140112994354e-05, + "loss": 0.1576, + "step": 512500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9420575141242945e-05, + "loss": 0.1559, + "step": 513000 + }, + { + "epoch": 0.03, + "learning_rate": 4.942001016949153e-05, + "loss": 0.1604, + "step": 513500 + }, + { + "epoch": 0.03, + "learning_rate": 4.941944519774012e-05, + "loss": 0.1548, + "step": 514000 + }, + { + "epoch": 0.03, + "learning_rate": 4.94188802259887e-05, + "loss": 0.1524, + "step": 514500 + }, + { + "epoch": 0.03, + "learning_rate": 4.941831638418079e-05, + "loss": 0.154, + "step": 515000 + }, + { + "epoch": 0.03, + "learning_rate": 4.941775141242938e-05, + "loss": 0.163, + "step": 515500 + }, + { + "epoch": 0.03, + "learning_rate": 4.941718644067797e-05, + "loss": 0.1603, + "step": 516000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9416621468926555e-05, + "loss": 0.1588, + "step": 516500 + }, + { + "epoch": 0.04, + "learning_rate": 4.941605762711864e-05, + "loss": 0.1573, + "step": 517000 + }, + { + "epoch": 0.04, + "learning_rate": 4.941549265536723e-05, + "loss": 0.1558, + "step": 517500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9414927683615817e-05, + "loss": 0.1656, + "step": 518000 + }, + { + "epoch": 0.04, + "learning_rate": 4.941436271186441e-05, + "loss": 0.162, + "step": 518500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9413797740113e-05, + "loss": 0.1683, + "step": 519000 + }, + { + "epoch": 0.04, + "learning_rate": 4.941323389830509e-05, + "loss": 0.1579, + "step": 519500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9412668926553676e-05, + "loss": 0.154, + "step": 520000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9412103954802266e-05, + "loss": 0.1579, + "step": 520500 + }, + { + "epoch": 0.04, + "learning_rate": 4.941153898305085e-05, + "loss": 0.1625, + "step": 521000 + }, + { + "epoch": 0.04, + "learning_rate": 4.941097514124294e-05, + "loss": 0.1664, + "step": 521500 + }, + { + "epoch": 0.04, + "learning_rate": 4.941041016949153e-05, + "loss": 0.1546, + "step": 522000 + }, + { + "epoch": 0.04, + "learning_rate": 4.940984519774012e-05, + "loss": 0.1582, + "step": 522500 + }, + { + "epoch": 0.04, + "learning_rate": 4.94092802259887e-05, + "loss": 0.1656, + "step": 523000 + }, + { + "epoch": 0.04, + "learning_rate": 4.940871525423729e-05, + "loss": 0.1557, + "step": 523500 + }, + { + "epoch": 0.04, + "learning_rate": 4.940815141242938e-05, + "loss": 0.172, + "step": 524000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9407586440677964e-05, + "loss": 0.1508, + "step": 524500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9407021468926554e-05, + "loss": 0.1622, + "step": 525000 + }, + { + "epoch": 0.04, + "learning_rate": 4.940645649717514e-05, + "loss": 0.1594, + "step": 525500 + }, + { + "epoch": 0.04, + "learning_rate": 4.940589265536724e-05, + "loss": 0.164, + "step": 526000 + }, + { + "epoch": 0.04, + "learning_rate": 4.940532768361582e-05, + "loss": 0.1537, + "step": 526500 + }, + { + "epoch": 0.04, + "learning_rate": 4.940476271186441e-05, + "loss": 0.158, + "step": 527000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9404197740113e-05, + "loss": 0.1566, + "step": 527500 + }, + { + "epoch": 0.04, + "learning_rate": 4.940363276836159e-05, + "loss": 0.1589, + "step": 528000 + }, + { + "epoch": 0.04, + "learning_rate": 4.940306779661017e-05, + "loss": 0.1625, + "step": 528500 + }, + { + "epoch": 0.04, + "learning_rate": 4.940250282485876e-05, + "loss": 0.1584, + "step": 529000 + }, + { + "epoch": 0.04, + "learning_rate": 4.940193898305085e-05, + "loss": 0.1623, + "step": 529500 + }, + { + "epoch": 0.04, + "learning_rate": 4.940137401129944e-05, + "loss": 0.162, + "step": 530000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9400809039548024e-05, + "loss": 0.1579, + "step": 530500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9400244067796614e-05, + "loss": 0.1541, + "step": 531000 + }, + { + "epoch": 0.04, + "learning_rate": 4.93996790960452e-05, + "loss": 0.1597, + "step": 531500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9399115254237285e-05, + "loss": 0.1554, + "step": 532000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9398550282485876e-05, + "loss": 0.1578, + "step": 532500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9397985310734466e-05, + "loss": 0.1592, + "step": 533000 + }, + { + "epoch": 0.04, + "learning_rate": 4.939742033898305e-05, + "loss": 0.1592, + "step": 533500 + }, + { + "epoch": 0.04, + "learning_rate": 4.939685536723164e-05, + "loss": 0.1575, + "step": 534000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9396291525423735e-05, + "loss": 0.16, + "step": 534500 + }, + { + "epoch": 0.04, + "learning_rate": 4.939572655367232e-05, + "loss": 0.1597, + "step": 535000 + }, + { + "epoch": 0.04, + "learning_rate": 4.939516158192091e-05, + "loss": 0.1547, + "step": 535500 + }, + { + "epoch": 0.04, + "learning_rate": 4.939459661016949e-05, + "loss": 0.1555, + "step": 536000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9394031638418083e-05, + "loss": 0.1617, + "step": 536500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9393466666666674e-05, + "loss": 0.1616, + "step": 537000 + }, + { + "epoch": 0.04, + "learning_rate": 4.939290282485876e-05, + "loss": 0.1647, + "step": 537500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9392337853107345e-05, + "loss": 0.1619, + "step": 538000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9391772881355936e-05, + "loss": 0.1575, + "step": 538500 + }, + { + "epoch": 0.04, + "learning_rate": 4.939120790960452e-05, + "loss": 0.1471, + "step": 539000 + }, + { + "epoch": 0.04, + "learning_rate": 4.939064406779661e-05, + "loss": 0.1576, + "step": 539500 + }, + { + "epoch": 0.04, + "learning_rate": 4.93900790960452e-05, + "loss": 0.1543, + "step": 540000 + }, + { + "epoch": 0.04, + "learning_rate": 4.938951412429379e-05, + "loss": 0.1531, + "step": 540500 + }, + { + "epoch": 0.04, + "learning_rate": 4.938894915254237e-05, + "loss": 0.1607, + "step": 541000 + }, + { + "epoch": 0.04, + "learning_rate": 4.938838418079096e-05, + "loss": 0.1549, + "step": 541500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9387820338983056e-05, + "loss": 0.1555, + "step": 542000 + }, + { + "epoch": 0.04, + "learning_rate": 4.938725536723164e-05, + "loss": 0.162, + "step": 542500 + }, + { + "epoch": 0.04, + "learning_rate": 4.938669039548023e-05, + "loss": 0.1614, + "step": 543000 + }, + { + "epoch": 0.04, + "learning_rate": 4.938612542372882e-05, + "loss": 0.1529, + "step": 543500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9385560451977405e-05, + "loss": 0.1526, + "step": 544000 + }, + { + "epoch": 0.04, + "learning_rate": 4.938499661016949e-05, + "loss": 0.1539, + "step": 544500 + }, + { + "epoch": 0.04, + "learning_rate": 4.938443163841808e-05, + "loss": 0.1586, + "step": 545000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9383866666666667e-05, + "loss": 0.1558, + "step": 545500 + }, + { + "epoch": 0.04, + "learning_rate": 4.938330169491526e-05, + "loss": 0.1611, + "step": 546000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9382737853107344e-05, + "loss": 0.1486, + "step": 546500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9382172881355935e-05, + "loss": 0.158, + "step": 547000 + }, + { + "epoch": 0.04, + "learning_rate": 4.938160790960452e-05, + "loss": 0.1645, + "step": 547500 + }, + { + "epoch": 0.04, + "learning_rate": 4.938104293785311e-05, + "loss": 0.1645, + "step": 548000 + }, + { + "epoch": 0.04, + "learning_rate": 4.938047796610169e-05, + "loss": 0.1463, + "step": 548500 + }, + { + "epoch": 0.04, + "learning_rate": 4.937991412429379e-05, + "loss": 0.161, + "step": 549000 + }, + { + "epoch": 0.04, + "learning_rate": 4.937934915254238e-05, + "loss": 0.161, + "step": 549500 + }, + { + "epoch": 0.04, + "learning_rate": 4.937878418079096e-05, + "loss": 0.1587, + "step": 550000 + }, + { + "epoch": 0.04, + "learning_rate": 4.937821920903955e-05, + "loss": 0.1542, + "step": 550500 + }, + { + "epoch": 0.04, + "learning_rate": 4.937765536723164e-05, + "loss": 0.1619, + "step": 551000 + }, + { + "epoch": 0.04, + "learning_rate": 4.937709039548023e-05, + "loss": 0.167, + "step": 551500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9376525423728814e-05, + "loss": 0.1523, + "step": 552000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9375960451977404e-05, + "loss": 0.1516, + "step": 552500 + }, + { + "epoch": 0.04, + "learning_rate": 4.937539548022599e-05, + "loss": 0.1528, + "step": 553000 + }, + { + "epoch": 0.04, + "learning_rate": 4.937483050847458e-05, + "loss": 0.164, + "step": 553500 + }, + { + "epoch": 0.04, + "learning_rate": 4.937426553672317e-05, + "loss": 0.1514, + "step": 554000 + }, + { + "epoch": 0.04, + "learning_rate": 4.937370056497175e-05, + "loss": 0.1621, + "step": 554500 + }, + { + "epoch": 0.04, + "learning_rate": 4.937313672316384e-05, + "loss": 0.1608, + "step": 555000 + }, + { + "epoch": 0.04, + "learning_rate": 4.937257175141243e-05, + "loss": 0.1558, + "step": 555500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9372006779661015e-05, + "loss": 0.1616, + "step": 556000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9371441807909605e-05, + "loss": 0.1566, + "step": 556500 + }, + { + "epoch": 0.04, + "learning_rate": 4.93708779661017e-05, + "loss": 0.1644, + "step": 557000 + }, + { + "epoch": 0.04, + "learning_rate": 4.937031299435029e-05, + "loss": 0.1527, + "step": 557500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9369748022598874e-05, + "loss": 0.1585, + "step": 558000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9369183050847464e-05, + "loss": 0.1563, + "step": 558500 + }, + { + "epoch": 0.04, + "learning_rate": 4.936861807909605e-05, + "loss": 0.1479, + "step": 559000 + }, + { + "epoch": 0.04, + "learning_rate": 4.936805310734464e-05, + "loss": 0.1617, + "step": 559500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9367489265536726e-05, + "loss": 0.1505, + "step": 560000 + }, + { + "epoch": 0.04, + "learning_rate": 4.936692429378531e-05, + "loss": 0.1515, + "step": 560500 + }, + { + "epoch": 0.04, + "learning_rate": 4.93663593220339e-05, + "loss": 0.1536, + "step": 561000 + }, + { + "epoch": 0.04, + "learning_rate": 4.936579435028249e-05, + "loss": 0.157, + "step": 561500 + }, + { + "epoch": 0.04, + "learning_rate": 4.936523050847458e-05, + "loss": 0.1602, + "step": 562000 + }, + { + "epoch": 0.04, + "learning_rate": 4.936466553672316e-05, + "loss": 0.1618, + "step": 562500 + }, + { + "epoch": 0.04, + "learning_rate": 4.936410056497175e-05, + "loss": 0.155, + "step": 563000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9363535593220336e-05, + "loss": 0.157, + "step": 563500 + }, + { + "epoch": 0.04, + "learning_rate": 4.936297062146893e-05, + "loss": 0.1538, + "step": 564000 + }, + { + "epoch": 0.04, + "learning_rate": 4.936240677966102e-05, + "loss": 0.1547, + "step": 564500 + }, + { + "epoch": 0.04, + "learning_rate": 4.936184180790961e-05, + "loss": 0.1539, + "step": 565000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9361276836158195e-05, + "loss": 0.1602, + "step": 565500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9360711864406786e-05, + "loss": 0.1546, + "step": 566000 + }, + { + "epoch": 0.04, + "learning_rate": 4.936014802259887e-05, + "loss": 0.1462, + "step": 566500 + }, + { + "epoch": 0.04, + "learning_rate": 4.935958305084746e-05, + "loss": 0.1647, + "step": 567000 + }, + { + "epoch": 0.04, + "learning_rate": 4.935901807909605e-05, + "loss": 0.1535, + "step": 567500 + }, + { + "epoch": 0.04, + "learning_rate": 4.935845310734464e-05, + "loss": 0.1589, + "step": 568000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9357889265536725e-05, + "loss": 0.151, + "step": 568500 + }, + { + "epoch": 0.04, + "learning_rate": 4.935732429378531e-05, + "loss": 0.1495, + "step": 569000 + }, + { + "epoch": 0.04, + "learning_rate": 4.93567593220339e-05, + "loss": 0.161, + "step": 569500 + }, + { + "epoch": 0.04, + "learning_rate": 4.935619435028248e-05, + "loss": 0.1533, + "step": 570000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9355629378531074e-05, + "loss": 0.1528, + "step": 570500 + }, + { + "epoch": 0.04, + "learning_rate": 4.935506553672317e-05, + "loss": 0.1571, + "step": 571000 + }, + { + "epoch": 0.04, + "learning_rate": 4.935450056497176e-05, + "loss": 0.1557, + "step": 571500 + }, + { + "epoch": 0.04, + "learning_rate": 4.935393559322034e-05, + "loss": 0.1474, + "step": 572000 + }, + { + "epoch": 0.04, + "learning_rate": 4.935337062146893e-05, + "loss": 0.16, + "step": 572500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9352805649717517e-05, + "loss": 0.1583, + "step": 573000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9352241807909604e-05, + "loss": 0.1488, + "step": 573500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9351676836158194e-05, + "loss": 0.1503, + "step": 574000 + }, + { + "epoch": 0.04, + "learning_rate": 4.935111186440678e-05, + "loss": 0.1595, + "step": 574500 + }, + { + "epoch": 0.04, + "learning_rate": 4.935054689265537e-05, + "loss": 0.1634, + "step": 575000 + }, + { + "epoch": 0.04, + "learning_rate": 4.934998192090396e-05, + "loss": 0.1552, + "step": 575500 + }, + { + "epoch": 0.04, + "learning_rate": 4.934941694915254e-05, + "loss": 0.1529, + "step": 576000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9348851977401134e-05, + "loss": 0.158, + "step": 576500 + }, + { + "epoch": 0.04, + "learning_rate": 4.934828700564972e-05, + "loss": 0.1481, + "step": 577000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9347723163841805e-05, + "loss": 0.152, + "step": 577500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9347158192090395e-05, + "loss": 0.1527, + "step": 578000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9346593220338986e-05, + "loss": 0.1644, + "step": 578500 + }, + { + "epoch": 0.04, + "learning_rate": 4.934602824858757e-05, + "loss": 0.1587, + "step": 579000 + }, + { + "epoch": 0.04, + "learning_rate": 4.934546327683616e-05, + "loss": 0.1574, + "step": 579500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9344899435028254e-05, + "loss": 0.1557, + "step": 580000 + }, + { + "epoch": 0.04, + "learning_rate": 4.934433446327684e-05, + "loss": 0.1552, + "step": 580500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9343770621468925e-05, + "loss": 0.156, + "step": 581000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9343205649717516e-05, + "loss": 0.1598, + "step": 581500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9342640677966106e-05, + "loss": 0.1552, + "step": 582000 + }, + { + "epoch": 0.04, + "learning_rate": 4.934207570621469e-05, + "loss": 0.1506, + "step": 582500 + }, + { + "epoch": 0.04, + "learning_rate": 4.934151073446328e-05, + "loss": 0.1521, + "step": 583000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9340945762711865e-05, + "loss": 0.1585, + "step": 583500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9340380790960455e-05, + "loss": 0.1547, + "step": 584000 + }, + { + "epoch": 0.04, + "learning_rate": 4.933981581920904e-05, + "loss": 0.1482, + "step": 584500 + }, + { + "epoch": 0.04, + "learning_rate": 4.933925084745763e-05, + "loss": 0.1447, + "step": 585000 + }, + { + "epoch": 0.04, + "learning_rate": 4.933868700564972e-05, + "loss": 0.1521, + "step": 585500 + }, + { + "epoch": 0.04, + "learning_rate": 4.933812203389831e-05, + "loss": 0.1551, + "step": 586000 + }, + { + "epoch": 0.04, + "learning_rate": 4.933755706214689e-05, + "loss": 0.1575, + "step": 586500 + }, + { + "epoch": 0.04, + "learning_rate": 4.933699209039548e-05, + "loss": 0.1597, + "step": 587000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9336427118644066e-05, + "loss": 0.1512, + "step": 587500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9335862146892656e-05, + "loss": 0.1525, + "step": 588000 + }, + { + "epoch": 0.04, + "learning_rate": 4.933529830508475e-05, + "loss": 0.1548, + "step": 588500 + }, + { + "epoch": 0.04, + "learning_rate": 4.933473333333334e-05, + "loss": 0.1558, + "step": 589000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9334168361581924e-05, + "loss": 0.1551, + "step": 589500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9333603389830515e-05, + "loss": 0.1586, + "step": 590000 + }, + { + "epoch": 0.04, + "learning_rate": 4.93330395480226e-05, + "loss": 0.1577, + "step": 590500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9332474576271186e-05, + "loss": 0.154, + "step": 591000 + }, + { + "epoch": 0.04, + "learning_rate": 4.933190960451978e-05, + "loss": 0.1482, + "step": 591500 + }, + { + "epoch": 0.04, + "learning_rate": 4.933134463276836e-05, + "loss": 0.1573, + "step": 592000 + }, + { + "epoch": 0.04, + "learning_rate": 4.933077966101695e-05, + "loss": 0.1517, + "step": 592500 + }, + { + "epoch": 0.04, + "learning_rate": 4.933021468926554e-05, + "loss": 0.1532, + "step": 593000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9329649717514125e-05, + "loss": 0.1504, + "step": 593500 + }, + { + "epoch": 0.04, + "learning_rate": 4.932908587570621e-05, + "loss": 0.1567, + "step": 594000 + }, + { + "epoch": 0.04, + "learning_rate": 4.93285209039548e-05, + "loss": 0.1652, + "step": 594500 + }, + { + "epoch": 0.04, + "learning_rate": 4.932795593220339e-05, + "loss": 0.1467, + "step": 595000 + }, + { + "epoch": 0.04, + "learning_rate": 4.932739096045198e-05, + "loss": 0.1565, + "step": 595500 + }, + { + "epoch": 0.04, + "learning_rate": 4.932682711864407e-05, + "loss": 0.1455, + "step": 596000 + }, + { + "epoch": 0.04, + "learning_rate": 4.932626214689266e-05, + "loss": 0.1591, + "step": 596500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9325697175141246e-05, + "loss": 0.1506, + "step": 597000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9325132203389837e-05, + "loss": 0.1512, + "step": 597500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9324568361581924e-05, + "loss": 0.1514, + "step": 598000 + }, + { + "epoch": 0.04, + "learning_rate": 4.932400338983051e-05, + "loss": 0.1593, + "step": 598500 + }, + { + "epoch": 0.04, + "learning_rate": 4.93234384180791e-05, + "loss": 0.1493, + "step": 599000 + }, + { + "epoch": 0.04, + "learning_rate": 4.932287344632769e-05, + "loss": 0.162, + "step": 599500 + }, + { + "epoch": 0.04, + "learning_rate": 4.932230847457627e-05, + "loss": 0.1485, + "step": 600000 + }, + { + "epoch": 0.04, + "learning_rate": 4.932174463276836e-05, + "loss": 0.1498, + "step": 600500 + }, + { + "epoch": 0.04, + "learning_rate": 4.932117966101695e-05, + "loss": 0.1548, + "step": 601000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9320614689265534e-05, + "loss": 0.1445, + "step": 601500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9320049717514125e-05, + "loss": 0.1486, + "step": 602000 + }, + { + "epoch": 0.04, + "learning_rate": 4.931948474576271e-05, + "loss": 0.1485, + "step": 602500 + }, + { + "epoch": 0.04, + "learning_rate": 4.931892090395481e-05, + "loss": 0.1562, + "step": 603000 + }, + { + "epoch": 0.04, + "learning_rate": 4.931835593220339e-05, + "loss": 0.1577, + "step": 603500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9317790960451984e-05, + "loss": 0.1601, + "step": 604000 + }, + { + "epoch": 0.04, + "learning_rate": 4.931722598870057e-05, + "loss": 0.1503, + "step": 604500 + }, + { + "epoch": 0.04, + "learning_rate": 4.931666101694916e-05, + "loss": 0.1524, + "step": 605000 + }, + { + "epoch": 0.04, + "learning_rate": 4.931609604519774e-05, + "loss": 0.1607, + "step": 605500 + }, + { + "epoch": 0.04, + "learning_rate": 4.931553220338983e-05, + "loss": 0.1566, + "step": 606000 + }, + { + "epoch": 0.04, + "learning_rate": 4.931496723163842e-05, + "loss": 0.1483, + "step": 606500 + }, + { + "epoch": 0.04, + "learning_rate": 4.931440225988701e-05, + "loss": 0.1449, + "step": 607000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9313837288135594e-05, + "loss": 0.1491, + "step": 607500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9313272316384185e-05, + "loss": 0.151, + "step": 608000 + }, + { + "epoch": 0.04, + "learning_rate": 4.931270847457627e-05, + "loss": 0.1607, + "step": 608500 + }, + { + "epoch": 0.04, + "learning_rate": 4.931214350282486e-05, + "loss": 0.1579, + "step": 609000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9311578531073446e-05, + "loss": 0.1567, + "step": 609500 + }, + { + "epoch": 0.04, + "learning_rate": 4.931101355932204e-05, + "loss": 0.1532, + "step": 610000 + }, + { + "epoch": 0.04, + "learning_rate": 4.931044858757062e-05, + "loss": 0.1588, + "step": 610500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9309884745762715e-05, + "loss": 0.151, + "step": 611000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9309319774011305e-05, + "loss": 0.1529, + "step": 611500 + }, + { + "epoch": 0.04, + "learning_rate": 4.930875480225989e-05, + "loss": 0.1521, + "step": 612000 + }, + { + "epoch": 0.04, + "learning_rate": 4.930818983050848e-05, + "loss": 0.1463, + "step": 612500 + }, + { + "epoch": 0.04, + "learning_rate": 4.930762598870057e-05, + "loss": 0.1561, + "step": 613000 + }, + { + "epoch": 0.04, + "learning_rate": 4.930706101694916e-05, + "loss": 0.1464, + "step": 613500 + }, + { + "epoch": 0.04, + "learning_rate": 4.930649604519774e-05, + "loss": 0.1506, + "step": 614000 + }, + { + "epoch": 0.04, + "learning_rate": 4.930593107344633e-05, + "loss": 0.1513, + "step": 614500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9305366101694916e-05, + "loss": 0.1537, + "step": 615000 + }, + { + "epoch": 0.04, + "learning_rate": 4.930480225988701e-05, + "loss": 0.1422, + "step": 615500 + }, + { + "epoch": 0.04, + "learning_rate": 4.930423728813559e-05, + "loss": 0.1483, + "step": 616000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9303672316384184e-05, + "loss": 0.1576, + "step": 616500 + }, + { + "epoch": 0.04, + "learning_rate": 4.930310734463277e-05, + "loss": 0.1459, + "step": 617000 + }, + { + "epoch": 0.04, + "learning_rate": 4.930254237288136e-05, + "loss": 0.1497, + "step": 617500 + }, + { + "epoch": 0.04, + "learning_rate": 4.930197853107345e-05, + "loss": 0.1573, + "step": 618000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9301413559322036e-05, + "loss": 0.1554, + "step": 618500 + }, + { + "epoch": 0.04, + "learning_rate": 4.930084858757063e-05, + "loss": 0.1493, + "step": 619000 + }, + { + "epoch": 0.04, + "learning_rate": 4.930028361581921e-05, + "loss": 0.15, + "step": 619500 + }, + { + "epoch": 0.04, + "learning_rate": 4.92997197740113e-05, + "loss": 0.1565, + "step": 620000 + }, + { + "epoch": 0.04, + "learning_rate": 4.929915480225989e-05, + "loss": 0.1569, + "step": 620500 + }, + { + "epoch": 0.04, + "learning_rate": 4.929858983050848e-05, + "loss": 0.1466, + "step": 621000 + }, + { + "epoch": 0.04, + "learning_rate": 4.929802485875706e-05, + "loss": 0.1594, + "step": 621500 + }, + { + "epoch": 0.04, + "learning_rate": 4.929745988700565e-05, + "loss": 0.1458, + "step": 622000 + }, + { + "epoch": 0.04, + "learning_rate": 4.929689491525424e-05, + "loss": 0.153, + "step": 622500 + }, + { + "epoch": 0.04, + "learning_rate": 4.929633107344633e-05, + "loss": 0.1599, + "step": 623000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9295766101694915e-05, + "loss": 0.1555, + "step": 623500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9295201129943505e-05, + "loss": 0.155, + "step": 624000 + }, + { + "epoch": 0.04, + "learning_rate": 4.929463615819209e-05, + "loss": 0.1514, + "step": 624500 + }, + { + "epoch": 0.04, + "learning_rate": 4.929407231638418e-05, + "loss": 0.1496, + "step": 625000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9293507344632774e-05, + "loss": 0.15, + "step": 625500 + }, + { + "epoch": 0.04, + "learning_rate": 4.929294237288136e-05, + "loss": 0.1419, + "step": 626000 + }, + { + "epoch": 0.04, + "learning_rate": 4.929237740112995e-05, + "loss": 0.1461, + "step": 626500 + }, + { + "epoch": 0.04, + "learning_rate": 4.929181242937853e-05, + "loss": 0.154, + "step": 627000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9291248587570626e-05, + "loss": 0.1535, + "step": 627500 + }, + { + "epoch": 0.04, + "learning_rate": 4.929068361581921e-05, + "loss": 0.148, + "step": 628000 + }, + { + "epoch": 0.04, + "learning_rate": 4.92901186440678e-05, + "loss": 0.1543, + "step": 628500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9289553672316384e-05, + "loss": 0.1564, + "step": 629000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9288988700564975e-05, + "loss": 0.1548, + "step": 629500 + }, + { + "epoch": 0.04, + "learning_rate": 4.928842372881356e-05, + "loss": 0.1472, + "step": 630000 + }, + { + "epoch": 0.04, + "learning_rate": 4.928785988700565e-05, + "loss": 0.1559, + "step": 630500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9287294915254236e-05, + "loss": 0.1435, + "step": 631000 + }, + { + "epoch": 0.04, + "learning_rate": 4.928672994350283e-05, + "loss": 0.1475, + "step": 631500 + }, + { + "epoch": 0.04, + "learning_rate": 4.928616497175141e-05, + "loss": 0.1516, + "step": 632000 + }, + { + "epoch": 0.04, + "learning_rate": 4.92856e-05, + "loss": 0.155, + "step": 632500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9285036158192095e-05, + "loss": 0.1587, + "step": 633000 + }, + { + "epoch": 0.04, + "learning_rate": 4.928447118644068e-05, + "loss": 0.1472, + "step": 633500 + }, + { + "epoch": 0.04, + "learning_rate": 4.928390621468927e-05, + "loss": 0.1521, + "step": 634000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9283341242937853e-05, + "loss": 0.1465, + "step": 634500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9282776271186444e-05, + "loss": 0.1456, + "step": 635000 + }, + { + "epoch": 0.04, + "learning_rate": 4.928221242937853e-05, + "loss": 0.1538, + "step": 635500 + }, + { + "epoch": 0.04, + "learning_rate": 4.928164745762712e-05, + "loss": 0.1561, + "step": 636000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9281082485875706e-05, + "loss": 0.1534, + "step": 636500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9280517514124296e-05, + "loss": 0.1464, + "step": 637000 + }, + { + "epoch": 0.04, + "learning_rate": 4.927995367231639e-05, + "loss": 0.1528, + "step": 637500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9279388700564974e-05, + "loss": 0.1472, + "step": 638000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9278823728813565e-05, + "loss": 0.1521, + "step": 638500 + }, + { + "epoch": 0.04, + "learning_rate": 4.927825875706215e-05, + "loss": 0.1427, + "step": 639000 + }, + { + "epoch": 0.04, + "learning_rate": 4.927769378531074e-05, + "loss": 0.1438, + "step": 639500 + }, + { + "epoch": 0.04, + "learning_rate": 4.927712881355932e-05, + "loss": 0.1546, + "step": 640000 + }, + { + "epoch": 0.04, + "learning_rate": 4.927656384180791e-05, + "loss": 0.1465, + "step": 640500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9276e-05, + "loss": 0.1533, + "step": 641000 + }, + { + "epoch": 0.04, + "learning_rate": 4.927543502824859e-05, + "loss": 0.1511, + "step": 641500 + }, + { + "epoch": 0.04, + "learning_rate": 4.927487005649718e-05, + "loss": 0.1436, + "step": 642000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9274305084745765e-05, + "loss": 0.1456, + "step": 642500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9273740112994356e-05, + "loss": 0.1507, + "step": 643000 + }, + { + "epoch": 0.04, + "learning_rate": 4.927317627118644e-05, + "loss": 0.1461, + "step": 643500 + }, + { + "epoch": 0.04, + "learning_rate": 4.927261129943503e-05, + "loss": 0.1505, + "step": 644000 + }, + { + "epoch": 0.04, + "learning_rate": 4.927204632768362e-05, + "loss": 0.154, + "step": 644500 + }, + { + "epoch": 0.04, + "learning_rate": 4.92714813559322e-05, + "loss": 0.1505, + "step": 645000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9270917514124295e-05, + "loss": 0.1464, + "step": 645500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9270352542372886e-05, + "loss": 0.1538, + "step": 646000 + }, + { + "epoch": 0.04, + "learning_rate": 4.926978757062147e-05, + "loss": 0.1597, + "step": 646500 + }, + { + "epoch": 0.04, + "learning_rate": 4.926922259887006e-05, + "loss": 0.1489, + "step": 647000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9268657627118644e-05, + "loss": 0.1525, + "step": 647500 + }, + { + "epoch": 0.04, + "learning_rate": 4.926809378531074e-05, + "loss": 0.1475, + "step": 648000 + }, + { + "epoch": 0.04, + "learning_rate": 4.926752881355933e-05, + "loss": 0.1506, + "step": 648500 + }, + { + "epoch": 0.04, + "learning_rate": 4.926696384180791e-05, + "loss": 0.1518, + "step": 649000 + }, + { + "epoch": 0.04, + "learning_rate": 4.92663988700565e-05, + "loss": 0.1501, + "step": 649500 + }, + { + "epoch": 0.04, + "learning_rate": 4.926583389830509e-05, + "loss": 0.147, + "step": 650000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9265270056497174e-05, + "loss": 0.148, + "step": 650500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9264705084745765e-05, + "loss": 0.147, + "step": 651000 + }, + { + "epoch": 0.04, + "learning_rate": 4.926414011299435e-05, + "loss": 0.1428, + "step": 651500 + }, + { + "epoch": 0.04, + "learning_rate": 4.926357514124294e-05, + "loss": 0.1503, + "step": 652000 + }, + { + "epoch": 0.04, + "learning_rate": 4.926301016949153e-05, + "loss": 0.1565, + "step": 652500 + }, + { + "epoch": 0.04, + "learning_rate": 4.926244632768362e-05, + "loss": 0.154, + "step": 653000 + }, + { + "epoch": 0.04, + "learning_rate": 4.926188135593221e-05, + "loss": 0.1432, + "step": 653500 + }, + { + "epoch": 0.04, + "learning_rate": 4.926131638418079e-05, + "loss": 0.1519, + "step": 654000 + }, + { + "epoch": 0.04, + "learning_rate": 4.926075141242938e-05, + "loss": 0.1436, + "step": 654500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9260186440677966e-05, + "loss": 0.1498, + "step": 655000 + }, + { + "epoch": 0.04, + "learning_rate": 4.925962259887006e-05, + "loss": 0.1477, + "step": 655500 + }, + { + "epoch": 0.04, + "learning_rate": 4.925905762711865e-05, + "loss": 0.1483, + "step": 656000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9258492655367234e-05, + "loss": 0.1517, + "step": 656500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9257927683615825e-05, + "loss": 0.1495, + "step": 657000 + }, + { + "epoch": 0.04, + "learning_rate": 4.925736271186441e-05, + "loss": 0.1525, + "step": 657500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9256798870056496e-05, + "loss": 0.147, + "step": 658000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9256233898305086e-05, + "loss": 0.1559, + "step": 658500 + }, + { + "epoch": 0.04, + "learning_rate": 4.925566892655367e-05, + "loss": 0.1461, + "step": 659000 + }, + { + "epoch": 0.04, + "learning_rate": 4.925510395480226e-05, + "loss": 0.1534, + "step": 659500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9254540112994355e-05, + "loss": 0.1507, + "step": 660000 + }, + { + "epoch": 0.04, + "learning_rate": 4.925397514124294e-05, + "loss": 0.1498, + "step": 660500 + }, + { + "epoch": 0.04, + "learning_rate": 4.925341016949153e-05, + "loss": 0.1518, + "step": 661000 + }, + { + "epoch": 0.04, + "learning_rate": 4.925284519774011e-05, + "loss": 0.146, + "step": 661500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9252280225988703e-05, + "loss": 0.1516, + "step": 662000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9251715254237294e-05, + "loss": 0.1404, + "step": 662500 + }, + { + "epoch": 0.04, + "learning_rate": 4.925115141242938e-05, + "loss": 0.1567, + "step": 663000 + }, + { + "epoch": 0.04, + "learning_rate": 4.925058644067797e-05, + "loss": 0.1502, + "step": 663500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9250021468926556e-05, + "loss": 0.1557, + "step": 664000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9249456497175146e-05, + "loss": 0.1425, + "step": 664500 + }, + { + "epoch": 0.05, + "learning_rate": 4.924889152542373e-05, + "loss": 0.1472, + "step": 665000 + }, + { + "epoch": 0.05, + "learning_rate": 4.924832768361582e-05, + "loss": 0.1584, + "step": 665500 + }, + { + "epoch": 0.05, + "learning_rate": 4.924776271186441e-05, + "loss": 0.1456, + "step": 666000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9247197740113e-05, + "loss": 0.155, + "step": 666500 + }, + { + "epoch": 0.05, + "learning_rate": 4.924663276836158e-05, + "loss": 0.15, + "step": 667000 + }, + { + "epoch": 0.05, + "learning_rate": 4.924606779661017e-05, + "loss": 0.1406, + "step": 667500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9245502824858757e-05, + "loss": 0.1539, + "step": 668000 + }, + { + "epoch": 0.05, + "learning_rate": 4.924493898305085e-05, + "loss": 0.1458, + "step": 668500 + }, + { + "epoch": 0.05, + "learning_rate": 4.924437401129944e-05, + "loss": 0.1418, + "step": 669000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9243809039548025e-05, + "loss": 0.1525, + "step": 669500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9243244067796615e-05, + "loss": 0.1472, + "step": 670000 + }, + { + "epoch": 0.05, + "learning_rate": 4.92426790960452e-05, + "loss": 0.1474, + "step": 670500 + }, + { + "epoch": 0.05, + "learning_rate": 4.924211525423729e-05, + "loss": 0.1391, + "step": 671000 + }, + { + "epoch": 0.05, + "learning_rate": 4.924155028248588e-05, + "loss": 0.1521, + "step": 671500 + }, + { + "epoch": 0.05, + "learning_rate": 4.924098531073447e-05, + "loss": 0.146, + "step": 672000 + }, + { + "epoch": 0.05, + "learning_rate": 4.924042033898305e-05, + "loss": 0.1516, + "step": 672500 + }, + { + "epoch": 0.05, + "learning_rate": 4.923985536723164e-05, + "loss": 0.147, + "step": 673000 + }, + { + "epoch": 0.05, + "learning_rate": 4.923929039548023e-05, + "loss": 0.1467, + "step": 673500 + }, + { + "epoch": 0.05, + "learning_rate": 4.923872655367232e-05, + "loss": 0.1493, + "step": 674000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9238161581920904e-05, + "loss": 0.1516, + "step": 674500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9237596610169494e-05, + "loss": 0.143, + "step": 675000 + }, + { + "epoch": 0.05, + "learning_rate": 4.923703163841808e-05, + "loss": 0.1418, + "step": 675500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9236468926553675e-05, + "loss": 0.1481, + "step": 676000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9235903954802266e-05, + "loss": 0.1477, + "step": 676500 + }, + { + "epoch": 0.05, + "learning_rate": 4.923533898305085e-05, + "loss": 0.1478, + "step": 677000 + }, + { + "epoch": 0.05, + "learning_rate": 4.923477401129944e-05, + "loss": 0.1477, + "step": 677500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9234209039548024e-05, + "loss": 0.1566, + "step": 678000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9233644067796615e-05, + "loss": 0.1478, + "step": 678500 + }, + { + "epoch": 0.05, + "learning_rate": 4.92330790960452e-05, + "loss": 0.159, + "step": 679000 + }, + { + "epoch": 0.05, + "learning_rate": 4.923251412429379e-05, + "loss": 0.1468, + "step": 679500 + }, + { + "epoch": 0.05, + "learning_rate": 4.923194915254237e-05, + "loss": 0.1526, + "step": 680000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9231384180790964e-05, + "loss": 0.1459, + "step": 680500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9230819209039554e-05, + "loss": 0.1389, + "step": 681000 + }, + { + "epoch": 0.05, + "learning_rate": 4.923025536723164e-05, + "loss": 0.15, + "step": 681500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9229690395480225e-05, + "loss": 0.155, + "step": 682000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9229125423728816e-05, + "loss": 0.1493, + "step": 682500 + }, + { + "epoch": 0.05, + "learning_rate": 4.92285604519774e-05, + "loss": 0.1431, + "step": 683000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9227996610169493e-05, + "loss": 0.1441, + "step": 683500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9227431638418084e-05, + "loss": 0.1496, + "step": 684000 + }, + { + "epoch": 0.05, + "learning_rate": 4.922686666666667e-05, + "loss": 0.1509, + "step": 684500 + }, + { + "epoch": 0.05, + "learning_rate": 4.922630169491526e-05, + "loss": 0.1484, + "step": 685000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9225737853107346e-05, + "loss": 0.1609, + "step": 685500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9225172881355936e-05, + "loss": 0.1489, + "step": 686000 + }, + { + "epoch": 0.05, + "learning_rate": 4.922460790960452e-05, + "loss": 0.1491, + "step": 686500 + }, + { + "epoch": 0.05, + "learning_rate": 4.922404293785311e-05, + "loss": 0.1511, + "step": 687000 + }, + { + "epoch": 0.05, + "learning_rate": 4.92234779661017e-05, + "loss": 0.1501, + "step": 687500 + }, + { + "epoch": 0.05, + "learning_rate": 4.922291412429379e-05, + "loss": 0.1449, + "step": 688000 + }, + { + "epoch": 0.05, + "learning_rate": 4.922234915254237e-05, + "loss": 0.1494, + "step": 688500 + }, + { + "epoch": 0.05, + "learning_rate": 4.922178418079096e-05, + "loss": 0.1471, + "step": 689000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9221219209039547e-05, + "loss": 0.1558, + "step": 689500 + }, + { + "epoch": 0.05, + "learning_rate": 4.922065536723164e-05, + "loss": 0.1499, + "step": 690000 + }, + { + "epoch": 0.05, + "learning_rate": 4.922009039548023e-05, + "loss": 0.1492, + "step": 690500 + }, + { + "epoch": 0.05, + "learning_rate": 4.921952542372882e-05, + "loss": 0.1531, + "step": 691000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9218960451977406e-05, + "loss": 0.1528, + "step": 691500 + }, + { + "epoch": 0.05, + "learning_rate": 4.921839661016949e-05, + "loss": 0.1435, + "step": 692000 + }, + { + "epoch": 0.05, + "learning_rate": 4.921783163841808e-05, + "loss": 0.1509, + "step": 692500 + }, + { + "epoch": 0.05, + "learning_rate": 4.921726666666667e-05, + "loss": 0.1451, + "step": 693000 + }, + { + "epoch": 0.05, + "learning_rate": 4.921670169491526e-05, + "loss": 0.1421, + "step": 693500 + }, + { + "epoch": 0.05, + "learning_rate": 4.921613672316384e-05, + "loss": 0.147, + "step": 694000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9215572881355936e-05, + "loss": 0.1451, + "step": 694500 + }, + { + "epoch": 0.05, + "learning_rate": 4.921500790960452e-05, + "loss": 0.1412, + "step": 695000 + }, + { + "epoch": 0.05, + "learning_rate": 4.921444293785311e-05, + "loss": 0.1416, + "step": 695500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9213879096045204e-05, + "loss": 0.1409, + "step": 696000 + }, + { + "epoch": 0.05, + "learning_rate": 4.921331412429379e-05, + "loss": 0.147, + "step": 696500 + }, + { + "epoch": 0.05, + "learning_rate": 4.921274915254238e-05, + "loss": 0.1478, + "step": 697000 + }, + { + "epoch": 0.05, + "learning_rate": 4.921218418079096e-05, + "loss": 0.1435, + "step": 697500 + }, + { + "epoch": 0.05, + "learning_rate": 4.921161920903955e-05, + "loss": 0.1401, + "step": 698000 + }, + { + "epoch": 0.05, + "learning_rate": 4.921105423728814e-05, + "loss": 0.1562, + "step": 698500 + }, + { + "epoch": 0.05, + "learning_rate": 4.921048926553673e-05, + "loss": 0.1517, + "step": 699000 + }, + { + "epoch": 0.05, + "learning_rate": 4.920992429378532e-05, + "loss": 0.1445, + "step": 699500 + }, + { + "epoch": 0.05, + "learning_rate": 4.92093593220339e-05, + "loss": 0.1498, + "step": 700000 + }, + { + "epoch": 0.05, + "learning_rate": 4.920879548022599e-05, + "loss": 0.1529, + "step": 700500 + }, + { + "epoch": 0.05, + "learning_rate": 4.920823050847458e-05, + "loss": 0.1515, + "step": 701000 + }, + { + "epoch": 0.05, + "learning_rate": 4.920766553672317e-05, + "loss": 0.1465, + "step": 701500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9207100564971754e-05, + "loss": 0.1482, + "step": 702000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9206535593220344e-05, + "loss": 0.1481, + "step": 702500 + }, + { + "epoch": 0.05, + "learning_rate": 4.920597175141243e-05, + "loss": 0.1514, + "step": 703000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9205406779661015e-05, + "loss": 0.1562, + "step": 703500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9204841807909606e-05, + "loss": 0.1556, + "step": 704000 + }, + { + "epoch": 0.05, + "learning_rate": 4.920427683615819e-05, + "loss": 0.1459, + "step": 704500 + }, + { + "epoch": 0.05, + "learning_rate": 4.920371186440678e-05, + "loss": 0.1548, + "step": 705000 + }, + { + "epoch": 0.05, + "learning_rate": 4.920314689265537e-05, + "loss": 0.1464, + "step": 705500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9202581920903955e-05, + "loss": 0.1483, + "step": 706000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9202016949152545e-05, + "loss": 0.1408, + "step": 706500 + }, + { + "epoch": 0.05, + "learning_rate": 4.920145310734464e-05, + "loss": 0.1431, + "step": 707000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9200889265536726e-05, + "loss": 0.1569, + "step": 707500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9200325423728814e-05, + "loss": 0.1403, + "step": 708000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9199760451977404e-05, + "loss": 0.1479, + "step": 708500 + }, + { + "epoch": 0.05, + "learning_rate": 4.919919548022599e-05, + "loss": 0.1452, + "step": 709000 + }, + { + "epoch": 0.05, + "learning_rate": 4.919863050847458e-05, + "loss": 0.1433, + "step": 709500 + }, + { + "epoch": 0.05, + "learning_rate": 4.919806553672316e-05, + "loss": 0.1418, + "step": 710000 + }, + { + "epoch": 0.05, + "learning_rate": 4.919750056497175e-05, + "loss": 0.1433, + "step": 710500 + }, + { + "epoch": 0.05, + "learning_rate": 4.919693559322034e-05, + "loss": 0.1421, + "step": 711000 + }, + { + "epoch": 0.05, + "learning_rate": 4.919637062146893e-05, + "loss": 0.149, + "step": 711500 + }, + { + "epoch": 0.05, + "learning_rate": 4.919580564971752e-05, + "loss": 0.1414, + "step": 712000 + }, + { + "epoch": 0.05, + "learning_rate": 4.91952406779661e-05, + "loss": 0.1469, + "step": 712500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9194676836158196e-05, + "loss": 0.1459, + "step": 713000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9194111864406786e-05, + "loss": 0.1505, + "step": 713500 + }, + { + "epoch": 0.05, + "learning_rate": 4.919354689265537e-05, + "loss": 0.149, + "step": 714000 + }, + { + "epoch": 0.05, + "learning_rate": 4.919298192090396e-05, + "loss": 0.1494, + "step": 714500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9192416949152544e-05, + "loss": 0.1428, + "step": 715000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9191851977401135e-05, + "loss": 0.1529, + "step": 715500 + }, + { + "epoch": 0.05, + "learning_rate": 4.919128813559322e-05, + "loss": 0.1469, + "step": 716000 + }, + { + "epoch": 0.05, + "learning_rate": 4.919072316384181e-05, + "loss": 0.1492, + "step": 716500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9190158192090397e-05, + "loss": 0.1419, + "step": 717000 + }, + { + "epoch": 0.05, + "learning_rate": 4.918959322033899e-05, + "loss": 0.1431, + "step": 717500 + }, + { + "epoch": 0.05, + "learning_rate": 4.918902824858757e-05, + "loss": 0.1471, + "step": 718000 + }, + { + "epoch": 0.05, + "learning_rate": 4.918846327683616e-05, + "loss": 0.1539, + "step": 718500 + }, + { + "epoch": 0.05, + "learning_rate": 4.918789943502825e-05, + "loss": 0.1392, + "step": 719000 + }, + { + "epoch": 0.05, + "learning_rate": 4.918733446327684e-05, + "loss": 0.1431, + "step": 719500 + }, + { + "epoch": 0.05, + "learning_rate": 4.918676949152542e-05, + "loss": 0.1399, + "step": 720000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9186204519774014e-05, + "loss": 0.1367, + "step": 720500 + }, + { + "epoch": 0.05, + "learning_rate": 4.918564067796611e-05, + "loss": 0.1457, + "step": 721000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9185076836158195e-05, + "loss": 0.1482, + "step": 721500 + }, + { + "epoch": 0.05, + "learning_rate": 4.918451186440678e-05, + "loss": 0.1438, + "step": 722000 + }, + { + "epoch": 0.05, + "learning_rate": 4.918394689265537e-05, + "loss": 0.1522, + "step": 722500 + }, + { + "epoch": 0.05, + "learning_rate": 4.918338192090396e-05, + "loss": 0.1434, + "step": 723000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9182816949152544e-05, + "loss": 0.1497, + "step": 723500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9182251977401134e-05, + "loss": 0.1424, + "step": 724000 + }, + { + "epoch": 0.05, + "learning_rate": 4.918168700564972e-05, + "loss": 0.1494, + "step": 724500 + }, + { + "epoch": 0.05, + "learning_rate": 4.918112203389831e-05, + "loss": 0.1474, + "step": 725000 + }, + { + "epoch": 0.05, + "learning_rate": 4.918055706214689e-05, + "loss": 0.1517, + "step": 725500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9179993220338986e-05, + "loss": 0.1396, + "step": 726000 + }, + { + "epoch": 0.05, + "learning_rate": 4.917942824858757e-05, + "loss": 0.1388, + "step": 726500 + }, + { + "epoch": 0.05, + "learning_rate": 4.917886327683616e-05, + "loss": 0.1435, + "step": 727000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9178298305084745e-05, + "loss": 0.1486, + "step": 727500 + }, + { + "epoch": 0.05, + "learning_rate": 4.917773446327684e-05, + "loss": 0.1382, + "step": 728000 + }, + { + "epoch": 0.05, + "learning_rate": 4.917716949152543e-05, + "loss": 0.1455, + "step": 728500 + }, + { + "epoch": 0.05, + "learning_rate": 4.917660451977401e-05, + "loss": 0.1525, + "step": 729000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9176039548022604e-05, + "loss": 0.1478, + "step": 729500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9175474576271194e-05, + "loss": 0.1463, + "step": 730000 + }, + { + "epoch": 0.05, + "learning_rate": 4.917491073446328e-05, + "loss": 0.1441, + "step": 730500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9174345762711865e-05, + "loss": 0.1426, + "step": 731000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9173780790960456e-05, + "loss": 0.1503, + "step": 731500 + }, + { + "epoch": 0.05, + "learning_rate": 4.917321581920904e-05, + "loss": 0.1498, + "step": 732000 + }, + { + "epoch": 0.05, + "learning_rate": 4.917265084745763e-05, + "loss": 0.1431, + "step": 732500 + }, + { + "epoch": 0.05, + "learning_rate": 4.917208587570622e-05, + "loss": 0.1484, + "step": 733000 + }, + { + "epoch": 0.05, + "learning_rate": 4.917152203389831e-05, + "loss": 0.1428, + "step": 733500 + }, + { + "epoch": 0.05, + "learning_rate": 4.917095706214689e-05, + "loss": 0.1377, + "step": 734000 + }, + { + "epoch": 0.05, + "learning_rate": 4.917039209039548e-05, + "loss": 0.1551, + "step": 734500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9169827118644066e-05, + "loss": 0.1527, + "step": 735000 + }, + { + "epoch": 0.05, + "learning_rate": 4.916926327683616e-05, + "loss": 0.1387, + "step": 735500 + }, + { + "epoch": 0.05, + "learning_rate": 4.916869830508475e-05, + "loss": 0.1453, + "step": 736000 + }, + { + "epoch": 0.05, + "learning_rate": 4.916813333333334e-05, + "loss": 0.1531, + "step": 736500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9167568361581925e-05, + "loss": 0.1513, + "step": 737000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9167003389830516e-05, + "loss": 0.1402, + "step": 737500 + }, + { + "epoch": 0.05, + "learning_rate": 4.91664384180791e-05, + "loss": 0.1429, + "step": 738000 + }, + { + "epoch": 0.05, + "learning_rate": 4.916587457627119e-05, + "loss": 0.147, + "step": 738500 + }, + { + "epoch": 0.05, + "learning_rate": 4.916530960451978e-05, + "loss": 0.1582, + "step": 739000 + }, + { + "epoch": 0.05, + "learning_rate": 4.916474463276836e-05, + "loss": 0.1506, + "step": 739500 + }, + { + "epoch": 0.05, + "learning_rate": 4.916417966101695e-05, + "loss": 0.1527, + "step": 740000 + }, + { + "epoch": 0.05, + "learning_rate": 4.916361468926554e-05, + "loss": 0.1427, + "step": 740500 + }, + { + "epoch": 0.05, + "learning_rate": 4.916305084745763e-05, + "loss": 0.1395, + "step": 741000 + }, + { + "epoch": 0.05, + "learning_rate": 4.916248587570621e-05, + "loss": 0.1388, + "step": 741500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9161920903954804e-05, + "loss": 0.1384, + "step": 742000 + }, + { + "epoch": 0.05, + "learning_rate": 4.916135593220339e-05, + "loss": 0.1437, + "step": 742500 + }, + { + "epoch": 0.05, + "learning_rate": 4.916079209039548e-05, + "loss": 0.148, + "step": 743000 + }, + { + "epoch": 0.05, + "learning_rate": 4.916022711864407e-05, + "loss": 0.1512, + "step": 743500 + }, + { + "epoch": 0.05, + "learning_rate": 4.915966214689266e-05, + "loss": 0.1387, + "step": 744000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9159097175141247e-05, + "loss": 0.1457, + "step": 744500 + }, + { + "epoch": 0.05, + "learning_rate": 4.915853220338984e-05, + "loss": 0.1455, + "step": 745000 + }, + { + "epoch": 0.05, + "learning_rate": 4.915796723163842e-05, + "loss": 0.1475, + "step": 745500 + }, + { + "epoch": 0.05, + "learning_rate": 4.915740225988701e-05, + "loss": 0.1462, + "step": 746000 + }, + { + "epoch": 0.05, + "learning_rate": 4.91568384180791e-05, + "loss": 0.146, + "step": 746500 + }, + { + "epoch": 0.05, + "learning_rate": 4.915627344632769e-05, + "loss": 0.1395, + "step": 747000 + }, + { + "epoch": 0.05, + "learning_rate": 4.915570847457627e-05, + "loss": 0.1465, + "step": 747500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9155143502824864e-05, + "loss": 0.1504, + "step": 748000 + }, + { + "epoch": 0.05, + "learning_rate": 4.915457853107345e-05, + "loss": 0.14, + "step": 748500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9154014689265535e-05, + "loss": 0.1459, + "step": 749000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9153449717514125e-05, + "loss": 0.1395, + "step": 749500 + }, + { + "epoch": 0.05, + "learning_rate": 4.915288474576271e-05, + "loss": 0.1454, + "step": 750000 + }, + { + "epoch": 0.05, + "learning_rate": 4.91523197740113e-05, + "loss": 0.1431, + "step": 750500 + }, + { + "epoch": 0.05, + "learning_rate": 4.915175480225989e-05, + "loss": 0.1363, + "step": 751000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9151189830508474e-05, + "loss": 0.1404, + "step": 751500 + }, + { + "epoch": 0.05, + "learning_rate": 4.915062598870057e-05, + "loss": 0.1454, + "step": 752000 + }, + { + "epoch": 0.05, + "learning_rate": 4.915006101694916e-05, + "loss": 0.1373, + "step": 752500 + }, + { + "epoch": 0.05, + "learning_rate": 4.914949604519774e-05, + "loss": 0.1403, + "step": 753000 + }, + { + "epoch": 0.05, + "learning_rate": 4.914893107344633e-05, + "loss": 0.1456, + "step": 753500 + }, + { + "epoch": 0.05, + "learning_rate": 4.914836723163842e-05, + "loss": 0.1432, + "step": 754000 + }, + { + "epoch": 0.05, + "learning_rate": 4.914780225988701e-05, + "loss": 0.1436, + "step": 754500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9147237288135595e-05, + "loss": 0.1473, + "step": 755000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9146672316384185e-05, + "loss": 0.1392, + "step": 755500 + }, + { + "epoch": 0.05, + "learning_rate": 4.914610847457627e-05, + "loss": 0.1446, + "step": 756000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9145543502824856e-05, + "loss": 0.1386, + "step": 756500 + }, + { + "epoch": 0.05, + "learning_rate": 4.914497853107345e-05, + "loss": 0.1407, + "step": 757000 + }, + { + "epoch": 0.05, + "learning_rate": 4.914441355932204e-05, + "loss": 0.1439, + "step": 757500 + }, + { + "epoch": 0.05, + "learning_rate": 4.914384858757062e-05, + "loss": 0.1422, + "step": 758000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9143284745762715e-05, + "loss": 0.1387, + "step": 758500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9142719774011306e-05, + "loss": 0.1392, + "step": 759000 + }, + { + "epoch": 0.05, + "learning_rate": 4.914215480225989e-05, + "loss": 0.1527, + "step": 759500 + }, + { + "epoch": 0.05, + "learning_rate": 4.914158983050848e-05, + "loss": 0.1469, + "step": 760000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9141024858757064e-05, + "loss": 0.1434, + "step": 760500 + }, + { + "epoch": 0.05, + "learning_rate": 4.914046101694916e-05, + "loss": 0.1431, + "step": 761000 + }, + { + "epoch": 0.05, + "learning_rate": 4.913989604519774e-05, + "loss": 0.1474, + "step": 761500 + }, + { + "epoch": 0.05, + "learning_rate": 4.913933107344633e-05, + "loss": 0.1473, + "step": 762000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9138766101694916e-05, + "loss": 0.1443, + "step": 762500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9138202259887e-05, + "loss": 0.1436, + "step": 763000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9137637288135594e-05, + "loss": 0.1459, + "step": 763500 + }, + { + "epoch": 0.05, + "learning_rate": 4.913707231638418e-05, + "loss": 0.1369, + "step": 764000 + }, + { + "epoch": 0.05, + "learning_rate": 4.913650734463277e-05, + "loss": 0.1432, + "step": 764500 + }, + { + "epoch": 0.05, + "learning_rate": 4.913594237288136e-05, + "loss": 0.1436, + "step": 765000 + }, + { + "epoch": 0.05, + "learning_rate": 4.913537853107345e-05, + "loss": 0.1412, + "step": 765500 + }, + { + "epoch": 0.05, + "learning_rate": 4.913481355932204e-05, + "loss": 0.1474, + "step": 766000 + }, + { + "epoch": 0.05, + "learning_rate": 4.913424858757063e-05, + "loss": 0.1421, + "step": 766500 + }, + { + "epoch": 0.05, + "learning_rate": 4.913368361581921e-05, + "loss": 0.1425, + "step": 767000 + }, + { + "epoch": 0.05, + "learning_rate": 4.91331197740113e-05, + "loss": 0.1455, + "step": 767500 + }, + { + "epoch": 0.05, + "learning_rate": 4.913255480225989e-05, + "loss": 0.1468, + "step": 768000 + }, + { + "epoch": 0.05, + "learning_rate": 4.913198983050848e-05, + "loss": 0.1438, + "step": 768500 + }, + { + "epoch": 0.05, + "learning_rate": 4.913142485875706e-05, + "loss": 0.1499, + "step": 769000 + }, + { + "epoch": 0.05, + "learning_rate": 4.913086101694915e-05, + "loss": 0.14, + "step": 769500 + }, + { + "epoch": 0.05, + "learning_rate": 4.913029604519774e-05, + "loss": 0.1436, + "step": 770000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9129731073446325e-05, + "loss": 0.1445, + "step": 770500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9129166101694915e-05, + "loss": 0.1482, + "step": 771000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9128601129943506e-05, + "loss": 0.147, + "step": 771500 + }, + { + "epoch": 0.05, + "learning_rate": 4.91280372881356e-05, + "loss": 0.1396, + "step": 772000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9127472316384184e-05, + "loss": 0.1427, + "step": 772500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9126907344632774e-05, + "loss": 0.146, + "step": 773000 + }, + { + "epoch": 0.05, + "learning_rate": 4.912634237288136e-05, + "loss": 0.145, + "step": 773500 + }, + { + "epoch": 0.05, + "learning_rate": 4.912577740112995e-05, + "loss": 0.1436, + "step": 774000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9125213559322036e-05, + "loss": 0.139, + "step": 774500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9124648587570627e-05, + "loss": 0.1505, + "step": 775000 + }, + { + "epoch": 0.05, + "learning_rate": 4.912408361581921e-05, + "loss": 0.1339, + "step": 775500 + }, + { + "epoch": 0.05, + "learning_rate": 4.91235186440678e-05, + "loss": 0.1407, + "step": 776000 + }, + { + "epoch": 0.05, + "learning_rate": 4.912295480225989e-05, + "loss": 0.1416, + "step": 776500 + }, + { + "epoch": 0.05, + "learning_rate": 4.912238983050847e-05, + "loss": 0.1419, + "step": 777000 + }, + { + "epoch": 0.05, + "learning_rate": 4.912182485875706e-05, + "loss": 0.1428, + "step": 777500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9121259887005646e-05, + "loss": 0.139, + "step": 778000 + }, + { + "epoch": 0.05, + "learning_rate": 4.912069491525424e-05, + "loss": 0.145, + "step": 778500 + }, + { + "epoch": 0.05, + "learning_rate": 4.912013107344633e-05, + "loss": 0.1445, + "step": 779000 + }, + { + "epoch": 0.05, + "learning_rate": 4.911956610169492e-05, + "loss": 0.1364, + "step": 779500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9119001129943505e-05, + "loss": 0.132, + "step": 780000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9118436158192096e-05, + "loss": 0.1489, + "step": 780500 + }, + { + "epoch": 0.05, + "learning_rate": 4.911787231638418e-05, + "loss": 0.141, + "step": 781000 + }, + { + "epoch": 0.05, + "learning_rate": 4.911730734463277e-05, + "loss": 0.1504, + "step": 781500 + }, + { + "epoch": 0.05, + "learning_rate": 4.911674237288136e-05, + "loss": 0.1356, + "step": 782000 + }, + { + "epoch": 0.05, + "learning_rate": 4.911617740112995e-05, + "loss": 0.1504, + "step": 782500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9115613559322035e-05, + "loss": 0.1386, + "step": 783000 + }, + { + "epoch": 0.05, + "learning_rate": 4.911504858757062e-05, + "loss": 0.1549, + "step": 783500 + }, + { + "epoch": 0.05, + "learning_rate": 4.911448361581921e-05, + "loss": 0.1382, + "step": 784000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9113918644067793e-05, + "loss": 0.138, + "step": 784500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9113353672316384e-05, + "loss": 0.1345, + "step": 785000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9112788700564975e-05, + "loss": 0.1407, + "step": 785500 + }, + { + "epoch": 0.05, + "learning_rate": 4.911222485875707e-05, + "loss": 0.1426, + "step": 786000 + }, + { + "epoch": 0.05, + "learning_rate": 4.911165988700565e-05, + "loss": 0.1386, + "step": 786500 + }, + { + "epoch": 0.05, + "learning_rate": 4.911109491525424e-05, + "loss": 0.1397, + "step": 787000 + }, + { + "epoch": 0.05, + "learning_rate": 4.911052994350283e-05, + "loss": 0.1361, + "step": 787500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9109966101694914e-05, + "loss": 0.1429, + "step": 788000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9109401129943505e-05, + "loss": 0.1404, + "step": 788500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9108836158192095e-05, + "loss": 0.1393, + "step": 789000 + }, + { + "epoch": 0.05, + "learning_rate": 4.910827118644068e-05, + "loss": 0.1437, + "step": 789500 + }, + { + "epoch": 0.05, + "learning_rate": 4.910770621468927e-05, + "loss": 0.1336, + "step": 790000 + }, + { + "epoch": 0.05, + "learning_rate": 4.910714237288136e-05, + "loss": 0.1371, + "step": 790500 + }, + { + "epoch": 0.05, + "learning_rate": 4.910657740112995e-05, + "loss": 0.1441, + "step": 791000 + }, + { + "epoch": 0.05, + "learning_rate": 4.910601242937853e-05, + "loss": 0.1432, + "step": 791500 + }, + { + "epoch": 0.05, + "learning_rate": 4.910544745762712e-05, + "loss": 0.139, + "step": 792000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9104883615819216e-05, + "loss": 0.1469, + "step": 792500 + }, + { + "epoch": 0.05, + "learning_rate": 4.91043186440678e-05, + "loss": 0.1414, + "step": 793000 + }, + { + "epoch": 0.05, + "learning_rate": 4.910375367231639e-05, + "loss": 0.1479, + "step": 793500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9103188700564974e-05, + "loss": 0.1462, + "step": 794000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9102623728813564e-05, + "loss": 0.1486, + "step": 794500 + }, + { + "epoch": 0.05, + "learning_rate": 4.910205988700565e-05, + "loss": 0.1412, + "step": 795000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9101494915254235e-05, + "loss": 0.1504, + "step": 795500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9100929943502826e-05, + "loss": 0.138, + "step": 796000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9100364971751417e-05, + "loss": 0.1359, + "step": 796500 + }, + { + "epoch": 0.05, + "learning_rate": 4.90998e-05, + "loss": 0.1532, + "step": 797000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9099236158192094e-05, + "loss": 0.1388, + "step": 797500 + }, + { + "epoch": 0.05, + "learning_rate": 4.909867118644068e-05, + "loss": 0.1422, + "step": 798000 + }, + { + "epoch": 0.05, + "learning_rate": 4.909810621468927e-05, + "loss": 0.1439, + "step": 798500 + }, + { + "epoch": 0.05, + "learning_rate": 4.909754124293785e-05, + "loss": 0.1464, + "step": 799000 + }, + { + "epoch": 0.05, + "learning_rate": 4.909697627118644e-05, + "loss": 0.1468, + "step": 799500 + }, + { + "epoch": 0.05, + "learning_rate": 4.909641242937854e-05, + "loss": 0.1419, + "step": 800000 + }, + { + "epoch": 0.05, + "learning_rate": 4.909584745762712e-05, + "loss": 0.1366, + "step": 800500 + }, + { + "epoch": 0.05, + "learning_rate": 4.909528248587571e-05, + "loss": 0.1289, + "step": 801000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9094717514124295e-05, + "loss": 0.1397, + "step": 801500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9094152542372886e-05, + "loss": 0.1358, + "step": 802000 + }, + { + "epoch": 0.05, + "learning_rate": 4.909358757062147e-05, + "loss": 0.1423, + "step": 802500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9093023728813564e-05, + "loss": 0.1319, + "step": 803000 + }, + { + "epoch": 0.05, + "learning_rate": 4.909245875706215e-05, + "loss": 0.1432, + "step": 803500 + }, + { + "epoch": 0.05, + "learning_rate": 4.909189378531074e-05, + "loss": 0.1382, + "step": 804000 + }, + { + "epoch": 0.05, + "learning_rate": 4.909132881355932e-05, + "loss": 0.1386, + "step": 804500 + }, + { + "epoch": 0.05, + "learning_rate": 4.909076384180791e-05, + "loss": 0.1444, + "step": 805000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9090198870056496e-05, + "loss": 0.1408, + "step": 805500 + }, + { + "epoch": 0.05, + "learning_rate": 4.908963502824859e-05, + "loss": 0.1415, + "step": 806000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9089070056497174e-05, + "loss": 0.1383, + "step": 806500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9088505084745765e-05, + "loss": 0.1477, + "step": 807000 + }, + { + "epoch": 0.05, + "learning_rate": 4.908794011299435e-05, + "loss": 0.1397, + "step": 807500 + }, + { + "epoch": 0.05, + "learning_rate": 4.908737627118644e-05, + "loss": 0.1488, + "step": 808000 + }, + { + "epoch": 0.05, + "learning_rate": 4.908681129943503e-05, + "loss": 0.1354, + "step": 808500 + }, + { + "epoch": 0.05, + "learning_rate": 4.908624632768362e-05, + "loss": 0.1426, + "step": 809000 + }, + { + "epoch": 0.05, + "learning_rate": 4.908568135593221e-05, + "loss": 0.1387, + "step": 809500 + }, + { + "epoch": 0.05, + "learning_rate": 4.90851163841808e-05, + "loss": 0.1446, + "step": 810000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9084552542372885e-05, + "loss": 0.1483, + "step": 810500 + }, + { + "epoch": 0.05, + "learning_rate": 4.908398757062147e-05, + "loss": 0.1386, + "step": 811000 + }, + { + "epoch": 0.06, + "learning_rate": 4.908342259887006e-05, + "loss": 0.1396, + "step": 811500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9082857627118643e-05, + "loss": 0.1432, + "step": 812000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9082292655367234e-05, + "loss": 0.1377, + "step": 812500 + }, + { + "epoch": 0.06, + "learning_rate": 4.908172881355932e-05, + "loss": 0.1397, + "step": 813000 + }, + { + "epoch": 0.06, + "learning_rate": 4.908116384180791e-05, + "loss": 0.1377, + "step": 813500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9080598870056496e-05, + "loss": 0.1439, + "step": 814000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9080033898305086e-05, + "loss": 0.1399, + "step": 814500 + }, + { + "epoch": 0.06, + "learning_rate": 4.907946892655368e-05, + "loss": 0.139, + "step": 815000 + }, + { + "epoch": 0.06, + "learning_rate": 4.907890395480226e-05, + "loss": 0.1415, + "step": 815500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9078340112994355e-05, + "loss": 0.1418, + "step": 816000 + }, + { + "epoch": 0.06, + "learning_rate": 4.907777514124294e-05, + "loss": 0.1441, + "step": 816500 + }, + { + "epoch": 0.06, + "learning_rate": 4.907721016949153e-05, + "loss": 0.1475, + "step": 817000 + }, + { + "epoch": 0.06, + "learning_rate": 4.907664519774012e-05, + "loss": 0.139, + "step": 817500 + }, + { + "epoch": 0.06, + "learning_rate": 4.907608135593221e-05, + "loss": 0.1456, + "step": 818000 + }, + { + "epoch": 0.06, + "learning_rate": 4.907551638418079e-05, + "loss": 0.1398, + "step": 818500 + }, + { + "epoch": 0.06, + "learning_rate": 4.907495141242938e-05, + "loss": 0.1372, + "step": 819000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9074386440677965e-05, + "loss": 0.1484, + "step": 819500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9073821468926555e-05, + "loss": 0.145, + "step": 820000 + }, + { + "epoch": 0.06, + "learning_rate": 4.907325762711865e-05, + "loss": 0.1453, + "step": 820500 + }, + { + "epoch": 0.06, + "learning_rate": 4.907269265536723e-05, + "loss": 0.1416, + "step": 821000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9072127683615824e-05, + "loss": 0.1437, + "step": 821500 + }, + { + "epoch": 0.06, + "learning_rate": 4.907156271186441e-05, + "loss": 0.1418, + "step": 822000 + }, + { + "epoch": 0.06, + "learning_rate": 4.90709988700565e-05, + "loss": 0.141, + "step": 822500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9070433898305085e-05, + "loss": 0.143, + "step": 823000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9069868926553676e-05, + "loss": 0.1391, + "step": 823500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9069303954802267e-05, + "loss": 0.1366, + "step": 824000 + }, + { + "epoch": 0.06, + "learning_rate": 4.906873898305085e-05, + "loss": 0.1362, + "step": 824500 + }, + { + "epoch": 0.06, + "learning_rate": 4.906817514124294e-05, + "loss": 0.1403, + "step": 825000 + }, + { + "epoch": 0.06, + "learning_rate": 4.906761016949153e-05, + "loss": 0.1423, + "step": 825500 + }, + { + "epoch": 0.06, + "learning_rate": 4.906704519774011e-05, + "loss": 0.145, + "step": 826000 + }, + { + "epoch": 0.06, + "learning_rate": 4.90664802259887e-05, + "loss": 0.1436, + "step": 826500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9065916384180797e-05, + "loss": 0.1402, + "step": 827000 + }, + { + "epoch": 0.06, + "learning_rate": 4.906535141242938e-05, + "loss": 0.1424, + "step": 827500 + }, + { + "epoch": 0.06, + "learning_rate": 4.906478644067797e-05, + "loss": 0.1419, + "step": 828000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9064221468926555e-05, + "loss": 0.1368, + "step": 828500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9063656497175145e-05, + "loss": 0.1414, + "step": 829000 + }, + { + "epoch": 0.06, + "learning_rate": 4.906309265536723e-05, + "loss": 0.1412, + "step": 829500 + }, + { + "epoch": 0.06, + "learning_rate": 4.906252768361582e-05, + "loss": 0.133, + "step": 830000 + }, + { + "epoch": 0.06, + "learning_rate": 4.906196271186441e-05, + "loss": 0.138, + "step": 830500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9061397740113e-05, + "loss": 0.1467, + "step": 831000 + }, + { + "epoch": 0.06, + "learning_rate": 4.906083276836159e-05, + "loss": 0.1407, + "step": 831500 + }, + { + "epoch": 0.06, + "learning_rate": 4.906026779661017e-05, + "loss": 0.1361, + "step": 832000 + }, + { + "epoch": 0.06, + "learning_rate": 4.905970395480226e-05, + "loss": 0.1477, + "step": 832500 + }, + { + "epoch": 0.06, + "learning_rate": 4.905913898305085e-05, + "loss": 0.1485, + "step": 833000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9058574011299433e-05, + "loss": 0.1452, + "step": 833500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9058009039548024e-05, + "loss": 0.1355, + "step": 834000 + }, + { + "epoch": 0.06, + "learning_rate": 4.905744519774012e-05, + "loss": 0.1306, + "step": 834500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9056881355932205e-05, + "loss": 0.1333, + "step": 835000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9056316384180796e-05, + "loss": 0.1328, + "step": 835500 + }, + { + "epoch": 0.06, + "learning_rate": 4.905575141242938e-05, + "loss": 0.1399, + "step": 836000 + }, + { + "epoch": 0.06, + "learning_rate": 4.905518644067797e-05, + "loss": 0.1458, + "step": 836500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9054621468926554e-05, + "loss": 0.1423, + "step": 837000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9054056497175145e-05, + "loss": 0.1467, + "step": 837500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9053491525423735e-05, + "loss": 0.142, + "step": 838000 + }, + { + "epoch": 0.06, + "learning_rate": 4.905292655367232e-05, + "loss": 0.1475, + "step": 838500 + }, + { + "epoch": 0.06, + "learning_rate": 4.905236158192091e-05, + "loss": 0.1455, + "step": 839000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9051797740113e-05, + "loss": 0.1431, + "step": 839500 + }, + { + "epoch": 0.06, + "learning_rate": 4.905123276836158e-05, + "loss": 0.1424, + "step": 840000 + }, + { + "epoch": 0.06, + "learning_rate": 4.905066779661017e-05, + "loss": 0.1366, + "step": 840500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9050102824858755e-05, + "loss": 0.1483, + "step": 841000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9049537853107346e-05, + "loss": 0.1353, + "step": 841500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9048972881355936e-05, + "loss": 0.1491, + "step": 842000 + }, + { + "epoch": 0.06, + "learning_rate": 4.904840903954802e-05, + "loss": 0.1344, + "step": 842500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9047844067796614e-05, + "loss": 0.1374, + "step": 843000 + }, + { + "epoch": 0.06, + "learning_rate": 4.90472790960452e-05, + "loss": 0.1376, + "step": 843500 + }, + { + "epoch": 0.06, + "learning_rate": 4.904671412429379e-05, + "loss": 0.1373, + "step": 844000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9046150282485876e-05, + "loss": 0.1433, + "step": 844500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9045585310734466e-05, + "loss": 0.1334, + "step": 845000 + }, + { + "epoch": 0.06, + "learning_rate": 4.904502033898306e-05, + "loss": 0.1409, + "step": 845500 + }, + { + "epoch": 0.06, + "learning_rate": 4.904445536723164e-05, + "loss": 0.1342, + "step": 846000 + }, + { + "epoch": 0.06, + "learning_rate": 4.904389039548023e-05, + "loss": 0.1449, + "step": 846500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9043325423728815e-05, + "loss": 0.1454, + "step": 847000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9042760451977405e-05, + "loss": 0.1392, + "step": 847500 + }, + { + "epoch": 0.06, + "learning_rate": 4.904219548022599e-05, + "loss": 0.147, + "step": 848000 + }, + { + "epoch": 0.06, + "learning_rate": 4.904163163841808e-05, + "loss": 0.1407, + "step": 848500 + }, + { + "epoch": 0.06, + "learning_rate": 4.904106666666667e-05, + "loss": 0.1412, + "step": 849000 + }, + { + "epoch": 0.06, + "learning_rate": 4.904050169491526e-05, + "loss": 0.1369, + "step": 849500 + }, + { + "epoch": 0.06, + "learning_rate": 4.903993672316384e-05, + "loss": 0.1373, + "step": 850000 + }, + { + "epoch": 0.06, + "learning_rate": 4.903937175141243e-05, + "loss": 0.1391, + "step": 850500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9038806779661016e-05, + "loss": 0.1384, + "step": 851000 + }, + { + "epoch": 0.06, + "learning_rate": 4.903824293785311e-05, + "loss": 0.1385, + "step": 851500 + }, + { + "epoch": 0.06, + "learning_rate": 4.90376779661017e-05, + "loss": 0.1392, + "step": 852000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9037112994350284e-05, + "loss": 0.1388, + "step": 852500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9036548022598875e-05, + "loss": 0.135, + "step": 853000 + }, + { + "epoch": 0.06, + "learning_rate": 4.903598305084746e-05, + "loss": 0.1352, + "step": 853500 + }, + { + "epoch": 0.06, + "learning_rate": 4.903541807909605e-05, + "loss": 0.1372, + "step": 854000 + }, + { + "epoch": 0.06, + "learning_rate": 4.903485310734463e-05, + "loss": 0.1452, + "step": 854500 + }, + { + "epoch": 0.06, + "learning_rate": 4.903428926553673e-05, + "loss": 0.1389, + "step": 855000 + }, + { + "epoch": 0.06, + "learning_rate": 4.903372429378532e-05, + "loss": 0.1388, + "step": 855500 + }, + { + "epoch": 0.06, + "learning_rate": 4.90331593220339e-05, + "loss": 0.1422, + "step": 856000 + }, + { + "epoch": 0.06, + "learning_rate": 4.903259435028249e-05, + "loss": 0.1442, + "step": 856500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9032029378531076e-05, + "loss": 0.1444, + "step": 857000 + }, + { + "epoch": 0.06, + "learning_rate": 4.903146553672316e-05, + "loss": 0.1331, + "step": 857500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9030900564971753e-05, + "loss": 0.1399, + "step": 858000 + }, + { + "epoch": 0.06, + "learning_rate": 4.903033559322034e-05, + "loss": 0.1387, + "step": 858500 + }, + { + "epoch": 0.06, + "learning_rate": 4.902977062146893e-05, + "loss": 0.1356, + "step": 859000 + }, + { + "epoch": 0.06, + "learning_rate": 4.902920677966102e-05, + "loss": 0.1459, + "step": 859500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9028641807909606e-05, + "loss": 0.1318, + "step": 860000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9028076836158196e-05, + "loss": 0.1324, + "step": 860500 + }, + { + "epoch": 0.06, + "learning_rate": 4.902751186440678e-05, + "loss": 0.137, + "step": 861000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9026948022598874e-05, + "loss": 0.1473, + "step": 861500 + }, + { + "epoch": 0.06, + "learning_rate": 4.902638305084746e-05, + "loss": 0.1343, + "step": 862000 + }, + { + "epoch": 0.06, + "learning_rate": 4.902581807909605e-05, + "loss": 0.1415, + "step": 862500 + }, + { + "epoch": 0.06, + "learning_rate": 4.902525310734464e-05, + "loss": 0.1366, + "step": 863000 + }, + { + "epoch": 0.06, + "learning_rate": 4.902468813559322e-05, + "loss": 0.1353, + "step": 863500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9024123163841813e-05, + "loss": 0.1386, + "step": 864000 + }, + { + "epoch": 0.06, + "learning_rate": 4.90235581920904e-05, + "loss": 0.1427, + "step": 864500 + }, + { + "epoch": 0.06, + "learning_rate": 4.902299322033899e-05, + "loss": 0.1439, + "step": 865000 + }, + { + "epoch": 0.06, + "learning_rate": 4.902243050847458e-05, + "loss": 0.1339, + "step": 865500 + }, + { + "epoch": 0.06, + "learning_rate": 4.902186553672317e-05, + "loss": 0.1407, + "step": 866000 + }, + { + "epoch": 0.06, + "learning_rate": 4.902130056497175e-05, + "loss": 0.1479, + "step": 866500 + }, + { + "epoch": 0.06, + "learning_rate": 4.902073559322034e-05, + "loss": 0.1383, + "step": 867000 + }, + { + "epoch": 0.06, + "learning_rate": 4.902017062146893e-05, + "loss": 0.1407, + "step": 867500 + }, + { + "epoch": 0.06, + "learning_rate": 4.901960564971752e-05, + "loss": 0.1356, + "step": 868000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9019041807909605e-05, + "loss": 0.129, + "step": 868500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9018476836158196e-05, + "loss": 0.1392, + "step": 869000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9017911864406786e-05, + "loss": 0.1466, + "step": 869500 + }, + { + "epoch": 0.06, + "learning_rate": 4.901734689265537e-05, + "loss": 0.1441, + "step": 870000 + }, + { + "epoch": 0.06, + "learning_rate": 4.901678192090396e-05, + "loss": 0.1337, + "step": 870500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9016216949152544e-05, + "loss": 0.135, + "step": 871000 + }, + { + "epoch": 0.06, + "learning_rate": 4.901565310734463e-05, + "loss": 0.1343, + "step": 871500 + }, + { + "epoch": 0.06, + "learning_rate": 4.901508813559322e-05, + "loss": 0.1426, + "step": 872000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9014523163841806e-05, + "loss": 0.1393, + "step": 872500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9013958192090396e-05, + "loss": 0.1429, + "step": 873000 + }, + { + "epoch": 0.06, + "learning_rate": 4.901339322033899e-05, + "loss": 0.1387, + "step": 873500 + }, + { + "epoch": 0.06, + "learning_rate": 4.901282937853108e-05, + "loss": 0.1387, + "step": 874000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9012264406779665e-05, + "loss": 0.1369, + "step": 874500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9011699435028255e-05, + "loss": 0.1377, + "step": 875000 + }, + { + "epoch": 0.06, + "learning_rate": 4.901113446327684e-05, + "loss": 0.136, + "step": 875500 + }, + { + "epoch": 0.06, + "learning_rate": 4.901056949152543e-05, + "loss": 0.1336, + "step": 876000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9010004519774014e-05, + "loss": 0.1279, + "step": 876500 + }, + { + "epoch": 0.06, + "learning_rate": 4.900944067796611e-05, + "loss": 0.1402, + "step": 877000 + }, + { + "epoch": 0.06, + "learning_rate": 4.900887570621469e-05, + "loss": 0.1368, + "step": 877500 + }, + { + "epoch": 0.06, + "learning_rate": 4.900831073446328e-05, + "loss": 0.1426, + "step": 878000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9007745762711866e-05, + "loss": 0.1376, + "step": 878500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9007180790960456e-05, + "loss": 0.1366, + "step": 879000 + }, + { + "epoch": 0.06, + "learning_rate": 4.900661581920904e-05, + "loss": 0.1372, + "step": 879500 + }, + { + "epoch": 0.06, + "learning_rate": 4.900605084745763e-05, + "loss": 0.1378, + "step": 880000 + }, + { + "epoch": 0.06, + "learning_rate": 4.900548587570622e-05, + "loss": 0.1389, + "step": 880500 + }, + { + "epoch": 0.06, + "learning_rate": 4.900492203389831e-05, + "loss": 0.1381, + "step": 881000 + }, + { + "epoch": 0.06, + "learning_rate": 4.90043581920904e-05, + "loss": 0.1298, + "step": 881500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9003793220338986e-05, + "loss": 0.1443, + "step": 882000 + }, + { + "epoch": 0.06, + "learning_rate": 4.900322824858758e-05, + "loss": 0.1387, + "step": 882500 + }, + { + "epoch": 0.06, + "learning_rate": 4.900266327683616e-05, + "loss": 0.1416, + "step": 883000 + }, + { + "epoch": 0.06, + "learning_rate": 4.900209830508475e-05, + "loss": 0.1431, + "step": 883500 + }, + { + "epoch": 0.06, + "learning_rate": 4.900153446327684e-05, + "loss": 0.1385, + "step": 884000 + }, + { + "epoch": 0.06, + "learning_rate": 4.900096949152543e-05, + "loss": 0.135, + "step": 884500 + }, + { + "epoch": 0.06, + "learning_rate": 4.900040451977401e-05, + "loss": 0.134, + "step": 885000 + }, + { + "epoch": 0.06, + "learning_rate": 4.8999839548022603e-05, + "loss": 0.1344, + "step": 885500 + }, + { + "epoch": 0.06, + "learning_rate": 4.899927570621469e-05, + "loss": 0.1495, + "step": 886000 + }, + { + "epoch": 0.06, + "learning_rate": 4.8998710734463274e-05, + "loss": 0.1401, + "step": 886500 + }, + { + "epoch": 0.06, + "learning_rate": 4.8998145762711865e-05, + "loss": 0.1391, + "step": 887000 + }, + { + "epoch": 0.06, + "learning_rate": 4.8997580790960456e-05, + "loss": 0.1369, + "step": 887500 + }, + { + "epoch": 0.06, + "learning_rate": 4.899701581920904e-05, + "loss": 0.1359, + "step": 888000 + }, + { + "epoch": 0.06, + "learning_rate": 4.8996451977401133e-05, + "loss": 0.1355, + "step": 888500 + }, + { + "epoch": 0.06, + "learning_rate": 4.8995887005649724e-05, + "loss": 0.1366, + "step": 889000 + }, + { + "epoch": 0.06, + "learning_rate": 4.899532203389831e-05, + "loss": 0.1382, + "step": 889500 + }, + { + "epoch": 0.06, + "learning_rate": 4.89947570621469e-05, + "loss": 0.1452, + "step": 890000 + }, + { + "epoch": 0.06, + "learning_rate": 4.8994193220338986e-05, + "loss": 0.1449, + "step": 890500 + }, + { + "epoch": 0.06, + "learning_rate": 4.8993628248587576e-05, + "loss": 0.1363, + "step": 891000 + }, + { + "epoch": 0.06, + "learning_rate": 4.899306327683616e-05, + "loss": 0.1364, + "step": 891500 + }, + { + "epoch": 0.06, + "learning_rate": 4.899249830508475e-05, + "loss": 0.1391, + "step": 892000 + }, + { + "epoch": 0.06, + "learning_rate": 4.899193446327684e-05, + "loss": 0.1424, + "step": 892500 + }, + { + "epoch": 0.06, + "learning_rate": 4.899136949152542e-05, + "loss": 0.1355, + "step": 893000 + }, + { + "epoch": 0.06, + "learning_rate": 4.899080451977401e-05, + "loss": 0.1411, + "step": 893500 + }, + { + "epoch": 0.06, + "learning_rate": 4.89902395480226e-05, + "loss": 0.1376, + "step": 894000 + }, + { + "epoch": 0.06, + "learning_rate": 4.89896757062147e-05, + "loss": 0.138, + "step": 894500 + }, + { + "epoch": 0.06, + "learning_rate": 4.898911073446328e-05, + "loss": 0.1354, + "step": 895000 + }, + { + "epoch": 0.06, + "learning_rate": 4.898854576271187e-05, + "loss": 0.141, + "step": 895500 + }, + { + "epoch": 0.06, + "learning_rate": 4.8987980790960455e-05, + "loss": 0.1366, + "step": 896000 + }, + { + "epoch": 0.06, + "learning_rate": 4.8987415819209046e-05, + "loss": 0.1311, + "step": 896500 + }, + { + "epoch": 0.06, + "learning_rate": 4.898685197740113e-05, + "loss": 0.136, + "step": 897000 + }, + { + "epoch": 0.06, + "learning_rate": 4.898628700564972e-05, + "loss": 0.1373, + "step": 897500 + }, + { + "epoch": 0.06, + "learning_rate": 4.898572203389831e-05, + "loss": 0.1375, + "step": 898000 + }, + { + "epoch": 0.06, + "learning_rate": 4.89851570621469e-05, + "loss": 0.1412, + "step": 898500 + }, + { + "epoch": 0.06, + "learning_rate": 4.8984593220338985e-05, + "loss": 0.1386, + "step": 899000 + }, + { + "epoch": 0.06, + "learning_rate": 4.898402824858757e-05, + "loss": 0.1388, + "step": 899500 + }, + { + "epoch": 0.06, + "learning_rate": 4.898346327683616e-05, + "loss": 0.1386, + "step": 900000 + }, + { + "epoch": 0.06, + "learning_rate": 4.898289830508474e-05, + "loss": 0.1382, + "step": 900500 + }, + { + "epoch": 0.06, + "learning_rate": 4.8982334463276844e-05, + "loss": 0.1365, + "step": 901000 + }, + { + "epoch": 0.06, + "learning_rate": 4.898176949152543e-05, + "loss": 0.137, + "step": 901500 + }, + { + "epoch": 0.06, + "learning_rate": 4.898120451977402e-05, + "loss": 0.1385, + "step": 902000 + }, + { + "epoch": 0.06, + "learning_rate": 4.89806395480226e-05, + "loss": 0.1387, + "step": 902500 + }, + { + "epoch": 0.06, + "learning_rate": 4.898007457627119e-05, + "loss": 0.1481, + "step": 903000 + }, + { + "epoch": 0.06, + "learning_rate": 4.897951073446328e-05, + "loss": 0.1258, + "step": 903500 + }, + { + "epoch": 0.06, + "learning_rate": 4.8978945762711864e-05, + "loss": 0.1343, + "step": 904000 + }, + { + "epoch": 0.06, + "learning_rate": 4.8978380790960454e-05, + "loss": 0.1374, + "step": 904500 + }, + { + "epoch": 0.06, + "learning_rate": 4.8977815819209045e-05, + "loss": 0.1375, + "step": 905000 + }, + { + "epoch": 0.06, + "learning_rate": 4.897725084745763e-05, + "loss": 0.1372, + "step": 905500 + }, + { + "epoch": 0.06, + "learning_rate": 4.897668587570622e-05, + "loss": 0.1402, + "step": 906000 + }, + { + "epoch": 0.06, + "learning_rate": 4.8976122033898306e-05, + "loss": 0.1402, + "step": 906500 + }, + { + "epoch": 0.06, + "learning_rate": 4.897555706214689e-05, + "loss": 0.132, + "step": 907000 + }, + { + "epoch": 0.06, + "learning_rate": 4.897499209039548e-05, + "loss": 0.1431, + "step": 907500 + }, + { + "epoch": 0.06, + "learning_rate": 4.897442711864407e-05, + "loss": 0.1365, + "step": 908000 + }, + { + "epoch": 0.06, + "learning_rate": 4.8973863276836165e-05, + "loss": 0.1333, + "step": 908500 + }, + { + "epoch": 0.06, + "learning_rate": 4.897329830508475e-05, + "loss": 0.143, + "step": 909000 + }, + { + "epoch": 0.06, + "learning_rate": 4.897273333333334e-05, + "loss": 0.1447, + "step": 909500 + }, + { + "epoch": 0.06, + "learning_rate": 4.8972168361581924e-05, + "loss": 0.1287, + "step": 910000 + }, + { + "epoch": 0.06, + "learning_rate": 4.8971603389830514e-05, + "loss": 0.1333, + "step": 910500 + }, + { + "epoch": 0.06, + "learning_rate": 4.89710395480226e-05, + "loss": 0.1391, + "step": 911000 + }, + { + "epoch": 0.06, + "learning_rate": 4.897047457627119e-05, + "loss": 0.1353, + "step": 911500 + }, + { + "epoch": 0.06, + "learning_rate": 4.8969909604519776e-05, + "loss": 0.1402, + "step": 912000 + }, + { + "epoch": 0.06, + "learning_rate": 4.8969344632768366e-05, + "loss": 0.1315, + "step": 912500 + }, + { + "epoch": 0.06, + "learning_rate": 4.8968780790960453e-05, + "loss": 0.1368, + "step": 913000 + }, + { + "epoch": 0.06, + "learning_rate": 4.896821581920904e-05, + "loss": 0.1373, + "step": 913500 + }, + { + "epoch": 0.06, + "learning_rate": 4.896765084745763e-05, + "loss": 0.1407, + "step": 914000 + }, + { + "epoch": 0.06, + "learning_rate": 4.896708587570621e-05, + "loss": 0.1325, + "step": 914500 + }, + { + "epoch": 0.06, + "learning_rate": 4.896652203389831e-05, + "loss": 0.1297, + "step": 915000 + }, + { + "epoch": 0.06, + "learning_rate": 4.8965957062146896e-05, + "loss": 0.1433, + "step": 915500 + }, + { + "epoch": 0.06, + "learning_rate": 4.896539209039549e-05, + "loss": 0.132, + "step": 916000 + }, + { + "epoch": 0.06, + "learning_rate": 4.8964828248587574e-05, + "loss": 0.1359, + "step": 916500 + }, + { + "epoch": 0.06, + "learning_rate": 4.896426327683616e-05, + "loss": 0.1391, + "step": 917000 + }, + { + "epoch": 0.06, + "learning_rate": 4.896369830508475e-05, + "loss": 0.135, + "step": 917500 + }, + { + "epoch": 0.06, + "learning_rate": 4.896313333333333e-05, + "loss": 0.1386, + "step": 918000 + }, + { + "epoch": 0.06, + "learning_rate": 4.896256836158192e-05, + "loss": 0.1382, + "step": 918500 + }, + { + "epoch": 0.06, + "learning_rate": 4.8962003389830513e-05, + "loss": 0.1387, + "step": 919000 + }, + { + "epoch": 0.06, + "learning_rate": 4.89614384180791e-05, + "loss": 0.14, + "step": 919500 + }, + { + "epoch": 0.06, + "learning_rate": 4.896087344632769e-05, + "loss": 0.145, + "step": 920000 + }, + { + "epoch": 0.06, + "learning_rate": 4.896030847457627e-05, + "loss": 0.1375, + "step": 920500 + }, + { + "epoch": 0.06, + "learning_rate": 4.895974350282486e-05, + "loss": 0.1351, + "step": 921000 + }, + { + "epoch": 0.06, + "learning_rate": 4.895917966101695e-05, + "loss": 0.1353, + "step": 921500 + }, + { + "epoch": 0.06, + "learning_rate": 4.895861468926554e-05, + "loss": 0.1312, + "step": 922000 + }, + { + "epoch": 0.06, + "learning_rate": 4.8958049717514124e-05, + "loss": 0.1318, + "step": 922500 + }, + { + "epoch": 0.06, + "learning_rate": 4.8957484745762714e-05, + "loss": 0.1333, + "step": 923000 + }, + { + "epoch": 0.06, + "learning_rate": 4.89569197740113e-05, + "loss": 0.1396, + "step": 923500 + }, + { + "epoch": 0.06, + "learning_rate": 4.895635593220339e-05, + "loss": 0.1382, + "step": 924000 + }, + { + "epoch": 0.06, + "learning_rate": 4.895579096045198e-05, + "loss": 0.1331, + "step": 924500 + }, + { + "epoch": 0.06, + "learning_rate": 4.8955225988700567e-05, + "loss": 0.1367, + "step": 925000 + }, + { + "epoch": 0.06, + "learning_rate": 4.895466101694916e-05, + "loss": 0.1413, + "step": 925500 + }, + { + "epoch": 0.06, + "learning_rate": 4.8954097175141244e-05, + "loss": 0.1361, + "step": 926000 + }, + { + "epoch": 0.06, + "learning_rate": 4.8953532203389835e-05, + "loss": 0.1293, + "step": 926500 + }, + { + "epoch": 0.06, + "learning_rate": 4.895296723163842e-05, + "loss": 0.1336, + "step": 927000 + }, + { + "epoch": 0.06, + "learning_rate": 4.895240225988701e-05, + "loss": 0.1374, + "step": 927500 + }, + { + "epoch": 0.06, + "learning_rate": 4.895183728813559e-05, + "loss": 0.1337, + "step": 928000 + }, + { + "epoch": 0.06, + "learning_rate": 4.895127344632768e-05, + "loss": 0.14, + "step": 928500 + }, + { + "epoch": 0.06, + "learning_rate": 4.895070847457627e-05, + "loss": 0.136, + "step": 929000 + }, + { + "epoch": 0.06, + "learning_rate": 4.895014350282486e-05, + "loss": 0.1421, + "step": 929500 + }, + { + "epoch": 0.06, + "learning_rate": 4.8949578531073445e-05, + "loss": 0.1228, + "step": 930000 + }, + { + "epoch": 0.06, + "learning_rate": 4.8949013559322036e-05, + "loss": 0.1371, + "step": 930500 + }, + { + "epoch": 0.06, + "learning_rate": 4.894844858757062e-05, + "loss": 0.1366, + "step": 931000 + }, + { + "epoch": 0.06, + "learning_rate": 4.8947884745762714e-05, + "loss": 0.1432, + "step": 931500 + }, + { + "epoch": 0.06, + "learning_rate": 4.8947319774011304e-05, + "loss": 0.1366, + "step": 932000 + }, + { + "epoch": 0.06, + "learning_rate": 4.8946754802259895e-05, + "loss": 0.1393, + "step": 932500 + }, + { + "epoch": 0.06, + "learning_rate": 4.894618983050848e-05, + "loss": 0.1373, + "step": 933000 + }, + { + "epoch": 0.06, + "learning_rate": 4.8945625988700566e-05, + "loss": 0.1393, + "step": 933500 + }, + { + "epoch": 0.06, + "learning_rate": 4.8945061016949156e-05, + "loss": 0.1374, + "step": 934000 + }, + { + "epoch": 0.06, + "learning_rate": 4.894449604519774e-05, + "loss": 0.1351, + "step": 934500 + }, + { + "epoch": 0.06, + "learning_rate": 4.894393107344633e-05, + "loss": 0.1412, + "step": 935000 + }, + { + "epoch": 0.06, + "learning_rate": 4.8943366101694915e-05, + "loss": 0.1306, + "step": 935500 + }, + { + "epoch": 0.06, + "learning_rate": 4.8942801129943505e-05, + "loss": 0.1322, + "step": 936000 + }, + { + "epoch": 0.06, + "learning_rate": 4.8942236158192096e-05, + "loss": 0.1371, + "step": 936500 + }, + { + "epoch": 0.06, + "learning_rate": 4.894167118644068e-05, + "loss": 0.1364, + "step": 937000 + }, + { + "epoch": 0.06, + "learning_rate": 4.894110847457628e-05, + "loss": 0.1367, + "step": 937500 + }, + { + "epoch": 0.06, + "learning_rate": 4.894054350282486e-05, + "loss": 0.1377, + "step": 938000 + }, + { + "epoch": 0.06, + "learning_rate": 4.893997853107345e-05, + "loss": 0.1304, + "step": 938500 + }, + { + "epoch": 0.06, + "learning_rate": 4.8939413559322035e-05, + "loss": 0.1357, + "step": 939000 + }, + { + "epoch": 0.06, + "learning_rate": 4.8938848587570626e-05, + "loss": 0.1293, + "step": 939500 + }, + { + "epoch": 0.06, + "learning_rate": 4.8938283615819216e-05, + "loss": 0.138, + "step": 940000 + }, + { + "epoch": 0.06, + "learning_rate": 4.8937719774011303e-05, + "loss": 0.1341, + "step": 940500 + }, + { + "epoch": 0.06, + "learning_rate": 4.893715480225989e-05, + "loss": 0.1354, + "step": 941000 + }, + { + "epoch": 0.06, + "learning_rate": 4.893658983050848e-05, + "loss": 0.1338, + "step": 941500 + }, + { + "epoch": 0.06, + "learning_rate": 4.893602485875706e-05, + "loss": 0.1375, + "step": 942000 + }, + { + "epoch": 0.06, + "learning_rate": 4.8935461016949156e-05, + "loss": 0.1331, + "step": 942500 + }, + { + "epoch": 0.06, + "learning_rate": 4.893489604519774e-05, + "loss": 0.1384, + "step": 943000 + }, + { + "epoch": 0.06, + "learning_rate": 4.893433107344633e-05, + "loss": 0.1413, + "step": 943500 + }, + { + "epoch": 0.06, + "learning_rate": 4.8933766101694914e-05, + "loss": 0.1377, + "step": 944000 + }, + { + "epoch": 0.06, + "learning_rate": 4.8933201129943504e-05, + "loss": 0.1315, + "step": 944500 + }, + { + "epoch": 0.06, + "learning_rate": 4.89326372881356e-05, + "loss": 0.1267, + "step": 945000 + }, + { + "epoch": 0.06, + "learning_rate": 4.8932073446327686e-05, + "loss": 0.1385, + "step": 945500 + }, + { + "epoch": 0.06, + "learning_rate": 4.893150847457627e-05, + "loss": 0.1393, + "step": 946000 + }, + { + "epoch": 0.06, + "learning_rate": 4.893094350282486e-05, + "loss": 0.1288, + "step": 946500 + }, + { + "epoch": 0.06, + "learning_rate": 4.893037853107345e-05, + "loss": 0.133, + "step": 947000 + }, + { + "epoch": 0.06, + "learning_rate": 4.8929813559322034e-05, + "loss": 0.1401, + "step": 947500 + }, + { + "epoch": 0.06, + "learning_rate": 4.8929248587570625e-05, + "loss": 0.1423, + "step": 948000 + }, + { + "epoch": 0.06, + "learning_rate": 4.892868361581921e-05, + "loss": 0.1328, + "step": 948500 + }, + { + "epoch": 0.06, + "learning_rate": 4.89281197740113e-05, + "loss": 0.1333, + "step": 949000 + }, + { + "epoch": 0.06, + "learning_rate": 4.8927554802259887e-05, + "loss": 0.1373, + "step": 949500 + }, + { + "epoch": 0.06, + "learning_rate": 4.892698983050848e-05, + "loss": 0.1349, + "step": 950000 + }, + { + "epoch": 0.06, + "learning_rate": 4.892642485875706e-05, + "loss": 0.1326, + "step": 950500 + }, + { + "epoch": 0.06, + "learning_rate": 4.892585988700565e-05, + "loss": 0.1379, + "step": 951000 + }, + { + "epoch": 0.06, + "learning_rate": 4.8925294915254235e-05, + "loss": 0.1315, + "step": 951500 + }, + { + "epoch": 0.06, + "learning_rate": 4.8924729943502826e-05, + "loss": 0.1369, + "step": 952000 + }, + { + "epoch": 0.06, + "learning_rate": 4.892416497175141e-05, + "loss": 0.1381, + "step": 952500 + }, + { + "epoch": 0.06, + "learning_rate": 4.89236e-05, + "loss": 0.1285, + "step": 953000 + }, + { + "epoch": 0.06, + "learning_rate": 4.8923036158192094e-05, + "loss": 0.1308, + "step": 953500 + }, + { + "epoch": 0.06, + "learning_rate": 4.8922471186440685e-05, + "loss": 0.1307, + "step": 954000 + }, + { + "epoch": 0.06, + "learning_rate": 4.892190621468927e-05, + "loss": 0.1349, + "step": 954500 + }, + { + "epoch": 0.06, + "learning_rate": 4.892134124293786e-05, + "loss": 0.14, + "step": 955000 + }, + { + "epoch": 0.06, + "learning_rate": 4.892077627118644e-05, + "loss": 0.1368, + "step": 955500 + }, + { + "epoch": 0.06, + "learning_rate": 4.892021242937853e-05, + "loss": 0.1325, + "step": 956000 + }, + { + "epoch": 0.06, + "learning_rate": 4.891964745762712e-05, + "loss": 0.1246, + "step": 956500 + }, + { + "epoch": 0.06, + "learning_rate": 4.891908248587571e-05, + "loss": 0.1425, + "step": 957000 + }, + { + "epoch": 0.06, + "learning_rate": 4.8918517514124295e-05, + "loss": 0.1385, + "step": 957500 + }, + { + "epoch": 0.06, + "learning_rate": 4.8917952542372886e-05, + "loss": 0.1419, + "step": 958000 + }, + { + "epoch": 0.06, + "learning_rate": 4.891738870056497e-05, + "loss": 0.1354, + "step": 958500 + }, + { + "epoch": 0.07, + "learning_rate": 4.891682372881356e-05, + "loss": 0.1437, + "step": 959000 + }, + { + "epoch": 0.07, + "learning_rate": 4.891625875706215e-05, + "loss": 0.1353, + "step": 959500 + }, + { + "epoch": 0.07, + "learning_rate": 4.891569378531073e-05, + "loss": 0.1349, + "step": 960000 + }, + { + "epoch": 0.07, + "learning_rate": 4.891512994350283e-05, + "loss": 0.1366, + "step": 960500 + }, + { + "epoch": 0.07, + "learning_rate": 4.891456610169492e-05, + "loss": 0.1316, + "step": 961000 + }, + { + "epoch": 0.07, + "learning_rate": 4.89140011299435e-05, + "loss": 0.1339, + "step": 961500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8913436158192094e-05, + "loss": 0.1292, + "step": 962000 + }, + { + "epoch": 0.07, + "learning_rate": 4.891287118644068e-05, + "loss": 0.1409, + "step": 962500 + }, + { + "epoch": 0.07, + "learning_rate": 4.891230621468927e-05, + "loss": 0.1328, + "step": 963000 + }, + { + "epoch": 0.07, + "learning_rate": 4.891174124293785e-05, + "loss": 0.1379, + "step": 963500 + }, + { + "epoch": 0.07, + "learning_rate": 4.891117627118644e-05, + "loss": 0.141, + "step": 964000 + }, + { + "epoch": 0.07, + "learning_rate": 4.891061129943503e-05, + "loss": 0.1316, + "step": 964500 + }, + { + "epoch": 0.07, + "learning_rate": 4.891004632768362e-05, + "loss": 0.1358, + "step": 965000 + }, + { + "epoch": 0.07, + "learning_rate": 4.890948135593221e-05, + "loss": 0.133, + "step": 965500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8908917514124295e-05, + "loss": 0.1376, + "step": 966000 + }, + { + "epoch": 0.07, + "learning_rate": 4.890835254237288e-05, + "loss": 0.1369, + "step": 966500 + }, + { + "epoch": 0.07, + "learning_rate": 4.890778757062147e-05, + "loss": 0.1339, + "step": 967000 + }, + { + "epoch": 0.07, + "learning_rate": 4.890722259887005e-05, + "loss": 0.1313, + "step": 967500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8906658757062153e-05, + "loss": 0.1418, + "step": 968000 + }, + { + "epoch": 0.07, + "learning_rate": 4.890609378531074e-05, + "loss": 0.1356, + "step": 968500 + }, + { + "epoch": 0.07, + "learning_rate": 4.890552881355933e-05, + "loss": 0.1368, + "step": 969000 + }, + { + "epoch": 0.07, + "learning_rate": 4.890496384180791e-05, + "loss": 0.1414, + "step": 969500 + }, + { + "epoch": 0.07, + "learning_rate": 4.89044e-05, + "loss": 0.1378, + "step": 970000 + }, + { + "epoch": 0.07, + "learning_rate": 4.890383502824859e-05, + "loss": 0.1273, + "step": 970500 + }, + { + "epoch": 0.07, + "learning_rate": 4.890327005649718e-05, + "loss": 0.1345, + "step": 971000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8902705084745764e-05, + "loss": 0.1304, + "step": 971500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8902140112994354e-05, + "loss": 0.1383, + "step": 972000 + }, + { + "epoch": 0.07, + "learning_rate": 4.890157627118644e-05, + "loss": 0.1337, + "step": 972500 + }, + { + "epoch": 0.07, + "learning_rate": 4.890101129943503e-05, + "loss": 0.1366, + "step": 973000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8900446327683616e-05, + "loss": 0.1391, + "step": 973500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8899881355932207e-05, + "loss": 0.1333, + "step": 974000 + }, + { + "epoch": 0.07, + "learning_rate": 4.88993175141243e-05, + "loss": 0.1317, + "step": 974500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8898752542372884e-05, + "loss": 0.1332, + "step": 975000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8898187570621475e-05, + "loss": 0.133, + "step": 975500 + }, + { + "epoch": 0.07, + "learning_rate": 4.889762259887006e-05, + "loss": 0.1232, + "step": 976000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8897058757062146e-05, + "loss": 0.1448, + "step": 976500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8896493785310737e-05, + "loss": 0.132, + "step": 977000 + }, + { + "epoch": 0.07, + "learning_rate": 4.889592881355932e-05, + "loss": 0.1328, + "step": 977500 + }, + { + "epoch": 0.07, + "learning_rate": 4.889536384180791e-05, + "loss": 0.1363, + "step": 978000 + }, + { + "epoch": 0.07, + "learning_rate": 4.88947988700565e-05, + "loss": 0.1284, + "step": 978500 + }, + { + "epoch": 0.07, + "learning_rate": 4.889423502824859e-05, + "loss": 0.1341, + "step": 979000 + }, + { + "epoch": 0.07, + "learning_rate": 4.889367005649718e-05, + "loss": 0.1343, + "step": 979500 + }, + { + "epoch": 0.07, + "learning_rate": 4.889310508474576e-05, + "loss": 0.1365, + "step": 980000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8892540112994354e-05, + "loss": 0.1263, + "step": 980500 + }, + { + "epoch": 0.07, + "learning_rate": 4.889197514124294e-05, + "loss": 0.1351, + "step": 981000 + }, + { + "epoch": 0.07, + "learning_rate": 4.889141129943503e-05, + "loss": 0.1347, + "step": 981500 + }, + { + "epoch": 0.07, + "learning_rate": 4.889084632768362e-05, + "loss": 0.1376, + "step": 982000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8890281355932206e-05, + "loss": 0.1349, + "step": 982500 + }, + { + "epoch": 0.07, + "learning_rate": 4.888971751412429e-05, + "loss": 0.1295, + "step": 983000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8889152542372884e-05, + "loss": 0.1441, + "step": 983500 + }, + { + "epoch": 0.07, + "learning_rate": 4.888858757062147e-05, + "loss": 0.1326, + "step": 984000 + }, + { + "epoch": 0.07, + "learning_rate": 4.888802259887006e-05, + "loss": 0.1383, + "step": 984500 + }, + { + "epoch": 0.07, + "learning_rate": 4.888745762711865e-05, + "loss": 0.1275, + "step": 985000 + }, + { + "epoch": 0.07, + "learning_rate": 4.888689265536723e-05, + "loss": 0.1329, + "step": 985500 + }, + { + "epoch": 0.07, + "learning_rate": 4.888632768361582e-05, + "loss": 0.1386, + "step": 986000 + }, + { + "epoch": 0.07, + "learning_rate": 4.888576271186441e-05, + "loss": 0.1404, + "step": 986500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8885197740113e-05, + "loss": 0.1298, + "step": 987000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8884633898305085e-05, + "loss": 0.1364, + "step": 987500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8884068926553675e-05, + "loss": 0.1311, + "step": 988000 + }, + { + "epoch": 0.07, + "learning_rate": 4.888350395480226e-05, + "loss": 0.1376, + "step": 988500 + }, + { + "epoch": 0.07, + "learning_rate": 4.888293898305085e-05, + "loss": 0.1248, + "step": 989000 + }, + { + "epoch": 0.07, + "learning_rate": 4.888237401129943e-05, + "loss": 0.1327, + "step": 989500 + }, + { + "epoch": 0.07, + "learning_rate": 4.888181016949153e-05, + "loss": 0.1323, + "step": 990000 + }, + { + "epoch": 0.07, + "learning_rate": 4.888124519774012e-05, + "loss": 0.1362, + "step": 990500 + }, + { + "epoch": 0.07, + "learning_rate": 4.88806802259887e-05, + "loss": 0.1393, + "step": 991000 + }, + { + "epoch": 0.07, + "learning_rate": 4.888011525423729e-05, + "loss": 0.129, + "step": 991500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8879550282485876e-05, + "loss": 0.1338, + "step": 992000 + }, + { + "epoch": 0.07, + "learning_rate": 4.887898531073447e-05, + "loss": 0.1271, + "step": 992500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8878421468926554e-05, + "loss": 0.1371, + "step": 993000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8877856497175144e-05, + "loss": 0.1297, + "step": 993500 + }, + { + "epoch": 0.07, + "learning_rate": 4.887729152542373e-05, + "loss": 0.1311, + "step": 994000 + }, + { + "epoch": 0.07, + "learning_rate": 4.887672655367232e-05, + "loss": 0.1259, + "step": 994500 + }, + { + "epoch": 0.07, + "learning_rate": 4.88761615819209e-05, + "loss": 0.1369, + "step": 995000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8875597740113e-05, + "loss": 0.1407, + "step": 995500 + }, + { + "epoch": 0.07, + "learning_rate": 4.887503276836158e-05, + "loss": 0.1365, + "step": 996000 + }, + { + "epoch": 0.07, + "learning_rate": 4.887446779661017e-05, + "loss": 0.1368, + "step": 996500 + }, + { + "epoch": 0.07, + "learning_rate": 4.887390282485876e-05, + "loss": 0.1273, + "step": 997000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8873337853107345e-05, + "loss": 0.1317, + "step": 997500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8872772881355936e-05, + "loss": 0.1252, + "step": 998000 + }, + { + "epoch": 0.07, + "learning_rate": 4.887220903954802e-05, + "loss": 0.1274, + "step": 998500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8871644067796614e-05, + "loss": 0.1267, + "step": 999000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8871079096045204e-05, + "loss": 0.1359, + "step": 999500 + }, + { + "epoch": 0.07, + "learning_rate": 4.887051412429379e-05, + "loss": 0.1305, + "step": 1000000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8869950282485875e-05, + "loss": 0.1324, + "step": 1000500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8869385310734466e-05, + "loss": 0.1318, + "step": 1001000 + }, + { + "epoch": 0.07, + "learning_rate": 4.886882033898305e-05, + "loss": 0.1351, + "step": 1001500 + }, + { + "epoch": 0.07, + "learning_rate": 4.886825536723164e-05, + "loss": 0.1306, + "step": 1002000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8867691525423734e-05, + "loss": 0.1317, + "step": 1002500 + }, + { + "epoch": 0.07, + "learning_rate": 4.886712655367232e-05, + "loss": 0.1301, + "step": 1003000 + }, + { + "epoch": 0.07, + "learning_rate": 4.886656158192091e-05, + "loss": 0.1323, + "step": 1003500 + }, + { + "epoch": 0.07, + "learning_rate": 4.886599661016949e-05, + "loss": 0.1273, + "step": 1004000 + }, + { + "epoch": 0.07, + "learning_rate": 4.886543163841808e-05, + "loss": 0.1412, + "step": 1004500 + }, + { + "epoch": 0.07, + "learning_rate": 4.886486779661017e-05, + "loss": 0.1353, + "step": 1005000 + }, + { + "epoch": 0.07, + "learning_rate": 4.886430282485876e-05, + "loss": 0.1344, + "step": 1005500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8863737853107345e-05, + "loss": 0.1315, + "step": 1006000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8863172881355935e-05, + "loss": 0.1339, + "step": 1006500 + }, + { + "epoch": 0.07, + "learning_rate": 4.886260903954802e-05, + "loss": 0.1287, + "step": 1007000 + }, + { + "epoch": 0.07, + "learning_rate": 4.886204406779661e-05, + "loss": 0.1435, + "step": 1007500 + }, + { + "epoch": 0.07, + "learning_rate": 4.88614790960452e-05, + "loss": 0.1376, + "step": 1008000 + }, + { + "epoch": 0.07, + "learning_rate": 4.886091412429379e-05, + "loss": 0.1362, + "step": 1008500 + }, + { + "epoch": 0.07, + "learning_rate": 4.886034915254237e-05, + "loss": 0.129, + "step": 1009000 + }, + { + "epoch": 0.07, + "learning_rate": 4.885978418079096e-05, + "loss": 0.1389, + "step": 1009500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8859220338983056e-05, + "loss": 0.1349, + "step": 1010000 + }, + { + "epoch": 0.07, + "learning_rate": 4.885865536723164e-05, + "loss": 0.1347, + "step": 1010500 + }, + { + "epoch": 0.07, + "learning_rate": 4.885809039548023e-05, + "loss": 0.1416, + "step": 1011000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8857525423728814e-05, + "loss": 0.1295, + "step": 1011500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8856960451977405e-05, + "loss": 0.1306, + "step": 1012000 + }, + { + "epoch": 0.07, + "learning_rate": 4.885639661016949e-05, + "loss": 0.1337, + "step": 1012500 + }, + { + "epoch": 0.07, + "learning_rate": 4.885583163841808e-05, + "loss": 0.1302, + "step": 1013000 + }, + { + "epoch": 0.07, + "learning_rate": 4.885526666666667e-05, + "loss": 0.133, + "step": 1013500 + }, + { + "epoch": 0.07, + "learning_rate": 4.885470169491526e-05, + "loss": 0.1291, + "step": 1014000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8854137853107344e-05, + "loss": 0.1348, + "step": 1014500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8853572881355935e-05, + "loss": 0.1357, + "step": 1015000 + }, + { + "epoch": 0.07, + "learning_rate": 4.885300790960452e-05, + "loss": 0.1343, + "step": 1015500 + }, + { + "epoch": 0.07, + "learning_rate": 4.885244293785311e-05, + "loss": 0.1345, + "step": 1016000 + }, + { + "epoch": 0.07, + "learning_rate": 4.885187796610169e-05, + "loss": 0.1318, + "step": 1016500 + }, + { + "epoch": 0.07, + "learning_rate": 4.885131299435028e-05, + "loss": 0.1336, + "step": 1017000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8850748022598874e-05, + "loss": 0.1345, + "step": 1017500 + }, + { + "epoch": 0.07, + "learning_rate": 4.885018418079096e-05, + "loss": 0.1305, + "step": 1018000 + }, + { + "epoch": 0.07, + "learning_rate": 4.884961920903955e-05, + "loss": 0.1364, + "step": 1018500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8849054237288136e-05, + "loss": 0.1383, + "step": 1019000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8848489265536726e-05, + "loss": 0.1359, + "step": 1019500 + }, + { + "epoch": 0.07, + "learning_rate": 4.884792429378531e-05, + "loss": 0.1391, + "step": 1020000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8847360451977404e-05, + "loss": 0.1342, + "step": 1020500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8846795480225994e-05, + "loss": 0.1327, + "step": 1021000 + }, + { + "epoch": 0.07, + "learning_rate": 4.884623050847458e-05, + "loss": 0.1309, + "step": 1021500 + }, + { + "epoch": 0.07, + "learning_rate": 4.884566553672317e-05, + "loss": 0.1271, + "step": 1022000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8845101694915256e-05, + "loss": 0.1319, + "step": 1022500 + }, + { + "epoch": 0.07, + "learning_rate": 4.884453672316384e-05, + "loss": 0.1306, + "step": 1023000 + }, + { + "epoch": 0.07, + "learning_rate": 4.884397175141243e-05, + "loss": 0.1357, + "step": 1023500 + }, + { + "epoch": 0.07, + "learning_rate": 4.884340677966102e-05, + "loss": 0.1292, + "step": 1024000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8842841807909605e-05, + "loss": 0.1455, + "step": 1024500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8842276836158195e-05, + "loss": 0.1345, + "step": 1025000 + }, + { + "epoch": 0.07, + "learning_rate": 4.884171186440678e-05, + "loss": 0.135, + "step": 1025500 + }, + { + "epoch": 0.07, + "learning_rate": 4.884114802259887e-05, + "loss": 0.1388, + "step": 1026000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8840583050847464e-05, + "loss": 0.1322, + "step": 1026500 + }, + { + "epoch": 0.07, + "learning_rate": 4.884001807909605e-05, + "loss": 0.1311, + "step": 1027000 + }, + { + "epoch": 0.07, + "learning_rate": 4.883945310734464e-05, + "loss": 0.1395, + "step": 1027500 + }, + { + "epoch": 0.07, + "learning_rate": 4.883888813559322e-05, + "loss": 0.1296, + "step": 1028000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8838324293785316e-05, + "loss": 0.138, + "step": 1028500 + }, + { + "epoch": 0.07, + "learning_rate": 4.88377593220339e-05, + "loss": 0.1333, + "step": 1029000 + }, + { + "epoch": 0.07, + "learning_rate": 4.883719435028249e-05, + "loss": 0.1348, + "step": 1029500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8836629378531074e-05, + "loss": 0.1283, + "step": 1030000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8836064406779665e-05, + "loss": 0.1372, + "step": 1030500 + }, + { + "epoch": 0.07, + "learning_rate": 4.883550056497175e-05, + "loss": 0.1304, + "step": 1031000 + }, + { + "epoch": 0.07, + "learning_rate": 4.883493559322034e-05, + "loss": 0.1369, + "step": 1031500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8834370621468926e-05, + "loss": 0.1281, + "step": 1032000 + }, + { + "epoch": 0.07, + "learning_rate": 4.883380564971752e-05, + "loss": 0.1296, + "step": 1032500 + }, + { + "epoch": 0.07, + "learning_rate": 4.88332406779661e-05, + "loss": 0.1353, + "step": 1033000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8832676836158195e-05, + "loss": 0.1503, + "step": 1033500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8832111864406785e-05, + "loss": 0.1324, + "step": 1034000 + }, + { + "epoch": 0.07, + "learning_rate": 4.883154689265537e-05, + "loss": 0.1332, + "step": 1034500 + }, + { + "epoch": 0.07, + "learning_rate": 4.883098192090396e-05, + "loss": 0.1296, + "step": 1035000 + }, + { + "epoch": 0.07, + "learning_rate": 4.883041807909605e-05, + "loss": 0.1331, + "step": 1035500 + }, + { + "epoch": 0.07, + "learning_rate": 4.882985310734464e-05, + "loss": 0.1338, + "step": 1036000 + }, + { + "epoch": 0.07, + "learning_rate": 4.882928813559322e-05, + "loss": 0.1319, + "step": 1036500 + }, + { + "epoch": 0.07, + "learning_rate": 4.882872316384181e-05, + "loss": 0.1392, + "step": 1037000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8828158192090396e-05, + "loss": 0.1377, + "step": 1037500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8827593220338986e-05, + "loss": 0.1314, + "step": 1038000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8827029378531073e-05, + "loss": 0.1378, + "step": 1038500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8826464406779664e-05, + "loss": 0.1271, + "step": 1039000 + }, + { + "epoch": 0.07, + "learning_rate": 4.882589943502825e-05, + "loss": 0.1364, + "step": 1039500 + }, + { + "epoch": 0.07, + "learning_rate": 4.882533446327684e-05, + "loss": 0.1375, + "step": 1040000 + }, + { + "epoch": 0.07, + "learning_rate": 4.882476949152542e-05, + "loss": 0.1295, + "step": 1040500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8824205649717516e-05, + "loss": 0.1309, + "step": 1041000 + }, + { + "epoch": 0.07, + "learning_rate": 4.882364067796611e-05, + "loss": 0.1439, + "step": 1041500 + }, + { + "epoch": 0.07, + "learning_rate": 4.882307570621469e-05, + "loss": 0.1297, + "step": 1042000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8822511864406785e-05, + "loss": 0.1307, + "step": 1042500 + }, + { + "epoch": 0.07, + "learning_rate": 4.882194689265537e-05, + "loss": 0.1346, + "step": 1043000 + }, + { + "epoch": 0.07, + "learning_rate": 4.882138192090396e-05, + "loss": 0.1324, + "step": 1043500 + }, + { + "epoch": 0.07, + "learning_rate": 4.882081694915254e-05, + "loss": 0.1269, + "step": 1044000 + }, + { + "epoch": 0.07, + "learning_rate": 4.882025197740113e-05, + "loss": 0.1357, + "step": 1044500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8819687005649724e-05, + "loss": 0.1282, + "step": 1045000 + }, + { + "epoch": 0.07, + "learning_rate": 4.881912203389831e-05, + "loss": 0.1307, + "step": 1045500 + }, + { + "epoch": 0.07, + "learning_rate": 4.88185570621469e-05, + "loss": 0.1354, + "step": 1046000 + }, + { + "epoch": 0.07, + "learning_rate": 4.881799209039548e-05, + "loss": 0.1308, + "step": 1046500 + }, + { + "epoch": 0.07, + "learning_rate": 4.881742711864407e-05, + "loss": 0.124, + "step": 1047000 + }, + { + "epoch": 0.07, + "learning_rate": 4.881686440677966e-05, + "loss": 0.1251, + "step": 1047500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8816299435028254e-05, + "loss": 0.1261, + "step": 1048000 + }, + { + "epoch": 0.07, + "learning_rate": 4.881573446327684e-05, + "loss": 0.1329, + "step": 1048500 + }, + { + "epoch": 0.07, + "learning_rate": 4.881516949152543e-05, + "loss": 0.132, + "step": 1049000 + }, + { + "epoch": 0.07, + "learning_rate": 4.881460451977401e-05, + "loss": 0.1322, + "step": 1049500 + }, + { + "epoch": 0.07, + "learning_rate": 4.88140395480226e-05, + "loss": 0.1378, + "step": 1050000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8813474576271186e-05, + "loss": 0.1319, + "step": 1050500 + }, + { + "epoch": 0.07, + "learning_rate": 4.881290960451978e-05, + "loss": 0.1337, + "step": 1051000 + }, + { + "epoch": 0.07, + "learning_rate": 4.881234463276836e-05, + "loss": 0.138, + "step": 1051500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8811780790960455e-05, + "loss": 0.1401, + "step": 1052000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8811215819209045e-05, + "loss": 0.1326, + "step": 1052500 + }, + { + "epoch": 0.07, + "learning_rate": 4.881065084745763e-05, + "loss": 0.1257, + "step": 1053000 + }, + { + "epoch": 0.07, + "learning_rate": 4.881008587570622e-05, + "loss": 0.1339, + "step": 1053500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8809520903954804e-05, + "loss": 0.1323, + "step": 1054000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8808955932203394e-05, + "loss": 0.1296, + "step": 1054500 + }, + { + "epoch": 0.07, + "learning_rate": 4.880839209039548e-05, + "loss": 0.1294, + "step": 1055000 + }, + { + "epoch": 0.07, + "learning_rate": 4.880782711864407e-05, + "loss": 0.1279, + "step": 1055500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8807262146892656e-05, + "loss": 0.1281, + "step": 1056000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8806697175141246e-05, + "loss": 0.1307, + "step": 1056500 + }, + { + "epoch": 0.07, + "learning_rate": 4.880613220338983e-05, + "loss": 0.134, + "step": 1057000 + }, + { + "epoch": 0.07, + "learning_rate": 4.880556723163842e-05, + "loss": 0.13, + "step": 1057500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8805003389830515e-05, + "loss": 0.1291, + "step": 1058000 + }, + { + "epoch": 0.07, + "learning_rate": 4.88044384180791e-05, + "loss": 0.1343, + "step": 1058500 + }, + { + "epoch": 0.07, + "learning_rate": 4.880387344632769e-05, + "loss": 0.141, + "step": 1059000 + }, + { + "epoch": 0.07, + "learning_rate": 4.880330847457627e-05, + "loss": 0.1308, + "step": 1059500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8802743502824863e-05, + "loss": 0.1343, + "step": 1060000 + }, + { + "epoch": 0.07, + "learning_rate": 4.880217966101695e-05, + "loss": 0.1242, + "step": 1060500 + }, + { + "epoch": 0.07, + "learning_rate": 4.880161468926554e-05, + "loss": 0.141, + "step": 1061000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8801049717514125e-05, + "loss": 0.134, + "step": 1061500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8800484745762716e-05, + "loss": 0.1373, + "step": 1062000 + }, + { + "epoch": 0.07, + "learning_rate": 4.87999209039548e-05, + "loss": 0.1283, + "step": 1062500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8799355932203393e-05, + "loss": 0.1302, + "step": 1063000 + }, + { + "epoch": 0.07, + "learning_rate": 4.879879096045198e-05, + "loss": 0.1322, + "step": 1063500 + }, + { + "epoch": 0.07, + "learning_rate": 4.879822598870057e-05, + "loss": 0.1299, + "step": 1064000 + }, + { + "epoch": 0.07, + "learning_rate": 4.879766214689266e-05, + "loss": 0.1283, + "step": 1064500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8797097175141246e-05, + "loss": 0.1386, + "step": 1065000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8796532203389836e-05, + "loss": 0.1363, + "step": 1065500 + }, + { + "epoch": 0.07, + "learning_rate": 4.879596723163842e-05, + "loss": 0.1325, + "step": 1066000 + }, + { + "epoch": 0.07, + "learning_rate": 4.879540225988701e-05, + "loss": 0.1413, + "step": 1066500 + }, + { + "epoch": 0.07, + "learning_rate": 4.87948384180791e-05, + "loss": 0.1319, + "step": 1067000 + }, + { + "epoch": 0.07, + "learning_rate": 4.879427344632769e-05, + "loss": 0.1231, + "step": 1067500 + }, + { + "epoch": 0.07, + "learning_rate": 4.879370847457627e-05, + "loss": 0.132, + "step": 1068000 + }, + { + "epoch": 0.07, + "learning_rate": 4.879314350282486e-05, + "loss": 0.1357, + "step": 1068500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8792578531073447e-05, + "loss": 0.128, + "step": 1069000 + }, + { + "epoch": 0.07, + "learning_rate": 4.879201468926554e-05, + "loss": 0.1279, + "step": 1069500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8791449717514124e-05, + "loss": 0.1292, + "step": 1070000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8790884745762715e-05, + "loss": 0.1377, + "step": 1070500 + }, + { + "epoch": 0.07, + "learning_rate": 4.87903197740113e-05, + "loss": 0.1283, + "step": 1071000 + }, + { + "epoch": 0.07, + "learning_rate": 4.878975480225989e-05, + "loss": 0.1341, + "step": 1071500 + }, + { + "epoch": 0.07, + "learning_rate": 4.878918983050847e-05, + "loss": 0.1305, + "step": 1072000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8788624858757064e-05, + "loss": 0.1351, + "step": 1072500 + }, + { + "epoch": 0.07, + "learning_rate": 4.878805988700565e-05, + "loss": 0.1249, + "step": 1073000 + }, + { + "epoch": 0.07, + "learning_rate": 4.878749604519774e-05, + "loss": 0.1289, + "step": 1073500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8786932203389835e-05, + "loss": 0.132, + "step": 1074000 + }, + { + "epoch": 0.07, + "learning_rate": 4.878636723163842e-05, + "loss": 0.1351, + "step": 1074500 + }, + { + "epoch": 0.07, + "learning_rate": 4.878580225988701e-05, + "loss": 0.1279, + "step": 1075000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8785237288135594e-05, + "loss": 0.1318, + "step": 1075500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8784672316384184e-05, + "loss": 0.1347, + "step": 1076000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8784107344632775e-05, + "loss": 0.1299, + "step": 1076500 + }, + { + "epoch": 0.07, + "learning_rate": 4.878354350282486e-05, + "loss": 0.1273, + "step": 1077000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8782978531073446e-05, + "loss": 0.1285, + "step": 1077500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8782413559322036e-05, + "loss": 0.1315, + "step": 1078000 + }, + { + "epoch": 0.07, + "learning_rate": 4.878184858757062e-05, + "loss": 0.1359, + "step": 1078500 + }, + { + "epoch": 0.07, + "learning_rate": 4.878128361581921e-05, + "loss": 0.1269, + "step": 1079000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8780719774011305e-05, + "loss": 0.1339, + "step": 1079500 + }, + { + "epoch": 0.07, + "learning_rate": 4.878015480225989e-05, + "loss": 0.1269, + "step": 1080000 + }, + { + "epoch": 0.07, + "learning_rate": 4.877958983050848e-05, + "loss": 0.1257, + "step": 1080500 + }, + { + "epoch": 0.07, + "learning_rate": 4.877902485875707e-05, + "loss": 0.1214, + "step": 1081000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8778459887005654e-05, + "loss": 0.1332, + "step": 1081500 + }, + { + "epoch": 0.07, + "learning_rate": 4.877789604519774e-05, + "loss": 0.1296, + "step": 1082000 + }, + { + "epoch": 0.07, + "learning_rate": 4.877733107344633e-05, + "loss": 0.1367, + "step": 1082500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8776766101694915e-05, + "loss": 0.1337, + "step": 1083000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8776201129943506e-05, + "loss": 0.1281, + "step": 1083500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8775636158192096e-05, + "loss": 0.1325, + "step": 1084000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8775072316384184e-05, + "loss": 0.136, + "step": 1084500 + }, + { + "epoch": 0.07, + "learning_rate": 4.877450734463277e-05, + "loss": 0.1342, + "step": 1085000 + }, + { + "epoch": 0.07, + "learning_rate": 4.877394237288136e-05, + "loss": 0.1302, + "step": 1085500 + }, + { + "epoch": 0.07, + "learning_rate": 4.877337740112994e-05, + "loss": 0.1326, + "step": 1086000 + }, + { + "epoch": 0.07, + "learning_rate": 4.877281242937853e-05, + "loss": 0.1352, + "step": 1086500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8772248587570626e-05, + "loss": 0.1295, + "step": 1087000 + }, + { + "epoch": 0.07, + "learning_rate": 4.877168361581922e-05, + "loss": 0.1285, + "step": 1087500 + }, + { + "epoch": 0.07, + "learning_rate": 4.87711186440678e-05, + "loss": 0.1352, + "step": 1088000 + }, + { + "epoch": 0.07, + "learning_rate": 4.877055367231639e-05, + "loss": 0.1317, + "step": 1088500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8769988700564975e-05, + "loss": 0.1283, + "step": 1089000 + }, + { + "epoch": 0.07, + "learning_rate": 4.876942485875706e-05, + "loss": 0.1278, + "step": 1089500 + }, + { + "epoch": 0.07, + "learning_rate": 4.876885988700565e-05, + "loss": 0.1306, + "step": 1090000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8768294915254243e-05, + "loss": 0.1301, + "step": 1090500 + }, + { + "epoch": 0.07, + "learning_rate": 4.876772994350283e-05, + "loss": 0.1334, + "step": 1091000 + }, + { + "epoch": 0.07, + "learning_rate": 4.876716497175142e-05, + "loss": 0.1302, + "step": 1091500 + }, + { + "epoch": 0.07, + "learning_rate": 4.87666e-05, + "loss": 0.1272, + "step": 1092000 + }, + { + "epoch": 0.07, + "learning_rate": 4.876603615819209e-05, + "loss": 0.135, + "step": 1092500 + }, + { + "epoch": 0.07, + "learning_rate": 4.876547118644068e-05, + "loss": 0.1291, + "step": 1093000 + }, + { + "epoch": 0.07, + "learning_rate": 4.876490621468926e-05, + "loss": 0.1308, + "step": 1093500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8764341242937854e-05, + "loss": 0.122, + "step": 1094000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8763776271186444e-05, + "loss": 0.1375, + "step": 1094500 + }, + { + "epoch": 0.07, + "learning_rate": 4.876321129943503e-05, + "loss": 0.1313, + "step": 1095000 + }, + { + "epoch": 0.07, + "learning_rate": 4.876264745762712e-05, + "loss": 0.1245, + "step": 1095500 + }, + { + "epoch": 0.07, + "learning_rate": 4.876208248587571e-05, + "loss": 0.1374, + "step": 1096000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8761517514124297e-05, + "loss": 0.1266, + "step": 1096500 + }, + { + "epoch": 0.07, + "learning_rate": 4.876095254237289e-05, + "loss": 0.1316, + "step": 1097000 + }, + { + "epoch": 0.07, + "learning_rate": 4.876038757062147e-05, + "loss": 0.1307, + "step": 1097500 + }, + { + "epoch": 0.07, + "learning_rate": 4.875982259887006e-05, + "loss": 0.1234, + "step": 1098000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8759257627118645e-05, + "loss": 0.1318, + "step": 1098500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8758692655367236e-05, + "loss": 0.1271, + "step": 1099000 + }, + { + "epoch": 0.07, + "learning_rate": 4.875812881355932e-05, + "loss": 0.1351, + "step": 1099500 + }, + { + "epoch": 0.07, + "learning_rate": 4.875756497175141e-05, + "loss": 0.1328, + "step": 1100000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8757e-05, + "loss": 0.1288, + "step": 1100500 + }, + { + "epoch": 0.07, + "learning_rate": 4.875643502824859e-05, + "loss": 0.1222, + "step": 1101000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8755870056497175e-05, + "loss": 0.1337, + "step": 1101500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8755305084745766e-05, + "loss": 0.133, + "step": 1102000 + }, + { + "epoch": 0.07, + "learning_rate": 4.875474124293786e-05, + "loss": 0.1233, + "step": 1102500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8754176271186444e-05, + "loss": 0.1247, + "step": 1103000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8753611299435034e-05, + "loss": 0.1348, + "step": 1103500 + }, + { + "epoch": 0.07, + "learning_rate": 4.875304632768362e-05, + "loss": 0.1373, + "step": 1104000 + }, + { + "epoch": 0.07, + "learning_rate": 4.875248248587571e-05, + "loss": 0.1337, + "step": 1104500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8751917514124296e-05, + "loss": 0.1316, + "step": 1105000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8751352542372886e-05, + "loss": 0.1273, + "step": 1105500 + }, + { + "epoch": 0.07, + "learning_rate": 4.875078757062147e-05, + "loss": 0.1295, + "step": 1106000 + }, + { + "epoch": 0.08, + "learning_rate": 4.875022259887006e-05, + "loss": 0.1344, + "step": 1106500 + }, + { + "epoch": 0.08, + "learning_rate": 4.874965875706215e-05, + "loss": 0.1289, + "step": 1107000 + }, + { + "epoch": 0.08, + "learning_rate": 4.874909378531073e-05, + "loss": 0.135, + "step": 1107500 + }, + { + "epoch": 0.08, + "learning_rate": 4.874852881355932e-05, + "loss": 0.1257, + "step": 1108000 + }, + { + "epoch": 0.08, + "learning_rate": 4.874796384180791e-05, + "loss": 0.1265, + "step": 1108500 + }, + { + "epoch": 0.08, + "learning_rate": 4.874740000000001e-05, + "loss": 0.1226, + "step": 1109000 + }, + { + "epoch": 0.08, + "learning_rate": 4.874683502824859e-05, + "loss": 0.1336, + "step": 1109500 + }, + { + "epoch": 0.08, + "learning_rate": 4.874627005649718e-05, + "loss": 0.1274, + "step": 1110000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8745705084745765e-05, + "loss": 0.128, + "step": 1110500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8745140112994356e-05, + "loss": 0.1338, + "step": 1111000 + }, + { + "epoch": 0.08, + "learning_rate": 4.874457627118644e-05, + "loss": 0.1278, + "step": 1111500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8744011299435034e-05, + "loss": 0.1304, + "step": 1112000 + }, + { + "epoch": 0.08, + "learning_rate": 4.874344632768362e-05, + "loss": 0.127, + "step": 1112500 + }, + { + "epoch": 0.08, + "learning_rate": 4.874288135593221e-05, + "loss": 0.1292, + "step": 1113000 + }, + { + "epoch": 0.08, + "learning_rate": 4.874231638418079e-05, + "loss": 0.1283, + "step": 1113500 + }, + { + "epoch": 0.08, + "learning_rate": 4.874175141242938e-05, + "loss": 0.1305, + "step": 1114000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8741186440677966e-05, + "loss": 0.1329, + "step": 1114500 + }, + { + "epoch": 0.08, + "learning_rate": 4.874062259887006e-05, + "loss": 0.1288, + "step": 1115000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8740057627118644e-05, + "loss": 0.1265, + "step": 1115500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8739492655367234e-05, + "loss": 0.1265, + "step": 1116000 + }, + { + "epoch": 0.08, + "learning_rate": 4.873892768361582e-05, + "loss": 0.1302, + "step": 1116500 + }, + { + "epoch": 0.08, + "learning_rate": 4.873836271186441e-05, + "loss": 0.1305, + "step": 1117000 + }, + { + "epoch": 0.08, + "learning_rate": 4.873779774011299e-05, + "loss": 0.1292, + "step": 1117500 + }, + { + "epoch": 0.08, + "learning_rate": 4.873723389830509e-05, + "loss": 0.14, + "step": 1118000 + }, + { + "epoch": 0.08, + "learning_rate": 4.873666892655368e-05, + "loss": 0.1345, + "step": 1118500 + }, + { + "epoch": 0.08, + "learning_rate": 4.873610395480227e-05, + "loss": 0.1322, + "step": 1119000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8735540112994355e-05, + "loss": 0.1265, + "step": 1119500 + }, + { + "epoch": 0.08, + "learning_rate": 4.873497514124294e-05, + "loss": 0.1288, + "step": 1120000 + }, + { + "epoch": 0.08, + "learning_rate": 4.873441016949153e-05, + "loss": 0.1208, + "step": 1120500 + }, + { + "epoch": 0.08, + "learning_rate": 4.873384519774011e-05, + "loss": 0.1272, + "step": 1121000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8733280225988704e-05, + "loss": 0.1243, + "step": 1121500 + }, + { + "epoch": 0.08, + "learning_rate": 4.873271525423729e-05, + "loss": 0.1319, + "step": 1122000 + }, + { + "epoch": 0.08, + "learning_rate": 4.873215028248588e-05, + "loss": 0.1362, + "step": 1122500 + }, + { + "epoch": 0.08, + "learning_rate": 4.873158531073447e-05, + "loss": 0.1346, + "step": 1123000 + }, + { + "epoch": 0.08, + "learning_rate": 4.873102033898305e-05, + "loss": 0.1297, + "step": 1123500 + }, + { + "epoch": 0.08, + "learning_rate": 4.873045649717514e-05, + "loss": 0.1345, + "step": 1124000 + }, + { + "epoch": 0.08, + "learning_rate": 4.872989152542373e-05, + "loss": 0.1288, + "step": 1124500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8729326553672314e-05, + "loss": 0.1249, + "step": 1125000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8728761581920905e-05, + "loss": 0.1259, + "step": 1125500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8728196610169495e-05, + "loss": 0.1334, + "step": 1126000 + }, + { + "epoch": 0.08, + "learning_rate": 4.872763163841808e-05, + "loss": 0.1357, + "step": 1126500 + }, + { + "epoch": 0.08, + "learning_rate": 4.872706666666667e-05, + "loss": 0.1282, + "step": 1127000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8726502824858764e-05, + "loss": 0.1397, + "step": 1127500 + }, + { + "epoch": 0.08, + "learning_rate": 4.872593785310735e-05, + "loss": 0.1244, + "step": 1128000 + }, + { + "epoch": 0.08, + "learning_rate": 4.872537288135594e-05, + "loss": 0.1324, + "step": 1128500 + }, + { + "epoch": 0.08, + "learning_rate": 4.872480790960452e-05, + "loss": 0.1343, + "step": 1129000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8724244067796616e-05, + "loss": 0.1223, + "step": 1129500 + }, + { + "epoch": 0.08, + "learning_rate": 4.87236790960452e-05, + "loss": 0.1335, + "step": 1130000 + }, + { + "epoch": 0.08, + "learning_rate": 4.872311412429379e-05, + "loss": 0.1265, + "step": 1130500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8722549152542374e-05, + "loss": 0.1305, + "step": 1131000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8721984180790965e-05, + "loss": 0.1272, + "step": 1131500 + }, + { + "epoch": 0.08, + "learning_rate": 4.872142033898305e-05, + "loss": 0.1343, + "step": 1132000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8720855367231636e-05, + "loss": 0.125, + "step": 1132500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8720290395480226e-05, + "loss": 0.1344, + "step": 1133000 + }, + { + "epoch": 0.08, + "learning_rate": 4.871972542372882e-05, + "loss": 0.1294, + "step": 1133500 + }, + { + "epoch": 0.08, + "learning_rate": 4.87191604519774e-05, + "loss": 0.1277, + "step": 1134000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8718596610169495e-05, + "loss": 0.1335, + "step": 1134500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8718031638418085e-05, + "loss": 0.1323, + "step": 1135000 + }, + { + "epoch": 0.08, + "learning_rate": 4.871746666666667e-05, + "loss": 0.1327, + "step": 1135500 + }, + { + "epoch": 0.08, + "learning_rate": 4.871690169491526e-05, + "loss": 0.1231, + "step": 1136000 + }, + { + "epoch": 0.08, + "learning_rate": 4.871633785310735e-05, + "loss": 0.1329, + "step": 1136500 + }, + { + "epoch": 0.08, + "learning_rate": 4.871577288135594e-05, + "loss": 0.136, + "step": 1137000 + }, + { + "epoch": 0.08, + "learning_rate": 4.871520790960452e-05, + "loss": 0.1275, + "step": 1137500 + }, + { + "epoch": 0.08, + "learning_rate": 4.871464293785311e-05, + "loss": 0.126, + "step": 1138000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8714077966101696e-05, + "loss": 0.134, + "step": 1138500 + }, + { + "epoch": 0.08, + "learning_rate": 4.871351412429378e-05, + "loss": 0.1352, + "step": 1139000 + }, + { + "epoch": 0.08, + "learning_rate": 4.871294915254237e-05, + "loss": 0.126, + "step": 1139500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8712384180790964e-05, + "loss": 0.1271, + "step": 1140000 + }, + { + "epoch": 0.08, + "learning_rate": 4.871181920903955e-05, + "loss": 0.1316, + "step": 1140500 + }, + { + "epoch": 0.08, + "learning_rate": 4.871125423728814e-05, + "loss": 0.1309, + "step": 1141000 + }, + { + "epoch": 0.08, + "learning_rate": 4.871068926553672e-05, + "loss": 0.1293, + "step": 1141500 + }, + { + "epoch": 0.08, + "learning_rate": 4.871012429378531e-05, + "loss": 0.127, + "step": 1142000 + }, + { + "epoch": 0.08, + "learning_rate": 4.870956045197741e-05, + "loss": 0.133, + "step": 1142500 + }, + { + "epoch": 0.08, + "learning_rate": 4.870899548022599e-05, + "loss": 0.1303, + "step": 1143000 + }, + { + "epoch": 0.08, + "learning_rate": 4.870843050847458e-05, + "loss": 0.1344, + "step": 1143500 + }, + { + "epoch": 0.08, + "learning_rate": 4.870786553672317e-05, + "loss": 0.1373, + "step": 1144000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8707300564971755e-05, + "loss": 0.13, + "step": 1144500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8706735593220346e-05, + "loss": 0.1327, + "step": 1145000 + }, + { + "epoch": 0.08, + "learning_rate": 4.870617175141243e-05, + "loss": 0.1336, + "step": 1145500 + }, + { + "epoch": 0.08, + "learning_rate": 4.870560677966102e-05, + "loss": 0.1321, + "step": 1146000 + }, + { + "epoch": 0.08, + "learning_rate": 4.870504180790961e-05, + "loss": 0.121, + "step": 1146500 + }, + { + "epoch": 0.08, + "learning_rate": 4.87044768361582e-05, + "loss": 0.1282, + "step": 1147000 + }, + { + "epoch": 0.08, + "learning_rate": 4.870391186440678e-05, + "loss": 0.13, + "step": 1147500 + }, + { + "epoch": 0.08, + "learning_rate": 4.870334802259887e-05, + "loss": 0.1286, + "step": 1148000 + }, + { + "epoch": 0.08, + "learning_rate": 4.870278305084746e-05, + "loss": 0.1317, + "step": 1148500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8702218079096044e-05, + "loss": 0.1284, + "step": 1149000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8701653107344634e-05, + "loss": 0.1257, + "step": 1149500 + }, + { + "epoch": 0.08, + "learning_rate": 4.870108813559322e-05, + "loss": 0.128, + "step": 1150000 + }, + { + "epoch": 0.08, + "learning_rate": 4.870052429378532e-05, + "loss": 0.1172, + "step": 1150500 + }, + { + "epoch": 0.08, + "learning_rate": 4.86999593220339e-05, + "loss": 0.1276, + "step": 1151000 + }, + { + "epoch": 0.08, + "learning_rate": 4.869939435028249e-05, + "loss": 0.1315, + "step": 1151500 + }, + { + "epoch": 0.08, + "learning_rate": 4.869882937853108e-05, + "loss": 0.1259, + "step": 1152000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8698265536723164e-05, + "loss": 0.1291, + "step": 1152500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8697700564971755e-05, + "loss": 0.1273, + "step": 1153000 + }, + { + "epoch": 0.08, + "learning_rate": 4.869713559322034e-05, + "loss": 0.1262, + "step": 1153500 + }, + { + "epoch": 0.08, + "learning_rate": 4.869657062146893e-05, + "loss": 0.1306, + "step": 1154000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8696006779661016e-05, + "loss": 0.1232, + "step": 1154500 + }, + { + "epoch": 0.08, + "learning_rate": 4.869544180790961e-05, + "loss": 0.1294, + "step": 1155000 + }, + { + "epoch": 0.08, + "learning_rate": 4.869487683615819e-05, + "loss": 0.1259, + "step": 1155500 + }, + { + "epoch": 0.08, + "learning_rate": 4.869431186440678e-05, + "loss": 0.1291, + "step": 1156000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8693746892655365e-05, + "loss": 0.1259, + "step": 1156500 + }, + { + "epoch": 0.08, + "learning_rate": 4.869318305084746e-05, + "loss": 0.1355, + "step": 1157000 + }, + { + "epoch": 0.08, + "learning_rate": 4.869261807909605e-05, + "loss": 0.1304, + "step": 1157500 + }, + { + "epoch": 0.08, + "learning_rate": 4.869205310734464e-05, + "loss": 0.134, + "step": 1158000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8691488135593224e-05, + "loss": 0.1269, + "step": 1158500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8690923163841815e-05, + "loss": 0.1308, + "step": 1159000 + }, + { + "epoch": 0.08, + "learning_rate": 4.86903581920904e-05, + "loss": 0.1263, + "step": 1159500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8689794350282486e-05, + "loss": 0.1248, + "step": 1160000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8689229378531076e-05, + "loss": 0.1391, + "step": 1160500 + }, + { + "epoch": 0.08, + "learning_rate": 4.868866440677967e-05, + "loss": 0.1282, + "step": 1161000 + }, + { + "epoch": 0.08, + "learning_rate": 4.868809943502825e-05, + "loss": 0.1269, + "step": 1161500 + }, + { + "epoch": 0.08, + "learning_rate": 4.868753559322034e-05, + "loss": 0.1292, + "step": 1162000 + }, + { + "epoch": 0.08, + "learning_rate": 4.868697062146893e-05, + "loss": 0.1269, + "step": 1162500 + }, + { + "epoch": 0.08, + "learning_rate": 4.868640564971751e-05, + "loss": 0.1333, + "step": 1163000 + }, + { + "epoch": 0.08, + "learning_rate": 4.86858406779661e-05, + "loss": 0.1332, + "step": 1163500 + }, + { + "epoch": 0.08, + "learning_rate": 4.86852768361582e-05, + "loss": 0.1222, + "step": 1164000 + }, + { + "epoch": 0.08, + "learning_rate": 4.868471186440679e-05, + "loss": 0.1307, + "step": 1164500 + }, + { + "epoch": 0.08, + "learning_rate": 4.868414689265537e-05, + "loss": 0.1289, + "step": 1165000 + }, + { + "epoch": 0.08, + "learning_rate": 4.868358192090396e-05, + "loss": 0.1288, + "step": 1165500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8683016949152546e-05, + "loss": 0.1281, + "step": 1166000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8682451977401136e-05, + "loss": 0.1345, + "step": 1166500 + }, + { + "epoch": 0.08, + "learning_rate": 4.868188813559322e-05, + "loss": 0.1329, + "step": 1167000 + }, + { + "epoch": 0.08, + "learning_rate": 4.868132316384181e-05, + "loss": 0.1288, + "step": 1167500 + }, + { + "epoch": 0.08, + "learning_rate": 4.86807581920904e-05, + "loss": 0.124, + "step": 1168000 + }, + { + "epoch": 0.08, + "learning_rate": 4.868019322033899e-05, + "loss": 0.1254, + "step": 1168500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8679629378531075e-05, + "loss": 0.128, + "step": 1169000 + }, + { + "epoch": 0.08, + "learning_rate": 4.867906440677966e-05, + "loss": 0.1367, + "step": 1169500 + }, + { + "epoch": 0.08, + "learning_rate": 4.867849943502825e-05, + "loss": 0.1275, + "step": 1170000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8677934463276834e-05, + "loss": 0.1317, + "step": 1170500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8677369491525424e-05, + "loss": 0.1275, + "step": 1171000 + }, + { + "epoch": 0.08, + "learning_rate": 4.867680564971752e-05, + "loss": 0.1275, + "step": 1171500 + }, + { + "epoch": 0.08, + "learning_rate": 4.867624067796611e-05, + "loss": 0.1287, + "step": 1172000 + }, + { + "epoch": 0.08, + "learning_rate": 4.867567570621469e-05, + "loss": 0.1265, + "step": 1172500 + }, + { + "epoch": 0.08, + "learning_rate": 4.867511073446328e-05, + "loss": 0.1319, + "step": 1173000 + }, + { + "epoch": 0.08, + "learning_rate": 4.867454689265537e-05, + "loss": 0.1251, + "step": 1173500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8673981920903954e-05, + "loss": 0.1292, + "step": 1174000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8673416949152545e-05, + "loss": 0.1299, + "step": 1174500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8672851977401135e-05, + "loss": 0.1328, + "step": 1175000 + }, + { + "epoch": 0.08, + "learning_rate": 4.867228700564972e-05, + "loss": 0.1293, + "step": 1175500 + }, + { + "epoch": 0.08, + "learning_rate": 4.867172203389831e-05, + "loss": 0.1287, + "step": 1176000 + }, + { + "epoch": 0.08, + "learning_rate": 4.86711581920904e-05, + "loss": 0.1238, + "step": 1176500 + }, + { + "epoch": 0.08, + "learning_rate": 4.867059322033898e-05, + "loss": 0.1211, + "step": 1177000 + }, + { + "epoch": 0.08, + "learning_rate": 4.867002824858757e-05, + "loss": 0.1271, + "step": 1177500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8669463276836155e-05, + "loss": 0.1255, + "step": 1178000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8668898305084746e-05, + "loss": 0.1258, + "step": 1178500 + }, + { + "epoch": 0.08, + "learning_rate": 4.866833446327684e-05, + "loss": 0.134, + "step": 1179000 + }, + { + "epoch": 0.08, + "learning_rate": 4.866776949152543e-05, + "loss": 0.1213, + "step": 1179500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8667204519774014e-05, + "loss": 0.1346, + "step": 1180000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8666639548022605e-05, + "loss": 0.1301, + "step": 1180500 + }, + { + "epoch": 0.08, + "learning_rate": 4.866607570621469e-05, + "loss": 0.1303, + "step": 1181000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8665510734463276e-05, + "loss": 0.1304, + "step": 1181500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8664945762711866e-05, + "loss": 0.1353, + "step": 1182000 + }, + { + "epoch": 0.08, + "learning_rate": 4.866438079096046e-05, + "loss": 0.1367, + "step": 1182500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8663816949152544e-05, + "loss": 0.123, + "step": 1183000 + }, + { + "epoch": 0.08, + "learning_rate": 4.866325197740113e-05, + "loss": 0.1258, + "step": 1183500 + }, + { + "epoch": 0.08, + "learning_rate": 4.866268700564972e-05, + "loss": 0.1306, + "step": 1184000 + }, + { + "epoch": 0.08, + "learning_rate": 4.86621220338983e-05, + "loss": 0.1284, + "step": 1184500 + }, + { + "epoch": 0.08, + "learning_rate": 4.866155706214689e-05, + "loss": 0.1282, + "step": 1185000 + }, + { + "epoch": 0.08, + "learning_rate": 4.866099322033899e-05, + "loss": 0.1218, + "step": 1185500 + }, + { + "epoch": 0.08, + "learning_rate": 4.866042824858758e-05, + "loss": 0.1175, + "step": 1186000 + }, + { + "epoch": 0.08, + "learning_rate": 4.865986327683616e-05, + "loss": 0.1312, + "step": 1186500 + }, + { + "epoch": 0.08, + "learning_rate": 4.865929830508475e-05, + "loss": 0.1278, + "step": 1187000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8658733333333336e-05, + "loss": 0.1312, + "step": 1187500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8658168361581926e-05, + "loss": 0.1224, + "step": 1188000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8657604519774013e-05, + "loss": 0.1213, + "step": 1188500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8657039548022604e-05, + "loss": 0.1274, + "step": 1189000 + }, + { + "epoch": 0.08, + "learning_rate": 4.865647457627119e-05, + "loss": 0.1227, + "step": 1189500 + }, + { + "epoch": 0.08, + "learning_rate": 4.865590960451978e-05, + "loss": 0.1212, + "step": 1190000 + }, + { + "epoch": 0.08, + "learning_rate": 4.865534463276836e-05, + "loss": 0.1314, + "step": 1190500 + }, + { + "epoch": 0.08, + "learning_rate": 4.865478079096045e-05, + "loss": 0.1296, + "step": 1191000 + }, + { + "epoch": 0.08, + "learning_rate": 4.865421581920904e-05, + "loss": 0.1301, + "step": 1191500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8653650847457624e-05, + "loss": 0.132, + "step": 1192000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8653085875706214e-05, + "loss": 0.1274, + "step": 1192500 + }, + { + "epoch": 0.08, + "learning_rate": 4.865252203389831e-05, + "loss": 0.1192, + "step": 1193000 + }, + { + "epoch": 0.08, + "learning_rate": 4.86519570621469e-05, + "loss": 0.1242, + "step": 1193500 + }, + { + "epoch": 0.08, + "learning_rate": 4.865139209039548e-05, + "loss": 0.1258, + "step": 1194000 + }, + { + "epoch": 0.08, + "learning_rate": 4.865082711864407e-05, + "loss": 0.1297, + "step": 1194500 + }, + { + "epoch": 0.08, + "learning_rate": 4.865026214689266e-05, + "loss": 0.1279, + "step": 1195000 + }, + { + "epoch": 0.08, + "learning_rate": 4.864969717514125e-05, + "loss": 0.131, + "step": 1195500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8649133333333335e-05, + "loss": 0.1304, + "step": 1196000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8648568361581925e-05, + "loss": 0.1299, + "step": 1196500 + }, + { + "epoch": 0.08, + "learning_rate": 4.864800338983051e-05, + "loss": 0.1284, + "step": 1197000 + }, + { + "epoch": 0.08, + "learning_rate": 4.86474384180791e-05, + "loss": 0.127, + "step": 1197500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8646873446327684e-05, + "loss": 0.1289, + "step": 1198000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8646308474576274e-05, + "loss": 0.1209, + "step": 1198500 + }, + { + "epoch": 0.08, + "learning_rate": 4.864574350282486e-05, + "loss": 0.1319, + "step": 1199000 + }, + { + "epoch": 0.08, + "learning_rate": 4.864517966101695e-05, + "loss": 0.1249, + "step": 1199500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8644614689265536e-05, + "loss": 0.1175, + "step": 1200000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8644049717514126e-05, + "loss": 0.1355, + "step": 1200500 + }, + { + "epoch": 0.08, + "learning_rate": 4.864348474576271e-05, + "loss": 0.1267, + "step": 1201000 + }, + { + "epoch": 0.08, + "learning_rate": 4.86429197740113e-05, + "loss": 0.1251, + "step": 1201500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8642354802259885e-05, + "loss": 0.1305, + "step": 1202000 + }, + { + "epoch": 0.08, + "learning_rate": 4.864179096045198e-05, + "loss": 0.1246, + "step": 1202500 + }, + { + "epoch": 0.08, + "learning_rate": 4.864122598870057e-05, + "loss": 0.1309, + "step": 1203000 + }, + { + "epoch": 0.08, + "learning_rate": 4.864066101694916e-05, + "loss": 0.124, + "step": 1203500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8640096045197744e-05, + "loss": 0.1297, + "step": 1204000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8639531073446334e-05, + "loss": 0.1289, + "step": 1204500 + }, + { + "epoch": 0.08, + "learning_rate": 4.863896723163842e-05, + "loss": 0.1336, + "step": 1205000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8638402259887005e-05, + "loss": 0.1335, + "step": 1205500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8637837288135596e-05, + "loss": 0.1294, + "step": 1206000 + }, + { + "epoch": 0.08, + "learning_rate": 4.863727231638418e-05, + "loss": 0.13, + "step": 1206500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8636708474576274e-05, + "loss": 0.1206, + "step": 1207000 + }, + { + "epoch": 0.08, + "learning_rate": 4.863614350282486e-05, + "loss": 0.1235, + "step": 1207500 + }, + { + "epoch": 0.08, + "learning_rate": 4.863557853107345e-05, + "loss": 0.1276, + "step": 1208000 + }, + { + "epoch": 0.08, + "learning_rate": 4.863501355932203e-05, + "loss": 0.1218, + "step": 1208500 + }, + { + "epoch": 0.08, + "learning_rate": 4.863444858757062e-05, + "loss": 0.1219, + "step": 1209000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8633883615819206e-05, + "loss": 0.1319, + "step": 1209500 + }, + { + "epoch": 0.08, + "learning_rate": 4.86333186440678e-05, + "loss": 0.1284, + "step": 1210000 + }, + { + "epoch": 0.08, + "learning_rate": 4.863275480225989e-05, + "loss": 0.1312, + "step": 1210500 + }, + { + "epoch": 0.08, + "learning_rate": 4.863218983050848e-05, + "loss": 0.1258, + "step": 1211000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8631624858757065e-05, + "loss": 0.1315, + "step": 1211500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8631059887005656e-05, + "loss": 0.129, + "step": 1212000 + }, + { + "epoch": 0.08, + "learning_rate": 4.863049491525424e-05, + "loss": 0.1323, + "step": 1212500 + }, + { + "epoch": 0.08, + "learning_rate": 4.862992994350283e-05, + "loss": 0.1369, + "step": 1213000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8629364971751414e-05, + "loss": 0.1342, + "step": 1213500 + }, + { + "epoch": 0.08, + "learning_rate": 4.862880112994351e-05, + "loss": 0.1314, + "step": 1214000 + }, + { + "epoch": 0.08, + "learning_rate": 4.862823615819209e-05, + "loss": 0.1206, + "step": 1214500 + }, + { + "epoch": 0.08, + "learning_rate": 4.862767118644068e-05, + "loss": 0.1223, + "step": 1215000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8627106214689266e-05, + "loss": 0.1208, + "step": 1215500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8626541242937857e-05, + "loss": 0.1306, + "step": 1216000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8625977401129944e-05, + "loss": 0.1225, + "step": 1216500 + }, + { + "epoch": 0.08, + "learning_rate": 4.862541242937853e-05, + "loss": 0.1336, + "step": 1217000 + }, + { + "epoch": 0.08, + "learning_rate": 4.862484745762712e-05, + "loss": 0.1273, + "step": 1217500 + }, + { + "epoch": 0.08, + "learning_rate": 4.862428248587571e-05, + "loss": 0.1272, + "step": 1218000 + }, + { + "epoch": 0.08, + "learning_rate": 4.862371751412429e-05, + "loss": 0.1246, + "step": 1218500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8623153672316387e-05, + "loss": 0.1192, + "step": 1219000 + }, + { + "epoch": 0.08, + "learning_rate": 4.862258870056498e-05, + "loss": 0.1279, + "step": 1219500 + }, + { + "epoch": 0.08, + "learning_rate": 4.862202372881356e-05, + "loss": 0.1216, + "step": 1220000 + }, + { + "epoch": 0.08, + "learning_rate": 4.862145875706215e-05, + "loss": 0.1276, + "step": 1220500 + }, + { + "epoch": 0.08, + "learning_rate": 4.862089491525424e-05, + "loss": 0.1289, + "step": 1221000 + }, + { + "epoch": 0.08, + "learning_rate": 4.862032994350283e-05, + "loss": 0.1312, + "step": 1221500 + }, + { + "epoch": 0.08, + "learning_rate": 4.861976497175141e-05, + "loss": 0.1294, + "step": 1222000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8619200000000004e-05, + "loss": 0.1288, + "step": 1222500 + }, + { + "epoch": 0.08, + "learning_rate": 4.861863502824859e-05, + "loss": 0.1325, + "step": 1223000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8618071186440675e-05, + "loss": 0.1316, + "step": 1223500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8617506214689265e-05, + "loss": 0.1318, + "step": 1224000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8616941242937856e-05, + "loss": 0.127, + "step": 1224500 + }, + { + "epoch": 0.08, + "learning_rate": 4.861637627118644e-05, + "loss": 0.1319, + "step": 1225000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8615812429378534e-05, + "loss": 0.1273, + "step": 1225500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8615247457627124e-05, + "loss": 0.1273, + "step": 1226000 + }, + { + "epoch": 0.08, + "learning_rate": 4.861468248587571e-05, + "loss": 0.13, + "step": 1226500 + }, + { + "epoch": 0.08, + "learning_rate": 4.86141175141243e-05, + "loss": 0.123, + "step": 1227000 + }, + { + "epoch": 0.08, + "learning_rate": 4.861355254237288e-05, + "loss": 0.1255, + "step": 1227500 + }, + { + "epoch": 0.08, + "learning_rate": 4.861298757062147e-05, + "loss": 0.1254, + "step": 1228000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8612422598870064e-05, + "loss": 0.1255, + "step": 1228500 + }, + { + "epoch": 0.08, + "learning_rate": 4.861185762711865e-05, + "loss": 0.1202, + "step": 1229000 + }, + { + "epoch": 0.08, + "learning_rate": 4.861129265536724e-05, + "loss": 0.1299, + "step": 1229500 + }, + { + "epoch": 0.08, + "learning_rate": 4.861072994350282e-05, + "loss": 0.1282, + "step": 1230000 + }, + { + "epoch": 0.08, + "learning_rate": 4.861016497175141e-05, + "loss": 0.1218, + "step": 1230500 + }, + { + "epoch": 0.08, + "learning_rate": 4.86096e-05, + "loss": 0.1211, + "step": 1231000 + }, + { + "epoch": 0.08, + "learning_rate": 4.860903502824859e-05, + "loss": 0.1318, + "step": 1231500 + }, + { + "epoch": 0.08, + "learning_rate": 4.860847005649718e-05, + "loss": 0.1258, + "step": 1232000 + }, + { + "epoch": 0.08, + "learning_rate": 4.860790621468927e-05, + "loss": 0.1235, + "step": 1232500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8607341242937855e-05, + "loss": 0.1254, + "step": 1233000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8606776271186446e-05, + "loss": 0.121, + "step": 1233500 + }, + { + "epoch": 0.08, + "learning_rate": 4.860621129943503e-05, + "loss": 0.1307, + "step": 1234000 + }, + { + "epoch": 0.08, + "learning_rate": 4.860564632768362e-05, + "loss": 0.1261, + "step": 1234500 + }, + { + "epoch": 0.08, + "learning_rate": 4.860508248587571e-05, + "loss": 0.1367, + "step": 1235000 + }, + { + "epoch": 0.08, + "learning_rate": 4.86045175141243e-05, + "loss": 0.1242, + "step": 1235500 + }, + { + "epoch": 0.08, + "learning_rate": 4.860395254237288e-05, + "loss": 0.1255, + "step": 1236000 + }, + { + "epoch": 0.08, + "learning_rate": 4.860338757062147e-05, + "loss": 0.1348, + "step": 1236500 + }, + { + "epoch": 0.08, + "learning_rate": 4.860282372881356e-05, + "loss": 0.1246, + "step": 1237000 + }, + { + "epoch": 0.08, + "learning_rate": 4.860225875706215e-05, + "loss": 0.1233, + "step": 1237500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8601693785310734e-05, + "loss": 0.1256, + "step": 1238000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8601128813559324e-05, + "loss": 0.1291, + "step": 1238500 + }, + { + "epoch": 0.08, + "learning_rate": 4.860056384180791e-05, + "loss": 0.1263, + "step": 1239000 + }, + { + "epoch": 0.08, + "learning_rate": 4.86e-05, + "loss": 0.128, + "step": 1239500 + }, + { + "epoch": 0.08, + "learning_rate": 4.859943502824859e-05, + "loss": 0.1309, + "step": 1240000 + }, + { + "epoch": 0.08, + "learning_rate": 4.859887005649718e-05, + "loss": 0.1199, + "step": 1240500 + }, + { + "epoch": 0.08, + "learning_rate": 4.859830508474577e-05, + "loss": 0.1249, + "step": 1241000 + }, + { + "epoch": 0.08, + "learning_rate": 4.859774011299435e-05, + "loss": 0.1334, + "step": 1241500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8597176271186445e-05, + "loss": 0.1321, + "step": 1242000 + }, + { + "epoch": 0.08, + "learning_rate": 4.859661242937853e-05, + "loss": 0.1289, + "step": 1242500 + }, + { + "epoch": 0.08, + "learning_rate": 4.859604745762712e-05, + "loss": 0.1169, + "step": 1243000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8595482485875707e-05, + "loss": 0.1195, + "step": 1243500 + }, + { + "epoch": 0.08, + "learning_rate": 4.85949175141243e-05, + "loss": 0.1187, + "step": 1244000 + }, + { + "epoch": 0.08, + "learning_rate": 4.859435254237288e-05, + "loss": 0.1221, + "step": 1244500 + }, + { + "epoch": 0.08, + "learning_rate": 4.859378757062147e-05, + "loss": 0.1208, + "step": 1245000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8593222598870055e-05, + "loss": 0.1197, + "step": 1245500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8592657627118646e-05, + "loss": 0.127, + "step": 1246000 + }, + { + "epoch": 0.08, + "learning_rate": 4.859209265536723e-05, + "loss": 0.1285, + "step": 1246500 + }, + { + "epoch": 0.08, + "learning_rate": 4.859152768361582e-05, + "loss": 0.134, + "step": 1247000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8590963841807914e-05, + "loss": 0.1305, + "step": 1247500 + }, + { + "epoch": 0.08, + "learning_rate": 4.85903988700565e-05, + "loss": 0.1261, + "step": 1248000 + }, + { + "epoch": 0.08, + "learning_rate": 4.858983389830509e-05, + "loss": 0.1251, + "step": 1248500 + }, + { + "epoch": 0.08, + "learning_rate": 4.858926892655368e-05, + "loss": 0.1252, + "step": 1249000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8588705084745766e-05, + "loss": 0.1318, + "step": 1249500 + }, + { + "epoch": 0.08, + "learning_rate": 4.858814011299435e-05, + "loss": 0.1294, + "step": 1250000 + }, + { + "epoch": 0.08, + "learning_rate": 4.858757514124294e-05, + "loss": 0.1232, + "step": 1250500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8587010169491525e-05, + "loss": 0.127, + "step": 1251000 + }, + { + "epoch": 0.08, + "learning_rate": 4.858644632768362e-05, + "loss": 0.1294, + "step": 1251500 + }, + { + "epoch": 0.08, + "learning_rate": 4.85858813559322e-05, + "loss": 0.1309, + "step": 1252000 + }, + { + "epoch": 0.08, + "learning_rate": 4.858531638418079e-05, + "loss": 0.1234, + "step": 1252500 + }, + { + "epoch": 0.08, + "learning_rate": 4.858475141242938e-05, + "loss": 0.1229, + "step": 1253000 + }, + { + "epoch": 0.08, + "learning_rate": 4.858418644067797e-05, + "loss": 0.1304, + "step": 1253500 + }, + { + "epoch": 0.09, + "learning_rate": 4.858362259887006e-05, + "loss": 0.1318, + "step": 1254000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8583057627118645e-05, + "loss": 0.1266, + "step": 1254500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8582492655367236e-05, + "loss": 0.1234, + "step": 1255000 + }, + { + "epoch": 0.09, + "learning_rate": 4.858192768361582e-05, + "loss": 0.1327, + "step": 1255500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8581363841807914e-05, + "loss": 0.127, + "step": 1256000 + }, + { + "epoch": 0.09, + "learning_rate": 4.85807988700565e-05, + "loss": 0.1273, + "step": 1256500 + }, + { + "epoch": 0.09, + "learning_rate": 4.858023389830509e-05, + "loss": 0.1347, + "step": 1257000 + }, + { + "epoch": 0.09, + "learning_rate": 4.857966892655367e-05, + "loss": 0.128, + "step": 1257500 + }, + { + "epoch": 0.09, + "learning_rate": 4.857910395480226e-05, + "loss": 0.1219, + "step": 1258000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8578538983050846e-05, + "loss": 0.1291, + "step": 1258500 + }, + { + "epoch": 0.09, + "learning_rate": 4.857797514124294e-05, + "loss": 0.1231, + "step": 1259000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8577410169491524e-05, + "loss": 0.1236, + "step": 1259500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8576845197740115e-05, + "loss": 0.1244, + "step": 1260000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8576280225988705e-05, + "loss": 0.1255, + "step": 1260500 + }, + { + "epoch": 0.09, + "learning_rate": 4.857571638418079e-05, + "loss": 0.1264, + "step": 1261000 + }, + { + "epoch": 0.09, + "learning_rate": 4.857515141242938e-05, + "loss": 0.1254, + "step": 1261500 + }, + { + "epoch": 0.09, + "learning_rate": 4.857458644067797e-05, + "loss": 0.1295, + "step": 1262000 + }, + { + "epoch": 0.09, + "learning_rate": 4.857402146892656e-05, + "loss": 0.1344, + "step": 1262500 + }, + { + "epoch": 0.09, + "learning_rate": 4.857345649717515e-05, + "loss": 0.1204, + "step": 1263000 + }, + { + "epoch": 0.09, + "learning_rate": 4.857289152542373e-05, + "loss": 0.1203, + "step": 1263500 + }, + { + "epoch": 0.09, + "learning_rate": 4.857232655367232e-05, + "loss": 0.1294, + "step": 1264000 + }, + { + "epoch": 0.09, + "learning_rate": 4.857176271186441e-05, + "loss": 0.1349, + "step": 1264500 + }, + { + "epoch": 0.09, + "learning_rate": 4.857119774011299e-05, + "loss": 0.1285, + "step": 1265000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8570632768361584e-05, + "loss": 0.1129, + "step": 1265500 + }, + { + "epoch": 0.09, + "learning_rate": 4.857006779661017e-05, + "loss": 0.1241, + "step": 1266000 + }, + { + "epoch": 0.09, + "learning_rate": 4.856950282485876e-05, + "loss": 0.1233, + "step": 1266500 + }, + { + "epoch": 0.09, + "learning_rate": 4.856893898305085e-05, + "loss": 0.1293, + "step": 1267000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8568374011299436e-05, + "loss": 0.1248, + "step": 1267500 + }, + { + "epoch": 0.09, + "learning_rate": 4.856780903954803e-05, + "loss": 0.1185, + "step": 1268000 + }, + { + "epoch": 0.09, + "learning_rate": 4.856724406779661e-05, + "loss": 0.1273, + "step": 1268500 + }, + { + "epoch": 0.09, + "learning_rate": 4.85666790960452e-05, + "loss": 0.1326, + "step": 1269000 + }, + { + "epoch": 0.09, + "learning_rate": 4.856611525423729e-05, + "loss": 0.1214, + "step": 1269500 + }, + { + "epoch": 0.09, + "learning_rate": 4.856555028248588e-05, + "loss": 0.1146, + "step": 1270000 + }, + { + "epoch": 0.09, + "learning_rate": 4.856498531073447e-05, + "loss": 0.1321, + "step": 1270500 + }, + { + "epoch": 0.09, + "learning_rate": 4.856442033898305e-05, + "loss": 0.129, + "step": 1271000 + }, + { + "epoch": 0.09, + "learning_rate": 4.856385649717514e-05, + "loss": 0.1314, + "step": 1271500 + }, + { + "epoch": 0.09, + "learning_rate": 4.856329152542373e-05, + "loss": 0.1298, + "step": 1272000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8562726553672315e-05, + "loss": 0.1183, + "step": 1272500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8562161581920905e-05, + "loss": 0.1231, + "step": 1273000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8561596610169496e-05, + "loss": 0.1269, + "step": 1273500 + }, + { + "epoch": 0.09, + "learning_rate": 4.856103163841808e-05, + "loss": 0.1349, + "step": 1274000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8560467796610174e-05, + "loss": 0.1243, + "step": 1274500 + }, + { + "epoch": 0.09, + "learning_rate": 4.855990282485876e-05, + "loss": 0.133, + "step": 1275000 + }, + { + "epoch": 0.09, + "learning_rate": 4.855933785310735e-05, + "loss": 0.1235, + "step": 1275500 + }, + { + "epoch": 0.09, + "learning_rate": 4.855877288135593e-05, + "loss": 0.1316, + "step": 1276000 + }, + { + "epoch": 0.09, + "learning_rate": 4.855820790960452e-05, + "loss": 0.1249, + "step": 1276500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8557644067796616e-05, + "loss": 0.1276, + "step": 1277000 + }, + { + "epoch": 0.09, + "learning_rate": 4.85570790960452e-05, + "loss": 0.128, + "step": 1277500 + }, + { + "epoch": 0.09, + "learning_rate": 4.855651412429379e-05, + "loss": 0.1286, + "step": 1278000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8555949152542375e-05, + "loss": 0.127, + "step": 1278500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8555384180790965e-05, + "loss": 0.1265, + "step": 1279000 + }, + { + "epoch": 0.09, + "learning_rate": 4.855481920903955e-05, + "loss": 0.1275, + "step": 1279500 + }, + { + "epoch": 0.09, + "learning_rate": 4.855425423728814e-05, + "loss": 0.1272, + "step": 1280000 + }, + { + "epoch": 0.09, + "learning_rate": 4.855368926553673e-05, + "loss": 0.1216, + "step": 1280500 + }, + { + "epoch": 0.09, + "learning_rate": 4.855312655367232e-05, + "loss": 0.1198, + "step": 1281000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8552561581920905e-05, + "loss": 0.1246, + "step": 1281500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8551996610169495e-05, + "loss": 0.1263, + "step": 1282000 + }, + { + "epoch": 0.09, + "learning_rate": 4.855143163841808e-05, + "loss": 0.1325, + "step": 1282500 + }, + { + "epoch": 0.09, + "learning_rate": 4.855086666666667e-05, + "loss": 0.1243, + "step": 1283000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8550301694915253e-05, + "loss": 0.1314, + "step": 1283500 + }, + { + "epoch": 0.09, + "learning_rate": 4.854973785310735e-05, + "loss": 0.1333, + "step": 1284000 + }, + { + "epoch": 0.09, + "learning_rate": 4.854917288135594e-05, + "loss": 0.1288, + "step": 1284500 + }, + { + "epoch": 0.09, + "learning_rate": 4.854860790960452e-05, + "loss": 0.1252, + "step": 1285000 + }, + { + "epoch": 0.09, + "learning_rate": 4.854804293785311e-05, + "loss": 0.1291, + "step": 1285500 + }, + { + "epoch": 0.09, + "learning_rate": 4.85474790960452e-05, + "loss": 0.1243, + "step": 1286000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8546915254237294e-05, + "loss": 0.1215, + "step": 1286500 + }, + { + "epoch": 0.09, + "learning_rate": 4.854635028248588e-05, + "loss": 0.1258, + "step": 1287000 + }, + { + "epoch": 0.09, + "learning_rate": 4.854578531073447e-05, + "loss": 0.1287, + "step": 1287500 + }, + { + "epoch": 0.09, + "learning_rate": 4.854522033898305e-05, + "loss": 0.1243, + "step": 1288000 + }, + { + "epoch": 0.09, + "learning_rate": 4.854465536723164e-05, + "loss": 0.1301, + "step": 1288500 + }, + { + "epoch": 0.09, + "learning_rate": 4.854409039548023e-05, + "loss": 0.1217, + "step": 1289000 + }, + { + "epoch": 0.09, + "learning_rate": 4.854352542372882e-05, + "loss": 0.1274, + "step": 1289500 + }, + { + "epoch": 0.09, + "learning_rate": 4.854296045197741e-05, + "loss": 0.1278, + "step": 1290000 + }, + { + "epoch": 0.09, + "learning_rate": 4.854239548022599e-05, + "loss": 0.1259, + "step": 1290500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8541831638418085e-05, + "loss": 0.1295, + "step": 1291000 + }, + { + "epoch": 0.09, + "learning_rate": 4.854126666666667e-05, + "loss": 0.1241, + "step": 1291500 + }, + { + "epoch": 0.09, + "learning_rate": 4.854070169491526e-05, + "loss": 0.124, + "step": 1292000 + }, + { + "epoch": 0.09, + "learning_rate": 4.854013672316384e-05, + "loss": 0.1284, + "step": 1292500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8539571751412434e-05, + "loss": 0.1279, + "step": 1293000 + }, + { + "epoch": 0.09, + "learning_rate": 4.853900790960452e-05, + "loss": 0.1284, + "step": 1293500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8538442937853105e-05, + "loss": 0.1236, + "step": 1294000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8537877966101695e-05, + "loss": 0.1289, + "step": 1294500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8537312994350286e-05, + "loss": 0.13, + "step": 1295000 + }, + { + "epoch": 0.09, + "learning_rate": 4.853674915254238e-05, + "loss": 0.1239, + "step": 1295500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8536184180790964e-05, + "loss": 0.1283, + "step": 1296000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8535619209039554e-05, + "loss": 0.128, + "step": 1296500 + }, + { + "epoch": 0.09, + "learning_rate": 4.853505423728814e-05, + "loss": 0.1247, + "step": 1297000 + }, + { + "epoch": 0.09, + "learning_rate": 4.853448926553673e-05, + "loss": 0.1181, + "step": 1297500 + }, + { + "epoch": 0.09, + "learning_rate": 4.853392429378531e-05, + "loss": 0.1221, + "step": 1298000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8533360451977407e-05, + "loss": 0.1277, + "step": 1298500 + }, + { + "epoch": 0.09, + "learning_rate": 4.853279548022599e-05, + "loss": 0.1167, + "step": 1299000 + }, + { + "epoch": 0.09, + "learning_rate": 4.853223050847458e-05, + "loss": 0.1272, + "step": 1299500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8531665536723165e-05, + "loss": 0.1334, + "step": 1300000 + }, + { + "epoch": 0.09, + "learning_rate": 4.853110169491525e-05, + "loss": 0.1243, + "step": 1300500 + }, + { + "epoch": 0.09, + "learning_rate": 4.853053672316384e-05, + "loss": 0.1196, + "step": 1301000 + }, + { + "epoch": 0.09, + "learning_rate": 4.852997175141243e-05, + "loss": 0.1318, + "step": 1301500 + }, + { + "epoch": 0.09, + "learning_rate": 4.852940677966102e-05, + "loss": 0.1272, + "step": 1302000 + }, + { + "epoch": 0.09, + "learning_rate": 4.852884180790961e-05, + "loss": 0.1301, + "step": 1302500 + }, + { + "epoch": 0.09, + "learning_rate": 4.852827683615819e-05, + "loss": 0.1274, + "step": 1303000 + }, + { + "epoch": 0.09, + "learning_rate": 4.852771186440678e-05, + "loss": 0.1316, + "step": 1303500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8527148022598876e-05, + "loss": 0.119, + "step": 1304000 + }, + { + "epoch": 0.09, + "learning_rate": 4.852658305084746e-05, + "loss": 0.1291, + "step": 1304500 + }, + { + "epoch": 0.09, + "learning_rate": 4.852601807909605e-05, + "loss": 0.1254, + "step": 1305000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8525453107344634e-05, + "loss": 0.1253, + "step": 1305500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8524888135593225e-05, + "loss": 0.126, + "step": 1306000 + }, + { + "epoch": 0.09, + "learning_rate": 4.852432429378531e-05, + "loss": 0.1234, + "step": 1306500 + }, + { + "epoch": 0.09, + "learning_rate": 4.85237593220339e-05, + "loss": 0.1244, + "step": 1307000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8523194350282486e-05, + "loss": 0.1243, + "step": 1307500 + }, + { + "epoch": 0.09, + "learning_rate": 4.852262937853108e-05, + "loss": 0.1161, + "step": 1308000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8522065536723164e-05, + "loss": 0.1236, + "step": 1308500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8521500564971755e-05, + "loss": 0.1259, + "step": 1309000 + }, + { + "epoch": 0.09, + "learning_rate": 4.852093559322034e-05, + "loss": 0.1271, + "step": 1309500 + }, + { + "epoch": 0.09, + "learning_rate": 4.852037062146893e-05, + "loss": 0.1287, + "step": 1310000 + }, + { + "epoch": 0.09, + "learning_rate": 4.851980564971751e-05, + "loss": 0.1265, + "step": 1310500 + }, + { + "epoch": 0.09, + "learning_rate": 4.851924180790961e-05, + "loss": 0.1267, + "step": 1311000 + }, + { + "epoch": 0.09, + "learning_rate": 4.85186768361582e-05, + "loss": 0.1253, + "step": 1311500 + }, + { + "epoch": 0.09, + "learning_rate": 4.851811186440678e-05, + "loss": 0.131, + "step": 1312000 + }, + { + "epoch": 0.09, + "learning_rate": 4.851754689265537e-05, + "loss": 0.1184, + "step": 1312500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8516981920903956e-05, + "loss": 0.1184, + "step": 1313000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8516416949152546e-05, + "loss": 0.1234, + "step": 1313500 + }, + { + "epoch": 0.09, + "learning_rate": 4.851585310734463e-05, + "loss": 0.124, + "step": 1314000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8515288135593224e-05, + "loss": 0.1296, + "step": 1314500 + }, + { + "epoch": 0.09, + "learning_rate": 4.851472316384181e-05, + "loss": 0.1198, + "step": 1315000 + }, + { + "epoch": 0.09, + "learning_rate": 4.85141581920904e-05, + "loss": 0.1326, + "step": 1315500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8513594350282486e-05, + "loss": 0.1292, + "step": 1316000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8513029378531076e-05, + "loss": 0.1205, + "step": 1316500 + }, + { + "epoch": 0.09, + "learning_rate": 4.851246440677966e-05, + "loss": 0.1227, + "step": 1317000 + }, + { + "epoch": 0.09, + "learning_rate": 4.851189943502825e-05, + "loss": 0.1296, + "step": 1317500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8511334463276834e-05, + "loss": 0.127, + "step": 1318000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8510769491525425e-05, + "loss": 0.12, + "step": 1318500 + }, + { + "epoch": 0.09, + "learning_rate": 4.851020564971752e-05, + "loss": 0.1208, + "step": 1319000 + }, + { + "epoch": 0.09, + "learning_rate": 4.850964067796611e-05, + "loss": 0.1226, + "step": 1319500 + }, + { + "epoch": 0.09, + "learning_rate": 4.850907570621469e-05, + "loss": 0.124, + "step": 1320000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8508510734463284e-05, + "loss": 0.1223, + "step": 1320500 + }, + { + "epoch": 0.09, + "learning_rate": 4.850794576271187e-05, + "loss": 0.122, + "step": 1321000 + }, + { + "epoch": 0.09, + "learning_rate": 4.850738079096046e-05, + "loss": 0.119, + "step": 1321500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8506816949152545e-05, + "loss": 0.1213, + "step": 1322000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8506251977401136e-05, + "loss": 0.1158, + "step": 1322500 + }, + { + "epoch": 0.09, + "learning_rate": 4.850568700564972e-05, + "loss": 0.1265, + "step": 1323000 + }, + { + "epoch": 0.09, + "learning_rate": 4.850512203389831e-05, + "loss": 0.1295, + "step": 1323500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8504557062146894e-05, + "loss": 0.1283, + "step": 1324000 + }, + { + "epoch": 0.09, + "learning_rate": 4.850399322033898e-05, + "loss": 0.1175, + "step": 1324500 + }, + { + "epoch": 0.09, + "learning_rate": 4.850342824858757e-05, + "loss": 0.1203, + "step": 1325000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8502863276836156e-05, + "loss": 0.1221, + "step": 1325500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8502298305084746e-05, + "loss": 0.1238, + "step": 1326000 + }, + { + "epoch": 0.09, + "learning_rate": 4.850173446327684e-05, + "loss": 0.1279, + "step": 1326500 + }, + { + "epoch": 0.09, + "learning_rate": 4.850116949152543e-05, + "loss": 0.1263, + "step": 1327000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8500604519774015e-05, + "loss": 0.12, + "step": 1327500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8500039548022605e-05, + "loss": 0.1262, + "step": 1328000 + }, + { + "epoch": 0.09, + "learning_rate": 4.849947457627119e-05, + "loss": 0.124, + "step": 1328500 + }, + { + "epoch": 0.09, + "learning_rate": 4.849890960451978e-05, + "loss": 0.1284, + "step": 1329000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8498344632768364e-05, + "loss": 0.1266, + "step": 1329500 + }, + { + "epoch": 0.09, + "learning_rate": 4.849778079096046e-05, + "loss": 0.1222, + "step": 1330000 + }, + { + "epoch": 0.09, + "learning_rate": 4.849721581920904e-05, + "loss": 0.1234, + "step": 1330500 + }, + { + "epoch": 0.09, + "learning_rate": 4.849665084745763e-05, + "loss": 0.113, + "step": 1331000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8496085875706216e-05, + "loss": 0.127, + "step": 1331500 + }, + { + "epoch": 0.09, + "learning_rate": 4.84955220338983e-05, + "loss": 0.1319, + "step": 1332000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8494957062146893e-05, + "loss": 0.1246, + "step": 1332500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8494392090395484e-05, + "loss": 0.1252, + "step": 1333000 + }, + { + "epoch": 0.09, + "learning_rate": 4.849382711864407e-05, + "loss": 0.1278, + "step": 1333500 + }, + { + "epoch": 0.09, + "learning_rate": 4.849326214689266e-05, + "loss": 0.1194, + "step": 1334000 + }, + { + "epoch": 0.09, + "learning_rate": 4.849269717514124e-05, + "loss": 0.1287, + "step": 1334500 + }, + { + "epoch": 0.09, + "learning_rate": 4.849213220338983e-05, + "loss": 0.1274, + "step": 1335000 + }, + { + "epoch": 0.09, + "learning_rate": 4.849156836158193e-05, + "loss": 0.1263, + "step": 1335500 + }, + { + "epoch": 0.09, + "learning_rate": 4.849100338983051e-05, + "loss": 0.1171, + "step": 1336000 + }, + { + "epoch": 0.09, + "learning_rate": 4.84904384180791e-05, + "loss": 0.1225, + "step": 1336500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8489873446327685e-05, + "loss": 0.1237, + "step": 1337000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8489308474576276e-05, + "loss": 0.1243, + "step": 1337500 + }, + { + "epoch": 0.09, + "learning_rate": 4.848874350282486e-05, + "loss": 0.1289, + "step": 1338000 + }, + { + "epoch": 0.09, + "learning_rate": 4.848817853107345e-05, + "loss": 0.1266, + "step": 1338500 + }, + { + "epoch": 0.09, + "learning_rate": 4.848761468926554e-05, + "loss": 0.1193, + "step": 1339000 + }, + { + "epoch": 0.09, + "learning_rate": 4.848704971751413e-05, + "loss": 0.1204, + "step": 1339500 + }, + { + "epoch": 0.09, + "learning_rate": 4.848648474576272e-05, + "loss": 0.1194, + "step": 1340000 + }, + { + "epoch": 0.09, + "learning_rate": 4.84859197740113e-05, + "loss": 0.1279, + "step": 1340500 + }, + { + "epoch": 0.09, + "learning_rate": 4.848535593220339e-05, + "loss": 0.1257, + "step": 1341000 + }, + { + "epoch": 0.09, + "learning_rate": 4.848479096045198e-05, + "loss": 0.127, + "step": 1341500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8484225988700564e-05, + "loss": 0.1203, + "step": 1342000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8483661016949154e-05, + "loss": 0.1198, + "step": 1342500 + }, + { + "epoch": 0.09, + "learning_rate": 4.848309717514125e-05, + "loss": 0.1254, + "step": 1343000 + }, + { + "epoch": 0.09, + "learning_rate": 4.848253220338983e-05, + "loss": 0.1203, + "step": 1343500 + }, + { + "epoch": 0.09, + "learning_rate": 4.848196723163842e-05, + "loss": 0.1298, + "step": 1344000 + }, + { + "epoch": 0.09, + "learning_rate": 4.848140225988701e-05, + "loss": 0.1247, + "step": 1344500 + }, + { + "epoch": 0.09, + "learning_rate": 4.84808372881356e-05, + "loss": 0.1286, + "step": 1345000 + }, + { + "epoch": 0.09, + "learning_rate": 4.848027231638419e-05, + "loss": 0.1192, + "step": 1345500 + }, + { + "epoch": 0.09, + "learning_rate": 4.847970734463277e-05, + "loss": 0.1314, + "step": 1346000 + }, + { + "epoch": 0.09, + "learning_rate": 4.847914237288136e-05, + "loss": 0.1342, + "step": 1346500 + }, + { + "epoch": 0.09, + "learning_rate": 4.847857853107345e-05, + "loss": 0.1272, + "step": 1347000 + }, + { + "epoch": 0.09, + "learning_rate": 4.847801355932204e-05, + "loss": 0.1242, + "step": 1347500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8477448587570624e-05, + "loss": 0.1251, + "step": 1348000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8476883615819214e-05, + "loss": 0.1228, + "step": 1348500 + }, + { + "epoch": 0.09, + "learning_rate": 4.84763197740113e-05, + "loss": 0.1222, + "step": 1349000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8475754802259885e-05, + "loss": 0.1232, + "step": 1349500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8475189830508476e-05, + "loss": 0.1293, + "step": 1350000 + }, + { + "epoch": 0.09, + "learning_rate": 4.847462485875706e-05, + "loss": 0.1213, + "step": 1350500 + }, + { + "epoch": 0.09, + "learning_rate": 4.847405988700565e-05, + "loss": 0.125, + "step": 1351000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8473496045197744e-05, + "loss": 0.1275, + "step": 1351500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8472931073446335e-05, + "loss": 0.125, + "step": 1352000 + }, + { + "epoch": 0.09, + "learning_rate": 4.847236610169492e-05, + "loss": 0.123, + "step": 1352500 + }, + { + "epoch": 0.09, + "learning_rate": 4.847180112994351e-05, + "loss": 0.1152, + "step": 1353000 + }, + { + "epoch": 0.09, + "learning_rate": 4.847123615819209e-05, + "loss": 0.1175, + "step": 1353500 + }, + { + "epoch": 0.09, + "learning_rate": 4.847067231638419e-05, + "loss": 0.1229, + "step": 1354000 + }, + { + "epoch": 0.09, + "learning_rate": 4.847010734463277e-05, + "loss": 0.1189, + "step": 1354500 + }, + { + "epoch": 0.09, + "learning_rate": 4.846954237288136e-05, + "loss": 0.1279, + "step": 1355000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8468977401129945e-05, + "loss": 0.1234, + "step": 1355500 + }, + { + "epoch": 0.09, + "learning_rate": 4.846841355932203e-05, + "loss": 0.1272, + "step": 1356000 + }, + { + "epoch": 0.09, + "learning_rate": 4.846784858757062e-05, + "loss": 0.1299, + "step": 1356500 + }, + { + "epoch": 0.09, + "learning_rate": 4.846728361581921e-05, + "loss": 0.1299, + "step": 1357000 + }, + { + "epoch": 0.09, + "learning_rate": 4.84667186440678e-05, + "loss": 0.1232, + "step": 1357500 + }, + { + "epoch": 0.09, + "learning_rate": 4.846615367231639e-05, + "loss": 0.1318, + "step": 1358000 + }, + { + "epoch": 0.09, + "learning_rate": 4.846558983050848e-05, + "loss": 0.1255, + "step": 1358500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8465024858757066e-05, + "loss": 0.1216, + "step": 1359000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8464459887005656e-05, + "loss": 0.1305, + "step": 1359500 + }, + { + "epoch": 0.09, + "learning_rate": 4.846389491525424e-05, + "loss": 0.123, + "step": 1360000 + }, + { + "epoch": 0.09, + "learning_rate": 4.846332994350283e-05, + "loss": 0.1359, + "step": 1360500 + }, + { + "epoch": 0.09, + "learning_rate": 4.846276610169492e-05, + "loss": 0.1234, + "step": 1361000 + }, + { + "epoch": 0.09, + "learning_rate": 4.846220112994351e-05, + "loss": 0.1285, + "step": 1361500 + }, + { + "epoch": 0.09, + "learning_rate": 4.846163615819209e-05, + "loss": 0.1258, + "step": 1362000 + }, + { + "epoch": 0.09, + "learning_rate": 4.846107118644068e-05, + "loss": 0.1264, + "step": 1362500 + }, + { + "epoch": 0.09, + "learning_rate": 4.846050621468927e-05, + "loss": 0.1172, + "step": 1363000 + }, + { + "epoch": 0.09, + "learning_rate": 4.845994124293786e-05, + "loss": 0.1275, + "step": 1363500 + }, + { + "epoch": 0.09, + "learning_rate": 4.845937627118644e-05, + "loss": 0.1284, + "step": 1364000 + }, + { + "epoch": 0.09, + "learning_rate": 4.845881129943503e-05, + "loss": 0.1212, + "step": 1364500 + }, + { + "epoch": 0.09, + "learning_rate": 4.845824745762712e-05, + "loss": 0.1174, + "step": 1365000 + }, + { + "epoch": 0.09, + "learning_rate": 4.845768248587571e-05, + "loss": 0.1143, + "step": 1365500 + }, + { + "epoch": 0.09, + "learning_rate": 4.845711751412429e-05, + "loss": 0.1217, + "step": 1366000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8456552542372884e-05, + "loss": 0.1248, + "step": 1366500 + }, + { + "epoch": 0.09, + "learning_rate": 4.845598870056498e-05, + "loss": 0.1208, + "step": 1367000 + }, + { + "epoch": 0.09, + "learning_rate": 4.845542372881356e-05, + "loss": 0.1188, + "step": 1367500 + }, + { + "epoch": 0.09, + "learning_rate": 4.845485875706215e-05, + "loss": 0.126, + "step": 1368000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8454293785310736e-05, + "loss": 0.1232, + "step": 1368500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8453728813559327e-05, + "loss": 0.1269, + "step": 1369000 + }, + { + "epoch": 0.09, + "learning_rate": 4.845316384180791e-05, + "loss": 0.1342, + "step": 1369500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8452600000000004e-05, + "loss": 0.1206, + "step": 1370000 + }, + { + "epoch": 0.09, + "learning_rate": 4.845203502824859e-05, + "loss": 0.1227, + "step": 1370500 + }, + { + "epoch": 0.09, + "learning_rate": 4.845147005649718e-05, + "loss": 0.1214, + "step": 1371000 + }, + { + "epoch": 0.09, + "learning_rate": 4.845090508474576e-05, + "loss": 0.1252, + "step": 1371500 + }, + { + "epoch": 0.09, + "learning_rate": 4.845034011299435e-05, + "loss": 0.129, + "step": 1372000 + }, + { + "epoch": 0.09, + "learning_rate": 4.844977627118644e-05, + "loss": 0.1244, + "step": 1372500 + }, + { + "epoch": 0.09, + "learning_rate": 4.844921129943503e-05, + "loss": 0.1189, + "step": 1373000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8448646327683615e-05, + "loss": 0.1212, + "step": 1373500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8448081355932205e-05, + "loss": 0.12, + "step": 1374000 + }, + { + "epoch": 0.09, + "learning_rate": 4.844751638418079e-05, + "loss": 0.1173, + "step": 1374500 + }, + { + "epoch": 0.09, + "learning_rate": 4.844695254237288e-05, + "loss": 0.1221, + "step": 1375000 + }, + { + "epoch": 0.09, + "learning_rate": 4.844638870056498e-05, + "loss": 0.1294, + "step": 1375500 + }, + { + "epoch": 0.09, + "learning_rate": 4.844582372881356e-05, + "loss": 0.1246, + "step": 1376000 + }, + { + "epoch": 0.09, + "learning_rate": 4.844525875706215e-05, + "loss": 0.1244, + "step": 1376500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8444693785310735e-05, + "loss": 0.127, + "step": 1377000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8444128813559326e-05, + "loss": 0.1218, + "step": 1377500 + }, + { + "epoch": 0.09, + "learning_rate": 4.844356384180791e-05, + "loss": 0.126, + "step": 1378000 + }, + { + "epoch": 0.09, + "learning_rate": 4.84429988700565e-05, + "loss": 0.1209, + "step": 1378500 + }, + { + "epoch": 0.09, + "learning_rate": 4.844243389830509e-05, + "loss": 0.1216, + "step": 1379000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8441868926553675e-05, + "loss": 0.122, + "step": 1379500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8441303954802265e-05, + "loss": 0.1271, + "step": 1380000 + }, + { + "epoch": 0.09, + "learning_rate": 4.844073898305085e-05, + "loss": 0.1251, + "step": 1380500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8440175141242936e-05, + "loss": 0.1201, + "step": 1381000 + }, + { + "epoch": 0.09, + "learning_rate": 4.843961016949153e-05, + "loss": 0.1222, + "step": 1381500 + }, + { + "epoch": 0.09, + "learning_rate": 4.843904519774011e-05, + "loss": 0.1238, + "step": 1382000 + }, + { + "epoch": 0.09, + "learning_rate": 4.84384802259887e-05, + "loss": 0.1263, + "step": 1382500 + }, + { + "epoch": 0.09, + "learning_rate": 4.843791525423729e-05, + "loss": 0.1183, + "step": 1383000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8437351412429386e-05, + "loss": 0.1204, + "step": 1383500 + }, + { + "epoch": 0.09, + "learning_rate": 4.843678644067797e-05, + "loss": 0.1263, + "step": 1384000 + }, + { + "epoch": 0.09, + "learning_rate": 4.843622146892656e-05, + "loss": 0.1201, + "step": 1384500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8435656497175144e-05, + "loss": 0.1202, + "step": 1385000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8435091525423734e-05, + "loss": 0.1255, + "step": 1385500 + }, + { + "epoch": 0.09, + "learning_rate": 4.843452655367232e-05, + "loss": 0.1241, + "step": 1386000 + }, + { + "epoch": 0.09, + "learning_rate": 4.843396271186441e-05, + "loss": 0.1224, + "step": 1386500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8433397740112996e-05, + "loss": 0.1144, + "step": 1387000 + }, + { + "epoch": 0.09, + "learning_rate": 4.843283276836159e-05, + "loss": 0.1267, + "step": 1387500 + }, + { + "epoch": 0.09, + "learning_rate": 4.843226779661017e-05, + "loss": 0.1258, + "step": 1388000 + }, + { + "epoch": 0.09, + "learning_rate": 4.843170282485876e-05, + "loss": 0.1203, + "step": 1388500 + }, + { + "epoch": 0.09, + "learning_rate": 4.843113898305085e-05, + "loss": 0.1229, + "step": 1389000 + }, + { + "epoch": 0.09, + "learning_rate": 4.843057401129944e-05, + "loss": 0.1312, + "step": 1389500 + }, + { + "epoch": 0.09, + "learning_rate": 4.843000903954802e-05, + "loss": 0.1235, + "step": 1390000 + }, + { + "epoch": 0.09, + "learning_rate": 4.842944406779661e-05, + "loss": 0.1256, + "step": 1390500 + }, + { + "epoch": 0.09, + "learning_rate": 4.84288790960452e-05, + "loss": 0.1228, + "step": 1391000 + }, + { + "epoch": 0.09, + "learning_rate": 4.842831525423729e-05, + "loss": 0.1149, + "step": 1391500 + }, + { + "epoch": 0.09, + "learning_rate": 4.842775028248588e-05, + "loss": 0.1238, + "step": 1392000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8427185310734465e-05, + "loss": 0.1345, + "step": 1392500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8426620338983056e-05, + "loss": 0.1253, + "step": 1393000 + }, + { + "epoch": 0.09, + "learning_rate": 4.842605536723164e-05, + "loss": 0.1214, + "step": 1393500 + }, + { + "epoch": 0.09, + "learning_rate": 4.842549039548023e-05, + "loss": 0.1224, + "step": 1394000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8424925423728814e-05, + "loss": 0.1226, + "step": 1394500 + }, + { + "epoch": 0.09, + "learning_rate": 4.842436158192091e-05, + "loss": 0.1199, + "step": 1395000 + }, + { + "epoch": 0.09, + "learning_rate": 4.842379661016949e-05, + "loss": 0.1198, + "step": 1395500 + }, + { + "epoch": 0.09, + "learning_rate": 4.842323163841808e-05, + "loss": 0.1203, + "step": 1396000 + }, + { + "epoch": 0.09, + "learning_rate": 4.842266666666667e-05, + "loss": 0.1189, + "step": 1396500 + }, + { + "epoch": 0.09, + "learning_rate": 4.842210169491526e-05, + "loss": 0.124, + "step": 1397000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8421537853107344e-05, + "loss": 0.1272, + "step": 1397500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8420972881355935e-05, + "loss": 0.1271, + "step": 1398000 + }, + { + "epoch": 0.09, + "learning_rate": 4.842040790960452e-05, + "loss": 0.1254, + "step": 1398500 + }, + { + "epoch": 0.09, + "learning_rate": 4.841984293785311e-05, + "loss": 0.1176, + "step": 1399000 + }, + { + "epoch": 0.09, + "learning_rate": 4.84192790960452e-05, + "loss": 0.1153, + "step": 1399500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8418714124293794e-05, + "loss": 0.1177, + "step": 1400000 + }, + { + "epoch": 0.09, + "learning_rate": 4.841814915254238e-05, + "loss": 0.1219, + "step": 1400500 + }, + { + "epoch": 0.09, + "learning_rate": 4.841758418079097e-05, + "loss": 0.1267, + "step": 1401000 + }, + { + "epoch": 0.1, + "learning_rate": 4.841701920903955e-05, + "loss": 0.1206, + "step": 1401500 + }, + { + "epoch": 0.1, + "learning_rate": 4.841645423728814e-05, + "loss": 0.1215, + "step": 1402000 + }, + { + "epoch": 0.1, + "learning_rate": 4.841589039548023e-05, + "loss": 0.1204, + "step": 1402500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8415325423728813e-05, + "loss": 0.1204, + "step": 1403000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8414760451977404e-05, + "loss": 0.1216, + "step": 1403500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8414195480225995e-05, + "loss": 0.1221, + "step": 1404000 + }, + { + "epoch": 0.1, + "learning_rate": 4.841363163841808e-05, + "loss": 0.1197, + "step": 1404500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8413066666666666e-05, + "loss": 0.1247, + "step": 1405000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8412501694915256e-05, + "loss": 0.1244, + "step": 1405500 + }, + { + "epoch": 0.1, + "learning_rate": 4.841193672316384e-05, + "loss": 0.1185, + "step": 1406000 + }, + { + "epoch": 0.1, + "learning_rate": 4.841137175141243e-05, + "loss": 0.1266, + "step": 1406500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8410807909604525e-05, + "loss": 0.1212, + "step": 1407000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8410242937853115e-05, + "loss": 0.1194, + "step": 1407500 + }, + { + "epoch": 0.1, + "learning_rate": 4.84096779661017e-05, + "loss": 0.1165, + "step": 1408000 + }, + { + "epoch": 0.1, + "learning_rate": 4.840911299435029e-05, + "loss": 0.1252, + "step": 1408500 + }, + { + "epoch": 0.1, + "learning_rate": 4.840854915254238e-05, + "loss": 0.1266, + "step": 1409000 + }, + { + "epoch": 0.1, + "learning_rate": 4.840798418079096e-05, + "loss": 0.127, + "step": 1409500 + }, + { + "epoch": 0.1, + "learning_rate": 4.840741920903955e-05, + "loss": 0.124, + "step": 1410000 + }, + { + "epoch": 0.1, + "learning_rate": 4.840685423728814e-05, + "loss": 0.1207, + "step": 1410500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8406289265536725e-05, + "loss": 0.1282, + "step": 1411000 + }, + { + "epoch": 0.1, + "learning_rate": 4.840572542372881e-05, + "loss": 0.1229, + "step": 1411500 + }, + { + "epoch": 0.1, + "learning_rate": 4.84051604519774e-05, + "loss": 0.1311, + "step": 1412000 + }, + { + "epoch": 0.1, + "learning_rate": 4.840459548022599e-05, + "loss": 0.1174, + "step": 1412500 + }, + { + "epoch": 0.1, + "learning_rate": 4.840403050847458e-05, + "loss": 0.1202, + "step": 1413000 + }, + { + "epoch": 0.1, + "learning_rate": 4.840346666666667e-05, + "loss": 0.1295, + "step": 1413500 + }, + { + "epoch": 0.1, + "learning_rate": 4.840290169491526e-05, + "loss": 0.128, + "step": 1414000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8402336723163846e-05, + "loss": 0.1228, + "step": 1414500 + }, + { + "epoch": 0.1, + "learning_rate": 4.840177175141244e-05, + "loss": 0.1177, + "step": 1415000 + }, + { + "epoch": 0.1, + "learning_rate": 4.840120677966102e-05, + "loss": 0.1257, + "step": 1415500 + }, + { + "epoch": 0.1, + "learning_rate": 4.840064293785311e-05, + "loss": 0.12, + "step": 1416000 + }, + { + "epoch": 0.1, + "learning_rate": 4.84000779661017e-05, + "loss": 0.1296, + "step": 1416500 + }, + { + "epoch": 0.1, + "learning_rate": 4.839951299435028e-05, + "loss": 0.1142, + "step": 1417000 + }, + { + "epoch": 0.1, + "learning_rate": 4.839894802259887e-05, + "loss": 0.1222, + "step": 1417500 + }, + { + "epoch": 0.1, + "learning_rate": 4.839838305084746e-05, + "loss": 0.1265, + "step": 1418000 + }, + { + "epoch": 0.1, + "learning_rate": 4.839781920903955e-05, + "loss": 0.1183, + "step": 1418500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8397254237288134e-05, + "loss": 0.1298, + "step": 1419000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8396689265536725e-05, + "loss": 0.1334, + "step": 1419500 + }, + { + "epoch": 0.1, + "learning_rate": 4.839612429378531e-05, + "loss": 0.1139, + "step": 1420000 + }, + { + "epoch": 0.1, + "learning_rate": 4.83955593220339e-05, + "loss": 0.1181, + "step": 1420500 + }, + { + "epoch": 0.1, + "learning_rate": 4.839499435028249e-05, + "loss": 0.1294, + "step": 1421000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8394430508474584e-05, + "loss": 0.129, + "step": 1421500 + }, + { + "epoch": 0.1, + "learning_rate": 4.839386553672317e-05, + "loss": 0.1271, + "step": 1422000 + }, + { + "epoch": 0.1, + "learning_rate": 4.839330056497176e-05, + "loss": 0.123, + "step": 1422500 + }, + { + "epoch": 0.1, + "learning_rate": 4.839273559322034e-05, + "loss": 0.1195, + "step": 1423000 + }, + { + "epoch": 0.1, + "learning_rate": 4.839217175141243e-05, + "loss": 0.1188, + "step": 1423500 + }, + { + "epoch": 0.1, + "learning_rate": 4.839160677966102e-05, + "loss": 0.1203, + "step": 1424000 + }, + { + "epoch": 0.1, + "learning_rate": 4.839104180790961e-05, + "loss": 0.1279, + "step": 1424500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8390476836158194e-05, + "loss": 0.1183, + "step": 1425000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8389911864406785e-05, + "loss": 0.1181, + "step": 1425500 + }, + { + "epoch": 0.1, + "learning_rate": 4.838934802259887e-05, + "loss": 0.1233, + "step": 1426000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8388783050847456e-05, + "loss": 0.1238, + "step": 1426500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8388218079096046e-05, + "loss": 0.1249, + "step": 1427000 + }, + { + "epoch": 0.1, + "learning_rate": 4.838765423728814e-05, + "loss": 0.1191, + "step": 1427500 + }, + { + "epoch": 0.1, + "learning_rate": 4.838708926553673e-05, + "loss": 0.121, + "step": 1428000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8386524293785315e-05, + "loss": 0.131, + "step": 1428500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8385959322033905e-05, + "loss": 0.1272, + "step": 1429000 + }, + { + "epoch": 0.1, + "learning_rate": 4.838539435028249e-05, + "loss": 0.1227, + "step": 1429500 + }, + { + "epoch": 0.1, + "learning_rate": 4.838482937853108e-05, + "loss": 0.1236, + "step": 1430000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8384264406779663e-05, + "loss": 0.129, + "step": 1430500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8383699435028254e-05, + "loss": 0.1214, + "step": 1431000 + }, + { + "epoch": 0.1, + "learning_rate": 4.838313446327684e-05, + "loss": 0.123, + "step": 1431500 + }, + { + "epoch": 0.1, + "learning_rate": 4.838256949152543e-05, + "loss": 0.1246, + "step": 1432000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8382005649717516e-05, + "loss": 0.12, + "step": 1432500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8381440677966106e-05, + "loss": 0.1181, + "step": 1433000 + }, + { + "epoch": 0.1, + "learning_rate": 4.838087570621469e-05, + "loss": 0.1218, + "step": 1433500 + }, + { + "epoch": 0.1, + "learning_rate": 4.838031073446328e-05, + "loss": 0.1364, + "step": 1434000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8379745762711864e-05, + "loss": 0.1262, + "step": 1434500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8379180790960455e-05, + "loss": 0.1191, + "step": 1435000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8378615819209046e-05, + "loss": 0.1228, + "step": 1435500 + }, + { + "epoch": 0.1, + "learning_rate": 4.837805084745763e-05, + "loss": 0.1175, + "step": 1436000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8377487005649717e-05, + "loss": 0.1206, + "step": 1436500 + }, + { + "epoch": 0.1, + "learning_rate": 4.837692203389831e-05, + "loss": 0.1279, + "step": 1437000 + }, + { + "epoch": 0.1, + "learning_rate": 4.837635706214689e-05, + "loss": 0.1273, + "step": 1437500 + }, + { + "epoch": 0.1, + "learning_rate": 4.837579209039548e-05, + "loss": 0.13, + "step": 1438000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8375228248587575e-05, + "loss": 0.1241, + "step": 1438500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8374663276836166e-05, + "loss": 0.1217, + "step": 1439000 + }, + { + "epoch": 0.1, + "learning_rate": 4.837409830508475e-05, + "loss": 0.1265, + "step": 1439500 + }, + { + "epoch": 0.1, + "learning_rate": 4.837353333333334e-05, + "loss": 0.1179, + "step": 1440000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8372968361581924e-05, + "loss": 0.1211, + "step": 1440500 + }, + { + "epoch": 0.1, + "learning_rate": 4.837240451977401e-05, + "loss": 0.1279, + "step": 1441000 + }, + { + "epoch": 0.1, + "learning_rate": 4.83718395480226e-05, + "loss": 0.1198, + "step": 1441500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8371274576271186e-05, + "loss": 0.1239, + "step": 1442000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8370709604519776e-05, + "loss": 0.1265, + "step": 1442500 + }, + { + "epoch": 0.1, + "learning_rate": 4.837014463276837e-05, + "loss": 0.117, + "step": 1443000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8369580790960454e-05, + "loss": 0.1161, + "step": 1443500 + }, + { + "epoch": 0.1, + "learning_rate": 4.836901581920904e-05, + "loss": 0.1219, + "step": 1444000 + }, + { + "epoch": 0.1, + "learning_rate": 4.836845084745763e-05, + "loss": 0.1223, + "step": 1444500 + }, + { + "epoch": 0.1, + "learning_rate": 4.836788587570621e-05, + "loss": 0.1194, + "step": 1445000 + }, + { + "epoch": 0.1, + "learning_rate": 4.83673209039548e-05, + "loss": 0.1197, + "step": 1445500 + }, + { + "epoch": 0.1, + "learning_rate": 4.83667570621469e-05, + "loss": 0.1269, + "step": 1446000 + }, + { + "epoch": 0.1, + "learning_rate": 4.836619209039549e-05, + "loss": 0.1267, + "step": 1446500 + }, + { + "epoch": 0.1, + "learning_rate": 4.836562711864407e-05, + "loss": 0.1205, + "step": 1447000 + }, + { + "epoch": 0.1, + "learning_rate": 4.836506214689266e-05, + "loss": 0.1218, + "step": 1447500 + }, + { + "epoch": 0.1, + "learning_rate": 4.836449830508475e-05, + "loss": 0.1206, + "step": 1448000 + }, + { + "epoch": 0.1, + "learning_rate": 4.836393333333333e-05, + "loss": 0.1234, + "step": 1448500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8363368361581924e-05, + "loss": 0.1251, + "step": 1449000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8362803389830514e-05, + "loss": 0.1232, + "step": 1449500 + }, + { + "epoch": 0.1, + "learning_rate": 4.83622384180791e-05, + "loss": 0.1173, + "step": 1450000 + }, + { + "epoch": 0.1, + "learning_rate": 4.836167344632769e-05, + "loss": 0.1217, + "step": 1450500 + }, + { + "epoch": 0.1, + "learning_rate": 4.836110847457627e-05, + "loss": 0.1229, + "step": 1451000 + }, + { + "epoch": 0.1, + "learning_rate": 4.836054350282486e-05, + "loss": 0.1192, + "step": 1451500 + }, + { + "epoch": 0.1, + "learning_rate": 4.835997966101695e-05, + "loss": 0.1231, + "step": 1452000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8359415819209044e-05, + "loss": 0.1217, + "step": 1452500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8358850847457635e-05, + "loss": 0.1262, + "step": 1453000 + }, + { + "epoch": 0.1, + "learning_rate": 4.835828587570622e-05, + "loss": 0.1198, + "step": 1453500 + }, + { + "epoch": 0.1, + "learning_rate": 4.835772090395481e-05, + "loss": 0.1196, + "step": 1454000 + }, + { + "epoch": 0.1, + "learning_rate": 4.835715593220339e-05, + "loss": 0.1319, + "step": 1454500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8356590960451983e-05, + "loss": 0.1195, + "step": 1455000 + }, + { + "epoch": 0.1, + "learning_rate": 4.835602711864407e-05, + "loss": 0.1213, + "step": 1455500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8355462146892654e-05, + "loss": 0.1249, + "step": 1456000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8354897175141245e-05, + "loss": 0.1179, + "step": 1456500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8354332203389836e-05, + "loss": 0.1138, + "step": 1457000 + }, + { + "epoch": 0.1, + "learning_rate": 4.835376723163842e-05, + "loss": 0.1207, + "step": 1457500 + }, + { + "epoch": 0.1, + "learning_rate": 4.835320338983051e-05, + "loss": 0.1214, + "step": 1458000 + }, + { + "epoch": 0.1, + "learning_rate": 4.83526384180791e-05, + "loss": 0.1213, + "step": 1458500 + }, + { + "epoch": 0.1, + "learning_rate": 4.835207457627119e-05, + "loss": 0.1203, + "step": 1459000 + }, + { + "epoch": 0.1, + "learning_rate": 4.835150960451978e-05, + "loss": 0.1202, + "step": 1459500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8350944632768366e-05, + "loss": 0.1171, + "step": 1460000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8350379661016956e-05, + "loss": 0.1193, + "step": 1460500 + }, + { + "epoch": 0.1, + "learning_rate": 4.834981468926554e-05, + "loss": 0.1255, + "step": 1461000 + }, + { + "epoch": 0.1, + "learning_rate": 4.834924971751413e-05, + "loss": 0.1239, + "step": 1461500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8348684745762714e-05, + "loss": 0.1178, + "step": 1462000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8348119774011305e-05, + "loss": 0.1249, + "step": 1462500 + }, + { + "epoch": 0.1, + "learning_rate": 4.834755480225989e-05, + "loss": 0.1141, + "step": 1463000 + }, + { + "epoch": 0.1, + "learning_rate": 4.834699096045198e-05, + "loss": 0.1208, + "step": 1463500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8346425988700567e-05, + "loss": 0.1202, + "step": 1464000 + }, + { + "epoch": 0.1, + "learning_rate": 4.834586101694916e-05, + "loss": 0.126, + "step": 1464500 + }, + { + "epoch": 0.1, + "learning_rate": 4.834529604519774e-05, + "loss": 0.1202, + "step": 1465000 + }, + { + "epoch": 0.1, + "learning_rate": 4.834473107344633e-05, + "loss": 0.1261, + "step": 1465500 + }, + { + "epoch": 0.1, + "learning_rate": 4.834416723163842e-05, + "loss": 0.1177, + "step": 1466000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8343602259887e-05, + "loss": 0.1257, + "step": 1466500 + }, + { + "epoch": 0.1, + "learning_rate": 4.834303728813559e-05, + "loss": 0.1236, + "step": 1467000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8342472316384184e-05, + "loss": 0.1115, + "step": 1467500 + }, + { + "epoch": 0.1, + "learning_rate": 4.834190734463277e-05, + "loss": 0.1203, + "step": 1468000 + }, + { + "epoch": 0.1, + "learning_rate": 4.834134350282486e-05, + "loss": 0.1239, + "step": 1468500 + }, + { + "epoch": 0.1, + "learning_rate": 4.834077853107345e-05, + "loss": 0.1227, + "step": 1469000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8340213559322036e-05, + "loss": 0.1158, + "step": 1469500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8339648587570626e-05, + "loss": 0.1246, + "step": 1470000 + }, + { + "epoch": 0.1, + "learning_rate": 4.833908361581922e-05, + "loss": 0.1231, + "step": 1470500 + }, + { + "epoch": 0.1, + "learning_rate": 4.83385186440678e-05, + "loss": 0.1205, + "step": 1471000 + }, + { + "epoch": 0.1, + "learning_rate": 4.833795367231639e-05, + "loss": 0.1241, + "step": 1471500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8337388700564975e-05, + "loss": 0.1148, + "step": 1472000 + }, + { + "epoch": 0.1, + "learning_rate": 4.833682485875706e-05, + "loss": 0.122, + "step": 1472500 + }, + { + "epoch": 0.1, + "learning_rate": 4.833625988700565e-05, + "loss": 0.1296, + "step": 1473000 + }, + { + "epoch": 0.1, + "learning_rate": 4.833569491525424e-05, + "loss": 0.1212, + "step": 1473500 + }, + { + "epoch": 0.1, + "learning_rate": 4.833512994350283e-05, + "loss": 0.1196, + "step": 1474000 + }, + { + "epoch": 0.1, + "learning_rate": 4.833456497175142e-05, + "loss": 0.1153, + "step": 1474500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8334e-05, + "loss": 0.1286, + "step": 1475000 + }, + { + "epoch": 0.1, + "learning_rate": 4.833343502824859e-05, + "loss": 0.1258, + "step": 1475500 + }, + { + "epoch": 0.1, + "learning_rate": 4.833287118644068e-05, + "loss": 0.1275, + "step": 1476000 + }, + { + "epoch": 0.1, + "learning_rate": 4.833230621468926e-05, + "loss": 0.1229, + "step": 1476500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8331741242937854e-05, + "loss": 0.1139, + "step": 1477000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8331176271186445e-05, + "loss": 0.1179, + "step": 1477500 + }, + { + "epoch": 0.1, + "learning_rate": 4.833061242937854e-05, + "loss": 0.116, + "step": 1478000 + }, + { + "epoch": 0.1, + "learning_rate": 4.833004745762712e-05, + "loss": 0.117, + "step": 1478500 + }, + { + "epoch": 0.1, + "learning_rate": 4.832948248587571e-05, + "loss": 0.1211, + "step": 1479000 + }, + { + "epoch": 0.1, + "learning_rate": 4.83289175141243e-05, + "loss": 0.1201, + "step": 1479500 + }, + { + "epoch": 0.1, + "learning_rate": 4.832835254237289e-05, + "loss": 0.1207, + "step": 1480000 + }, + { + "epoch": 0.1, + "learning_rate": 4.832778757062147e-05, + "loss": 0.1168, + "step": 1480500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8327223728813565e-05, + "loss": 0.1187, + "step": 1481000 + }, + { + "epoch": 0.1, + "learning_rate": 4.832665875706215e-05, + "loss": 0.1208, + "step": 1481500 + }, + { + "epoch": 0.1, + "learning_rate": 4.832609378531074e-05, + "loss": 0.1281, + "step": 1482000 + }, + { + "epoch": 0.1, + "learning_rate": 4.832552881355932e-05, + "loss": 0.1306, + "step": 1482500 + }, + { + "epoch": 0.1, + "learning_rate": 4.832496497175141e-05, + "loss": 0.1196, + "step": 1483000 + }, + { + "epoch": 0.1, + "learning_rate": 4.83244e-05, + "loss": 0.125, + "step": 1483500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8323835028248585e-05, + "loss": 0.1208, + "step": 1484000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8323270056497175e-05, + "loss": 0.1169, + "step": 1484500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8322705084745766e-05, + "loss": 0.1197, + "step": 1485000 + }, + { + "epoch": 0.1, + "learning_rate": 4.832214124293786e-05, + "loss": 0.1247, + "step": 1485500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8321576271186444e-05, + "loss": 0.1168, + "step": 1486000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8321011299435034e-05, + "loss": 0.1282, + "step": 1486500 + }, + { + "epoch": 0.1, + "learning_rate": 4.832044632768362e-05, + "loss": 0.1162, + "step": 1487000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8319882485875705e-05, + "loss": 0.1191, + "step": 1487500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8319317514124296e-05, + "loss": 0.1243, + "step": 1488000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8318752542372887e-05, + "loss": 0.1137, + "step": 1488500 + }, + { + "epoch": 0.1, + "learning_rate": 4.831818757062147e-05, + "loss": 0.128, + "step": 1489000 + }, + { + "epoch": 0.1, + "learning_rate": 4.831762259887006e-05, + "loss": 0.1211, + "step": 1489500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8317057627118645e-05, + "loss": 0.1187, + "step": 1490000 + }, + { + "epoch": 0.1, + "learning_rate": 4.831649378531073e-05, + "loss": 0.1221, + "step": 1490500 + }, + { + "epoch": 0.1, + "learning_rate": 4.831592881355932e-05, + "loss": 0.1246, + "step": 1491000 + }, + { + "epoch": 0.1, + "learning_rate": 4.831536384180791e-05, + "loss": 0.1145, + "step": 1491500 + }, + { + "epoch": 0.1, + "learning_rate": 4.83147988700565e-05, + "loss": 0.1219, + "step": 1492000 + }, + { + "epoch": 0.1, + "learning_rate": 4.831423502824859e-05, + "loss": 0.129, + "step": 1492500 + }, + { + "epoch": 0.1, + "learning_rate": 4.831367005649718e-05, + "loss": 0.12, + "step": 1493000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8313105084745765e-05, + "loss": 0.1179, + "step": 1493500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8312540112994356e-05, + "loss": 0.1235, + "step": 1494000 + }, + { + "epoch": 0.1, + "learning_rate": 4.831197514124294e-05, + "loss": 0.1237, + "step": 1494500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8311411299435034e-05, + "loss": 0.112, + "step": 1495000 + }, + { + "epoch": 0.1, + "learning_rate": 4.831084632768362e-05, + "loss": 0.1169, + "step": 1495500 + }, + { + "epoch": 0.1, + "learning_rate": 4.831028135593221e-05, + "loss": 0.1266, + "step": 1496000 + }, + { + "epoch": 0.1, + "learning_rate": 4.830971638418079e-05, + "loss": 0.1272, + "step": 1496500 + }, + { + "epoch": 0.1, + "learning_rate": 4.830915141242938e-05, + "loss": 0.1219, + "step": 1497000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8308586440677966e-05, + "loss": 0.1229, + "step": 1497500 + }, + { + "epoch": 0.1, + "learning_rate": 4.830802146892656e-05, + "loss": 0.1158, + "step": 1498000 + }, + { + "epoch": 0.1, + "learning_rate": 4.830745649717514e-05, + "loss": 0.1169, + "step": 1498500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8306892655367235e-05, + "loss": 0.1218, + "step": 1499000 + }, + { + "epoch": 0.1, + "learning_rate": 4.830632881355933e-05, + "loss": 0.1238, + "step": 1499500 + }, + { + "epoch": 0.1, + "learning_rate": 4.830576384180791e-05, + "loss": 0.119, + "step": 1500000 + }, + { + "epoch": 0.1, + "learning_rate": 4.83051988700565e-05, + "loss": 0.1214, + "step": 1500500 + }, + { + "epoch": 0.1, + "learning_rate": 4.830463389830509e-05, + "loss": 0.1243, + "step": 1501000 + }, + { + "epoch": 0.1, + "learning_rate": 4.830406892655368e-05, + "loss": 0.1241, + "step": 1501500 + }, + { + "epoch": 0.1, + "learning_rate": 4.830350395480226e-05, + "loss": 0.1173, + "step": 1502000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8302940112994355e-05, + "loss": 0.1221, + "step": 1502500 + }, + { + "epoch": 0.1, + "learning_rate": 4.830237514124294e-05, + "loss": 0.1192, + "step": 1503000 + }, + { + "epoch": 0.1, + "learning_rate": 4.830181016949153e-05, + "loss": 0.1164, + "step": 1503500 + }, + { + "epoch": 0.1, + "learning_rate": 4.830124519774011e-05, + "loss": 0.1175, + "step": 1504000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8300680225988704e-05, + "loss": 0.1186, + "step": 1504500 + }, + { + "epoch": 0.1, + "learning_rate": 4.830011638418079e-05, + "loss": 0.115, + "step": 1505000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8299552542372885e-05, + "loss": 0.122, + "step": 1505500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8298987570621476e-05, + "loss": 0.1258, + "step": 1506000 + }, + { + "epoch": 0.1, + "learning_rate": 4.829842259887006e-05, + "loss": 0.1205, + "step": 1506500 + }, + { + "epoch": 0.1, + "learning_rate": 4.829785762711865e-05, + "loss": 0.1225, + "step": 1507000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8297292655367234e-05, + "loss": 0.1212, + "step": 1507500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8296727683615824e-05, + "loss": 0.1223, + "step": 1508000 + }, + { + "epoch": 0.1, + "learning_rate": 4.829616271186441e-05, + "loss": 0.118, + "step": 1508500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8295597740113e-05, + "loss": 0.1178, + "step": 1509000 + }, + { + "epoch": 0.1, + "learning_rate": 4.829503276836159e-05, + "loss": 0.1235, + "step": 1509500 + }, + { + "epoch": 0.1, + "learning_rate": 4.829446779661017e-05, + "loss": 0.1189, + "step": 1510000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8293902824858764e-05, + "loss": 0.1262, + "step": 1510500 + }, + { + "epoch": 0.1, + "learning_rate": 4.829333785310735e-05, + "loss": 0.1203, + "step": 1511000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8292774011299435e-05, + "loss": 0.1262, + "step": 1511500 + }, + { + "epoch": 0.1, + "learning_rate": 4.829221016949152e-05, + "loss": 0.1159, + "step": 1512000 + }, + { + "epoch": 0.1, + "learning_rate": 4.829164519774011e-05, + "loss": 0.1217, + "step": 1512500 + }, + { + "epoch": 0.1, + "learning_rate": 4.82910802259887e-05, + "loss": 0.1177, + "step": 1513000 + }, + { + "epoch": 0.1, + "learning_rate": 4.829051525423729e-05, + "loss": 0.1202, + "step": 1513500 + }, + { + "epoch": 0.1, + "learning_rate": 4.828995028248588e-05, + "loss": 0.1232, + "step": 1514000 + }, + { + "epoch": 0.1, + "learning_rate": 4.828938644067797e-05, + "loss": 0.1207, + "step": 1514500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8288821468926555e-05, + "loss": 0.124, + "step": 1515000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8288256497175146e-05, + "loss": 0.1231, + "step": 1515500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8287691525423737e-05, + "loss": 0.1193, + "step": 1516000 + }, + { + "epoch": 0.1, + "learning_rate": 4.828712655367232e-05, + "loss": 0.1197, + "step": 1516500 + }, + { + "epoch": 0.1, + "learning_rate": 4.828656158192091e-05, + "loss": 0.118, + "step": 1517000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8285997740113e-05, + "loss": 0.1251, + "step": 1517500 + }, + { + "epoch": 0.1, + "learning_rate": 4.828543276836158e-05, + "loss": 0.1222, + "step": 1518000 + }, + { + "epoch": 0.1, + "learning_rate": 4.828486779661017e-05, + "loss": 0.1182, + "step": 1518500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8284302824858756e-05, + "loss": 0.1269, + "step": 1519000 + }, + { + "epoch": 0.1, + "learning_rate": 4.828373785310735e-05, + "loss": 0.1132, + "step": 1519500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8283174011299434e-05, + "loss": 0.1217, + "step": 1520000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8282609039548025e-05, + "loss": 0.1223, + "step": 1520500 + }, + { + "epoch": 0.1, + "learning_rate": 4.828204406779661e-05, + "loss": 0.1294, + "step": 1521000 + }, + { + "epoch": 0.1, + "learning_rate": 4.82814790960452e-05, + "loss": 0.1234, + "step": 1521500 + }, + { + "epoch": 0.1, + "learning_rate": 4.828091638418079e-05, + "loss": 0.1274, + "step": 1522000 + }, + { + "epoch": 0.1, + "learning_rate": 4.828035141242938e-05, + "loss": 0.1231, + "step": 1522500 + }, + { + "epoch": 0.1, + "learning_rate": 4.827978644067797e-05, + "loss": 0.1205, + "step": 1523000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8279221468926555e-05, + "loss": 0.1204, + "step": 1523500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8278656497175145e-05, + "loss": 0.118, + "step": 1524000 + }, + { + "epoch": 0.1, + "learning_rate": 4.827809152542373e-05, + "loss": 0.1162, + "step": 1524500 + }, + { + "epoch": 0.1, + "learning_rate": 4.827752655367232e-05, + "loss": 0.115, + "step": 1525000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8276961581920903e-05, + "loss": 0.1184, + "step": 1525500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8276396610169494e-05, + "loss": 0.1197, + "step": 1526000 + }, + { + "epoch": 0.1, + "learning_rate": 4.827583276836158e-05, + "loss": 0.1185, + "step": 1526500 + }, + { + "epoch": 0.1, + "learning_rate": 4.827526779661017e-05, + "loss": 0.1236, + "step": 1527000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8274702824858756e-05, + "loss": 0.126, + "step": 1527500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8274137853107346e-05, + "loss": 0.119, + "step": 1528000 + }, + { + "epoch": 0.1, + "learning_rate": 4.827357288135593e-05, + "loss": 0.1219, + "step": 1528500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8273009039548024e-05, + "loss": 0.1195, + "step": 1529000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8272444067796615e-05, + "loss": 0.1149, + "step": 1529500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8271879096045205e-05, + "loss": 0.1268, + "step": 1530000 + }, + { + "epoch": 0.1, + "learning_rate": 4.827131412429379e-05, + "loss": 0.1181, + "step": 1530500 + }, + { + "epoch": 0.1, + "learning_rate": 4.827074915254238e-05, + "loss": 0.122, + "step": 1531000 + }, + { + "epoch": 0.1, + "learning_rate": 4.827018531073447e-05, + "loss": 0.1219, + "step": 1531500 + }, + { + "epoch": 0.1, + "learning_rate": 4.826962033898305e-05, + "loss": 0.1236, + "step": 1532000 + }, + { + "epoch": 0.1, + "learning_rate": 4.826905536723164e-05, + "loss": 0.1189, + "step": 1532500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8268490395480225e-05, + "loss": 0.1246, + "step": 1533000 + }, + { + "epoch": 0.1, + "learning_rate": 4.826792655367232e-05, + "loss": 0.1191, + "step": 1533500 + }, + { + "epoch": 0.1, + "learning_rate": 4.82673615819209e-05, + "loss": 0.1169, + "step": 1534000 + }, + { + "epoch": 0.1, + "learning_rate": 4.826679661016949e-05, + "loss": 0.1178, + "step": 1534500 + }, + { + "epoch": 0.1, + "learning_rate": 4.826623163841808e-05, + "loss": 0.1141, + "step": 1535000 + }, + { + "epoch": 0.1, + "learning_rate": 4.826566666666667e-05, + "loss": 0.117, + "step": 1535500 + }, + { + "epoch": 0.1, + "learning_rate": 4.826510282485876e-05, + "loss": 0.123, + "step": 1536000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8264537853107345e-05, + "loss": 0.1192, + "step": 1536500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8263972881355936e-05, + "loss": 0.1188, + "step": 1537000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8263407909604527e-05, + "loss": 0.12, + "step": 1537500 + }, + { + "epoch": 0.1, + "learning_rate": 4.826284293785311e-05, + "loss": 0.126, + "step": 1538000 + }, + { + "epoch": 0.1, + "learning_rate": 4.82622779661017e-05, + "loss": 0.1104, + "step": 1538500 + }, + { + "epoch": 0.1, + "learning_rate": 4.826171412429379e-05, + "loss": 0.1192, + "step": 1539000 + }, + { + "epoch": 0.1, + "learning_rate": 4.826114915254237e-05, + "loss": 0.1194, + "step": 1539500 + }, + { + "epoch": 0.1, + "learning_rate": 4.826058418079096e-05, + "loss": 0.1185, + "step": 1540000 + }, + { + "epoch": 0.1, + "learning_rate": 4.826001920903955e-05, + "loss": 0.118, + "step": 1540500 + }, + { + "epoch": 0.1, + "learning_rate": 4.825945423728814e-05, + "loss": 0.1306, + "step": 1541000 + }, + { + "epoch": 0.1, + "learning_rate": 4.825888926553673e-05, + "loss": 0.1145, + "step": 1541500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8258325423728815e-05, + "loss": 0.1224, + "step": 1542000 + }, + { + "epoch": 0.1, + "learning_rate": 4.82577604519774e-05, + "loss": 0.1216, + "step": 1542500 + }, + { + "epoch": 0.1, + "learning_rate": 4.825719548022599e-05, + "loss": 0.1214, + "step": 1543000 + }, + { + "epoch": 0.1, + "learning_rate": 4.825663050847457e-05, + "loss": 0.1185, + "step": 1543500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8256066666666674e-05, + "loss": 0.1193, + "step": 1544000 + }, + { + "epoch": 0.1, + "learning_rate": 4.825550169491526e-05, + "loss": 0.1239, + "step": 1544500 + }, + { + "epoch": 0.1, + "learning_rate": 4.825493672316385e-05, + "loss": 0.1165, + "step": 1545000 + }, + { + "epoch": 0.1, + "learning_rate": 4.825437175141243e-05, + "loss": 0.1236, + "step": 1545500 + }, + { + "epoch": 0.1, + "learning_rate": 4.825380790960452e-05, + "loss": 0.1202, + "step": 1546000 + }, + { + "epoch": 0.1, + "learning_rate": 4.825324293785311e-05, + "loss": 0.1173, + "step": 1546500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8252677966101693e-05, + "loss": 0.1168, + "step": 1547000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8252112994350284e-05, + "loss": 0.1184, + "step": 1547500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8251548022598875e-05, + "loss": 0.1164, + "step": 1548000 + }, + { + "epoch": 0.1, + "learning_rate": 4.825098305084746e-05, + "loss": 0.1226, + "step": 1548500 + }, + { + "epoch": 0.11, + "learning_rate": 4.825041807909605e-05, + "loss": 0.1161, + "step": 1549000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8249854237288136e-05, + "loss": 0.1192, + "step": 1549500 + }, + { + "epoch": 0.11, + "learning_rate": 4.824928926553673e-05, + "loss": 0.1196, + "step": 1550000 + }, + { + "epoch": 0.11, + "learning_rate": 4.824872429378531e-05, + "loss": 0.1211, + "step": 1550500 + }, + { + "epoch": 0.11, + "learning_rate": 4.82481593220339e-05, + "loss": 0.1226, + "step": 1551000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8247594350282485e-05, + "loss": 0.1203, + "step": 1551500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8247029378531076e-05, + "loss": 0.1183, + "step": 1552000 + }, + { + "epoch": 0.11, + "learning_rate": 4.824646553672317e-05, + "loss": 0.1224, + "step": 1552500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8245900564971753e-05, + "loss": 0.122, + "step": 1553000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8245335593220344e-05, + "loss": 0.1213, + "step": 1553500 + }, + { + "epoch": 0.11, + "learning_rate": 4.824477062146893e-05, + "loss": 0.1198, + "step": 1554000 + }, + { + "epoch": 0.11, + "learning_rate": 4.824420564971752e-05, + "loss": 0.1122, + "step": 1554500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8243641807909606e-05, + "loss": 0.1229, + "step": 1555000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8243076836158196e-05, + "loss": 0.1162, + "step": 1555500 + }, + { + "epoch": 0.11, + "learning_rate": 4.824251186440678e-05, + "loss": 0.1221, + "step": 1556000 + }, + { + "epoch": 0.11, + "learning_rate": 4.824194689265537e-05, + "loss": 0.1174, + "step": 1556500 + }, + { + "epoch": 0.11, + "learning_rate": 4.824138305084746e-05, + "loss": 0.1179, + "step": 1557000 + }, + { + "epoch": 0.11, + "learning_rate": 4.824081807909605e-05, + "loss": 0.1265, + "step": 1557500 + }, + { + "epoch": 0.11, + "learning_rate": 4.824025310734463e-05, + "loss": 0.122, + "step": 1558000 + }, + { + "epoch": 0.11, + "learning_rate": 4.823968813559322e-05, + "loss": 0.1266, + "step": 1558500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8239123163841807e-05, + "loss": 0.116, + "step": 1559000 + }, + { + "epoch": 0.11, + "learning_rate": 4.82385581920904e-05, + "loss": 0.1167, + "step": 1559500 + }, + { + "epoch": 0.11, + "learning_rate": 4.823799322033898e-05, + "loss": 0.114, + "step": 1560000 + }, + { + "epoch": 0.11, + "learning_rate": 4.823742824858757e-05, + "loss": 0.1193, + "step": 1560500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8236864406779665e-05, + "loss": 0.1167, + "step": 1561000 + }, + { + "epoch": 0.11, + "learning_rate": 4.823630056497175e-05, + "loss": 0.1195, + "step": 1561500 + }, + { + "epoch": 0.11, + "learning_rate": 4.823573559322034e-05, + "loss": 0.1165, + "step": 1562000 + }, + { + "epoch": 0.11, + "learning_rate": 4.823517062146893e-05, + "loss": 0.1195, + "step": 1562500 + }, + { + "epoch": 0.11, + "learning_rate": 4.823460564971752e-05, + "loss": 0.1221, + "step": 1563000 + }, + { + "epoch": 0.11, + "learning_rate": 4.82340406779661e-05, + "loss": 0.1205, + "step": 1563500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8233476836158195e-05, + "loss": 0.1172, + "step": 1564000 + }, + { + "epoch": 0.11, + "learning_rate": 4.823291186440678e-05, + "loss": 0.1252, + "step": 1564500 + }, + { + "epoch": 0.11, + "learning_rate": 4.823234689265537e-05, + "loss": 0.1229, + "step": 1565000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8231781920903954e-05, + "loss": 0.1229, + "step": 1565500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8231216949152544e-05, + "loss": 0.1185, + "step": 1566000 + }, + { + "epoch": 0.11, + "learning_rate": 4.823065197740113e-05, + "loss": 0.1166, + "step": 1566500 + }, + { + "epoch": 0.11, + "learning_rate": 4.823008700564972e-05, + "loss": 0.1234, + "step": 1567000 + }, + { + "epoch": 0.11, + "learning_rate": 4.82295220338983e-05, + "loss": 0.1232, + "step": 1567500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8228958192090396e-05, + "loss": 0.1199, + "step": 1568000 + }, + { + "epoch": 0.11, + "learning_rate": 4.822839435028249e-05, + "loss": 0.1146, + "step": 1568500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8227829378531074e-05, + "loss": 0.1144, + "step": 1569000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8227264406779665e-05, + "loss": 0.1214, + "step": 1569500 + }, + { + "epoch": 0.11, + "learning_rate": 4.822669943502825e-05, + "loss": 0.1175, + "step": 1570000 + }, + { + "epoch": 0.11, + "learning_rate": 4.822613446327684e-05, + "loss": 0.1151, + "step": 1570500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8225570621468926e-05, + "loss": 0.1279, + "step": 1571000 + }, + { + "epoch": 0.11, + "learning_rate": 4.822500564971752e-05, + "loss": 0.117, + "step": 1571500 + }, + { + "epoch": 0.11, + "learning_rate": 4.822444180790961e-05, + "loss": 0.1162, + "step": 1572000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8223876836158195e-05, + "loss": 0.1254, + "step": 1572500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8223311864406785e-05, + "loss": 0.1177, + "step": 1573000 + }, + { + "epoch": 0.11, + "learning_rate": 4.822274689265537e-05, + "loss": 0.1199, + "step": 1573500 + }, + { + "epoch": 0.11, + "learning_rate": 4.822218192090396e-05, + "loss": 0.1152, + "step": 1574000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8221616949152543e-05, + "loss": 0.1193, + "step": 1574500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8221051977401134e-05, + "loss": 0.1196, + "step": 1575000 + }, + { + "epoch": 0.11, + "learning_rate": 4.822048700564972e-05, + "loss": 0.1262, + "step": 1575500 + }, + { + "epoch": 0.11, + "learning_rate": 4.821992203389831e-05, + "loss": 0.1192, + "step": 1576000 + }, + { + "epoch": 0.11, + "learning_rate": 4.82193570621469e-05, + "loss": 0.1139, + "step": 1576500 + }, + { + "epoch": 0.11, + "learning_rate": 4.821879209039548e-05, + "loss": 0.1279, + "step": 1577000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8218227118644073e-05, + "loss": 0.1171, + "step": 1577500 + }, + { + "epoch": 0.11, + "learning_rate": 4.821766214689266e-05, + "loss": 0.1142, + "step": 1578000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8217098305084744e-05, + "loss": 0.1207, + "step": 1578500 + }, + { + "epoch": 0.11, + "learning_rate": 4.821653446327684e-05, + "loss": 0.1174, + "step": 1579000 + }, + { + "epoch": 0.11, + "learning_rate": 4.821596949152543e-05, + "loss": 0.122, + "step": 1579500 + }, + { + "epoch": 0.11, + "learning_rate": 4.821540451977401e-05, + "loss": 0.1228, + "step": 1580000 + }, + { + "epoch": 0.11, + "learning_rate": 4.82148395480226e-05, + "loss": 0.1181, + "step": 1580500 + }, + { + "epoch": 0.11, + "learning_rate": 4.821427457627119e-05, + "loss": 0.12, + "step": 1581000 + }, + { + "epoch": 0.11, + "learning_rate": 4.821371073446328e-05, + "loss": 0.1174, + "step": 1581500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8213145762711865e-05, + "loss": 0.1201, + "step": 1582000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8212580790960456e-05, + "loss": 0.1192, + "step": 1582500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8212015819209046e-05, + "loss": 0.1145, + "step": 1583000 + }, + { + "epoch": 0.11, + "learning_rate": 4.821145197740113e-05, + "loss": 0.1248, + "step": 1583500 + }, + { + "epoch": 0.11, + "learning_rate": 4.821088700564972e-05, + "loss": 0.1217, + "step": 1584000 + }, + { + "epoch": 0.11, + "learning_rate": 4.821032203389831e-05, + "loss": 0.1158, + "step": 1584500 + }, + { + "epoch": 0.11, + "learning_rate": 4.820975706214689e-05, + "loss": 0.1237, + "step": 1585000 + }, + { + "epoch": 0.11, + "learning_rate": 4.820919209039548e-05, + "loss": 0.1203, + "step": 1585500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8208627118644066e-05, + "loss": 0.1217, + "step": 1586000 + }, + { + "epoch": 0.11, + "learning_rate": 4.820806327683616e-05, + "loss": 0.1258, + "step": 1586500 + }, + { + "epoch": 0.11, + "learning_rate": 4.820749830508475e-05, + "loss": 0.1188, + "step": 1587000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8206933333333334e-05, + "loss": 0.1146, + "step": 1587500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8206368361581925e-05, + "loss": 0.1179, + "step": 1588000 + }, + { + "epoch": 0.11, + "learning_rate": 4.820580338983051e-05, + "loss": 0.1141, + "step": 1588500 + }, + { + "epoch": 0.11, + "learning_rate": 4.82052395480226e-05, + "loss": 0.1186, + "step": 1589000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8204674576271186e-05, + "loss": 0.1151, + "step": 1589500 + }, + { + "epoch": 0.11, + "learning_rate": 4.820410960451978e-05, + "loss": 0.116, + "step": 1590000 + }, + { + "epoch": 0.11, + "learning_rate": 4.820354463276837e-05, + "loss": 0.1158, + "step": 1590500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8202980790960455e-05, + "loss": 0.1229, + "step": 1591000 + }, + { + "epoch": 0.11, + "learning_rate": 4.820241581920904e-05, + "loss": 0.113, + "step": 1591500 + }, + { + "epoch": 0.11, + "learning_rate": 4.820185084745763e-05, + "loss": 0.1135, + "step": 1592000 + }, + { + "epoch": 0.11, + "learning_rate": 4.820128587570621e-05, + "loss": 0.1208, + "step": 1592500 + }, + { + "epoch": 0.11, + "learning_rate": 4.820072203389831e-05, + "loss": 0.1216, + "step": 1593000 + }, + { + "epoch": 0.11, + "learning_rate": 4.82001570621469e-05, + "loss": 0.1155, + "step": 1593500 + }, + { + "epoch": 0.11, + "learning_rate": 4.819959209039548e-05, + "loss": 0.1221, + "step": 1594000 + }, + { + "epoch": 0.11, + "learning_rate": 4.819902711864407e-05, + "loss": 0.1153, + "step": 1594500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8198462146892656e-05, + "loss": 0.1233, + "step": 1595000 + }, + { + "epoch": 0.11, + "learning_rate": 4.819789830508475e-05, + "loss": 0.1164, + "step": 1595500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8197333333333334e-05, + "loss": 0.1236, + "step": 1596000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8196768361581924e-05, + "loss": 0.1225, + "step": 1596500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8196203389830515e-05, + "loss": 0.1244, + "step": 1597000 + }, + { + "epoch": 0.11, + "learning_rate": 4.81956395480226e-05, + "loss": 0.1156, + "step": 1597500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8195074576271186e-05, + "loss": 0.1223, + "step": 1598000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8194509604519776e-05, + "loss": 0.1162, + "step": 1598500 + }, + { + "epoch": 0.11, + "learning_rate": 4.819394463276836e-05, + "loss": 0.112, + "step": 1599000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8193380790960454e-05, + "loss": 0.1128, + "step": 1599500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8192815819209045e-05, + "loss": 0.119, + "step": 1600000 + }, + { + "epoch": 0.11, + "learning_rate": 4.819225084745763e-05, + "loss": 0.1148, + "step": 1600500 + }, + { + "epoch": 0.11, + "learning_rate": 4.819168587570622e-05, + "loss": 0.1212, + "step": 1601000 + }, + { + "epoch": 0.11, + "learning_rate": 4.81911209039548e-05, + "loss": 0.1174, + "step": 1601500 + }, + { + "epoch": 0.11, + "learning_rate": 4.81905570621469e-05, + "loss": 0.1258, + "step": 1602000 + }, + { + "epoch": 0.11, + "learning_rate": 4.818999209039548e-05, + "loss": 0.1137, + "step": 1602500 + }, + { + "epoch": 0.11, + "learning_rate": 4.818942711864407e-05, + "loss": 0.1196, + "step": 1603000 + }, + { + "epoch": 0.11, + "learning_rate": 4.818886214689266e-05, + "loss": 0.1269, + "step": 1603500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8188297175141246e-05, + "loss": 0.1134, + "step": 1604000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8187732203389836e-05, + "loss": 0.1161, + "step": 1604500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8187168361581923e-05, + "loss": 0.1116, + "step": 1605000 + }, + { + "epoch": 0.11, + "learning_rate": 4.818660338983051e-05, + "loss": 0.118, + "step": 1605500 + }, + { + "epoch": 0.11, + "learning_rate": 4.81860384180791e-05, + "loss": 0.1224, + "step": 1606000 + }, + { + "epoch": 0.11, + "learning_rate": 4.818547344632768e-05, + "loss": 0.122, + "step": 1606500 + }, + { + "epoch": 0.11, + "learning_rate": 4.818490847457627e-05, + "loss": 0.1222, + "step": 1607000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8184344632768366e-05, + "loss": 0.1138, + "step": 1607500 + }, + { + "epoch": 0.11, + "learning_rate": 4.818377966101696e-05, + "loss": 0.1158, + "step": 1608000 + }, + { + "epoch": 0.11, + "learning_rate": 4.818321468926554e-05, + "loss": 0.1263, + "step": 1608500 + }, + { + "epoch": 0.11, + "learning_rate": 4.818264971751413e-05, + "loss": 0.1142, + "step": 1609000 + }, + { + "epoch": 0.11, + "learning_rate": 4.818208587570622e-05, + "loss": 0.1205, + "step": 1609500 + }, + { + "epoch": 0.11, + "learning_rate": 4.81815209039548e-05, + "loss": 0.1151, + "step": 1610000 + }, + { + "epoch": 0.11, + "learning_rate": 4.818095593220339e-05, + "loss": 0.1239, + "step": 1610500 + }, + { + "epoch": 0.11, + "learning_rate": 4.818039096045198e-05, + "loss": 0.1211, + "step": 1611000 + }, + { + "epoch": 0.11, + "learning_rate": 4.817982598870057e-05, + "loss": 0.1213, + "step": 1611500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8179262146892654e-05, + "loss": 0.1206, + "step": 1612000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8178697175141245e-05, + "loss": 0.128, + "step": 1612500 + }, + { + "epoch": 0.11, + "learning_rate": 4.817813220338983e-05, + "loss": 0.1171, + "step": 1613000 + }, + { + "epoch": 0.11, + "learning_rate": 4.817756723163842e-05, + "loss": 0.1192, + "step": 1613500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8177002259887e-05, + "loss": 0.1212, + "step": 1614000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8176437288135594e-05, + "loss": 0.1238, + "step": 1614500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8175872316384184e-05, + "loss": 0.1256, + "step": 1615000 + }, + { + "epoch": 0.11, + "learning_rate": 4.817530847457628e-05, + "loss": 0.118, + "step": 1615500 + }, + { + "epoch": 0.11, + "learning_rate": 4.817474350282486e-05, + "loss": 0.1156, + "step": 1616000 + }, + { + "epoch": 0.11, + "learning_rate": 4.817417853107345e-05, + "loss": 0.1169, + "step": 1616500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8173613559322036e-05, + "loss": 0.1172, + "step": 1617000 + }, + { + "epoch": 0.11, + "learning_rate": 4.817304971751413e-05, + "loss": 0.1145, + "step": 1617500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8172484745762714e-05, + "loss": 0.1269, + "step": 1618000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8171919774011305e-05, + "loss": 0.1164, + "step": 1618500 + }, + { + "epoch": 0.11, + "learning_rate": 4.817135480225989e-05, + "loss": 0.1252, + "step": 1619000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8170790960451976e-05, + "loss": 0.1277, + "step": 1619500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8170225988700566e-05, + "loss": 0.1163, + "step": 1620000 + }, + { + "epoch": 0.11, + "learning_rate": 4.816966101694915e-05, + "loss": 0.1289, + "step": 1620500 + }, + { + "epoch": 0.11, + "learning_rate": 4.816909604519774e-05, + "loss": 0.1191, + "step": 1621000 + }, + { + "epoch": 0.11, + "learning_rate": 4.816853107344633e-05, + "loss": 0.1192, + "step": 1621500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8167967231638425e-05, + "loss": 0.1162, + "step": 1622000 + }, + { + "epoch": 0.11, + "learning_rate": 4.816740225988701e-05, + "loss": 0.1214, + "step": 1622500 + }, + { + "epoch": 0.11, + "learning_rate": 4.81668372881356e-05, + "loss": 0.119, + "step": 1623000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8166272316384184e-05, + "loss": 0.12, + "step": 1623500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8165707344632774e-05, + "loss": 0.1136, + "step": 1624000 + }, + { + "epoch": 0.11, + "learning_rate": 4.816514237288136e-05, + "loss": 0.1182, + "step": 1624500 + }, + { + "epoch": 0.11, + "learning_rate": 4.816457740112995e-05, + "loss": 0.1253, + "step": 1625000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8164013559322036e-05, + "loss": 0.1191, + "step": 1625500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8163448587570626e-05, + "loss": 0.1126, + "step": 1626000 + }, + { + "epoch": 0.11, + "learning_rate": 4.816288361581921e-05, + "loss": 0.1194, + "step": 1626500 + }, + { + "epoch": 0.11, + "learning_rate": 4.81623186440678e-05, + "loss": 0.1189, + "step": 1627000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8161753672316384e-05, + "loss": 0.1174, + "step": 1627500 + }, + { + "epoch": 0.11, + "learning_rate": 4.816118983050848e-05, + "loss": 0.1301, + "step": 1628000 + }, + { + "epoch": 0.11, + "learning_rate": 4.816062485875706e-05, + "loss": 0.1177, + "step": 1628500 + }, + { + "epoch": 0.11, + "learning_rate": 4.816005988700565e-05, + "loss": 0.1235, + "step": 1629000 + }, + { + "epoch": 0.11, + "learning_rate": 4.815949491525424e-05, + "loss": 0.1153, + "step": 1629500 + }, + { + "epoch": 0.11, + "learning_rate": 4.815892994350283e-05, + "loss": 0.1131, + "step": 1630000 + }, + { + "epoch": 0.11, + "learning_rate": 4.815836497175141e-05, + "loss": 0.1279, + "step": 1630500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8157801129943505e-05, + "loss": 0.1186, + "step": 1631000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8157236158192096e-05, + "loss": 0.1207, + "step": 1631500 + }, + { + "epoch": 0.11, + "learning_rate": 4.815667118644068e-05, + "loss": 0.1164, + "step": 1632000 + }, + { + "epoch": 0.11, + "learning_rate": 4.815610621468927e-05, + "loss": 0.1166, + "step": 1632500 + }, + { + "epoch": 0.11, + "learning_rate": 4.815554237288136e-05, + "loss": 0.1194, + "step": 1633000 + }, + { + "epoch": 0.11, + "learning_rate": 4.815497740112995e-05, + "loss": 0.1128, + "step": 1633500 + }, + { + "epoch": 0.11, + "learning_rate": 4.815441242937853e-05, + "loss": 0.1189, + "step": 1634000 + }, + { + "epoch": 0.11, + "learning_rate": 4.815384745762712e-05, + "loss": 0.1232, + "step": 1634500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8153282485875706e-05, + "loss": 0.1242, + "step": 1635000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8152717514124297e-05, + "loss": 0.1265, + "step": 1635500 + }, + { + "epoch": 0.11, + "learning_rate": 4.815215254237289e-05, + "loss": 0.1226, + "step": 1636000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8151588700564974e-05, + "loss": 0.1247, + "step": 1636500 + }, + { + "epoch": 0.11, + "learning_rate": 4.815102372881356e-05, + "loss": 0.114, + "step": 1637000 + }, + { + "epoch": 0.11, + "learning_rate": 4.815045875706215e-05, + "loss": 0.1172, + "step": 1637500 + }, + { + "epoch": 0.11, + "learning_rate": 4.814989378531073e-05, + "loss": 0.1144, + "step": 1638000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8149329943502827e-05, + "loss": 0.1224, + "step": 1638500 + }, + { + "epoch": 0.11, + "learning_rate": 4.814876497175142e-05, + "loss": 0.1209, + "step": 1639000 + }, + { + "epoch": 0.11, + "learning_rate": 4.814820000000001e-05, + "loss": 0.1154, + "step": 1639500 + }, + { + "epoch": 0.11, + "learning_rate": 4.814763502824859e-05, + "loss": 0.114, + "step": 1640000 + }, + { + "epoch": 0.11, + "learning_rate": 4.814707005649718e-05, + "loss": 0.121, + "step": 1640500 + }, + { + "epoch": 0.11, + "learning_rate": 4.814650621468927e-05, + "loss": 0.1188, + "step": 1641000 + }, + { + "epoch": 0.11, + "learning_rate": 4.814594124293785e-05, + "loss": 0.122, + "step": 1641500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8145376271186444e-05, + "loss": 0.121, + "step": 1642000 + }, + { + "epoch": 0.11, + "learning_rate": 4.814481242937853e-05, + "loss": 0.1225, + "step": 1642500 + }, + { + "epoch": 0.11, + "learning_rate": 4.814424745762712e-05, + "loss": 0.1223, + "step": 1643000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8143682485875705e-05, + "loss": 0.1227, + "step": 1643500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8143117514124296e-05, + "loss": 0.1175, + "step": 1644000 + }, + { + "epoch": 0.11, + "learning_rate": 4.814255254237288e-05, + "loss": 0.1125, + "step": 1644500 + }, + { + "epoch": 0.11, + "learning_rate": 4.814198757062147e-05, + "loss": 0.119, + "step": 1645000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8141422598870054e-05, + "loss": 0.1156, + "step": 1645500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8140857627118645e-05, + "loss": 0.1141, + "step": 1646000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8140292655367235e-05, + "loss": 0.1143, + "step": 1646500 + }, + { + "epoch": 0.11, + "learning_rate": 4.813972768361582e-05, + "loss": 0.122, + "step": 1647000 + }, + { + "epoch": 0.11, + "learning_rate": 4.813916271186441e-05, + "loss": 0.1127, + "step": 1647500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8138598870056504e-05, + "loss": 0.1156, + "step": 1648000 + }, + { + "epoch": 0.11, + "learning_rate": 4.813803389830509e-05, + "loss": 0.1073, + "step": 1648500 + }, + { + "epoch": 0.11, + "learning_rate": 4.813746892655368e-05, + "loss": 0.1198, + "step": 1649000 + }, + { + "epoch": 0.11, + "learning_rate": 4.813690395480226e-05, + "loss": 0.1213, + "step": 1649500 + }, + { + "epoch": 0.11, + "learning_rate": 4.813633898305085e-05, + "loss": 0.12, + "step": 1650000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8135774011299436e-05, + "loss": 0.1209, + "step": 1650500 + }, + { + "epoch": 0.11, + "learning_rate": 4.813521016949153e-05, + "loss": 0.1182, + "step": 1651000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8134645197740114e-05, + "loss": 0.1226, + "step": 1651500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8134080225988705e-05, + "loss": 0.1147, + "step": 1652000 + }, + { + "epoch": 0.11, + "learning_rate": 4.813351525423729e-05, + "loss": 0.1247, + "step": 1652500 + }, + { + "epoch": 0.11, + "learning_rate": 4.813295028248588e-05, + "loss": 0.1162, + "step": 1653000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8132386440677966e-05, + "loss": 0.1151, + "step": 1653500 + }, + { + "epoch": 0.11, + "learning_rate": 4.813182146892656e-05, + "loss": 0.1171, + "step": 1654000 + }, + { + "epoch": 0.11, + "learning_rate": 4.813125649717514e-05, + "loss": 0.1257, + "step": 1654500 + }, + { + "epoch": 0.11, + "learning_rate": 4.813069152542373e-05, + "loss": 0.1167, + "step": 1655000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8130127683615825e-05, + "loss": 0.1228, + "step": 1655500 + }, + { + "epoch": 0.11, + "learning_rate": 4.812956271186441e-05, + "loss": 0.1199, + "step": 1656000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8128997740113e-05, + "loss": 0.1157, + "step": 1656500 + }, + { + "epoch": 0.11, + "learning_rate": 4.812843276836158e-05, + "loss": 0.1174, + "step": 1657000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8127867796610174e-05, + "loss": 0.1142, + "step": 1657500 + }, + { + "epoch": 0.11, + "learning_rate": 4.812730282485876e-05, + "loss": 0.117, + "step": 1658000 + }, + { + "epoch": 0.11, + "learning_rate": 4.812673785310735e-05, + "loss": 0.1248, + "step": 1658500 + }, + { + "epoch": 0.11, + "learning_rate": 4.812617288135593e-05, + "loss": 0.1263, + "step": 1659000 + }, + { + "epoch": 0.11, + "learning_rate": 4.812561016949152e-05, + "loss": 0.1248, + "step": 1659500 + }, + { + "epoch": 0.11, + "learning_rate": 4.812504519774011e-05, + "loss": 0.1179, + "step": 1660000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8124480225988704e-05, + "loss": 0.1152, + "step": 1660500 + }, + { + "epoch": 0.11, + "learning_rate": 4.812391525423729e-05, + "loss": 0.1199, + "step": 1661000 + }, + { + "epoch": 0.11, + "learning_rate": 4.812335028248588e-05, + "loss": 0.1195, + "step": 1661500 + }, + { + "epoch": 0.11, + "learning_rate": 4.812278644067797e-05, + "loss": 0.1247, + "step": 1662000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8122221468926556e-05, + "loss": 0.1119, + "step": 1662500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8121656497175147e-05, + "loss": 0.1141, + "step": 1663000 + }, + { + "epoch": 0.11, + "learning_rate": 4.812109152542374e-05, + "loss": 0.1174, + "step": 1663500 + }, + { + "epoch": 0.11, + "learning_rate": 4.812052655367232e-05, + "loss": 0.1273, + "step": 1664000 + }, + { + "epoch": 0.11, + "learning_rate": 4.811996158192091e-05, + "loss": 0.1119, + "step": 1664500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8119396610169495e-05, + "loss": 0.1173, + "step": 1665000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8118831638418086e-05, + "loss": 0.1173, + "step": 1665500 + }, + { + "epoch": 0.11, + "learning_rate": 4.811826779661017e-05, + "loss": 0.1103, + "step": 1666000 + }, + { + "epoch": 0.11, + "learning_rate": 4.811770395480226e-05, + "loss": 0.1122, + "step": 1666500 + }, + { + "epoch": 0.11, + "learning_rate": 4.811713898305085e-05, + "loss": 0.1192, + "step": 1667000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8116574011299435e-05, + "loss": 0.1115, + "step": 1667500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8116009039548025e-05, + "loss": 0.1221, + "step": 1668000 + }, + { + "epoch": 0.11, + "learning_rate": 4.811544519774012e-05, + "loss": 0.1198, + "step": 1668500 + }, + { + "epoch": 0.11, + "learning_rate": 4.81148802259887e-05, + "loss": 0.1175, + "step": 1669000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8114315254237294e-05, + "loss": 0.1192, + "step": 1669500 + }, + { + "epoch": 0.11, + "learning_rate": 4.811375028248588e-05, + "loss": 0.1074, + "step": 1670000 + }, + { + "epoch": 0.11, + "learning_rate": 4.811318531073447e-05, + "loss": 0.1179, + "step": 1670500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8112621468926555e-05, + "loss": 0.1195, + "step": 1671000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8112056497175146e-05, + "loss": 0.1147, + "step": 1671500 + }, + { + "epoch": 0.11, + "learning_rate": 4.811149152542373e-05, + "loss": 0.1165, + "step": 1672000 + }, + { + "epoch": 0.11, + "learning_rate": 4.811092655367232e-05, + "loss": 0.1207, + "step": 1672500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8110361581920904e-05, + "loss": 0.1157, + "step": 1673000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8109796610169495e-05, + "loss": 0.1181, + "step": 1673500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8109231638418085e-05, + "loss": 0.1155, + "step": 1674000 + }, + { + "epoch": 0.11, + "learning_rate": 4.810866779661017e-05, + "loss": 0.1143, + "step": 1674500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8108102824858756e-05, + "loss": 0.1183, + "step": 1675000 + }, + { + "epoch": 0.11, + "learning_rate": 4.810753785310735e-05, + "loss": 0.1165, + "step": 1675500 + }, + { + "epoch": 0.11, + "learning_rate": 4.810697288135593e-05, + "loss": 0.1179, + "step": 1676000 + }, + { + "epoch": 0.11, + "learning_rate": 4.810640790960452e-05, + "loss": 0.1175, + "step": 1676500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8105842937853105e-05, + "loss": 0.1168, + "step": 1677000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8105279096045206e-05, + "loss": 0.1183, + "step": 1677500 + }, + { + "epoch": 0.11, + "learning_rate": 4.810471412429379e-05, + "loss": 0.1232, + "step": 1678000 + }, + { + "epoch": 0.11, + "learning_rate": 4.810414915254238e-05, + "loss": 0.1181, + "step": 1678500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8103584180790964e-05, + "loss": 0.1139, + "step": 1679000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8103019209039554e-05, + "loss": 0.1205, + "step": 1679500 + }, + { + "epoch": 0.11, + "learning_rate": 4.810245536723164e-05, + "loss": 0.1286, + "step": 1680000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8101890395480226e-05, + "loss": 0.1157, + "step": 1680500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8101325423728816e-05, + "loss": 0.1179, + "step": 1681000 + }, + { + "epoch": 0.11, + "learning_rate": 4.810076045197741e-05, + "loss": 0.1221, + "step": 1681500 + }, + { + "epoch": 0.11, + "learning_rate": 4.810019548022599e-05, + "loss": 0.1142, + "step": 1682000 + }, + { + "epoch": 0.11, + "learning_rate": 4.809963050847458e-05, + "loss": 0.121, + "step": 1682500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8099065536723165e-05, + "loss": 0.1181, + "step": 1683000 + }, + { + "epoch": 0.11, + "learning_rate": 4.809850169491525e-05, + "loss": 0.112, + "step": 1683500 + }, + { + "epoch": 0.11, + "learning_rate": 4.809793672316384e-05, + "loss": 0.1158, + "step": 1684000 + }, + { + "epoch": 0.11, + "learning_rate": 4.809737175141243e-05, + "loss": 0.1093, + "step": 1684500 + }, + { + "epoch": 0.11, + "learning_rate": 4.809680677966102e-05, + "loss": 0.1109, + "step": 1685000 + }, + { + "epoch": 0.11, + "learning_rate": 4.809624293785311e-05, + "loss": 0.1152, + "step": 1685500 + }, + { + "epoch": 0.11, + "learning_rate": 4.80956779661017e-05, + "loss": 0.1151, + "step": 1686000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8095112994350285e-05, + "loss": 0.1061, + "step": 1686500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8094548022598876e-05, + "loss": 0.1216, + "step": 1687000 + }, + { + "epoch": 0.11, + "learning_rate": 4.809398305084746e-05, + "loss": 0.1229, + "step": 1687500 + }, + { + "epoch": 0.11, + "learning_rate": 4.809341807909605e-05, + "loss": 0.1216, + "step": 1688000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8092853107344634e-05, + "loss": 0.1171, + "step": 1688500 + }, + { + "epoch": 0.11, + "learning_rate": 4.809228926553673e-05, + "loss": 0.1169, + "step": 1689000 + }, + { + "epoch": 0.11, + "learning_rate": 4.809172429378531e-05, + "loss": 0.1157, + "step": 1689500 + }, + { + "epoch": 0.11, + "learning_rate": 4.80911593220339e-05, + "loss": 0.1211, + "step": 1690000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8090594350282486e-05, + "loss": 0.1181, + "step": 1690500 + }, + { + "epoch": 0.11, + "learning_rate": 4.809002937853108e-05, + "loss": 0.1138, + "step": 1691000 + }, + { + "epoch": 0.11, + "learning_rate": 4.808946440677966e-05, + "loss": 0.1169, + "step": 1691500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8088900564971755e-05, + "loss": 0.1156, + "step": 1692000 + }, + { + "epoch": 0.11, + "learning_rate": 4.808833559322034e-05, + "loss": 0.1164, + "step": 1692500 + }, + { + "epoch": 0.11, + "learning_rate": 4.808777062146893e-05, + "loss": 0.1227, + "step": 1693000 + }, + { + "epoch": 0.11, + "learning_rate": 4.808720564971751e-05, + "loss": 0.121, + "step": 1693500 + }, + { + "epoch": 0.11, + "learning_rate": 4.808664180790961e-05, + "loss": 0.1151, + "step": 1694000 + }, + { + "epoch": 0.11, + "learning_rate": 4.80860768361582e-05, + "loss": 0.1173, + "step": 1694500 + }, + { + "epoch": 0.11, + "learning_rate": 4.808551186440678e-05, + "loss": 0.1244, + "step": 1695000 + }, + { + "epoch": 0.11, + "learning_rate": 4.808494689265537e-05, + "loss": 0.1206, + "step": 1695500 + }, + { + "epoch": 0.11, + "learning_rate": 4.808438192090396e-05, + "loss": 0.1174, + "step": 1696000 + }, + { + "epoch": 0.12, + "learning_rate": 4.8083816949152546e-05, + "loss": 0.1222, + "step": 1696500 + }, + { + "epoch": 0.12, + "learning_rate": 4.8083253107344633e-05, + "loss": 0.1183, + "step": 1697000 + }, + { + "epoch": 0.12, + "learning_rate": 4.8082688135593224e-05, + "loss": 0.1183, + "step": 1697500 + }, + { + "epoch": 0.12, + "learning_rate": 4.808212316384181e-05, + "loss": 0.1176, + "step": 1698000 + }, + { + "epoch": 0.12, + "learning_rate": 4.80815581920904e-05, + "loss": 0.1142, + "step": 1698500 + }, + { + "epoch": 0.12, + "learning_rate": 4.8080994350282486e-05, + "loss": 0.1226, + "step": 1699000 + }, + { + "epoch": 0.12, + "learning_rate": 4.8080429378531076e-05, + "loss": 0.1158, + "step": 1699500 + }, + { + "epoch": 0.12, + "learning_rate": 4.807986440677966e-05, + "loss": 0.115, + "step": 1700000 + }, + { + "epoch": 0.12, + "learning_rate": 4.807929943502825e-05, + "loss": 0.1157, + "step": 1700500 + }, + { + "epoch": 0.12, + "learning_rate": 4.8078734463276834e-05, + "loss": 0.115, + "step": 1701000 + }, + { + "epoch": 0.12, + "learning_rate": 4.807817062146893e-05, + "loss": 0.1209, + "step": 1701500 + }, + { + "epoch": 0.12, + "learning_rate": 4.807760564971752e-05, + "loss": 0.1174, + "step": 1702000 + }, + { + "epoch": 0.12, + "learning_rate": 4.807704067796611e-05, + "loss": 0.1122, + "step": 1702500 + }, + { + "epoch": 0.12, + "learning_rate": 4.807647570621469e-05, + "loss": 0.1149, + "step": 1703000 + }, + { + "epoch": 0.12, + "learning_rate": 4.8075910734463284e-05, + "loss": 0.1168, + "step": 1703500 + }, + { + "epoch": 0.12, + "learning_rate": 4.807534689265537e-05, + "loss": 0.1217, + "step": 1704000 + }, + { + "epoch": 0.12, + "learning_rate": 4.8074781920903955e-05, + "loss": 0.1174, + "step": 1704500 + }, + { + "epoch": 0.12, + "learning_rate": 4.8074216949152546e-05, + "loss": 0.1175, + "step": 1705000 + }, + { + "epoch": 0.12, + "learning_rate": 4.807365197740113e-05, + "loss": 0.1174, + "step": 1705500 + }, + { + "epoch": 0.12, + "learning_rate": 4.807308700564972e-05, + "loss": 0.1125, + "step": 1706000 + }, + { + "epoch": 0.12, + "learning_rate": 4.807252316384181e-05, + "loss": 0.1154, + "step": 1706500 + }, + { + "epoch": 0.12, + "learning_rate": 4.80719581920904e-05, + "loss": 0.125, + "step": 1707000 + }, + { + "epoch": 0.12, + "learning_rate": 4.807139322033898e-05, + "loss": 0.1189, + "step": 1707500 + }, + { + "epoch": 0.12, + "learning_rate": 4.807082824858757e-05, + "loss": 0.118, + "step": 1708000 + }, + { + "epoch": 0.12, + "learning_rate": 4.8070264406779666e-05, + "loss": 0.1151, + "step": 1708500 + }, + { + "epoch": 0.12, + "learning_rate": 4.806969943502825e-05, + "loss": 0.1171, + "step": 1709000 + }, + { + "epoch": 0.12, + "learning_rate": 4.806913446327684e-05, + "loss": 0.1159, + "step": 1709500 + }, + { + "epoch": 0.12, + "learning_rate": 4.806856949152543e-05, + "loss": 0.1173, + "step": 1710000 + }, + { + "epoch": 0.12, + "learning_rate": 4.806800564971752e-05, + "loss": 0.1238, + "step": 1710500 + }, + { + "epoch": 0.12, + "learning_rate": 4.80674406779661e-05, + "loss": 0.1183, + "step": 1711000 + }, + { + "epoch": 0.12, + "learning_rate": 4.806687570621469e-05, + "loss": 0.1164, + "step": 1711500 + }, + { + "epoch": 0.12, + "learning_rate": 4.8066310734463276e-05, + "loss": 0.1176, + "step": 1712000 + }, + { + "epoch": 0.12, + "learning_rate": 4.806574576271187e-05, + "loss": 0.1213, + "step": 1712500 + }, + { + "epoch": 0.12, + "learning_rate": 4.8065181920903954e-05, + "loss": 0.1149, + "step": 1713000 + }, + { + "epoch": 0.12, + "learning_rate": 4.8064616949152545e-05, + "loss": 0.1206, + "step": 1713500 + }, + { + "epoch": 0.12, + "learning_rate": 4.806405197740113e-05, + "loss": 0.1167, + "step": 1714000 + }, + { + "epoch": 0.12, + "learning_rate": 4.806348700564972e-05, + "loss": 0.1231, + "step": 1714500 + }, + { + "epoch": 0.12, + "learning_rate": 4.80629220338983e-05, + "loss": 0.1142, + "step": 1715000 + }, + { + "epoch": 0.12, + "learning_rate": 4.80623581920904e-05, + "loss": 0.1193, + "step": 1715500 + }, + { + "epoch": 0.12, + "learning_rate": 4.806179322033899e-05, + "loss": 0.1125, + "step": 1716000 + }, + { + "epoch": 0.12, + "learning_rate": 4.806122824858758e-05, + "loss": 0.1106, + "step": 1716500 + }, + { + "epoch": 0.12, + "learning_rate": 4.806066327683616e-05, + "loss": 0.1162, + "step": 1717000 + }, + { + "epoch": 0.12, + "learning_rate": 4.806009943502825e-05, + "loss": 0.1191, + "step": 1717500 + }, + { + "epoch": 0.12, + "learning_rate": 4.805953446327684e-05, + "loss": 0.1214, + "step": 1718000 + }, + { + "epoch": 0.12, + "learning_rate": 4.8058969491525424e-05, + "loss": 0.1205, + "step": 1718500 + }, + { + "epoch": 0.12, + "learning_rate": 4.8058404519774014e-05, + "loss": 0.117, + "step": 1719000 + }, + { + "epoch": 0.12, + "learning_rate": 4.80578395480226e-05, + "loss": 0.1177, + "step": 1719500 + }, + { + "epoch": 0.12, + "learning_rate": 4.805727570621469e-05, + "loss": 0.1159, + "step": 1720000 + }, + { + "epoch": 0.12, + "learning_rate": 4.8056710734463276e-05, + "loss": 0.1218, + "step": 1720500 + }, + { + "epoch": 0.12, + "learning_rate": 4.8056145762711866e-05, + "loss": 0.1108, + "step": 1721000 + }, + { + "epoch": 0.12, + "learning_rate": 4.805558079096045e-05, + "loss": 0.1118, + "step": 1721500 + }, + { + "epoch": 0.12, + "learning_rate": 4.805501581920904e-05, + "loss": 0.1145, + "step": 1722000 + }, + { + "epoch": 0.12, + "learning_rate": 4.8054450847457624e-05, + "loss": 0.1133, + "step": 1722500 + }, + { + "epoch": 0.12, + "learning_rate": 4.8053887005649725e-05, + "loss": 0.1215, + "step": 1723000 + }, + { + "epoch": 0.12, + "learning_rate": 4.805332203389831e-05, + "loss": 0.1167, + "step": 1723500 + }, + { + "epoch": 0.12, + "learning_rate": 4.80527570621469e-05, + "loss": 0.1122, + "step": 1724000 + }, + { + "epoch": 0.12, + "learning_rate": 4.8052192090395483e-05, + "loss": 0.1234, + "step": 1724500 + }, + { + "epoch": 0.12, + "learning_rate": 4.8051627118644074e-05, + "loss": 0.1221, + "step": 1725000 + }, + { + "epoch": 0.12, + "learning_rate": 4.805106327683616e-05, + "loss": 0.1112, + "step": 1725500 + }, + { + "epoch": 0.12, + "learning_rate": 4.8050498305084745e-05, + "loss": 0.1214, + "step": 1726000 + }, + { + "epoch": 0.12, + "learning_rate": 4.8049933333333336e-05, + "loss": 0.11, + "step": 1726500 + }, + { + "epoch": 0.12, + "learning_rate": 4.8049368361581926e-05, + "loss": 0.1202, + "step": 1727000 + }, + { + "epoch": 0.12, + "learning_rate": 4.8048804519774013e-05, + "loss": 0.1188, + "step": 1727500 + }, + { + "epoch": 0.12, + "learning_rate": 4.80482395480226e-05, + "loss": 0.1087, + "step": 1728000 + }, + { + "epoch": 0.12, + "learning_rate": 4.804767457627119e-05, + "loss": 0.1138, + "step": 1728500 + }, + { + "epoch": 0.12, + "learning_rate": 4.804710960451977e-05, + "loss": 0.1237, + "step": 1729000 + }, + { + "epoch": 0.12, + "learning_rate": 4.804654463276836e-05, + "loss": 0.1137, + "step": 1729500 + }, + { + "epoch": 0.12, + "learning_rate": 4.8045979661016946e-05, + "loss": 0.1173, + "step": 1730000 + }, + { + "epoch": 0.12, + "learning_rate": 4.8045414689265537e-05, + "loss": 0.1191, + "step": 1730500 + }, + { + "epoch": 0.12, + "learning_rate": 4.804485084745763e-05, + "loss": 0.119, + "step": 1731000 + }, + { + "epoch": 0.12, + "learning_rate": 4.804428587570622e-05, + "loss": 0.1161, + "step": 1731500 + }, + { + "epoch": 0.12, + "learning_rate": 4.8043720903954805e-05, + "loss": 0.117, + "step": 1732000 + }, + { + "epoch": 0.12, + "learning_rate": 4.8043155932203396e-05, + "loss": 0.1218, + "step": 1732500 + }, + { + "epoch": 0.12, + "learning_rate": 4.804259209039548e-05, + "loss": 0.1187, + "step": 1733000 + }, + { + "epoch": 0.12, + "learning_rate": 4.8042027118644067e-05, + "loss": 0.1178, + "step": 1733500 + }, + { + "epoch": 0.12, + "learning_rate": 4.804146214689266e-05, + "loss": 0.1143, + "step": 1734000 + }, + { + "epoch": 0.12, + "learning_rate": 4.804089717514125e-05, + "loss": 0.1269, + "step": 1734500 + }, + { + "epoch": 0.12, + "learning_rate": 4.804033220338983e-05, + "loss": 0.123, + "step": 1735000 + }, + { + "epoch": 0.12, + "learning_rate": 4.803976836158192e-05, + "loss": 0.1183, + "step": 1735500 + }, + { + "epoch": 0.12, + "learning_rate": 4.803920338983051e-05, + "loss": 0.1134, + "step": 1736000 + }, + { + "epoch": 0.12, + "learning_rate": 4.803863841807909e-05, + "loss": 0.1126, + "step": 1736500 + }, + { + "epoch": 0.12, + "learning_rate": 4.8038073446327684e-05, + "loss": 0.112, + "step": 1737000 + }, + { + "epoch": 0.12, + "learning_rate": 4.803750960451978e-05, + "loss": 0.1124, + "step": 1737500 + }, + { + "epoch": 0.12, + "learning_rate": 4.803694463276837e-05, + "loss": 0.1169, + "step": 1738000 + }, + { + "epoch": 0.12, + "learning_rate": 4.803637966101695e-05, + "loss": 0.1193, + "step": 1738500 + }, + { + "epoch": 0.12, + "learning_rate": 4.803581468926554e-05, + "loss": 0.1141, + "step": 1739000 + }, + { + "epoch": 0.12, + "learning_rate": 4.8035249717514126e-05, + "loss": 0.1183, + "step": 1739500 + }, + { + "epoch": 0.12, + "learning_rate": 4.803468474576272e-05, + "loss": 0.1139, + "step": 1740000 + }, + { + "epoch": 0.12, + "learning_rate": 4.8034120903954804e-05, + "loss": 0.1194, + "step": 1740500 + }, + { + "epoch": 0.12, + "learning_rate": 4.8033555932203395e-05, + "loss": 0.1168, + "step": 1741000 + }, + { + "epoch": 0.12, + "learning_rate": 4.803299096045198e-05, + "loss": 0.1198, + "step": 1741500 + }, + { + "epoch": 0.12, + "learning_rate": 4.803242598870057e-05, + "loss": 0.1214, + "step": 1742000 + }, + { + "epoch": 0.12, + "learning_rate": 4.8031862146892656e-05, + "loss": 0.1139, + "step": 1742500 + }, + { + "epoch": 0.12, + "learning_rate": 4.803129830508475e-05, + "loss": 0.1225, + "step": 1743000 + }, + { + "epoch": 0.12, + "learning_rate": 4.8030733333333334e-05, + "loss": 0.1152, + "step": 1743500 + }, + { + "epoch": 0.12, + "learning_rate": 4.8030168361581925e-05, + "loss": 0.1187, + "step": 1744000 + }, + { + "epoch": 0.12, + "learning_rate": 4.8029603389830515e-05, + "loss": 0.1142, + "step": 1744500 + }, + { + "epoch": 0.12, + "learning_rate": 4.80290384180791e-05, + "loss": 0.126, + "step": 1745000 + }, + { + "epoch": 0.12, + "learning_rate": 4.802847344632769e-05, + "loss": 0.1155, + "step": 1745500 + }, + { + "epoch": 0.12, + "learning_rate": 4.8027908474576274e-05, + "loss": 0.1178, + "step": 1746000 + }, + { + "epoch": 0.12, + "learning_rate": 4.8027343502824864e-05, + "loss": 0.1185, + "step": 1746500 + }, + { + "epoch": 0.12, + "learning_rate": 4.802677853107345e-05, + "loss": 0.1199, + "step": 1747000 + }, + { + "epoch": 0.12, + "learning_rate": 4.802621468926554e-05, + "loss": 0.1159, + "step": 1747500 + }, + { + "epoch": 0.12, + "learning_rate": 4.8025649717514126e-05, + "loss": 0.1146, + "step": 1748000 + }, + { + "epoch": 0.12, + "learning_rate": 4.8025084745762716e-05, + "loss": 0.1176, + "step": 1748500 + }, + { + "epoch": 0.12, + "learning_rate": 4.80245197740113e-05, + "loss": 0.1173, + "step": 1749000 + }, + { + "epoch": 0.12, + "learning_rate": 4.802395480225989e-05, + "loss": 0.1194, + "step": 1749500 + }, + { + "epoch": 0.12, + "learning_rate": 4.8023389830508474e-05, + "loss": 0.1241, + "step": 1750000 + }, + { + "epoch": 0.12, + "learning_rate": 4.802282598870056e-05, + "loss": 0.1134, + "step": 1750500 + }, + { + "epoch": 0.12, + "learning_rate": 4.802226101694915e-05, + "loss": 0.119, + "step": 1751000 + }, + { + "epoch": 0.12, + "learning_rate": 4.802169604519774e-05, + "loss": 0.1182, + "step": 1751500 + }, + { + "epoch": 0.12, + "learning_rate": 4.802113107344633e-05, + "loss": 0.1222, + "step": 1752000 + }, + { + "epoch": 0.12, + "learning_rate": 4.802056723163842e-05, + "loss": 0.1138, + "step": 1752500 + }, + { + "epoch": 0.12, + "learning_rate": 4.802000225988701e-05, + "loss": 0.1231, + "step": 1753000 + }, + { + "epoch": 0.12, + "learning_rate": 4.8019437288135595e-05, + "loss": 0.1174, + "step": 1753500 + }, + { + "epoch": 0.12, + "learning_rate": 4.8018872316384186e-05, + "loss": 0.1099, + "step": 1754000 + }, + { + "epoch": 0.12, + "learning_rate": 4.801830734463277e-05, + "loss": 0.1214, + "step": 1754500 + }, + { + "epoch": 0.12, + "learning_rate": 4.8017743502824863e-05, + "loss": 0.119, + "step": 1755000 + }, + { + "epoch": 0.12, + "learning_rate": 4.801717853107345e-05, + "loss": 0.1124, + "step": 1755500 + }, + { + "epoch": 0.12, + "learning_rate": 4.801661355932204e-05, + "loss": 0.1199, + "step": 1756000 + }, + { + "epoch": 0.12, + "learning_rate": 4.801604858757062e-05, + "loss": 0.1252, + "step": 1756500 + }, + { + "epoch": 0.12, + "learning_rate": 4.801548361581921e-05, + "loss": 0.1173, + "step": 1757000 + }, + { + "epoch": 0.12, + "learning_rate": 4.8014918644067796e-05, + "loss": 0.1178, + "step": 1757500 + }, + { + "epoch": 0.12, + "learning_rate": 4.8014353672316387e-05, + "loss": 0.1178, + "step": 1758000 + }, + { + "epoch": 0.12, + "learning_rate": 4.8013789830508474e-05, + "loss": 0.1108, + "step": 1758500 + }, + { + "epoch": 0.12, + "learning_rate": 4.8013224858757064e-05, + "loss": 0.1182, + "step": 1759000 + }, + { + "epoch": 0.12, + "learning_rate": 4.801265988700565e-05, + "loss": 0.1205, + "step": 1759500 + }, + { + "epoch": 0.12, + "learning_rate": 4.801209491525424e-05, + "loss": 0.116, + "step": 1760000 + }, + { + "epoch": 0.12, + "learning_rate": 4.801152994350282e-05, + "loss": 0.121, + "step": 1760500 + }, + { + "epoch": 0.12, + "learning_rate": 4.801096497175141e-05, + "loss": 0.1096, + "step": 1761000 + }, + { + "epoch": 0.12, + "learning_rate": 4.801040112994351e-05, + "loss": 0.1179, + "step": 1761500 + }, + { + "epoch": 0.12, + "learning_rate": 4.80098361581921e-05, + "loss": 0.1159, + "step": 1762000 + }, + { + "epoch": 0.12, + "learning_rate": 4.800927118644068e-05, + "loss": 0.1144, + "step": 1762500 + }, + { + "epoch": 0.12, + "learning_rate": 4.800870621468927e-05, + "loss": 0.1234, + "step": 1763000 + }, + { + "epoch": 0.12, + "learning_rate": 4.800814237288136e-05, + "loss": 0.1134, + "step": 1763500 + }, + { + "epoch": 0.12, + "learning_rate": 4.800757740112994e-05, + "loss": 0.1118, + "step": 1764000 + }, + { + "epoch": 0.12, + "learning_rate": 4.8007012429378534e-05, + "loss": 0.1139, + "step": 1764500 + }, + { + "epoch": 0.12, + "learning_rate": 4.800644745762712e-05, + "loss": 0.1107, + "step": 1765000 + }, + { + "epoch": 0.12, + "learning_rate": 4.800588248587571e-05, + "loss": 0.1119, + "step": 1765500 + }, + { + "epoch": 0.12, + "learning_rate": 4.80053175141243e-05, + "loss": 0.1132, + "step": 1766000 + }, + { + "epoch": 0.12, + "learning_rate": 4.8004753672316386e-05, + "loss": 0.1182, + "step": 1766500 + }, + { + "epoch": 0.12, + "learning_rate": 4.800418870056497e-05, + "loss": 0.1146, + "step": 1767000 + }, + { + "epoch": 0.12, + "learning_rate": 4.800362372881356e-05, + "loss": 0.1207, + "step": 1767500 + }, + { + "epoch": 0.12, + "learning_rate": 4.8003058757062144e-05, + "loss": 0.1155, + "step": 1768000 + }, + { + "epoch": 0.12, + "learning_rate": 4.800249491525424e-05, + "loss": 0.1213, + "step": 1768500 + }, + { + "epoch": 0.12, + "learning_rate": 4.800192994350283e-05, + "loss": 0.1174, + "step": 1769000 + }, + { + "epoch": 0.12, + "learning_rate": 4.800136497175142e-05, + "loss": 0.114, + "step": 1769500 + }, + { + "epoch": 0.12, + "learning_rate": 4.80008e-05, + "loss": 0.1094, + "step": 1770000 + }, + { + "epoch": 0.12, + "learning_rate": 4.800023615819209e-05, + "loss": 0.1135, + "step": 1770500 + }, + { + "epoch": 0.12, + "learning_rate": 4.799967118644068e-05, + "loss": 0.1156, + "step": 1771000 + }, + { + "epoch": 0.12, + "learning_rate": 4.7999106214689265e-05, + "loss": 0.1156, + "step": 1771500 + }, + { + "epoch": 0.12, + "learning_rate": 4.7998541242937855e-05, + "loss": 0.1208, + "step": 1772000 + }, + { + "epoch": 0.12, + "learning_rate": 4.7997976271186446e-05, + "loss": 0.1108, + "step": 1772500 + }, + { + "epoch": 0.12, + "learning_rate": 4.799741242937853e-05, + "loss": 0.1133, + "step": 1773000 + }, + { + "epoch": 0.12, + "learning_rate": 4.799684745762712e-05, + "loss": 0.1139, + "step": 1773500 + }, + { + "epoch": 0.12, + "learning_rate": 4.799628248587571e-05, + "loss": 0.1138, + "step": 1774000 + }, + { + "epoch": 0.12, + "learning_rate": 4.799571751412429e-05, + "loss": 0.1139, + "step": 1774500 + }, + { + "epoch": 0.12, + "learning_rate": 4.799515254237288e-05, + "loss": 0.1148, + "step": 1775000 + }, + { + "epoch": 0.12, + "learning_rate": 4.7994587570621466e-05, + "loss": 0.1108, + "step": 1775500 + }, + { + "epoch": 0.12, + "learning_rate": 4.7994023728813566e-05, + "loss": 0.1228, + "step": 1776000 + }, + { + "epoch": 0.12, + "learning_rate": 4.799345875706215e-05, + "loss": 0.1173, + "step": 1776500 + }, + { + "epoch": 0.12, + "learning_rate": 4.799289491525424e-05, + "loss": 0.1089, + "step": 1777000 + }, + { + "epoch": 0.12, + "learning_rate": 4.799232994350283e-05, + "loss": 0.1164, + "step": 1777500 + }, + { + "epoch": 0.12, + "learning_rate": 4.799176497175141e-05, + "loss": 0.1129, + "step": 1778000 + }, + { + "epoch": 0.12, + "learning_rate": 4.79912e-05, + "loss": 0.1246, + "step": 1778500 + }, + { + "epoch": 0.12, + "learning_rate": 4.7990635028248586e-05, + "loss": 0.1099, + "step": 1779000 + }, + { + "epoch": 0.12, + "learning_rate": 4.799007005649718e-05, + "loss": 0.1197, + "step": 1779500 + }, + { + "epoch": 0.12, + "learning_rate": 4.798950508474577e-05, + "loss": 0.1188, + "step": 1780000 + }, + { + "epoch": 0.12, + "learning_rate": 4.798894011299435e-05, + "loss": 0.114, + "step": 1780500 + }, + { + "epoch": 0.12, + "learning_rate": 4.798837627118644e-05, + "loss": 0.125, + "step": 1781000 + }, + { + "epoch": 0.12, + "learning_rate": 4.798781129943503e-05, + "loss": 0.1155, + "step": 1781500 + }, + { + "epoch": 0.12, + "learning_rate": 4.798724632768361e-05, + "loss": 0.1229, + "step": 1782000 + }, + { + "epoch": 0.12, + "learning_rate": 4.79866813559322e-05, + "loss": 0.1192, + "step": 1782500 + }, + { + "epoch": 0.12, + "learning_rate": 4.7986116384180794e-05, + "loss": 0.1071, + "step": 1783000 + }, + { + "epoch": 0.12, + "learning_rate": 4.798555254237289e-05, + "loss": 0.1131, + "step": 1783500 + }, + { + "epoch": 0.12, + "learning_rate": 4.798498757062147e-05, + "loss": 0.1166, + "step": 1784000 + }, + { + "epoch": 0.12, + "learning_rate": 4.798442259887006e-05, + "loss": 0.1141, + "step": 1784500 + }, + { + "epoch": 0.12, + "learning_rate": 4.7983857627118646e-05, + "loss": 0.1195, + "step": 1785000 + }, + { + "epoch": 0.12, + "learning_rate": 4.7983292655367237e-05, + "loss": 0.1188, + "step": 1785500 + }, + { + "epoch": 0.12, + "learning_rate": 4.798272768361582e-05, + "loss": 0.1163, + "step": 1786000 + }, + { + "epoch": 0.12, + "learning_rate": 4.798216271186441e-05, + "loss": 0.1177, + "step": 1786500 + }, + { + "epoch": 0.12, + "learning_rate": 4.7981597740113e-05, + "loss": 0.1189, + "step": 1787000 + }, + { + "epoch": 0.12, + "learning_rate": 4.798103389830509e-05, + "loss": 0.1076, + "step": 1787500 + }, + { + "epoch": 0.12, + "learning_rate": 4.798046892655367e-05, + "loss": 0.1167, + "step": 1788000 + }, + { + "epoch": 0.12, + "learning_rate": 4.797990395480226e-05, + "loss": 0.1204, + "step": 1788500 + }, + { + "epoch": 0.12, + "learning_rate": 4.797933898305085e-05, + "loss": 0.1182, + "step": 1789000 + }, + { + "epoch": 0.12, + "learning_rate": 4.797877514124294e-05, + "loss": 0.1224, + "step": 1789500 + }, + { + "epoch": 0.12, + "learning_rate": 4.7978210169491525e-05, + "loss": 0.1168, + "step": 1790000 + }, + { + "epoch": 0.12, + "learning_rate": 4.7977645197740115e-05, + "loss": 0.1139, + "step": 1790500 + }, + { + "epoch": 0.12, + "learning_rate": 4.79770802259887e-05, + "loss": 0.1139, + "step": 1791000 + }, + { + "epoch": 0.12, + "learning_rate": 4.797651525423729e-05, + "loss": 0.1202, + "step": 1791500 + }, + { + "epoch": 0.12, + "learning_rate": 4.7975951412429384e-05, + "loss": 0.1168, + "step": 1792000 + }, + { + "epoch": 0.12, + "learning_rate": 4.797538644067797e-05, + "loss": 0.1146, + "step": 1792500 + }, + { + "epoch": 0.12, + "learning_rate": 4.797482146892656e-05, + "loss": 0.1119, + "step": 1793000 + }, + { + "epoch": 0.12, + "learning_rate": 4.797425649717515e-05, + "loss": 0.1098, + "step": 1793500 + }, + { + "epoch": 0.12, + "learning_rate": 4.7973692655367236e-05, + "loss": 0.1155, + "step": 1794000 + }, + { + "epoch": 0.12, + "learning_rate": 4.797312768361582e-05, + "loss": 0.1189, + "step": 1794500 + }, + { + "epoch": 0.12, + "learning_rate": 4.797256271186441e-05, + "loss": 0.1162, + "step": 1795000 + }, + { + "epoch": 0.12, + "learning_rate": 4.7971997740112994e-05, + "loss": 0.1074, + "step": 1795500 + }, + { + "epoch": 0.12, + "learning_rate": 4.7971432768361585e-05, + "loss": 0.1102, + "step": 1796000 + }, + { + "epoch": 0.12, + "learning_rate": 4.797086779661017e-05, + "loss": 0.116, + "step": 1796500 + }, + { + "epoch": 0.12, + "learning_rate": 4.797030282485876e-05, + "loss": 0.1113, + "step": 1797000 + }, + { + "epoch": 0.12, + "learning_rate": 4.7969738983050846e-05, + "loss": 0.1089, + "step": 1797500 + }, + { + "epoch": 0.12, + "learning_rate": 4.796917401129944e-05, + "loss": 0.1174, + "step": 1798000 + }, + { + "epoch": 0.12, + "learning_rate": 4.796860903954802e-05, + "loss": 0.1126, + "step": 1798500 + }, + { + "epoch": 0.12, + "learning_rate": 4.796804406779661e-05, + "loss": 0.1155, + "step": 1799000 + }, + { + "epoch": 0.12, + "learning_rate": 4.7967480225988705e-05, + "loss": 0.1134, + "step": 1799500 + }, + { + "epoch": 0.12, + "learning_rate": 4.796691525423729e-05, + "loss": 0.1159, + "step": 1800000 + }, + { + "epoch": 0.12, + "learning_rate": 4.796635028248588e-05, + "loss": 0.1205, + "step": 1800500 + }, + { + "epoch": 0.12, + "learning_rate": 4.796578531073447e-05, + "loss": 0.1073, + "step": 1801000 + }, + { + "epoch": 0.12, + "learning_rate": 4.7965220338983054e-05, + "loss": 0.1139, + "step": 1801500 + }, + { + "epoch": 0.12, + "learning_rate": 4.7964655367231644e-05, + "loss": 0.1182, + "step": 1802000 + }, + { + "epoch": 0.12, + "learning_rate": 4.796409152542373e-05, + "loss": 0.1211, + "step": 1802500 + }, + { + "epoch": 0.12, + "learning_rate": 4.7963526553672315e-05, + "loss": 0.1122, + "step": 1803000 + }, + { + "epoch": 0.12, + "learning_rate": 4.7962961581920906e-05, + "loss": 0.1181, + "step": 1803500 + }, + { + "epoch": 0.12, + "learning_rate": 4.79623966101695e-05, + "loss": 0.123, + "step": 1804000 + }, + { + "epoch": 0.12, + "learning_rate": 4.796183163841808e-05, + "loss": 0.1153, + "step": 1804500 + }, + { + "epoch": 0.12, + "learning_rate": 4.796126779661017e-05, + "loss": 0.1221, + "step": 1805000 + }, + { + "epoch": 0.12, + "learning_rate": 4.796070282485876e-05, + "loss": 0.1116, + "step": 1805500 + }, + { + "epoch": 0.12, + "learning_rate": 4.796013785310734e-05, + "loss": 0.1208, + "step": 1806000 + }, + { + "epoch": 0.12, + "learning_rate": 4.795957288135593e-05, + "loss": 0.1095, + "step": 1806500 + }, + { + "epoch": 0.12, + "learning_rate": 4.7959007909604516e-05, + "loss": 0.1242, + "step": 1807000 + }, + { + "epoch": 0.12, + "learning_rate": 4.795844406779662e-05, + "loss": 0.119, + "step": 1807500 + }, + { + "epoch": 0.12, + "learning_rate": 4.79578790960452e-05, + "loss": 0.1122, + "step": 1808000 + }, + { + "epoch": 0.12, + "learning_rate": 4.795731412429379e-05, + "loss": 0.1173, + "step": 1808500 + }, + { + "epoch": 0.12, + "learning_rate": 4.7956749152542375e-05, + "loss": 0.1115, + "step": 1809000 + }, + { + "epoch": 0.12, + "learning_rate": 4.7956184180790966e-05, + "loss": 0.1193, + "step": 1809500 + }, + { + "epoch": 0.12, + "learning_rate": 4.795562033898305e-05, + "loss": 0.1143, + "step": 1810000 + }, + { + "epoch": 0.12, + "learning_rate": 4.795505536723164e-05, + "loss": 0.1143, + "step": 1810500 + }, + { + "epoch": 0.12, + "learning_rate": 4.795449039548023e-05, + "loss": 0.116, + "step": 1811000 + }, + { + "epoch": 0.12, + "learning_rate": 4.795392542372882e-05, + "loss": 0.1093, + "step": 1811500 + }, + { + "epoch": 0.12, + "learning_rate": 4.7953361581920905e-05, + "loss": 0.1125, + "step": 1812000 + }, + { + "epoch": 0.12, + "learning_rate": 4.795279661016949e-05, + "loss": 0.1088, + "step": 1812500 + }, + { + "epoch": 0.12, + "learning_rate": 4.795223163841808e-05, + "loss": 0.1232, + "step": 1813000 + }, + { + "epoch": 0.12, + "learning_rate": 4.795166666666667e-05, + "loss": 0.1235, + "step": 1813500 + }, + { + "epoch": 0.12, + "learning_rate": 4.7951101694915254e-05, + "loss": 0.119, + "step": 1814000 + }, + { + "epoch": 0.12, + "learning_rate": 4.7950536723163845e-05, + "loss": 0.1162, + "step": 1814500 + }, + { + "epoch": 0.12, + "learning_rate": 4.794997288135594e-05, + "loss": 0.1121, + "step": 1815000 + }, + { + "epoch": 0.12, + "learning_rate": 4.794940790960452e-05, + "loss": 0.1205, + "step": 1815500 + }, + { + "epoch": 0.12, + "learning_rate": 4.794884293785311e-05, + "loss": 0.1174, + "step": 1816000 + }, + { + "epoch": 0.12, + "learning_rate": 4.79482779661017e-05, + "loss": 0.1134, + "step": 1816500 + }, + { + "epoch": 0.12, + "learning_rate": 4.794771299435029e-05, + "loss": 0.1123, + "step": 1817000 + }, + { + "epoch": 0.12, + "learning_rate": 4.794714802259887e-05, + "loss": 0.1134, + "step": 1817500 + }, + { + "epoch": 0.12, + "learning_rate": 4.7946584180790965e-05, + "loss": 0.1179, + "step": 1818000 + }, + { + "epoch": 0.12, + "learning_rate": 4.794601920903955e-05, + "loss": 0.1095, + "step": 1818500 + }, + { + "epoch": 0.12, + "learning_rate": 4.794545423728814e-05, + "loss": 0.1196, + "step": 1819000 + }, + { + "epoch": 0.12, + "learning_rate": 4.7944889265536723e-05, + "loss": 0.1202, + "step": 1819500 + }, + { + "epoch": 0.12, + "learning_rate": 4.7944324293785314e-05, + "loss": 0.1212, + "step": 1820000 + }, + { + "epoch": 0.12, + "learning_rate": 4.79437593220339e-05, + "loss": 0.1156, + "step": 1820500 + }, + { + "epoch": 0.12, + "learning_rate": 4.794319548022599e-05, + "loss": 0.1161, + "step": 1821000 + }, + { + "epoch": 0.12, + "learning_rate": 4.7942630508474576e-05, + "loss": 0.1195, + "step": 1821500 + }, + { + "epoch": 0.12, + "learning_rate": 4.794206666666667e-05, + "loss": 0.1162, + "step": 1822000 + }, + { + "epoch": 0.12, + "learning_rate": 4.794150169491526e-05, + "loss": 0.121, + "step": 1822500 + }, + { + "epoch": 0.12, + "learning_rate": 4.7940936723163844e-05, + "loss": 0.1129, + "step": 1823000 + }, + { + "epoch": 0.12, + "learning_rate": 4.7940371751412435e-05, + "loss": 0.1177, + "step": 1823500 + }, + { + "epoch": 0.12, + "learning_rate": 4.793980677966102e-05, + "loss": 0.1134, + "step": 1824000 + }, + { + "epoch": 0.12, + "learning_rate": 4.793924180790961e-05, + "loss": 0.1112, + "step": 1824500 + }, + { + "epoch": 0.12, + "learning_rate": 4.793867683615819e-05, + "loss": 0.1168, + "step": 1825000 + }, + { + "epoch": 0.12, + "learning_rate": 4.793811186440678e-05, + "loss": 0.1171, + "step": 1825500 + }, + { + "epoch": 0.12, + "learning_rate": 4.7937546892655374e-05, + "loss": 0.1152, + "step": 1826000 + }, + { + "epoch": 0.12, + "learning_rate": 4.793698192090396e-05, + "loss": 0.1153, + "step": 1826500 + }, + { + "epoch": 0.12, + "learning_rate": 4.793641694915255e-05, + "loss": 0.1177, + "step": 1827000 + }, + { + "epoch": 0.12, + "learning_rate": 4.7935853107344636e-05, + "loss": 0.1153, + "step": 1827500 + }, + { + "epoch": 0.12, + "learning_rate": 4.793528813559322e-05, + "loss": 0.1226, + "step": 1828000 + }, + { + "epoch": 0.12, + "learning_rate": 4.793472316384181e-05, + "loss": 0.1136, + "step": 1828500 + }, + { + "epoch": 0.12, + "learning_rate": 4.79341581920904e-05, + "loss": 0.1168, + "step": 1829000 + }, + { + "epoch": 0.12, + "learning_rate": 4.7933593220338984e-05, + "loss": 0.1151, + "step": 1829500 + }, + { + "epoch": 0.12, + "learning_rate": 4.7933028248587575e-05, + "loss": 0.1159, + "step": 1830000 + }, + { + "epoch": 0.12, + "learning_rate": 4.793246327683616e-05, + "loss": 0.114, + "step": 1830500 + }, + { + "epoch": 0.12, + "learning_rate": 4.793189830508475e-05, + "loss": 0.1121, + "step": 1831000 + }, + { + "epoch": 0.12, + "learning_rate": 4.793133559322034e-05, + "loss": 0.1157, + "step": 1831500 + }, + { + "epoch": 0.12, + "learning_rate": 4.793077062146893e-05, + "loss": 0.1131, + "step": 1832000 + }, + { + "epoch": 0.12, + "learning_rate": 4.793020564971752e-05, + "loss": 0.1145, + "step": 1832500 + }, + { + "epoch": 0.12, + "learning_rate": 4.7929640677966105e-05, + "loss": 0.1161, + "step": 1833000 + }, + { + "epoch": 0.12, + "learning_rate": 4.7929075706214695e-05, + "loss": 0.1154, + "step": 1833500 + }, + { + "epoch": 0.12, + "learning_rate": 4.792851073446328e-05, + "loss": 0.1086, + "step": 1834000 + }, + { + "epoch": 0.12, + "learning_rate": 4.792794576271187e-05, + "loss": 0.1218, + "step": 1834500 + }, + { + "epoch": 0.12, + "learning_rate": 4.792738192090396e-05, + "loss": 0.1147, + "step": 1835000 + }, + { + "epoch": 0.12, + "learning_rate": 4.792681694915254e-05, + "loss": 0.1161, + "step": 1835500 + }, + { + "epoch": 0.12, + "learning_rate": 4.792625197740113e-05, + "loss": 0.1168, + "step": 1836000 + }, + { + "epoch": 0.12, + "learning_rate": 4.792568700564972e-05, + "loss": 0.1184, + "step": 1836500 + }, + { + "epoch": 0.12, + "learning_rate": 4.7925122033898306e-05, + "loss": 0.1242, + "step": 1837000 + }, + { + "epoch": 0.12, + "learning_rate": 4.7924557062146896e-05, + "loss": 0.116, + "step": 1837500 + }, + { + "epoch": 0.12, + "learning_rate": 4.792399209039548e-05, + "loss": 0.1113, + "step": 1838000 + }, + { + "epoch": 0.12, + "learning_rate": 4.792342711864407e-05, + "loss": 0.1201, + "step": 1838500 + }, + { + "epoch": 0.12, + "learning_rate": 4.792286327683616e-05, + "loss": 0.1115, + "step": 1839000 + }, + { + "epoch": 0.12, + "learning_rate": 4.792229943502825e-05, + "loss": 0.1166, + "step": 1839500 + }, + { + "epoch": 0.12, + "learning_rate": 4.792173446327684e-05, + "loss": 0.1124, + "step": 1840000 + }, + { + "epoch": 0.12, + "learning_rate": 4.7921169491525426e-05, + "loss": 0.1132, + "step": 1840500 + }, + { + "epoch": 0.12, + "learning_rate": 4.792060451977402e-05, + "loss": 0.1086, + "step": 1841000 + }, + { + "epoch": 0.12, + "learning_rate": 4.79200395480226e-05, + "loss": 0.1177, + "step": 1841500 + }, + { + "epoch": 0.12, + "learning_rate": 4.791947457627119e-05, + "loss": 0.1137, + "step": 1842000 + }, + { + "epoch": 0.12, + "learning_rate": 4.791891073446328e-05, + "loss": 0.1166, + "step": 1842500 + }, + { + "epoch": 0.12, + "learning_rate": 4.791834576271187e-05, + "loss": 0.1118, + "step": 1843000 + }, + { + "epoch": 0.12, + "learning_rate": 4.791778079096045e-05, + "loss": 0.1139, + "step": 1843500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7917215819209043e-05, + "loss": 0.1066, + "step": 1844000 + }, + { + "epoch": 0.13, + "learning_rate": 4.791665084745763e-05, + "loss": 0.1133, + "step": 1844500 + }, + { + "epoch": 0.13, + "learning_rate": 4.791608587570622e-05, + "loss": 0.1135, + "step": 1845000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7915522033898305e-05, + "loss": 0.1048, + "step": 1845500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7914957062146896e-05, + "loss": 0.1167, + "step": 1846000 + }, + { + "epoch": 0.13, + "learning_rate": 4.791439209039548e-05, + "loss": 0.111, + "step": 1846500 + }, + { + "epoch": 0.13, + "learning_rate": 4.791382711864407e-05, + "loss": 0.1107, + "step": 1847000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7913262146892654e-05, + "loss": 0.1099, + "step": 1847500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7912697175141244e-05, + "loss": 0.1143, + "step": 1848000 + }, + { + "epoch": 0.13, + "learning_rate": 4.791213333333334e-05, + "loss": 0.1101, + "step": 1848500 + }, + { + "epoch": 0.13, + "learning_rate": 4.791156836158192e-05, + "loss": 0.1085, + "step": 1849000 + }, + { + "epoch": 0.13, + "learning_rate": 4.791100338983051e-05, + "loss": 0.1203, + "step": 1849500 + }, + { + "epoch": 0.13, + "learning_rate": 4.79104384180791e-05, + "loss": 0.1232, + "step": 1850000 + }, + { + "epoch": 0.13, + "learning_rate": 4.790987457627119e-05, + "loss": 0.1124, + "step": 1850500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7909309604519774e-05, + "loss": 0.1137, + "step": 1851000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7908744632768365e-05, + "loss": 0.1173, + "step": 1851500 + }, + { + "epoch": 0.13, + "learning_rate": 4.790817966101695e-05, + "loss": 0.1106, + "step": 1852000 + }, + { + "epoch": 0.13, + "learning_rate": 4.790761468926554e-05, + "loss": 0.1192, + "step": 1852500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7907050847457627e-05, + "loss": 0.1058, + "step": 1853000 + }, + { + "epoch": 0.13, + "learning_rate": 4.790648587570622e-05, + "loss": 0.1153, + "step": 1853500 + }, + { + "epoch": 0.13, + "learning_rate": 4.79059209039548e-05, + "loss": 0.1153, + "step": 1854000 + }, + { + "epoch": 0.13, + "learning_rate": 4.790535593220339e-05, + "loss": 0.1232, + "step": 1854500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7904792090395485e-05, + "loss": 0.1185, + "step": 1855000 + }, + { + "epoch": 0.13, + "learning_rate": 4.790422711864407e-05, + "loss": 0.1198, + "step": 1855500 + }, + { + "epoch": 0.13, + "learning_rate": 4.790366214689266e-05, + "loss": 0.1114, + "step": 1856000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7903097175141244e-05, + "loss": 0.1189, + "step": 1856500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7902532203389834e-05, + "loss": 0.1166, + "step": 1857000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7901967231638425e-05, + "loss": 0.1155, + "step": 1857500 + }, + { + "epoch": 0.13, + "learning_rate": 4.790140338983051e-05, + "loss": 0.1124, + "step": 1858000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7900838418079096e-05, + "loss": 0.1131, + "step": 1858500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7900273446327686e-05, + "loss": 0.1117, + "step": 1859000 + }, + { + "epoch": 0.13, + "learning_rate": 4.789970847457627e-05, + "loss": 0.119, + "step": 1859500 + }, + { + "epoch": 0.13, + "learning_rate": 4.789914350282486e-05, + "loss": 0.1182, + "step": 1860000 + }, + { + "epoch": 0.13, + "learning_rate": 4.789857966101695e-05, + "loss": 0.1179, + "step": 1860500 + }, + { + "epoch": 0.13, + "learning_rate": 4.789801468926554e-05, + "loss": 0.1181, + "step": 1861000 + }, + { + "epoch": 0.13, + "learning_rate": 4.789744971751412e-05, + "loss": 0.1175, + "step": 1861500 + }, + { + "epoch": 0.13, + "learning_rate": 4.789688474576271e-05, + "loss": 0.1091, + "step": 1862000 + }, + { + "epoch": 0.13, + "learning_rate": 4.789632090395481e-05, + "loss": 0.117, + "step": 1862500 + }, + { + "epoch": 0.13, + "learning_rate": 4.789575593220339e-05, + "loss": 0.1223, + "step": 1863000 + }, + { + "epoch": 0.13, + "learning_rate": 4.789519096045198e-05, + "loss": 0.1159, + "step": 1863500 + }, + { + "epoch": 0.13, + "learning_rate": 4.789462598870057e-05, + "loss": 0.1089, + "step": 1864000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7894061016949156e-05, + "loss": 0.1144, + "step": 1864500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7893496045197746e-05, + "loss": 0.1074, + "step": 1865000 + }, + { + "epoch": 0.13, + "learning_rate": 4.789293107344633e-05, + "loss": 0.1139, + "step": 1865500 + }, + { + "epoch": 0.13, + "learning_rate": 4.789236723163842e-05, + "loss": 0.1184, + "step": 1866000 + }, + { + "epoch": 0.13, + "learning_rate": 4.789180225988701e-05, + "loss": 0.1054, + "step": 1866500 + }, + { + "epoch": 0.13, + "learning_rate": 4.789123728813559e-05, + "loss": 0.1174, + "step": 1867000 + }, + { + "epoch": 0.13, + "learning_rate": 4.789067231638418e-05, + "loss": 0.1151, + "step": 1867500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7890108474576276e-05, + "loss": 0.1115, + "step": 1868000 + }, + { + "epoch": 0.13, + "learning_rate": 4.788954350282486e-05, + "loss": 0.1184, + "step": 1868500 + }, + { + "epoch": 0.13, + "learning_rate": 4.788897853107345e-05, + "loss": 0.1139, + "step": 1869000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7888413559322034e-05, + "loss": 0.118, + "step": 1869500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7887848587570625e-05, + "loss": 0.1112, + "step": 1870000 + }, + { + "epoch": 0.13, + "learning_rate": 4.788728474576271e-05, + "loss": 0.1123, + "step": 1870500 + }, + { + "epoch": 0.13, + "learning_rate": 4.78867197740113e-05, + "loss": 0.1167, + "step": 1871000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7886154802259893e-05, + "loss": 0.1131, + "step": 1871500 + }, + { + "epoch": 0.13, + "learning_rate": 4.788558983050848e-05, + "loss": 0.1136, + "step": 1872000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7885025988700564e-05, + "loss": 0.1196, + "step": 1872500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7884461016949155e-05, + "loss": 0.1208, + "step": 1873000 + }, + { + "epoch": 0.13, + "learning_rate": 4.788389604519774e-05, + "loss": 0.1072, + "step": 1873500 + }, + { + "epoch": 0.13, + "learning_rate": 4.788333107344633e-05, + "loss": 0.1172, + "step": 1874000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7882767231638423e-05, + "loss": 0.1157, + "step": 1874500 + }, + { + "epoch": 0.13, + "learning_rate": 4.788220225988701e-05, + "loss": 0.1181, + "step": 1875000 + }, + { + "epoch": 0.13, + "learning_rate": 4.78816372881356e-05, + "loss": 0.1135, + "step": 1875500 + }, + { + "epoch": 0.13, + "learning_rate": 4.788107231638418e-05, + "loss": 0.116, + "step": 1876000 + }, + { + "epoch": 0.13, + "learning_rate": 4.788050734463277e-05, + "loss": 0.1149, + "step": 1876500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7879942372881356e-05, + "loss": 0.1056, + "step": 1877000 + }, + { + "epoch": 0.13, + "learning_rate": 4.787937853107345e-05, + "loss": 0.1056, + "step": 1877500 + }, + { + "epoch": 0.13, + "learning_rate": 4.787881355932204e-05, + "loss": 0.1063, + "step": 1878000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7878248587570624e-05, + "loss": 0.1135, + "step": 1878500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7877683615819215e-05, + "loss": 0.1148, + "step": 1879000 + }, + { + "epoch": 0.13, + "learning_rate": 4.78771186440678e-05, + "loss": 0.1138, + "step": 1879500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7876554802259886e-05, + "loss": 0.1155, + "step": 1880000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7875989830508477e-05, + "loss": 0.1117, + "step": 1880500 + }, + { + "epoch": 0.13, + "learning_rate": 4.787542485875706e-05, + "loss": 0.12, + "step": 1881000 + }, + { + "epoch": 0.13, + "learning_rate": 4.787485988700565e-05, + "loss": 0.1099, + "step": 1881500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7874296045197745e-05, + "loss": 0.119, + "step": 1882000 + }, + { + "epoch": 0.13, + "learning_rate": 4.787373107344633e-05, + "loss": 0.1154, + "step": 1882500 + }, + { + "epoch": 0.13, + "learning_rate": 4.787316610169492e-05, + "loss": 0.1181, + "step": 1883000 + }, + { + "epoch": 0.13, + "learning_rate": 4.78726011299435e-05, + "loss": 0.1246, + "step": 1883500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7872036158192094e-05, + "loss": 0.1171, + "step": 1884000 + }, + { + "epoch": 0.13, + "learning_rate": 4.787147118644068e-05, + "loss": 0.1159, + "step": 1884500 + }, + { + "epoch": 0.13, + "learning_rate": 4.787090734463277e-05, + "loss": 0.1171, + "step": 1885000 + }, + { + "epoch": 0.13, + "learning_rate": 4.787034237288136e-05, + "loss": 0.1152, + "step": 1885500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7869777401129946e-05, + "loss": 0.1094, + "step": 1886000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7869212429378536e-05, + "loss": 0.1182, + "step": 1886500 + }, + { + "epoch": 0.13, + "learning_rate": 4.786864971751413e-05, + "loss": 0.1178, + "step": 1887000 + }, + { + "epoch": 0.13, + "learning_rate": 4.786808474576272e-05, + "loss": 0.1252, + "step": 1887500 + }, + { + "epoch": 0.13, + "learning_rate": 4.78675197740113e-05, + "loss": 0.1127, + "step": 1888000 + }, + { + "epoch": 0.13, + "learning_rate": 4.786695480225989e-05, + "loss": 0.1131, + "step": 1888500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7866389830508476e-05, + "loss": 0.1175, + "step": 1889000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7865824858757066e-05, + "loss": 0.1127, + "step": 1889500 + }, + { + "epoch": 0.13, + "learning_rate": 4.786525988700565e-05, + "loss": 0.1086, + "step": 1890000 + }, + { + "epoch": 0.13, + "learning_rate": 4.786469491525424e-05, + "loss": 0.1136, + "step": 1890500 + }, + { + "epoch": 0.13, + "learning_rate": 4.786413107344633e-05, + "loss": 0.1087, + "step": 1891000 + }, + { + "epoch": 0.13, + "learning_rate": 4.786356610169492e-05, + "loss": 0.1168, + "step": 1891500 + }, + { + "epoch": 0.13, + "learning_rate": 4.786300112994351e-05, + "loss": 0.1147, + "step": 1892000 + }, + { + "epoch": 0.13, + "learning_rate": 4.786243615819209e-05, + "loss": 0.1124, + "step": 1892500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7861871186440684e-05, + "loss": 0.1045, + "step": 1893000 + }, + { + "epoch": 0.13, + "learning_rate": 4.786130734463277e-05, + "loss": 0.1148, + "step": 1893500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7860742372881355e-05, + "loss": 0.1142, + "step": 1894000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7860177401129945e-05, + "loss": 0.1068, + "step": 1894500 + }, + { + "epoch": 0.13, + "learning_rate": 4.785961242937853e-05, + "loss": 0.1117, + "step": 1895000 + }, + { + "epoch": 0.13, + "learning_rate": 4.785904745762712e-05, + "loss": 0.1155, + "step": 1895500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7858483615819213e-05, + "loss": 0.1067, + "step": 1896000 + }, + { + "epoch": 0.13, + "learning_rate": 4.78579186440678e-05, + "loss": 0.1043, + "step": 1896500 + }, + { + "epoch": 0.13, + "learning_rate": 4.785735367231639e-05, + "loss": 0.1098, + "step": 1897000 + }, + { + "epoch": 0.13, + "learning_rate": 4.785678870056498e-05, + "loss": 0.109, + "step": 1897500 + }, + { + "epoch": 0.13, + "learning_rate": 4.785622372881356e-05, + "loss": 0.1141, + "step": 1898000 + }, + { + "epoch": 0.13, + "learning_rate": 4.785565875706215e-05, + "loss": 0.1075, + "step": 1898500 + }, + { + "epoch": 0.13, + "learning_rate": 4.785509378531074e-05, + "loss": 0.1177, + "step": 1899000 + }, + { + "epoch": 0.13, + "learning_rate": 4.785452881355933e-05, + "loss": 0.1134, + "step": 1899500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7853964971751414e-05, + "loss": 0.1168, + "step": 1900000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7853400000000005e-05, + "loss": 0.1118, + "step": 1900500 + }, + { + "epoch": 0.13, + "learning_rate": 4.785283502824859e-05, + "loss": 0.117, + "step": 1901000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7852271186440676e-05, + "loss": 0.1124, + "step": 1901500 + }, + { + "epoch": 0.13, + "learning_rate": 4.785170621468927e-05, + "loss": 0.1146, + "step": 1902000 + }, + { + "epoch": 0.13, + "learning_rate": 4.785114124293786e-05, + "loss": 0.1134, + "step": 1902500 + }, + { + "epoch": 0.13, + "learning_rate": 4.785057627118644e-05, + "loss": 0.1149, + "step": 1903000 + }, + { + "epoch": 0.13, + "learning_rate": 4.785001129943503e-05, + "loss": 0.1107, + "step": 1903500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7849446327683615e-05, + "loss": 0.1142, + "step": 1904000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7848881355932206e-05, + "loss": 0.1133, + "step": 1904500 + }, + { + "epoch": 0.13, + "learning_rate": 4.784831638418079e-05, + "loss": 0.1121, + "step": 1905000 + }, + { + "epoch": 0.13, + "learning_rate": 4.784775367231639e-05, + "loss": 0.1154, + "step": 1905500 + }, + { + "epoch": 0.13, + "learning_rate": 4.784718870056498e-05, + "loss": 0.1145, + "step": 1906000 + }, + { + "epoch": 0.13, + "learning_rate": 4.784662372881356e-05, + "loss": 0.1145, + "step": 1906500 + }, + { + "epoch": 0.13, + "learning_rate": 4.784605875706215e-05, + "loss": 0.1157, + "step": 1907000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7845493785310736e-05, + "loss": 0.1153, + "step": 1907500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7844928813559327e-05, + "loss": 0.1075, + "step": 1908000 + }, + { + "epoch": 0.13, + "learning_rate": 4.784436384180791e-05, + "loss": 0.1183, + "step": 1908500 + }, + { + "epoch": 0.13, + "learning_rate": 4.78437988700565e-05, + "loss": 0.1159, + "step": 1909000 + }, + { + "epoch": 0.13, + "learning_rate": 4.784323502824859e-05, + "loss": 0.1126, + "step": 1909500 + }, + { + "epoch": 0.13, + "learning_rate": 4.784267005649718e-05, + "loss": 0.1135, + "step": 1910000 + }, + { + "epoch": 0.13, + "learning_rate": 4.784210508474576e-05, + "loss": 0.1154, + "step": 1910500 + }, + { + "epoch": 0.13, + "learning_rate": 4.784154011299435e-05, + "loss": 0.1039, + "step": 1911000 + }, + { + "epoch": 0.13, + "learning_rate": 4.784097514124294e-05, + "loss": 0.1095, + "step": 1911500 + }, + { + "epoch": 0.13, + "learning_rate": 4.784041129943503e-05, + "loss": 0.1161, + "step": 1912000 + }, + { + "epoch": 0.13, + "learning_rate": 4.783984632768362e-05, + "loss": 0.1093, + "step": 1912500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7839281355932205e-05, + "loss": 0.1175, + "step": 1913000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7838716384180796e-05, + "loss": 0.1115, + "step": 1913500 + }, + { + "epoch": 0.13, + "learning_rate": 4.783815254237288e-05, + "loss": 0.1182, + "step": 1914000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7837587570621474e-05, + "loss": 0.1116, + "step": 1914500 + }, + { + "epoch": 0.13, + "learning_rate": 4.783702259887006e-05, + "loss": 0.112, + "step": 1915000 + }, + { + "epoch": 0.13, + "learning_rate": 4.783645762711865e-05, + "loss": 0.115, + "step": 1915500 + }, + { + "epoch": 0.13, + "learning_rate": 4.783589265536723e-05, + "loss": 0.1168, + "step": 1916000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7835328813559326e-05, + "loss": 0.1085, + "step": 1916500 + }, + { + "epoch": 0.13, + "learning_rate": 4.783476384180791e-05, + "loss": 0.1151, + "step": 1917000 + }, + { + "epoch": 0.13, + "learning_rate": 4.78341988700565e-05, + "loss": 0.1195, + "step": 1917500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7833633898305084e-05, + "loss": 0.1147, + "step": 1918000 + }, + { + "epoch": 0.13, + "learning_rate": 4.783307005649718e-05, + "loss": 0.1093, + "step": 1918500 + }, + { + "epoch": 0.13, + "learning_rate": 4.783250508474577e-05, + "loss": 0.1125, + "step": 1919000 + }, + { + "epoch": 0.13, + "learning_rate": 4.783194011299435e-05, + "loss": 0.1101, + "step": 1919500 + }, + { + "epoch": 0.13, + "learning_rate": 4.783137514124294e-05, + "loss": 0.1184, + "step": 1920000 + }, + { + "epoch": 0.13, + "learning_rate": 4.783081016949153e-05, + "loss": 0.1156, + "step": 1920500 + }, + { + "epoch": 0.13, + "learning_rate": 4.783024632768362e-05, + "loss": 0.112, + "step": 1921000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7829681355932205e-05, + "loss": 0.1166, + "step": 1921500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7829116384180795e-05, + "loss": 0.1092, + "step": 1922000 + }, + { + "epoch": 0.13, + "learning_rate": 4.782855141242938e-05, + "loss": 0.1124, + "step": 1922500 + }, + { + "epoch": 0.13, + "learning_rate": 4.782798644067797e-05, + "loss": 0.1097, + "step": 1923000 + }, + { + "epoch": 0.13, + "learning_rate": 4.782742259887006e-05, + "loss": 0.1154, + "step": 1923500 + }, + { + "epoch": 0.13, + "learning_rate": 4.782685762711865e-05, + "loss": 0.1163, + "step": 1924000 + }, + { + "epoch": 0.13, + "learning_rate": 4.782629265536723e-05, + "loss": 0.1135, + "step": 1924500 + }, + { + "epoch": 0.13, + "learning_rate": 4.782572768361582e-05, + "loss": 0.1095, + "step": 1925000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7825162711864405e-05, + "loss": 0.1081, + "step": 1925500 + }, + { + "epoch": 0.13, + "learning_rate": 4.78245988700565e-05, + "loss": 0.1122, + "step": 1926000 + }, + { + "epoch": 0.13, + "learning_rate": 4.782403389830509e-05, + "loss": 0.1142, + "step": 1926500 + }, + { + "epoch": 0.13, + "learning_rate": 4.782346892655368e-05, + "loss": 0.1124, + "step": 1927000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7822903954802264e-05, + "loss": 0.1079, + "step": 1927500 + }, + { + "epoch": 0.13, + "learning_rate": 4.782234011299435e-05, + "loss": 0.1076, + "step": 1928000 + }, + { + "epoch": 0.13, + "learning_rate": 4.782177514124294e-05, + "loss": 0.1125, + "step": 1928500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7821210169491526e-05, + "loss": 0.1199, + "step": 1929000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7820645197740117e-05, + "loss": 0.1075, + "step": 1929500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7820081355932204e-05, + "loss": 0.1176, + "step": 1930000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7819516384180794e-05, + "loss": 0.1137, + "step": 1930500 + }, + { + "epoch": 0.13, + "learning_rate": 4.781895141242938e-05, + "loss": 0.1148, + "step": 1931000 + }, + { + "epoch": 0.13, + "learning_rate": 4.781838644067797e-05, + "loss": 0.1097, + "step": 1931500 + }, + { + "epoch": 0.13, + "learning_rate": 4.781782259887006e-05, + "loss": 0.1139, + "step": 1932000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7817257627118647e-05, + "loss": 0.1156, + "step": 1932500 + }, + { + "epoch": 0.13, + "learning_rate": 4.781669265536724e-05, + "loss": 0.1125, + "step": 1933000 + }, + { + "epoch": 0.13, + "learning_rate": 4.781612768361582e-05, + "loss": 0.1149, + "step": 1933500 + }, + { + "epoch": 0.13, + "learning_rate": 4.781556271186441e-05, + "loss": 0.1213, + "step": 1934000 + }, + { + "epoch": 0.13, + "learning_rate": 4.78149988700565e-05, + "loss": 0.1188, + "step": 1934500 + }, + { + "epoch": 0.13, + "learning_rate": 4.781443389830509e-05, + "loss": 0.1168, + "step": 1935000 + }, + { + "epoch": 0.13, + "learning_rate": 4.781386892655367e-05, + "loss": 0.1166, + "step": 1935500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7813303954802264e-05, + "loss": 0.1182, + "step": 1936000 + }, + { + "epoch": 0.13, + "learning_rate": 4.781273898305085e-05, + "loss": 0.1175, + "step": 1936500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7812175141242935e-05, + "loss": 0.1153, + "step": 1937000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7811610169491525e-05, + "loss": 0.1157, + "step": 1937500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7811045197740116e-05, + "loss": 0.1098, + "step": 1938000 + }, + { + "epoch": 0.13, + "learning_rate": 4.78104802259887e-05, + "loss": 0.118, + "step": 1938500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7809916384180794e-05, + "loss": 0.112, + "step": 1939000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7809351412429384e-05, + "loss": 0.109, + "step": 1939500 + }, + { + "epoch": 0.13, + "learning_rate": 4.780878644067797e-05, + "loss": 0.1111, + "step": 1940000 + }, + { + "epoch": 0.13, + "learning_rate": 4.780822146892656e-05, + "loss": 0.1096, + "step": 1940500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7807657627118646e-05, + "loss": 0.1174, + "step": 1941000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7807092655367236e-05, + "loss": 0.1189, + "step": 1941500 + }, + { + "epoch": 0.13, + "learning_rate": 4.780652768361582e-05, + "loss": 0.1081, + "step": 1942000 + }, + { + "epoch": 0.13, + "learning_rate": 4.780596271186441e-05, + "loss": 0.1153, + "step": 1942500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7805397740112995e-05, + "loss": 0.1106, + "step": 1943000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7804832768361585e-05, + "loss": 0.1191, + "step": 1943500 + }, + { + "epoch": 0.13, + "learning_rate": 4.780426779661017e-05, + "loss": 0.1149, + "step": 1944000 + }, + { + "epoch": 0.13, + "learning_rate": 4.780370282485876e-05, + "loss": 0.1123, + "step": 1944500 + }, + { + "epoch": 0.13, + "learning_rate": 4.780313898305085e-05, + "loss": 0.1138, + "step": 1945000 + }, + { + "epoch": 0.13, + "learning_rate": 4.780257401129944e-05, + "loss": 0.11, + "step": 1945500 + }, + { + "epoch": 0.13, + "learning_rate": 4.780200903954802e-05, + "loss": 0.111, + "step": 1946000 + }, + { + "epoch": 0.13, + "learning_rate": 4.780144406779661e-05, + "loss": 0.1221, + "step": 1946500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7800879096045196e-05, + "loss": 0.112, + "step": 1947000 + }, + { + "epoch": 0.13, + "learning_rate": 4.780031525423729e-05, + "loss": 0.1145, + "step": 1947500 + }, + { + "epoch": 0.13, + "learning_rate": 4.779975028248588e-05, + "loss": 0.116, + "step": 1948000 + }, + { + "epoch": 0.13, + "learning_rate": 4.779918531073447e-05, + "loss": 0.1077, + "step": 1948500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7798620338983055e-05, + "loss": 0.1198, + "step": 1949000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7798055367231645e-05, + "loss": 0.1255, + "step": 1949500 + }, + { + "epoch": 0.13, + "learning_rate": 4.779749152542373e-05, + "loss": 0.1134, + "step": 1950000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7796926553672316e-05, + "loss": 0.1089, + "step": 1950500 + }, + { + "epoch": 0.13, + "learning_rate": 4.77963627118644e-05, + "loss": 0.1192, + "step": 1951000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7795797740112994e-05, + "loss": 0.1151, + "step": 1951500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7795232768361584e-05, + "loss": 0.1088, + "step": 1952000 + }, + { + "epoch": 0.13, + "learning_rate": 4.779466779661017e-05, + "loss": 0.115, + "step": 1952500 + }, + { + "epoch": 0.13, + "learning_rate": 4.779410282485876e-05, + "loss": 0.1109, + "step": 1953000 + }, + { + "epoch": 0.13, + "learning_rate": 4.779353785310734e-05, + "loss": 0.1115, + "step": 1953500 + }, + { + "epoch": 0.13, + "learning_rate": 4.779297288135593e-05, + "loss": 0.1131, + "step": 1954000 + }, + { + "epoch": 0.13, + "learning_rate": 4.779240790960452e-05, + "loss": 0.1135, + "step": 1954500 + }, + { + "epoch": 0.13, + "learning_rate": 4.779184406779662e-05, + "loss": 0.1156, + "step": 1955000 + }, + { + "epoch": 0.13, + "learning_rate": 4.77912790960452e-05, + "loss": 0.1113, + "step": 1955500 + }, + { + "epoch": 0.13, + "learning_rate": 4.779071412429379e-05, + "loss": 0.1128, + "step": 1956000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7790149152542376e-05, + "loss": 0.113, + "step": 1956500 + }, + { + "epoch": 0.13, + "learning_rate": 4.778958531073446e-05, + "loss": 0.1134, + "step": 1957000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7789020338983054e-05, + "loss": 0.1126, + "step": 1957500 + }, + { + "epoch": 0.13, + "learning_rate": 4.778845536723164e-05, + "loss": 0.1158, + "step": 1958000 + }, + { + "epoch": 0.13, + "learning_rate": 4.778789039548023e-05, + "loss": 0.1121, + "step": 1958500 + }, + { + "epoch": 0.13, + "learning_rate": 4.778732542372882e-05, + "loss": 0.1138, + "step": 1959000 + }, + { + "epoch": 0.13, + "learning_rate": 4.77867604519774e-05, + "loss": 0.1151, + "step": 1959500 + }, + { + "epoch": 0.13, + "learning_rate": 4.778619661016949e-05, + "loss": 0.1194, + "step": 1960000 + }, + { + "epoch": 0.13, + "learning_rate": 4.778563163841808e-05, + "loss": 0.1191, + "step": 1960500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7785066666666664e-05, + "loss": 0.1122, + "step": 1961000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7784501694915255e-05, + "loss": 0.1102, + "step": 1961500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7783936723163845e-05, + "loss": 0.1108, + "step": 1962000 + }, + { + "epoch": 0.13, + "learning_rate": 4.778337288135594e-05, + "loss": 0.1108, + "step": 1962500 + }, + { + "epoch": 0.13, + "learning_rate": 4.778280790960452e-05, + "loss": 0.1115, + "step": 1963000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7782242937853114e-05, + "loss": 0.1097, + "step": 1963500 + }, + { + "epoch": 0.13, + "learning_rate": 4.77816779661017e-05, + "loss": 0.1125, + "step": 1964000 + }, + { + "epoch": 0.13, + "learning_rate": 4.778111299435029e-05, + "loss": 0.1187, + "step": 1964500 + }, + { + "epoch": 0.13, + "learning_rate": 4.778054802259887e-05, + "loss": 0.1176, + "step": 1965000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7779984180790966e-05, + "loss": 0.1145, + "step": 1965500 + }, + { + "epoch": 0.13, + "learning_rate": 4.777941920903955e-05, + "loss": 0.1102, + "step": 1966000 + }, + { + "epoch": 0.13, + "learning_rate": 4.777885423728814e-05, + "loss": 0.1179, + "step": 1966500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7778289265536724e-05, + "loss": 0.1101, + "step": 1967000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7777724293785315e-05, + "loss": 0.1073, + "step": 1967500 + }, + { + "epoch": 0.13, + "learning_rate": 4.77771604519774e-05, + "loss": 0.1164, + "step": 1968000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7776595480225986e-05, + "loss": 0.1109, + "step": 1968500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7776030508474576e-05, + "loss": 0.112, + "step": 1969000 + }, + { + "epoch": 0.13, + "learning_rate": 4.777546553672317e-05, + "loss": 0.1263, + "step": 1969500 + }, + { + "epoch": 0.13, + "learning_rate": 4.777490169491526e-05, + "loss": 0.1161, + "step": 1970000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7774336723163845e-05, + "loss": 0.1188, + "step": 1970500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7773771751412435e-05, + "loss": 0.1114, + "step": 1971000 + }, + { + "epoch": 0.13, + "learning_rate": 4.777320677966102e-05, + "loss": 0.1138, + "step": 1971500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7772642937853106e-05, + "loss": 0.1134, + "step": 1972000 + }, + { + "epoch": 0.13, + "learning_rate": 4.77720779661017e-05, + "loss": 0.1161, + "step": 1972500 + }, + { + "epoch": 0.13, + "learning_rate": 4.777151299435029e-05, + "loss": 0.1118, + "step": 1973000 + }, + { + "epoch": 0.13, + "learning_rate": 4.777094802259887e-05, + "loss": 0.12, + "step": 1973500 + }, + { + "epoch": 0.13, + "learning_rate": 4.777038305084746e-05, + "loss": 0.1117, + "step": 1974000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7769818079096046e-05, + "loss": 0.1097, + "step": 1974500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7769253107344636e-05, + "loss": 0.1052, + "step": 1975000 + }, + { + "epoch": 0.13, + "learning_rate": 4.776868926553672e-05, + "loss": 0.1155, + "step": 1975500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7768124293785314e-05, + "loss": 0.1082, + "step": 1976000 + }, + { + "epoch": 0.13, + "learning_rate": 4.77675593220339e-05, + "loss": 0.1163, + "step": 1976500 + }, + { + "epoch": 0.13, + "learning_rate": 4.776699435028249e-05, + "loss": 0.112, + "step": 1977000 + }, + { + "epoch": 0.13, + "learning_rate": 4.776642937853107e-05, + "loss": 0.1073, + "step": 1977500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7765865536723166e-05, + "loss": 0.1059, + "step": 1978000 + }, + { + "epoch": 0.13, + "learning_rate": 4.776530056497176e-05, + "loss": 0.1104, + "step": 1978500 + }, + { + "epoch": 0.13, + "learning_rate": 4.776473559322034e-05, + "loss": 0.1067, + "step": 1979000 + }, + { + "epoch": 0.13, + "learning_rate": 4.776417062146893e-05, + "loss": 0.1124, + "step": 1979500 + }, + { + "epoch": 0.13, + "learning_rate": 4.776360564971752e-05, + "loss": 0.1166, + "step": 1980000 + }, + { + "epoch": 0.13, + "learning_rate": 4.776304180790961e-05, + "loss": 0.1168, + "step": 1980500 + }, + { + "epoch": 0.13, + "learning_rate": 4.776247683615819e-05, + "loss": 0.1121, + "step": 1981000 + }, + { + "epoch": 0.13, + "learning_rate": 4.776191186440678e-05, + "loss": 0.1111, + "step": 1981500 + }, + { + "epoch": 0.13, + "learning_rate": 4.776134689265537e-05, + "loss": 0.1108, + "step": 1982000 + }, + { + "epoch": 0.13, + "learning_rate": 4.776078192090396e-05, + "loss": 0.1098, + "step": 1982500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7760218079096045e-05, + "loss": 0.114, + "step": 1983000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7759653107344635e-05, + "loss": 0.1184, + "step": 1983500 + }, + { + "epoch": 0.13, + "learning_rate": 4.775908813559322e-05, + "loss": 0.1111, + "step": 1984000 + }, + { + "epoch": 0.13, + "learning_rate": 4.775852316384181e-05, + "loss": 0.112, + "step": 1984500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7757958192090394e-05, + "loss": 0.1165, + "step": 1985000 + }, + { + "epoch": 0.13, + "learning_rate": 4.775739435028249e-05, + "loss": 0.115, + "step": 1985500 + }, + { + "epoch": 0.13, + "learning_rate": 4.775682937853108e-05, + "loss": 0.1165, + "step": 1986000 + }, + { + "epoch": 0.13, + "learning_rate": 4.775626440677967e-05, + "loss": 0.1087, + "step": 1986500 + }, + { + "epoch": 0.13, + "learning_rate": 4.775569943502825e-05, + "loss": 0.1104, + "step": 1987000 + }, + { + "epoch": 0.13, + "learning_rate": 4.775513559322034e-05, + "loss": 0.1138, + "step": 1987500 + }, + { + "epoch": 0.13, + "learning_rate": 4.775457062146893e-05, + "loss": 0.1078, + "step": 1988000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7754005649717514e-05, + "loss": 0.1124, + "step": 1988500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7753440677966105e-05, + "loss": 0.1127, + "step": 1989000 + }, + { + "epoch": 0.13, + "learning_rate": 4.775287570621469e-05, + "loss": 0.1164, + "step": 1989500 + }, + { + "epoch": 0.13, + "learning_rate": 4.775231186440678e-05, + "loss": 0.114, + "step": 1990000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7751746892655366e-05, + "loss": 0.1141, + "step": 1990500 + }, + { + "epoch": 0.13, + "learning_rate": 4.775118192090396e-05, + "loss": 0.1135, + "step": 1991000 + }, + { + "epoch": 0.14, + "learning_rate": 4.775061694915254e-05, + "loss": 0.1137, + "step": 1991500 + }, + { + "epoch": 0.14, + "learning_rate": 4.775005197740113e-05, + "loss": 0.1126, + "step": 1992000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7749487005649715e-05, + "loss": 0.1064, + "step": 1992500 + }, + { + "epoch": 0.14, + "learning_rate": 4.774892316384181e-05, + "loss": 0.116, + "step": 1993000 + }, + { + "epoch": 0.14, + "learning_rate": 4.77483581920904e-05, + "loss": 0.1154, + "step": 1993500 + }, + { + "epoch": 0.14, + "learning_rate": 4.774779322033899e-05, + "loss": 0.115, + "step": 1994000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7747228248587574e-05, + "loss": 0.1169, + "step": 1994500 + }, + { + "epoch": 0.14, + "learning_rate": 4.774666440677966e-05, + "loss": 0.1132, + "step": 1995000 + }, + { + "epoch": 0.14, + "learning_rate": 4.774609943502825e-05, + "loss": 0.1148, + "step": 1995500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7745534463276836e-05, + "loss": 0.1137, + "step": 1996000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7744969491525426e-05, + "loss": 0.1138, + "step": 1996500 + }, + { + "epoch": 0.14, + "learning_rate": 4.774440451977401e-05, + "loss": 0.1075, + "step": 1997000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7743840677966104e-05, + "loss": 0.1149, + "step": 1997500 + }, + { + "epoch": 0.14, + "learning_rate": 4.774327570621469e-05, + "loss": 0.1229, + "step": 1998000 + }, + { + "epoch": 0.14, + "learning_rate": 4.774271073446328e-05, + "loss": 0.1154, + "step": 1998500 + }, + { + "epoch": 0.14, + "learning_rate": 4.774214576271186e-05, + "loss": 0.1116, + "step": 1999000 + }, + { + "epoch": 0.14, + "learning_rate": 4.774158079096045e-05, + "loss": 0.1075, + "step": 1999500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7741015819209037e-05, + "loss": 0.1138, + "step": 2000000 + }, + { + "epoch": 0.14, + "learning_rate": 4.774045197740114e-05, + "loss": 0.109, + "step": 2000500 + }, + { + "epoch": 0.14, + "learning_rate": 4.773988700564972e-05, + "loss": 0.1105, + "step": 2001000 + }, + { + "epoch": 0.14, + "learning_rate": 4.773932203389831e-05, + "loss": 0.1106, + "step": 2001500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7738757062146896e-05, + "loss": 0.1138, + "step": 2002000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7738192090395486e-05, + "loss": 0.1105, + "step": 2002500 + }, + { + "epoch": 0.14, + "learning_rate": 4.773762824858757e-05, + "loss": 0.1062, + "step": 2003000 + }, + { + "epoch": 0.14, + "learning_rate": 4.773706327683616e-05, + "loss": 0.1127, + "step": 2003500 + }, + { + "epoch": 0.14, + "learning_rate": 4.773649830508475e-05, + "loss": 0.1047, + "step": 2004000 + }, + { + "epoch": 0.14, + "learning_rate": 4.773593333333334e-05, + "loss": 0.1124, + "step": 2004500 + }, + { + "epoch": 0.14, + "learning_rate": 4.773536836158192e-05, + "loss": 0.1119, + "step": 2005000 + }, + { + "epoch": 0.14, + "learning_rate": 4.773480338983051e-05, + "loss": 0.112, + "step": 2005500 + }, + { + "epoch": 0.14, + "learning_rate": 4.77342395480226e-05, + "loss": 0.1107, + "step": 2006000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7733674576271184e-05, + "loss": 0.1098, + "step": 2006500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7733109604519774e-05, + "loss": 0.1155, + "step": 2007000 + }, + { + "epoch": 0.14, + "learning_rate": 4.773254463276836e-05, + "loss": 0.1112, + "step": 2007500 + }, + { + "epoch": 0.14, + "learning_rate": 4.773197966101695e-05, + "loss": 0.1115, + "step": 2008000 + }, + { + "epoch": 0.14, + "learning_rate": 4.773141468926554e-05, + "loss": 0.1031, + "step": 2008500 + }, + { + "epoch": 0.14, + "learning_rate": 4.773085084745763e-05, + "loss": 0.1141, + "step": 2009000 + }, + { + "epoch": 0.14, + "learning_rate": 4.773028587570622e-05, + "loss": 0.105, + "step": 2009500 + }, + { + "epoch": 0.14, + "learning_rate": 4.772972090395481e-05, + "loss": 0.1098, + "step": 2010000 + }, + { + "epoch": 0.14, + "learning_rate": 4.772915593220339e-05, + "loss": 0.111, + "step": 2010500 + }, + { + "epoch": 0.14, + "learning_rate": 4.772859096045198e-05, + "loss": 0.1184, + "step": 2011000 + }, + { + "epoch": 0.14, + "learning_rate": 4.772802598870057e-05, + "loss": 0.1116, + "step": 2011500 + }, + { + "epoch": 0.14, + "learning_rate": 4.772746214689266e-05, + "loss": 0.1126, + "step": 2012000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7726897175141244e-05, + "loss": 0.101, + "step": 2012500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7726332203389834e-05, + "loss": 0.1118, + "step": 2013000 + }, + { + "epoch": 0.14, + "learning_rate": 4.772576723163842e-05, + "loss": 0.1067, + "step": 2013500 + }, + { + "epoch": 0.14, + "learning_rate": 4.772520225988701e-05, + "loss": 0.1063, + "step": 2014000 + }, + { + "epoch": 0.14, + "learning_rate": 4.772463728813559e-05, + "loss": 0.1167, + "step": 2014500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7724073446327686e-05, + "loss": 0.1113, + "step": 2015000 + }, + { + "epoch": 0.14, + "learning_rate": 4.772350847457627e-05, + "loss": 0.1103, + "step": 2015500 + }, + { + "epoch": 0.14, + "learning_rate": 4.772294350282486e-05, + "loss": 0.1128, + "step": 2016000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7722378531073445e-05, + "loss": 0.1132, + "step": 2016500 + }, + { + "epoch": 0.14, + "learning_rate": 4.772181468926554e-05, + "loss": 0.1158, + "step": 2017000 + }, + { + "epoch": 0.14, + "learning_rate": 4.772124971751413e-05, + "loss": 0.111, + "step": 2017500 + }, + { + "epoch": 0.14, + "learning_rate": 4.772068474576271e-05, + "loss": 0.1142, + "step": 2018000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7720119774011303e-05, + "loss": 0.1131, + "step": 2018500 + }, + { + "epoch": 0.14, + "learning_rate": 4.771955593220339e-05, + "loss": 0.1116, + "step": 2019000 + }, + { + "epoch": 0.14, + "learning_rate": 4.771899096045198e-05, + "loss": 0.1168, + "step": 2019500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7718425988700565e-05, + "loss": 0.1128, + "step": 2020000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7717861016949156e-05, + "loss": 0.1114, + "step": 2020500 + }, + { + "epoch": 0.14, + "learning_rate": 4.771729604519774e-05, + "loss": 0.11, + "step": 2021000 + }, + { + "epoch": 0.14, + "learning_rate": 4.771673107344633e-05, + "loss": 0.1141, + "step": 2021500 + }, + { + "epoch": 0.14, + "learning_rate": 4.771616610169492e-05, + "loss": 0.1155, + "step": 2022000 + }, + { + "epoch": 0.14, + "learning_rate": 4.771560225988701e-05, + "loss": 0.1165, + "step": 2022500 + }, + { + "epoch": 0.14, + "learning_rate": 4.771503728813559e-05, + "loss": 0.1151, + "step": 2023000 + }, + { + "epoch": 0.14, + "learning_rate": 4.771447231638418e-05, + "loss": 0.1097, + "step": 2023500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7713907344632766e-05, + "loss": 0.1114, + "step": 2024000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7713342372881357e-05, + "loss": 0.1079, + "step": 2024500 + }, + { + "epoch": 0.14, + "learning_rate": 4.771277853107345e-05, + "loss": 0.1141, + "step": 2025000 + }, + { + "epoch": 0.14, + "learning_rate": 4.771221355932204e-05, + "loss": 0.1252, + "step": 2025500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7711648587570625e-05, + "loss": 0.1163, + "step": 2026000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7711083615819216e-05, + "loss": 0.1136, + "step": 2026500 + }, + { + "epoch": 0.14, + "learning_rate": 4.77105197740113e-05, + "loss": 0.1057, + "step": 2027000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7709954802259887e-05, + "loss": 0.1045, + "step": 2027500 + }, + { + "epoch": 0.14, + "learning_rate": 4.770938983050848e-05, + "loss": 0.1127, + "step": 2028000 + }, + { + "epoch": 0.14, + "learning_rate": 4.770882485875706e-05, + "loss": 0.1141, + "step": 2028500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7708261016949155e-05, + "loss": 0.112, + "step": 2029000 + }, + { + "epoch": 0.14, + "learning_rate": 4.770769604519774e-05, + "loss": 0.1162, + "step": 2029500 + }, + { + "epoch": 0.14, + "learning_rate": 4.770713107344633e-05, + "loss": 0.1124, + "step": 2030000 + }, + { + "epoch": 0.14, + "learning_rate": 4.770656610169491e-05, + "loss": 0.1142, + "step": 2030500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7706001129943504e-05, + "loss": 0.1132, + "step": 2031000 + }, + { + "epoch": 0.14, + "learning_rate": 4.770543615819209e-05, + "loss": 0.1164, + "step": 2031500 + }, + { + "epoch": 0.14, + "learning_rate": 4.770487118644068e-05, + "loss": 0.1132, + "step": 2032000 + }, + { + "epoch": 0.14, + "learning_rate": 4.770430734463277e-05, + "loss": 0.1207, + "step": 2032500 + }, + { + "epoch": 0.14, + "learning_rate": 4.770374237288136e-05, + "loss": 0.1118, + "step": 2033000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7703177401129946e-05, + "loss": 0.1119, + "step": 2033500 + }, + { + "epoch": 0.14, + "learning_rate": 4.770261242937854e-05, + "loss": 0.1081, + "step": 2034000 + }, + { + "epoch": 0.14, + "learning_rate": 4.770204745762712e-05, + "loss": 0.1141, + "step": 2034500 + }, + { + "epoch": 0.14, + "learning_rate": 4.770148361581921e-05, + "loss": 0.1099, + "step": 2035000 + }, + { + "epoch": 0.14, + "learning_rate": 4.77009186440678e-05, + "loss": 0.1115, + "step": 2035500 + }, + { + "epoch": 0.14, + "learning_rate": 4.770035367231639e-05, + "loss": 0.1172, + "step": 2036000 + }, + { + "epoch": 0.14, + "learning_rate": 4.769978870056497e-05, + "loss": 0.112, + "step": 2036500 + }, + { + "epoch": 0.14, + "learning_rate": 4.769922485875706e-05, + "loss": 0.1149, + "step": 2037000 + }, + { + "epoch": 0.14, + "learning_rate": 4.769865988700565e-05, + "loss": 0.1079, + "step": 2037500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7698094915254235e-05, + "loss": 0.1054, + "step": 2038000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7697529943502825e-05, + "loss": 0.11, + "step": 2038500 + }, + { + "epoch": 0.14, + "learning_rate": 4.769696497175141e-05, + "loss": 0.1152, + "step": 2039000 + }, + { + "epoch": 0.14, + "learning_rate": 4.769640112994351e-05, + "loss": 0.1152, + "step": 2039500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7695836158192094e-05, + "loss": 0.1172, + "step": 2040000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7695271186440684e-05, + "loss": 0.1073, + "step": 2040500 + }, + { + "epoch": 0.14, + "learning_rate": 4.769470621468927e-05, + "loss": 0.1111, + "step": 2041000 + }, + { + "epoch": 0.14, + "learning_rate": 4.769414124293786e-05, + "loss": 0.1043, + "step": 2041500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7693577401129946e-05, + "loss": 0.1152, + "step": 2042000 + }, + { + "epoch": 0.14, + "learning_rate": 4.769301242937853e-05, + "loss": 0.1138, + "step": 2042500 + }, + { + "epoch": 0.14, + "learning_rate": 4.769244745762712e-05, + "loss": 0.1098, + "step": 2043000 + }, + { + "epoch": 0.14, + "learning_rate": 4.769188248587571e-05, + "loss": 0.1029, + "step": 2043500 + }, + { + "epoch": 0.14, + "learning_rate": 4.76913186440678e-05, + "loss": 0.1152, + "step": 2044000 + }, + { + "epoch": 0.14, + "learning_rate": 4.769075367231638e-05, + "loss": 0.1053, + "step": 2044500 + }, + { + "epoch": 0.14, + "learning_rate": 4.769018870056497e-05, + "loss": 0.1157, + "step": 2045000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7689623728813556e-05, + "loss": 0.108, + "step": 2045500 + }, + { + "epoch": 0.14, + "learning_rate": 4.768905875706215e-05, + "loss": 0.1044, + "step": 2046000 + }, + { + "epoch": 0.14, + "learning_rate": 4.768849378531074e-05, + "loss": 0.1131, + "step": 2046500 + }, + { + "epoch": 0.14, + "learning_rate": 4.768792881355932e-05, + "loss": 0.1085, + "step": 2047000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7687364971751415e-05, + "loss": 0.1057, + "step": 2047500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7686800000000006e-05, + "loss": 0.1068, + "step": 2048000 + }, + { + "epoch": 0.14, + "learning_rate": 4.768623502824859e-05, + "loss": 0.1125, + "step": 2048500 + }, + { + "epoch": 0.14, + "learning_rate": 4.768567005649718e-05, + "loss": 0.1143, + "step": 2049000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7685105084745764e-05, + "loss": 0.1094, + "step": 2049500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7684540112994354e-05, + "loss": 0.1147, + "step": 2050000 + }, + { + "epoch": 0.14, + "learning_rate": 4.768397627118644e-05, + "loss": 0.1179, + "step": 2050500 + }, + { + "epoch": 0.14, + "learning_rate": 4.768341129943503e-05, + "loss": 0.1179, + "step": 2051000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7682846327683616e-05, + "loss": 0.1118, + "step": 2051500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7682281355932207e-05, + "loss": 0.1193, + "step": 2052000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7681717514124294e-05, + "loss": 0.1153, + "step": 2052500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7681152542372884e-05, + "loss": 0.1144, + "step": 2053000 + }, + { + "epoch": 0.14, + "learning_rate": 4.768058757062147e-05, + "loss": 0.1182, + "step": 2053500 + }, + { + "epoch": 0.14, + "learning_rate": 4.768002259887006e-05, + "loss": 0.1111, + "step": 2054000 + }, + { + "epoch": 0.14, + "learning_rate": 4.767945875706215e-05, + "loss": 0.1208, + "step": 2054500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7678893785310737e-05, + "loss": 0.1078, + "step": 2055000 + }, + { + "epoch": 0.14, + "learning_rate": 4.767832881355933e-05, + "loss": 0.1077, + "step": 2055500 + }, + { + "epoch": 0.14, + "learning_rate": 4.767776384180791e-05, + "loss": 0.1084, + "step": 2056000 + }, + { + "epoch": 0.14, + "learning_rate": 4.76771988700565e-05, + "loss": 0.1076, + "step": 2056500 + }, + { + "epoch": 0.14, + "learning_rate": 4.767663389830509e-05, + "loss": 0.1148, + "step": 2057000 + }, + { + "epoch": 0.14, + "learning_rate": 4.767607005649718e-05, + "loss": 0.1171, + "step": 2057500 + }, + { + "epoch": 0.14, + "learning_rate": 4.767550508474576e-05, + "loss": 0.1092, + "step": 2058000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7674940112994354e-05, + "loss": 0.1099, + "step": 2058500 + }, + { + "epoch": 0.14, + "learning_rate": 4.767437514124294e-05, + "loss": 0.1109, + "step": 2059000 + }, + { + "epoch": 0.14, + "learning_rate": 4.767381129943503e-05, + "loss": 0.104, + "step": 2059500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7673246327683615e-05, + "loss": 0.102, + "step": 2060000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7672681355932206e-05, + "loss": 0.1175, + "step": 2060500 + }, + { + "epoch": 0.14, + "learning_rate": 4.767211638418079e-05, + "loss": 0.1182, + "step": 2061000 + }, + { + "epoch": 0.14, + "learning_rate": 4.767155141242938e-05, + "loss": 0.1203, + "step": 2061500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7670987570621474e-05, + "loss": 0.112, + "step": 2062000 + }, + { + "epoch": 0.14, + "learning_rate": 4.767042259887006e-05, + "loss": 0.1184, + "step": 2062500 + }, + { + "epoch": 0.14, + "learning_rate": 4.766985762711865e-05, + "loss": 0.1127, + "step": 2063000 + }, + { + "epoch": 0.14, + "learning_rate": 4.766929265536723e-05, + "loss": 0.1106, + "step": 2063500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7668728813559326e-05, + "loss": 0.1109, + "step": 2064000 + }, + { + "epoch": 0.14, + "learning_rate": 4.766816384180791e-05, + "loss": 0.1084, + "step": 2064500 + }, + { + "epoch": 0.14, + "learning_rate": 4.76675988700565e-05, + "loss": 0.1065, + "step": 2065000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7667033898305085e-05, + "loss": 0.1146, + "step": 2065500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7666468926553675e-05, + "loss": 0.109, + "step": 2066000 + }, + { + "epoch": 0.14, + "learning_rate": 4.766590508474576e-05, + "loss": 0.1117, + "step": 2066500 + }, + { + "epoch": 0.14, + "learning_rate": 4.766534011299435e-05, + "loss": 0.1141, + "step": 2067000 + }, + { + "epoch": 0.14, + "learning_rate": 4.766477514124294e-05, + "loss": 0.1099, + "step": 2067500 + }, + { + "epoch": 0.14, + "learning_rate": 4.766421016949153e-05, + "loss": 0.1061, + "step": 2068000 + }, + { + "epoch": 0.14, + "learning_rate": 4.766364519774011e-05, + "loss": 0.1122, + "step": 2068500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7663081355932205e-05, + "loss": 0.1145, + "step": 2069000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7662516384180796e-05, + "loss": 0.1161, + "step": 2069500 + }, + { + "epoch": 0.14, + "learning_rate": 4.766195141242938e-05, + "loss": 0.1058, + "step": 2070000 + }, + { + "epoch": 0.14, + "learning_rate": 4.766138644067797e-05, + "loss": 0.1055, + "step": 2070500 + }, + { + "epoch": 0.14, + "learning_rate": 4.766082146892656e-05, + "loss": 0.1106, + "step": 2071000 + }, + { + "epoch": 0.14, + "learning_rate": 4.766025762711865e-05, + "loss": 0.1095, + "step": 2071500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7659693785310735e-05, + "loss": 0.1116, + "step": 2072000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7659128813559326e-05, + "loss": 0.1094, + "step": 2072500 + }, + { + "epoch": 0.14, + "learning_rate": 4.765856384180791e-05, + "loss": 0.1144, + "step": 2073000 + }, + { + "epoch": 0.14, + "learning_rate": 4.76579988700565e-05, + "loss": 0.1146, + "step": 2073500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7657433898305084e-05, + "loss": 0.1056, + "step": 2074000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7656868926553674e-05, + "loss": 0.1045, + "step": 2074500 + }, + { + "epoch": 0.14, + "learning_rate": 4.765630395480226e-05, + "loss": 0.1089, + "step": 2075000 + }, + { + "epoch": 0.14, + "learning_rate": 4.765573898305085e-05, + "loss": 0.1137, + "step": 2075500 + }, + { + "epoch": 0.14, + "learning_rate": 4.765517514124294e-05, + "loss": 0.1107, + "step": 2076000 + }, + { + "epoch": 0.14, + "learning_rate": 4.765461016949153e-05, + "loss": 0.1127, + "step": 2076500 + }, + { + "epoch": 0.14, + "learning_rate": 4.765404519774012e-05, + "loss": 0.1156, + "step": 2077000 + }, + { + "epoch": 0.14, + "learning_rate": 4.76534802259887e-05, + "loss": 0.1134, + "step": 2077500 + }, + { + "epoch": 0.14, + "learning_rate": 4.765291525423729e-05, + "loss": 0.1122, + "step": 2078000 + }, + { + "epoch": 0.14, + "learning_rate": 4.765235141242938e-05, + "loss": 0.1064, + "step": 2078500 + }, + { + "epoch": 0.14, + "learning_rate": 4.765178757062147e-05, + "loss": 0.1067, + "step": 2079000 + }, + { + "epoch": 0.14, + "learning_rate": 4.765122259887006e-05, + "loss": 0.1037, + "step": 2079500 + }, + { + "epoch": 0.14, + "learning_rate": 4.765065762711865e-05, + "loss": 0.1092, + "step": 2080000 + }, + { + "epoch": 0.14, + "learning_rate": 4.765009265536724e-05, + "loss": 0.118, + "step": 2080500 + }, + { + "epoch": 0.14, + "learning_rate": 4.764952768361582e-05, + "loss": 0.1117, + "step": 2081000 + }, + { + "epoch": 0.14, + "learning_rate": 4.764896271186441e-05, + "loss": 0.1106, + "step": 2081500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7648397740112996e-05, + "loss": 0.1054, + "step": 2082000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7647832768361587e-05, + "loss": 0.1125, + "step": 2082500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7647268926553674e-05, + "loss": 0.1078, + "step": 2083000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7646703954802264e-05, + "loss": 0.1107, + "step": 2083500 + }, + { + "epoch": 0.14, + "learning_rate": 4.764613898305085e-05, + "loss": 0.1159, + "step": 2084000 + }, + { + "epoch": 0.14, + "learning_rate": 4.764557401129944e-05, + "loss": 0.1083, + "step": 2084500 + }, + { + "epoch": 0.14, + "learning_rate": 4.764500903954803e-05, + "loss": 0.112, + "step": 2085000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7644445197740116e-05, + "loss": 0.1166, + "step": 2085500 + }, + { + "epoch": 0.14, + "learning_rate": 4.76438802259887e-05, + "loss": 0.1209, + "step": 2086000 + }, + { + "epoch": 0.14, + "learning_rate": 4.764331525423729e-05, + "loss": 0.1109, + "step": 2086500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7642750282485875e-05, + "loss": 0.1104, + "step": 2087000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7642185310734465e-05, + "loss": 0.11, + "step": 2087500 + }, + { + "epoch": 0.14, + "learning_rate": 4.764162146892656e-05, + "loss": 0.1073, + "step": 2088000 + }, + { + "epoch": 0.14, + "learning_rate": 4.764105649717514e-05, + "loss": 0.111, + "step": 2088500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7640491525423734e-05, + "loss": 0.1154, + "step": 2089000 + }, + { + "epoch": 0.14, + "learning_rate": 4.763992655367232e-05, + "loss": 0.1073, + "step": 2089500 + }, + { + "epoch": 0.14, + "learning_rate": 4.763936271186441e-05, + "loss": 0.109, + "step": 2090000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7638797740112995e-05, + "loss": 0.1102, + "step": 2090500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7638232768361586e-05, + "loss": 0.111, + "step": 2091000 + }, + { + "epoch": 0.14, + "learning_rate": 4.763766779661017e-05, + "loss": 0.1083, + "step": 2091500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7637103954802264e-05, + "loss": 0.1105, + "step": 2092000 + }, + { + "epoch": 0.14, + "learning_rate": 4.763653898305085e-05, + "loss": 0.1172, + "step": 2092500 + }, + { + "epoch": 0.14, + "learning_rate": 4.763597401129944e-05, + "loss": 0.1103, + "step": 2093000 + }, + { + "epoch": 0.14, + "learning_rate": 4.763540903954802e-05, + "loss": 0.109, + "step": 2093500 + }, + { + "epoch": 0.14, + "learning_rate": 4.763484406779661e-05, + "loss": 0.113, + "step": 2094000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7634279096045196e-05, + "loss": 0.1146, + "step": 2094500 + }, + { + "epoch": 0.14, + "learning_rate": 4.763371525423729e-05, + "loss": 0.1033, + "step": 2095000 + }, + { + "epoch": 0.14, + "learning_rate": 4.763315028248588e-05, + "loss": 0.1152, + "step": 2095500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7632585310734465e-05, + "loss": 0.1064, + "step": 2096000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7632020338983055e-05, + "loss": 0.1102, + "step": 2096500 + }, + { + "epoch": 0.14, + "learning_rate": 4.763145536723164e-05, + "loss": 0.1196, + "step": 2097000 + }, + { + "epoch": 0.14, + "learning_rate": 4.763089152542373e-05, + "loss": 0.1098, + "step": 2097500 + }, + { + "epoch": 0.14, + "learning_rate": 4.763032655367232e-05, + "loss": 0.108, + "step": 2098000 + }, + { + "epoch": 0.14, + "learning_rate": 4.762976158192091e-05, + "loss": 0.1077, + "step": 2098500 + }, + { + "epoch": 0.14, + "learning_rate": 4.76291966101695e-05, + "loss": 0.1063, + "step": 2099000 + }, + { + "epoch": 0.14, + "learning_rate": 4.762863163841808e-05, + "loss": 0.1139, + "step": 2099500 + }, + { + "epoch": 0.14, + "learning_rate": 4.762806666666667e-05, + "loss": 0.114, + "step": 2100000 + }, + { + "epoch": 0.14, + "learning_rate": 4.762750282485876e-05, + "loss": 0.1197, + "step": 2100500 + }, + { + "epoch": 0.14, + "learning_rate": 4.762693785310734e-05, + "loss": 0.1124, + "step": 2101000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7626372881355934e-05, + "loss": 0.1149, + "step": 2101500 + }, + { + "epoch": 0.14, + "learning_rate": 4.762580790960452e-05, + "loss": 0.1117, + "step": 2102000 + }, + { + "epoch": 0.14, + "learning_rate": 4.762524293785311e-05, + "loss": 0.1176, + "step": 2102500 + }, + { + "epoch": 0.14, + "learning_rate": 4.76246779661017e-05, + "loss": 0.1127, + "step": 2103000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7624114124293786e-05, + "loss": 0.1075, + "step": 2103500 + }, + { + "epoch": 0.14, + "learning_rate": 4.762354915254238e-05, + "loss": 0.1121, + "step": 2104000 + }, + { + "epoch": 0.14, + "learning_rate": 4.762298418079096e-05, + "loss": 0.1093, + "step": 2104500 + }, + { + "epoch": 0.14, + "learning_rate": 4.762241920903955e-05, + "loss": 0.1112, + "step": 2105000 + }, + { + "epoch": 0.14, + "learning_rate": 4.762185423728814e-05, + "loss": 0.1082, + "step": 2105500 + }, + { + "epoch": 0.14, + "learning_rate": 4.762129039548023e-05, + "loss": 0.1143, + "step": 2106000 + }, + { + "epoch": 0.14, + "learning_rate": 4.762072542372882e-05, + "loss": 0.1074, + "step": 2106500 + }, + { + "epoch": 0.14, + "learning_rate": 4.76201604519774e-05, + "loss": 0.1113, + "step": 2107000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7619595480225994e-05, + "loss": 0.1069, + "step": 2107500 + }, + { + "epoch": 0.14, + "learning_rate": 4.761903050847458e-05, + "loss": 0.1069, + "step": 2108000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7618466666666665e-05, + "loss": 0.1148, + "step": 2108500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7617901694915255e-05, + "loss": 0.1096, + "step": 2109000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7617336723163846e-05, + "loss": 0.1127, + "step": 2109500 + }, + { + "epoch": 0.14, + "learning_rate": 4.761677175141243e-05, + "loss": 0.1074, + "step": 2110000 + }, + { + "epoch": 0.14, + "learning_rate": 4.761620677966102e-05, + "loss": 0.1056, + "step": 2110500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7615641807909604e-05, + "loss": 0.1077, + "step": 2111000 + }, + { + "epoch": 0.14, + "learning_rate": 4.76150779661017e-05, + "loss": 0.104, + "step": 2111500 + }, + { + "epoch": 0.14, + "learning_rate": 4.761451299435029e-05, + "loss": 0.1118, + "step": 2112000 + }, + { + "epoch": 0.14, + "learning_rate": 4.761394802259887e-05, + "loss": 0.1071, + "step": 2112500 + }, + { + "epoch": 0.14, + "learning_rate": 4.761338305084746e-05, + "loss": 0.1121, + "step": 2113000 + }, + { + "epoch": 0.14, + "learning_rate": 4.761281920903955e-05, + "loss": 0.1118, + "step": 2113500 + }, + { + "epoch": 0.14, + "learning_rate": 4.761225423728814e-05, + "loss": 0.1058, + "step": 2114000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7611689265536725e-05, + "loss": 0.1101, + "step": 2114500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7611124293785315e-05, + "loss": 0.1103, + "step": 2115000 + }, + { + "epoch": 0.14, + "learning_rate": 4.76105593220339e-05, + "loss": 0.1107, + "step": 2115500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7609995480225986e-05, + "loss": 0.113, + "step": 2116000 + }, + { + "epoch": 0.14, + "learning_rate": 4.760943050847458e-05, + "loss": 0.1095, + "step": 2116500 + }, + { + "epoch": 0.14, + "learning_rate": 4.760886553672317e-05, + "loss": 0.1118, + "step": 2117000 + }, + { + "epoch": 0.14, + "learning_rate": 4.760830056497175e-05, + "loss": 0.1097, + "step": 2117500 + }, + { + "epoch": 0.14, + "learning_rate": 4.760773559322034e-05, + "loss": 0.1138, + "step": 2118000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7607171751412436e-05, + "loss": 0.1097, + "step": 2118500 + }, + { + "epoch": 0.14, + "learning_rate": 4.760660677966102e-05, + "loss": 0.1151, + "step": 2119000 + }, + { + "epoch": 0.14, + "learning_rate": 4.760604180790961e-05, + "loss": 0.1074, + "step": 2119500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7605476836158194e-05, + "loss": 0.1153, + "step": 2120000 + }, + { + "epoch": 0.14, + "learning_rate": 4.760491299435029e-05, + "loss": 0.1067, + "step": 2120500 + }, + { + "epoch": 0.14, + "learning_rate": 4.760434802259887e-05, + "loss": 0.1092, + "step": 2121000 + }, + { + "epoch": 0.14, + "learning_rate": 4.760378305084746e-05, + "loss": 0.1091, + "step": 2121500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7603218079096046e-05, + "loss": 0.1112, + "step": 2122000 + }, + { + "epoch": 0.14, + "learning_rate": 4.760265423728813e-05, + "loss": 0.1185, + "step": 2122500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7602089265536724e-05, + "loss": 0.113, + "step": 2123000 + }, + { + "epoch": 0.14, + "learning_rate": 4.760152542372882e-05, + "loss": 0.1115, + "step": 2123500 + }, + { + "epoch": 0.14, + "learning_rate": 4.760096045197741e-05, + "loss": 0.1139, + "step": 2124000 + }, + { + "epoch": 0.14, + "learning_rate": 4.760039548022599e-05, + "loss": 0.1116, + "step": 2124500 + }, + { + "epoch": 0.14, + "learning_rate": 4.759983163841808e-05, + "loss": 0.111, + "step": 2125000 + }, + { + "epoch": 0.14, + "learning_rate": 4.759926666666667e-05, + "loss": 0.1068, + "step": 2125500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7598701694915254e-05, + "loss": 0.1068, + "step": 2126000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7598136723163844e-05, + "loss": 0.1066, + "step": 2126500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7597571751412435e-05, + "loss": 0.1138, + "step": 2127000 + }, + { + "epoch": 0.14, + "learning_rate": 4.759700677966102e-05, + "loss": 0.1023, + "step": 2127500 + }, + { + "epoch": 0.14, + "learning_rate": 4.759644180790961e-05, + "loss": 0.1079, + "step": 2128000 + }, + { + "epoch": 0.14, + "learning_rate": 4.759587683615819e-05, + "loss": 0.1118, + "step": 2128500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7595311864406784e-05, + "loss": 0.1115, + "step": 2129000 + }, + { + "epoch": 0.14, + "learning_rate": 4.759474689265537e-05, + "loss": 0.1115, + "step": 2129500 + }, + { + "epoch": 0.14, + "learning_rate": 4.759418192090396e-05, + "loss": 0.1129, + "step": 2130000 + }, + { + "epoch": 0.14, + "learning_rate": 4.759361694915255e-05, + "loss": 0.1116, + "step": 2130500 + }, + { + "epoch": 0.14, + "learning_rate": 4.759305197740113e-05, + "loss": 0.1084, + "step": 2131000 + }, + { + "epoch": 0.14, + "learning_rate": 4.759248700564972e-05, + "loss": 0.1063, + "step": 2131500 + }, + { + "epoch": 0.14, + "learning_rate": 4.759192203389831e-05, + "loss": 0.1135, + "step": 2132000 + }, + { + "epoch": 0.14, + "learning_rate": 4.75913570621469e-05, + "loss": 0.117, + "step": 2132500 + }, + { + "epoch": 0.14, + "learning_rate": 4.759079209039548e-05, + "loss": 0.1045, + "step": 2133000 + }, + { + "epoch": 0.14, + "learning_rate": 4.759022824858757e-05, + "loss": 0.1114, + "step": 2133500 + }, + { + "epoch": 0.14, + "learning_rate": 4.758966440677967e-05, + "loss": 0.1076, + "step": 2134000 + }, + { + "epoch": 0.14, + "learning_rate": 4.758909943502825e-05, + "loss": 0.1077, + "step": 2134500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7588534463276844e-05, + "loss": 0.1106, + "step": 2135000 + }, + { + "epoch": 0.14, + "learning_rate": 4.758796949152543e-05, + "loss": 0.113, + "step": 2135500 + }, + { + "epoch": 0.14, + "learning_rate": 4.758740451977402e-05, + "loss": 0.1126, + "step": 2136000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7586840677966105e-05, + "loss": 0.1053, + "step": 2136500 + }, + { + "epoch": 0.14, + "learning_rate": 4.758627570621469e-05, + "loss": 0.1156, + "step": 2137000 + }, + { + "epoch": 0.14, + "learning_rate": 4.758571073446328e-05, + "loss": 0.108, + "step": 2137500 + }, + { + "epoch": 0.14, + "learning_rate": 4.758514576271187e-05, + "loss": 0.1116, + "step": 2138000 + }, + { + "epoch": 0.14, + "learning_rate": 4.758458192090396e-05, + "loss": 0.1097, + "step": 2138500 + }, + { + "epoch": 0.15, + "learning_rate": 4.758401694915254e-05, + "loss": 0.1115, + "step": 2139000 + }, + { + "epoch": 0.15, + "learning_rate": 4.758345197740113e-05, + "loss": 0.1078, + "step": 2139500 + }, + { + "epoch": 0.15, + "learning_rate": 4.7582887005649716e-05, + "loss": 0.1174, + "step": 2140000 + }, + { + "epoch": 0.15, + "learning_rate": 4.7582322033898306e-05, + "loss": 0.1081, + "step": 2140500 + }, + { + "epoch": 0.15, + "learning_rate": 4.758175706214689e-05, + "loss": 0.1146, + "step": 2141000 + }, + { + "epoch": 0.15, + "learning_rate": 4.758119322033899e-05, + "loss": 0.1148, + "step": 2141500 + }, + { + "epoch": 0.15, + "learning_rate": 4.7580628248587575e-05, + "loss": 0.1119, + "step": 2142000 + }, + { + "epoch": 0.15, + "learning_rate": 4.7580063276836165e-05, + "loss": 0.1136, + "step": 2142500 + }, + { + "epoch": 0.15, + "learning_rate": 4.757949830508475e-05, + "loss": 0.109, + "step": 2143000 + }, + { + "epoch": 0.15, + "learning_rate": 4.7578934463276836e-05, + "loss": 0.1089, + "step": 2143500 + }, + { + "epoch": 0.15, + "learning_rate": 4.757836949152543e-05, + "loss": 0.1091, + "step": 2144000 + }, + { + "epoch": 0.15, + "learning_rate": 4.757780451977402e-05, + "loss": 0.1073, + "step": 2144500 + }, + { + "epoch": 0.15, + "learning_rate": 4.75772395480226e-05, + "loss": 0.1068, + "step": 2145000 + }, + { + "epoch": 0.15, + "learning_rate": 4.757667570621469e-05, + "loss": 0.1076, + "step": 2145500 + }, + { + "epoch": 0.15, + "learning_rate": 4.757611073446328e-05, + "loss": 0.1085, + "step": 2146000 + }, + { + "epoch": 0.15, + "learning_rate": 4.757554576271186e-05, + "loss": 0.107, + "step": 2146500 + }, + { + "epoch": 0.15, + "learning_rate": 4.757498079096045e-05, + "loss": 0.1083, + "step": 2147000 + }, + { + "epoch": 0.15, + "learning_rate": 4.757441581920904e-05, + "loss": 0.1113, + "step": 2147500 + }, + { + "epoch": 0.15, + "learning_rate": 4.757385197740114e-05, + "loss": 0.1068, + "step": 2148000 + }, + { + "epoch": 0.15, + "learning_rate": 4.757328700564972e-05, + "loss": 0.1097, + "step": 2148500 + }, + { + "epoch": 0.15, + "learning_rate": 4.757272203389831e-05, + "loss": 0.0999, + "step": 2149000 + }, + { + "epoch": 0.15, + "learning_rate": 4.7572157062146896e-05, + "loss": 0.1024, + "step": 2149500 + }, + { + "epoch": 0.15, + "learning_rate": 4.757159322033898e-05, + "loss": 0.1047, + "step": 2150000 + }, + { + "epoch": 0.15, + "learning_rate": 4.7571028248587574e-05, + "loss": 0.1108, + "step": 2150500 + }, + { + "epoch": 0.15, + "learning_rate": 4.757046327683616e-05, + "loss": 0.1139, + "step": 2151000 + } + ], + "max_steps": 44250000, + "num_train_epochs": 3, + "total_flos": 1.530035227432059e+17, + "trial_name": null, + "trial_params": null +}