diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,18998 @@ +{ + "best_metric": 1.1188170909881592, + "best_model_checkpoint": "/work/twskvfb446/facebook/wav2vec2-large-lv60_voidful/phoneme_byt5_SpeechMixEEDT5_w2v2-large_t5lephone-small_lrdiff2_bs64/checkpoint-31521", + "epoch": 7.0, + "global_step": 31521, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5.714285714285715e-07, + "loss": 4.8338, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 1.142857142857143e-06, + "loss": 4.8403, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 1.7142857142857145e-06, + "loss": 4.6805, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 2.285714285714286e-06, + "loss": 4.5098, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 2.8571428571428573e-06, + "loss": 4.3628, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 3.428571428571429e-06, + "loss": 4.3564, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 4.000000000000001e-06, + "loss": 4.1557, + "step": 70 + }, + { + "epoch": 0.02, + "learning_rate": 4.571428571428572e-06, + "loss": 4.0656, + "step": 80 + }, + { + "epoch": 0.02, + "learning_rate": 5.142857142857142e-06, + "loss": 3.8855, + "step": 90 + }, + { + "epoch": 0.02, + "learning_rate": 5.7142857142857145e-06, + "loss": 3.7873, + "step": 100 + }, + { + "epoch": 0.02, + "learning_rate": 6.285714285714286e-06, + "loss": 3.685, + "step": 110 + }, + { + "epoch": 0.03, + "learning_rate": 6.857142857142858e-06, + "loss": 3.5988, + "step": 120 + }, + { + "epoch": 0.03, + "learning_rate": 7.428571428571429e-06, + "loss": 3.4437, + "step": 130 + }, + { + "epoch": 0.03, + "learning_rate": 8.000000000000001e-06, + "loss": 3.3101, + "step": 140 + }, + { + "epoch": 0.03, + "learning_rate": 8.571428571428571e-06, + "loss": 3.2639, + "step": 150 + }, + { + "epoch": 0.04, + "learning_rate": 9.142857142857144e-06, + "loss": 3.1035, + "step": 160 + }, + { + "epoch": 0.04, + "learning_rate": 9.714285714285715e-06, + "loss": 2.9991, + "step": 170 + }, + { + "epoch": 0.04, + "learning_rate": 1.0285714285714285e-05, + "loss": 2.9539, + "step": 180 + }, + { + "epoch": 0.04, + "learning_rate": 1.0857142857142858e-05, + "loss": 2.941, + "step": 190 + }, + { + "epoch": 0.04, + "learning_rate": 1.1428571428571429e-05, + "loss": 2.8688, + "step": 200 + }, + { + "epoch": 0.05, + "learning_rate": 1.2e-05, + "loss": 2.8255, + "step": 210 + }, + { + "epoch": 0.05, + "learning_rate": 1.2571428571428572e-05, + "loss": 2.7809, + "step": 220 + }, + { + "epoch": 0.05, + "learning_rate": 1.3142857142857145e-05, + "loss": 2.7848, + "step": 230 + }, + { + "epoch": 0.05, + "learning_rate": 1.3714285714285716e-05, + "loss": 2.6878, + "step": 240 + }, + { + "epoch": 0.06, + "learning_rate": 1.4285714285714287e-05, + "loss": 2.6746, + "step": 250 + }, + { + "epoch": 0.06, + "learning_rate": 1.4857142857142858e-05, + "loss": 2.6664, + "step": 260 + }, + { + "epoch": 0.06, + "learning_rate": 1.542857142857143e-05, + "loss": 2.5992, + "step": 270 + }, + { + "epoch": 0.06, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.5776, + "step": 280 + }, + { + "epoch": 0.06, + "learning_rate": 1.6571428571428574e-05, + "loss": 2.5659, + "step": 290 + }, + { + "epoch": 0.07, + "learning_rate": 1.7142857142857142e-05, + "loss": 2.5339, + "step": 300 + }, + { + "epoch": 0.07, + "learning_rate": 1.7714285714285717e-05, + "loss": 2.5111, + "step": 310 + }, + { + "epoch": 0.07, + "learning_rate": 1.8285714285714288e-05, + "loss": 2.5012, + "step": 320 + }, + { + "epoch": 0.07, + "learning_rate": 1.885714285714286e-05, + "loss": 2.4786, + "step": 330 + }, + { + "epoch": 0.08, + "learning_rate": 1.942857142857143e-05, + "loss": 2.4416, + "step": 340 + }, + { + "epoch": 0.08, + "learning_rate": 2e-05, + "loss": 2.4435, + "step": 350 + }, + { + "epoch": 0.08, + "learning_rate": 2.057142857142857e-05, + "loss": 2.4357, + "step": 360 + }, + { + "epoch": 0.08, + "learning_rate": 2.1142857142857144e-05, + "loss": 2.397, + "step": 370 + }, + { + "epoch": 0.08, + "learning_rate": 2.1714285714285715e-05, + "loss": 2.401, + "step": 380 + }, + { + "epoch": 0.09, + "learning_rate": 2.2285714285714287e-05, + "loss": 2.375, + "step": 390 + }, + { + "epoch": 0.09, + "learning_rate": 2.2857142857142858e-05, + "loss": 2.3784, + "step": 400 + }, + { + "epoch": 0.09, + "learning_rate": 2.3428571428571433e-05, + "loss": 2.3549, + "step": 410 + }, + { + "epoch": 0.09, + "learning_rate": 2.4e-05, + "loss": 2.3334, + "step": 420 + }, + { + "epoch": 0.1, + "learning_rate": 2.4571428571428575e-05, + "loss": 2.3282, + "step": 430 + }, + { + "epoch": 0.1, + "learning_rate": 2.5142857142857143e-05, + "loss": 2.3087, + "step": 440 + }, + { + "epoch": 0.1, + "learning_rate": 2.5714285714285718e-05, + "loss": 2.3273, + "step": 450 + }, + { + "epoch": 0.1, + "learning_rate": 2.628571428571429e-05, + "loss": 2.2403, + "step": 460 + }, + { + "epoch": 0.1, + "learning_rate": 2.6857142857142857e-05, + "loss": 2.1895, + "step": 470 + }, + { + "epoch": 0.11, + "learning_rate": 2.742857142857143e-05, + "loss": 2.1575, + "step": 480 + }, + { + "epoch": 0.11, + "learning_rate": 2.8e-05, + "loss": 2.1236, + "step": 490 + }, + { + "epoch": 0.11, + "learning_rate": 2.8571428571428574e-05, + "loss": 2.0825, + "step": 500 + }, + { + "epoch": 0.11, + "learning_rate": 2.9142857142857146e-05, + "loss": 2.0988, + "step": 510 + }, + { + "epoch": 0.12, + "learning_rate": 2.9714285714285717e-05, + "loss": 2.0474, + "step": 520 + }, + { + "epoch": 0.12, + "learning_rate": 3.0285714285714288e-05, + "loss": 2.0416, + "step": 530 + }, + { + "epoch": 0.12, + "learning_rate": 3.085714285714286e-05, + "loss": 2.002, + "step": 540 + }, + { + "epoch": 0.12, + "learning_rate": 3.142857142857143e-05, + "loss": 2.0199, + "step": 550 + }, + { + "epoch": 0.12, + "learning_rate": 3.2000000000000005e-05, + "loss": 1.9793, + "step": 560 + }, + { + "epoch": 0.13, + "learning_rate": 3.257142857142857e-05, + "loss": 1.9666, + "step": 570 + }, + { + "epoch": 0.13, + "learning_rate": 3.314285714285715e-05, + "loss": 1.9722, + "step": 580 + }, + { + "epoch": 0.13, + "learning_rate": 3.3714285714285716e-05, + "loss": 1.9398, + "step": 590 + }, + { + "epoch": 0.13, + "learning_rate": 3.4285714285714284e-05, + "loss": 1.9595, + "step": 600 + }, + { + "epoch": 0.14, + "learning_rate": 3.485714285714286e-05, + "loss": 1.97, + "step": 610 + }, + { + "epoch": 0.14, + "learning_rate": 3.542857142857143e-05, + "loss": 1.9175, + "step": 620 + }, + { + "epoch": 0.14, + "learning_rate": 3.6e-05, + "loss": 1.9196, + "step": 630 + }, + { + "epoch": 0.14, + "learning_rate": 3.6571428571428576e-05, + "loss": 1.9297, + "step": 640 + }, + { + "epoch": 0.14, + "learning_rate": 3.714285714285715e-05, + "loss": 1.8824, + "step": 650 + }, + { + "epoch": 0.15, + "learning_rate": 3.771428571428572e-05, + "loss": 1.9058, + "step": 660 + }, + { + "epoch": 0.15, + "learning_rate": 3.828571428571429e-05, + "loss": 1.8983, + "step": 670 + }, + { + "epoch": 0.15, + "learning_rate": 3.885714285714286e-05, + "loss": 1.8565, + "step": 680 + }, + { + "epoch": 0.15, + "learning_rate": 3.9428571428571435e-05, + "loss": 1.8626, + "step": 690 + }, + { + "epoch": 0.16, + "learning_rate": 4e-05, + "loss": 1.8494, + "step": 700 + }, + { + "epoch": 0.16, + "learning_rate": 3.999702358806459e-05, + "loss": 1.8598, + "step": 710 + }, + { + "epoch": 0.16, + "learning_rate": 3.999404717612918e-05, + "loss": 1.8434, + "step": 720 + }, + { + "epoch": 0.16, + "learning_rate": 3.999107076419377e-05, + "loss": 1.8494, + "step": 730 + }, + { + "epoch": 0.16, + "learning_rate": 3.998809435225836e-05, + "loss": 1.7983, + "step": 740 + }, + { + "epoch": 0.17, + "learning_rate": 3.9985117940322945e-05, + "loss": 1.8372, + "step": 750 + }, + { + "epoch": 0.17, + "learning_rate": 3.998214152838753e-05, + "loss": 1.8138, + "step": 760 + }, + { + "epoch": 0.17, + "learning_rate": 3.997916511645212e-05, + "loss": 1.8005, + "step": 770 + }, + { + "epoch": 0.17, + "learning_rate": 3.9976188704516705e-05, + "loss": 1.8083, + "step": 780 + }, + { + "epoch": 0.18, + "learning_rate": 3.99732122925813e-05, + "loss": 1.8047, + "step": 790 + }, + { + "epoch": 0.18, + "learning_rate": 3.9970235880645886e-05, + "loss": 1.8119, + "step": 800 + }, + { + "epoch": 0.18, + "learning_rate": 3.996725946871047e-05, + "loss": 1.7993, + "step": 810 + }, + { + "epoch": 0.18, + "learning_rate": 3.996428305677506e-05, + "loss": 1.8174, + "step": 820 + }, + { + "epoch": 0.18, + "learning_rate": 3.996130664483965e-05, + "loss": 1.7907, + "step": 830 + }, + { + "epoch": 0.19, + "learning_rate": 3.995833023290424e-05, + "loss": 1.7951, + "step": 840 + }, + { + "epoch": 0.19, + "learning_rate": 3.995535382096883e-05, + "loss": 1.7833, + "step": 850 + }, + { + "epoch": 0.19, + "learning_rate": 3.9952377409033414e-05, + "loss": 1.7893, + "step": 860 + }, + { + "epoch": 0.19, + "learning_rate": 3.9949400997098e-05, + "loss": 1.7739, + "step": 870 + }, + { + "epoch": 0.2, + "learning_rate": 3.994642458516259e-05, + "loss": 1.7959, + "step": 880 + }, + { + "epoch": 0.2, + "learning_rate": 3.9943448173227175e-05, + "loss": 1.7773, + "step": 890 + }, + { + "epoch": 0.2, + "learning_rate": 3.994047176129177e-05, + "loss": 1.769, + "step": 900 + }, + { + "epoch": 0.2, + "learning_rate": 3.9937495349356355e-05, + "loss": 1.7596, + "step": 910 + }, + { + "epoch": 0.2, + "learning_rate": 3.993451893742094e-05, + "loss": 1.7659, + "step": 920 + }, + { + "epoch": 0.21, + "learning_rate": 3.993154252548553e-05, + "loss": 1.7592, + "step": 930 + }, + { + "epoch": 0.21, + "learning_rate": 3.992856611355012e-05, + "loss": 1.7695, + "step": 940 + }, + { + "epoch": 0.21, + "learning_rate": 3.992558970161471e-05, + "loss": 1.7539, + "step": 950 + }, + { + "epoch": 0.21, + "learning_rate": 3.992261328967929e-05, + "loss": 1.7511, + "step": 960 + }, + { + "epoch": 0.22, + "learning_rate": 3.9919636877743883e-05, + "loss": 1.7402, + "step": 970 + }, + { + "epoch": 0.22, + "learning_rate": 3.991666046580847e-05, + "loss": 1.7562, + "step": 980 + }, + { + "epoch": 0.22, + "learning_rate": 3.991368405387306e-05, + "loss": 1.7448, + "step": 990 + }, + { + "epoch": 0.22, + "learning_rate": 3.9910707641937644e-05, + "loss": 1.751, + "step": 1000 + }, + { + "epoch": 0.22, + "learning_rate": 3.990773123000224e-05, + "loss": 1.735, + "step": 1010 + }, + { + "epoch": 0.23, + "learning_rate": 3.9904754818066825e-05, + "loss": 1.7277, + "step": 1020 + }, + { + "epoch": 0.23, + "learning_rate": 3.990177840613141e-05, + "loss": 1.743, + "step": 1030 + }, + { + "epoch": 0.23, + "learning_rate": 3.9898801994196e-05, + "loss": 1.7349, + "step": 1040 + }, + { + "epoch": 0.23, + "learning_rate": 3.989582558226059e-05, + "loss": 1.735, + "step": 1050 + }, + { + "epoch": 0.24, + "learning_rate": 3.989284917032517e-05, + "loss": 1.7349, + "step": 1060 + }, + { + "epoch": 0.24, + "learning_rate": 3.9889872758389766e-05, + "loss": 1.7154, + "step": 1070 + }, + { + "epoch": 0.24, + "learning_rate": 3.988689634645435e-05, + "loss": 1.7371, + "step": 1080 + }, + { + "epoch": 0.24, + "learning_rate": 3.988391993451894e-05, + "loss": 1.7301, + "step": 1090 + }, + { + "epoch": 0.24, + "learning_rate": 3.988094352258353e-05, + "loss": 1.7114, + "step": 1100 + }, + { + "epoch": 0.25, + "learning_rate": 3.987796711064812e-05, + "loss": 1.7207, + "step": 1110 + }, + { + "epoch": 0.25, + "learning_rate": 3.987499069871271e-05, + "loss": 1.7162, + "step": 1120 + }, + { + "epoch": 0.25, + "learning_rate": 3.9872014286777294e-05, + "loss": 1.715, + "step": 1130 + }, + { + "epoch": 0.25, + "learning_rate": 3.986903787484188e-05, + "loss": 1.728, + "step": 1140 + }, + { + "epoch": 0.26, + "learning_rate": 3.986606146290647e-05, + "loss": 1.7043, + "step": 1150 + }, + { + "epoch": 0.26, + "learning_rate": 3.986308505097106e-05, + "loss": 1.6983, + "step": 1160 + }, + { + "epoch": 0.26, + "learning_rate": 3.986010863903564e-05, + "loss": 1.6977, + "step": 1170 + }, + { + "epoch": 0.26, + "learning_rate": 3.9857132227100235e-05, + "loss": 1.7026, + "step": 1180 + }, + { + "epoch": 0.26, + "learning_rate": 3.985415581516482e-05, + "loss": 1.719, + "step": 1190 + }, + { + "epoch": 0.27, + "learning_rate": 3.985117940322941e-05, + "loss": 1.6955, + "step": 1200 + }, + { + "epoch": 0.27, + "learning_rate": 3.9848202991293996e-05, + "loss": 1.6891, + "step": 1210 + }, + { + "epoch": 0.27, + "learning_rate": 3.984522657935859e-05, + "loss": 1.7063, + "step": 1220 + }, + { + "epoch": 0.27, + "learning_rate": 3.984225016742318e-05, + "loss": 1.6837, + "step": 1230 + }, + { + "epoch": 0.28, + "learning_rate": 3.9839273755487764e-05, + "loss": 1.7061, + "step": 1240 + }, + { + "epoch": 0.28, + "learning_rate": 3.983629734355235e-05, + "loss": 1.693, + "step": 1250 + }, + { + "epoch": 0.28, + "learning_rate": 3.9833320931616944e-05, + "loss": 1.6938, + "step": 1260 + }, + { + "epoch": 0.28, + "learning_rate": 3.9830344519681524e-05, + "loss": 1.6636, + "step": 1270 + }, + { + "epoch": 0.28, + "learning_rate": 3.982736810774611e-05, + "loss": 1.6865, + "step": 1280 + }, + { + "epoch": 0.29, + "learning_rate": 3.9824391695810705e-05, + "loss": 1.682, + "step": 1290 + }, + { + "epoch": 0.29, + "learning_rate": 3.982141528387529e-05, + "loss": 1.6979, + "step": 1300 + }, + { + "epoch": 0.29, + "learning_rate": 3.981843887193988e-05, + "loss": 1.6958, + "step": 1310 + }, + { + "epoch": 0.29, + "learning_rate": 3.9815462460004466e-05, + "loss": 1.6674, + "step": 1320 + }, + { + "epoch": 0.3, + "learning_rate": 3.981248604806906e-05, + "loss": 1.6978, + "step": 1330 + }, + { + "epoch": 0.3, + "learning_rate": 3.9809509636133646e-05, + "loss": 1.6704, + "step": 1340 + }, + { + "epoch": 0.3, + "learning_rate": 3.980653322419823e-05, + "loss": 1.6825, + "step": 1350 + }, + { + "epoch": 0.3, + "learning_rate": 3.980355681226282e-05, + "loss": 1.6565, + "step": 1360 + }, + { + "epoch": 0.3, + "learning_rate": 3.980058040032741e-05, + "loss": 1.6619, + "step": 1370 + }, + { + "epoch": 0.31, + "learning_rate": 3.9797603988391994e-05, + "loss": 1.6971, + "step": 1380 + }, + { + "epoch": 0.31, + "learning_rate": 3.979462757645658e-05, + "loss": 1.6852, + "step": 1390 + }, + { + "epoch": 0.31, + "learning_rate": 3.9791651164521174e-05, + "loss": 1.6619, + "step": 1400 + }, + { + "epoch": 0.31, + "learning_rate": 3.978867475258576e-05, + "loss": 1.6702, + "step": 1410 + }, + { + "epoch": 0.32, + "learning_rate": 3.978569834065035e-05, + "loss": 1.6727, + "step": 1420 + }, + { + "epoch": 0.32, + "learning_rate": 3.9782721928714935e-05, + "loss": 1.6758, + "step": 1430 + }, + { + "epoch": 0.32, + "learning_rate": 3.977974551677953e-05, + "loss": 1.6611, + "step": 1440 + }, + { + "epoch": 0.32, + "learning_rate": 3.9776769104844116e-05, + "loss": 1.6655, + "step": 1450 + }, + { + "epoch": 0.32, + "learning_rate": 3.97737926929087e-05, + "loss": 1.6648, + "step": 1460 + }, + { + "epoch": 0.33, + "learning_rate": 3.977081628097329e-05, + "loss": 1.6597, + "step": 1470 + }, + { + "epoch": 0.33, + "learning_rate": 3.9767839869037876e-05, + "loss": 1.6605, + "step": 1480 + }, + { + "epoch": 0.33, + "learning_rate": 3.976486345710246e-05, + "loss": 1.6496, + "step": 1490 + }, + { + "epoch": 0.33, + "learning_rate": 3.976188704516706e-05, + "loss": 1.6576, + "step": 1500 + }, + { + "epoch": 0.34, + "learning_rate": 3.9758910633231644e-05, + "loss": 1.6491, + "step": 1510 + }, + { + "epoch": 0.34, + "learning_rate": 3.975593422129623e-05, + "loss": 1.6556, + "step": 1520 + }, + { + "epoch": 0.34, + "learning_rate": 3.975295780936082e-05, + "loss": 1.6661, + "step": 1530 + }, + { + "epoch": 0.34, + "learning_rate": 3.974998139742541e-05, + "loss": 1.6495, + "step": 1540 + }, + { + "epoch": 0.34, + "learning_rate": 3.974700498549e-05, + "loss": 1.6522, + "step": 1550 + }, + { + "epoch": 0.35, + "learning_rate": 3.9744028573554585e-05, + "loss": 1.6375, + "step": 1560 + }, + { + "epoch": 0.35, + "learning_rate": 3.974105216161917e-05, + "loss": 1.6383, + "step": 1570 + }, + { + "epoch": 0.35, + "learning_rate": 3.973807574968376e-05, + "loss": 1.6532, + "step": 1580 + }, + { + "epoch": 0.35, + "learning_rate": 3.9735099337748346e-05, + "loss": 1.6463, + "step": 1590 + }, + { + "epoch": 0.36, + "learning_rate": 3.973212292581293e-05, + "loss": 1.649, + "step": 1600 + }, + { + "epoch": 0.36, + "learning_rate": 3.9729146513877526e-05, + "loss": 1.6093, + "step": 1610 + }, + { + "epoch": 0.36, + "learning_rate": 3.972617010194211e-05, + "loss": 1.6211, + "step": 1620 + }, + { + "epoch": 0.36, + "learning_rate": 3.97231936900067e-05, + "loss": 1.6387, + "step": 1630 + }, + { + "epoch": 0.36, + "learning_rate": 3.972021727807129e-05, + "loss": 1.6311, + "step": 1640 + }, + { + "epoch": 0.37, + "learning_rate": 3.971724086613588e-05, + "loss": 1.6282, + "step": 1650 + }, + { + "epoch": 0.37, + "learning_rate": 3.971426445420047e-05, + "loss": 1.6344, + "step": 1660 + }, + { + "epoch": 0.37, + "learning_rate": 3.9711288042265054e-05, + "loss": 1.6427, + "step": 1670 + }, + { + "epoch": 0.37, + "learning_rate": 3.970831163032964e-05, + "loss": 1.6177, + "step": 1680 + }, + { + "epoch": 0.38, + "learning_rate": 3.970533521839423e-05, + "loss": 1.6341, + "step": 1690 + }, + { + "epoch": 0.38, + "learning_rate": 3.9702358806458815e-05, + "loss": 1.6331, + "step": 1700 + }, + { + "epoch": 0.38, + "learning_rate": 3.96993823945234e-05, + "loss": 1.6227, + "step": 1710 + }, + { + "epoch": 0.38, + "learning_rate": 3.9696405982587996e-05, + "loss": 1.6084, + "step": 1720 + }, + { + "epoch": 0.38, + "learning_rate": 3.969342957065258e-05, + "loss": 1.6176, + "step": 1730 + }, + { + "epoch": 0.39, + "learning_rate": 3.969045315871717e-05, + "loss": 1.6214, + "step": 1740 + }, + { + "epoch": 0.39, + "learning_rate": 3.9687476746781756e-05, + "loss": 1.6157, + "step": 1750 + }, + { + "epoch": 0.39, + "learning_rate": 3.968450033484635e-05, + "loss": 1.6166, + "step": 1760 + }, + { + "epoch": 0.39, + "learning_rate": 3.968152392291094e-05, + "loss": 1.6362, + "step": 1770 + }, + { + "epoch": 0.4, + "learning_rate": 3.967854751097552e-05, + "loss": 1.617, + "step": 1780 + }, + { + "epoch": 0.4, + "learning_rate": 3.967557109904011e-05, + "loss": 1.6175, + "step": 1790 + }, + { + "epoch": 0.4, + "learning_rate": 3.96725946871047e-05, + "loss": 1.6246, + "step": 1800 + }, + { + "epoch": 0.4, + "learning_rate": 3.9669618275169285e-05, + "loss": 1.6406, + "step": 1810 + }, + { + "epoch": 0.4, + "learning_rate": 3.966664186323387e-05, + "loss": 1.6243, + "step": 1820 + }, + { + "epoch": 0.41, + "learning_rate": 3.9663665451298465e-05, + "loss": 1.6223, + "step": 1830 + }, + { + "epoch": 0.41, + "learning_rate": 3.966068903936305e-05, + "loss": 1.6198, + "step": 1840 + }, + { + "epoch": 0.41, + "learning_rate": 3.965771262742764e-05, + "loss": 1.6278, + "step": 1850 + }, + { + "epoch": 0.41, + "learning_rate": 3.9654736215492226e-05, + "loss": 1.6187, + "step": 1860 + }, + { + "epoch": 0.42, + "learning_rate": 3.965175980355682e-05, + "loss": 1.6135, + "step": 1870 + }, + { + "epoch": 0.42, + "learning_rate": 3.96487833916214e-05, + "loss": 1.6055, + "step": 1880 + }, + { + "epoch": 0.42, + "learning_rate": 3.964580697968599e-05, + "loss": 1.6063, + "step": 1890 + }, + { + "epoch": 0.42, + "learning_rate": 3.964283056775058e-05, + "loss": 1.6158, + "step": 1900 + }, + { + "epoch": 0.42, + "learning_rate": 3.963985415581517e-05, + "loss": 1.6267, + "step": 1910 + }, + { + "epoch": 0.43, + "learning_rate": 3.9636877743879754e-05, + "loss": 1.5936, + "step": 1920 + }, + { + "epoch": 0.43, + "learning_rate": 3.963390133194435e-05, + "loss": 1.6001, + "step": 1930 + }, + { + "epoch": 0.43, + "learning_rate": 3.9630924920008935e-05, + "loss": 1.5773, + "step": 1940 + }, + { + "epoch": 0.43, + "learning_rate": 3.962794850807352e-05, + "loss": 1.6196, + "step": 1950 + }, + { + "epoch": 0.44, + "learning_rate": 3.962497209613811e-05, + "loss": 1.5988, + "step": 1960 + }, + { + "epoch": 0.44, + "learning_rate": 3.96219956842027e-05, + "loss": 1.606, + "step": 1970 + }, + { + "epoch": 0.44, + "learning_rate": 3.961901927226728e-05, + "loss": 1.5979, + "step": 1980 + }, + { + "epoch": 0.44, + "learning_rate": 3.961604286033187e-05, + "loss": 1.6236, + "step": 1990 + }, + { + "epoch": 0.44, + "learning_rate": 3.961306644839646e-05, + "loss": 1.6158, + "step": 2000 + }, + { + "epoch": 0.45, + "learning_rate": 3.961009003646105e-05, + "loss": 1.6075, + "step": 2010 + }, + { + "epoch": 0.45, + "learning_rate": 3.9607113624525637e-05, + "loss": 1.5907, + "step": 2020 + }, + { + "epoch": 0.45, + "learning_rate": 3.9604137212590223e-05, + "loss": 1.5795, + "step": 2030 + }, + { + "epoch": 0.45, + "learning_rate": 3.960116080065482e-05, + "loss": 1.5991, + "step": 2040 + }, + { + "epoch": 0.46, + "learning_rate": 3.9598184388719404e-05, + "loss": 1.6036, + "step": 2050 + }, + { + "epoch": 0.46, + "learning_rate": 3.959520797678399e-05, + "loss": 1.5972, + "step": 2060 + }, + { + "epoch": 0.46, + "learning_rate": 3.959223156484858e-05, + "loss": 1.5929, + "step": 2070 + }, + { + "epoch": 0.46, + "learning_rate": 3.958925515291317e-05, + "loss": 1.6135, + "step": 2080 + }, + { + "epoch": 0.46, + "learning_rate": 3.958627874097775e-05, + "loss": 1.5965, + "step": 2090 + }, + { + "epoch": 0.47, + "learning_rate": 3.958330232904234e-05, + "loss": 1.5721, + "step": 2100 + }, + { + "epoch": 0.47, + "learning_rate": 3.958032591710693e-05, + "loss": 1.6161, + "step": 2110 + }, + { + "epoch": 0.47, + "learning_rate": 3.957734950517152e-05, + "loss": 1.5773, + "step": 2120 + }, + { + "epoch": 0.47, + "learning_rate": 3.9574373093236106e-05, + "loss": 1.5842, + "step": 2130 + }, + { + "epoch": 0.48, + "learning_rate": 3.957139668130069e-05, + "loss": 1.5768, + "step": 2140 + }, + { + "epoch": 0.48, + "learning_rate": 3.9568420269365287e-05, + "loss": 1.5952, + "step": 2150 + }, + { + "epoch": 0.48, + "learning_rate": 3.9565443857429873e-05, + "loss": 1.5914, + "step": 2160 + }, + { + "epoch": 0.48, + "learning_rate": 3.956246744549446e-05, + "loss": 1.5858, + "step": 2170 + }, + { + "epoch": 0.48, + "learning_rate": 3.955949103355905e-05, + "loss": 1.5981, + "step": 2180 + }, + { + "epoch": 0.49, + "learning_rate": 3.9556514621623634e-05, + "loss": 1.5979, + "step": 2190 + }, + { + "epoch": 0.49, + "learning_rate": 3.955353820968822e-05, + "loss": 1.5776, + "step": 2200 + }, + { + "epoch": 0.49, + "learning_rate": 3.955056179775281e-05, + "loss": 1.5858, + "step": 2210 + }, + { + "epoch": 0.49, + "learning_rate": 3.95475853858174e-05, + "loss": 1.5765, + "step": 2220 + }, + { + "epoch": 0.5, + "learning_rate": 3.954460897388199e-05, + "loss": 1.5679, + "step": 2230 + }, + { + "epoch": 0.5, + "learning_rate": 3.9541632561946575e-05, + "loss": 1.5826, + "step": 2240 + }, + { + "epoch": 0.5, + "learning_rate": 3.953865615001116e-05, + "loss": 1.5584, + "step": 2250 + }, + { + "epoch": 0.5, + "learning_rate": 3.9535679738075756e-05, + "loss": 1.5761, + "step": 2260 + }, + { + "epoch": 0.5, + "learning_rate": 3.953270332614034e-05, + "loss": 1.5726, + "step": 2270 + }, + { + "epoch": 0.51, + "learning_rate": 3.952972691420493e-05, + "loss": 1.5794, + "step": 2280 + }, + { + "epoch": 0.51, + "learning_rate": 3.952675050226952e-05, + "loss": 1.5713, + "step": 2290 + }, + { + "epoch": 0.51, + "learning_rate": 3.9523774090334104e-05, + "loss": 1.5826, + "step": 2300 + }, + { + "epoch": 0.51, + "learning_rate": 3.952079767839869e-05, + "loss": 1.5817, + "step": 2310 + }, + { + "epoch": 0.52, + "learning_rate": 3.9517821266463284e-05, + "loss": 1.5893, + "step": 2320 + }, + { + "epoch": 0.52, + "learning_rate": 3.951484485452787e-05, + "loss": 1.5868, + "step": 2330 + }, + { + "epoch": 0.52, + "learning_rate": 3.951186844259246e-05, + "loss": 1.5877, + "step": 2340 + }, + { + "epoch": 0.52, + "learning_rate": 3.9508892030657045e-05, + "loss": 1.5833, + "step": 2350 + }, + { + "epoch": 0.52, + "learning_rate": 3.950591561872164e-05, + "loss": 1.5799, + "step": 2360 + }, + { + "epoch": 0.53, + "learning_rate": 3.9502939206786225e-05, + "loss": 1.5572, + "step": 2370 + }, + { + "epoch": 0.53, + "learning_rate": 3.949996279485081e-05, + "loss": 1.556, + "step": 2380 + }, + { + "epoch": 0.53, + "learning_rate": 3.94969863829154e-05, + "loss": 1.5751, + "step": 2390 + }, + { + "epoch": 0.53, + "learning_rate": 3.9494009970979986e-05, + "loss": 1.5972, + "step": 2400 + }, + { + "epoch": 0.54, + "learning_rate": 3.949103355904457e-05, + "loss": 1.5636, + "step": 2410 + }, + { + "epoch": 0.54, + "learning_rate": 3.948805714710916e-05, + "loss": 1.5675, + "step": 2420 + }, + { + "epoch": 0.54, + "learning_rate": 3.9485080735173754e-05, + "loss": 1.5717, + "step": 2430 + }, + { + "epoch": 0.54, + "learning_rate": 3.948210432323834e-05, + "loss": 1.5882, + "step": 2440 + }, + { + "epoch": 0.54, + "learning_rate": 3.947912791130293e-05, + "loss": 1.57, + "step": 2450 + }, + { + "epoch": 0.55, + "learning_rate": 3.9476151499367514e-05, + "loss": 1.5653, + "step": 2460 + }, + { + "epoch": 0.55, + "learning_rate": 3.947317508743211e-05, + "loss": 1.5688, + "step": 2470 + }, + { + "epoch": 0.55, + "learning_rate": 3.9470198675496695e-05, + "loss": 1.5653, + "step": 2480 + }, + { + "epoch": 0.55, + "learning_rate": 3.946722226356128e-05, + "loss": 1.5763, + "step": 2490 + }, + { + "epoch": 0.56, + "learning_rate": 3.946424585162587e-05, + "loss": 1.5734, + "step": 2500 + }, + { + "epoch": 0.56, + "learning_rate": 3.9461269439690456e-05, + "loss": 1.5544, + "step": 2510 + }, + { + "epoch": 0.56, + "learning_rate": 3.945829302775504e-05, + "loss": 1.5677, + "step": 2520 + }, + { + "epoch": 0.56, + "learning_rate": 3.945531661581963e-05, + "loss": 1.566, + "step": 2530 + }, + { + "epoch": 0.56, + "learning_rate": 3.945234020388422e-05, + "loss": 1.5546, + "step": 2540 + }, + { + "epoch": 0.57, + "learning_rate": 3.944936379194881e-05, + "loss": 1.5657, + "step": 2550 + }, + { + "epoch": 0.57, + "learning_rate": 3.94463873800134e-05, + "loss": 1.5427, + "step": 2560 + }, + { + "epoch": 0.57, + "learning_rate": 3.9443410968077984e-05, + "loss": 1.5625, + "step": 2570 + }, + { + "epoch": 0.57, + "learning_rate": 3.944043455614258e-05, + "loss": 1.5843, + "step": 2580 + }, + { + "epoch": 0.58, + "learning_rate": 3.9437458144207164e-05, + "loss": 1.5627, + "step": 2590 + }, + { + "epoch": 0.58, + "learning_rate": 3.9434481732271744e-05, + "loss": 1.5557, + "step": 2600 + }, + { + "epoch": 0.58, + "learning_rate": 3.943150532033634e-05, + "loss": 1.5608, + "step": 2610 + }, + { + "epoch": 0.58, + "learning_rate": 3.9428528908400925e-05, + "loss": 1.5534, + "step": 2620 + }, + { + "epoch": 0.58, + "learning_rate": 3.942555249646551e-05, + "loss": 1.5554, + "step": 2630 + }, + { + "epoch": 0.59, + "learning_rate": 3.94225760845301e-05, + "loss": 1.5498, + "step": 2640 + }, + { + "epoch": 0.59, + "learning_rate": 3.941959967259469e-05, + "loss": 1.5577, + "step": 2650 + }, + { + "epoch": 0.59, + "learning_rate": 3.941662326065928e-05, + "loss": 1.5533, + "step": 2660 + }, + { + "epoch": 0.59, + "learning_rate": 3.9413646848723866e-05, + "loss": 1.5644, + "step": 2670 + }, + { + "epoch": 0.6, + "learning_rate": 3.941067043678845e-05, + "loss": 1.5648, + "step": 2680 + }, + { + "epoch": 0.6, + "learning_rate": 3.940769402485305e-05, + "loss": 1.5518, + "step": 2690 + }, + { + "epoch": 0.6, + "learning_rate": 3.940471761291763e-05, + "loss": 1.5476, + "step": 2700 + }, + { + "epoch": 0.6, + "learning_rate": 3.940174120098222e-05, + "loss": 1.5732, + "step": 2710 + }, + { + "epoch": 0.6, + "learning_rate": 3.939876478904681e-05, + "loss": 1.5329, + "step": 2720 + }, + { + "epoch": 0.61, + "learning_rate": 3.9395788377111394e-05, + "loss": 1.5428, + "step": 2730 + }, + { + "epoch": 0.61, + "learning_rate": 3.939281196517598e-05, + "loss": 1.56, + "step": 2740 + }, + { + "epoch": 0.61, + "learning_rate": 3.9389835553240575e-05, + "loss": 1.5553, + "step": 2750 + }, + { + "epoch": 0.61, + "learning_rate": 3.938685914130516e-05, + "loss": 1.55, + "step": 2760 + }, + { + "epoch": 0.62, + "learning_rate": 3.938388272936975e-05, + "loss": 1.5454, + "step": 2770 + }, + { + "epoch": 0.62, + "learning_rate": 3.9380906317434336e-05, + "loss": 1.5549, + "step": 2780 + }, + { + "epoch": 0.62, + "learning_rate": 3.937792990549893e-05, + "loss": 1.5397, + "step": 2790 + }, + { + "epoch": 0.62, + "learning_rate": 3.937495349356351e-05, + "loss": 1.5402, + "step": 2800 + }, + { + "epoch": 0.62, + "learning_rate": 3.9371977081628096e-05, + "loss": 1.5646, + "step": 2810 + }, + { + "epoch": 0.63, + "learning_rate": 3.936900066969269e-05, + "loss": 1.5459, + "step": 2820 + }, + { + "epoch": 0.63, + "learning_rate": 3.936602425775728e-05, + "loss": 1.539, + "step": 2830 + }, + { + "epoch": 0.63, + "learning_rate": 3.9363047845821864e-05, + "loss": 1.5546, + "step": 2840 + }, + { + "epoch": 0.63, + "learning_rate": 3.936007143388645e-05, + "loss": 1.5612, + "step": 2850 + }, + { + "epoch": 0.64, + "learning_rate": 3.9357095021951044e-05, + "loss": 1.5497, + "step": 2860 + }, + { + "epoch": 0.64, + "learning_rate": 3.935411861001563e-05, + "loss": 1.5549, + "step": 2870 + }, + { + "epoch": 0.64, + "learning_rate": 3.935114219808022e-05, + "loss": 1.553, + "step": 2880 + }, + { + "epoch": 0.64, + "learning_rate": 3.9348165786144805e-05, + "loss": 1.5347, + "step": 2890 + }, + { + "epoch": 0.64, + "learning_rate": 3.93451893742094e-05, + "loss": 1.5542, + "step": 2900 + }, + { + "epoch": 0.65, + "learning_rate": 3.934221296227398e-05, + "loss": 1.5547, + "step": 2910 + }, + { + "epoch": 0.65, + "learning_rate": 3.9339236550338566e-05, + "loss": 1.5431, + "step": 2920 + }, + { + "epoch": 0.65, + "learning_rate": 3.933626013840316e-05, + "loss": 1.5507, + "step": 2930 + }, + { + "epoch": 0.65, + "learning_rate": 3.9333283726467746e-05, + "loss": 1.5369, + "step": 2940 + }, + { + "epoch": 0.66, + "learning_rate": 3.933030731453233e-05, + "loss": 1.5259, + "step": 2950 + }, + { + "epoch": 0.66, + "learning_rate": 3.932733090259692e-05, + "loss": 1.5419, + "step": 2960 + }, + { + "epoch": 0.66, + "learning_rate": 3.9324354490661514e-05, + "loss": 1.546, + "step": 2970 + }, + { + "epoch": 0.66, + "learning_rate": 3.93213780787261e-05, + "loss": 1.5264, + "step": 2980 + }, + { + "epoch": 0.66, + "learning_rate": 3.931840166679069e-05, + "loss": 1.5362, + "step": 2990 + }, + { + "epoch": 0.67, + "learning_rate": 3.9315425254855275e-05, + "loss": 1.5343, + "step": 3000 + }, + { + "epoch": 0.67, + "learning_rate": 3.931244884291986e-05, + "loss": 1.534, + "step": 3010 + }, + { + "epoch": 0.67, + "learning_rate": 3.930947243098445e-05, + "loss": 1.5508, + "step": 3020 + }, + { + "epoch": 0.67, + "learning_rate": 3.9306496019049035e-05, + "loss": 1.5422, + "step": 3030 + }, + { + "epoch": 0.68, + "learning_rate": 3.930351960711363e-05, + "loss": 1.5345, + "step": 3040 + }, + { + "epoch": 0.68, + "learning_rate": 3.9300543195178216e-05, + "loss": 1.5369, + "step": 3050 + }, + { + "epoch": 0.68, + "learning_rate": 3.92975667832428e-05, + "loss": 1.5227, + "step": 3060 + }, + { + "epoch": 0.68, + "learning_rate": 3.929459037130739e-05, + "loss": 1.5311, + "step": 3070 + }, + { + "epoch": 0.68, + "learning_rate": 3.929161395937198e-05, + "loss": 1.5557, + "step": 3080 + }, + { + "epoch": 0.69, + "learning_rate": 3.928863754743657e-05, + "loss": 1.5382, + "step": 3090 + }, + { + "epoch": 0.69, + "learning_rate": 3.928566113550116e-05, + "loss": 1.5438, + "step": 3100 + }, + { + "epoch": 0.69, + "learning_rate": 3.9282684723565744e-05, + "loss": 1.5174, + "step": 3110 + }, + { + "epoch": 0.69, + "learning_rate": 3.927970831163033e-05, + "loss": 1.5197, + "step": 3120 + }, + { + "epoch": 0.7, + "learning_rate": 3.927673189969492e-05, + "loss": 1.5343, + "step": 3130 + }, + { + "epoch": 0.7, + "learning_rate": 3.927375548775951e-05, + "loss": 1.5259, + "step": 3140 + }, + { + "epoch": 0.7, + "learning_rate": 3.92707790758241e-05, + "loss": 1.5244, + "step": 3150 + }, + { + "epoch": 0.7, + "learning_rate": 3.9267802663888685e-05, + "loss": 1.531, + "step": 3160 + }, + { + "epoch": 0.7, + "learning_rate": 3.926482625195327e-05, + "loss": 1.5356, + "step": 3170 + }, + { + "epoch": 0.71, + "learning_rate": 3.9261849840017866e-05, + "loss": 1.5448, + "step": 3180 + }, + { + "epoch": 0.71, + "learning_rate": 3.925887342808245e-05, + "loss": 1.5287, + "step": 3190 + }, + { + "epoch": 0.71, + "learning_rate": 3.925589701614704e-05, + "loss": 1.5417, + "step": 3200 + }, + { + "epoch": 0.71, + "learning_rate": 3.9252920604211627e-05, + "loss": 1.5189, + "step": 3210 + }, + { + "epoch": 0.72, + "learning_rate": 3.9249944192276213e-05, + "loss": 1.5303, + "step": 3220 + }, + { + "epoch": 0.72, + "learning_rate": 3.92469677803408e-05, + "loss": 1.5242, + "step": 3230 + }, + { + "epoch": 0.72, + "learning_rate": 3.924399136840539e-05, + "loss": 1.5226, + "step": 3240 + }, + { + "epoch": 0.72, + "learning_rate": 3.924101495646998e-05, + "loss": 1.5257, + "step": 3250 + }, + { + "epoch": 0.72, + "learning_rate": 3.923803854453457e-05, + "loss": 1.5339, + "step": 3260 + }, + { + "epoch": 0.73, + "learning_rate": 3.9235062132599155e-05, + "loss": 1.5305, + "step": 3270 + }, + { + "epoch": 0.73, + "learning_rate": 3.923208572066374e-05, + "loss": 1.5434, + "step": 3280 + }, + { + "epoch": 0.73, + "learning_rate": 3.9229109308728335e-05, + "loss": 1.5342, + "step": 3290 + }, + { + "epoch": 0.73, + "learning_rate": 3.922613289679292e-05, + "loss": 1.5278, + "step": 3300 + }, + { + "epoch": 0.74, + "learning_rate": 3.922315648485751e-05, + "loss": 1.5316, + "step": 3310 + }, + { + "epoch": 0.74, + "learning_rate": 3.9220180072922096e-05, + "loss": 1.519, + "step": 3320 + }, + { + "epoch": 0.74, + "learning_rate": 3.921720366098668e-05, + "loss": 1.5412, + "step": 3330 + }, + { + "epoch": 0.74, + "learning_rate": 3.921422724905127e-05, + "loss": 1.5168, + "step": 3340 + }, + { + "epoch": 0.74, + "learning_rate": 3.921125083711586e-05, + "loss": 1.5461, + "step": 3350 + }, + { + "epoch": 0.75, + "learning_rate": 3.920827442518045e-05, + "loss": 1.523, + "step": 3360 + }, + { + "epoch": 0.75, + "learning_rate": 3.920529801324504e-05, + "loss": 1.5262, + "step": 3370 + }, + { + "epoch": 0.75, + "learning_rate": 3.9202321601309624e-05, + "loss": 1.5234, + "step": 3380 + }, + { + "epoch": 0.75, + "learning_rate": 3.919934518937421e-05, + "loss": 1.5145, + "step": 3390 + }, + { + "epoch": 0.76, + "learning_rate": 3.9196368777438805e-05, + "loss": 1.5092, + "step": 3400 + }, + { + "epoch": 0.76, + "learning_rate": 3.919339236550339e-05, + "loss": 1.5192, + "step": 3410 + }, + { + "epoch": 0.76, + "learning_rate": 3.919041595356797e-05, + "loss": 1.5141, + "step": 3420 + }, + { + "epoch": 0.76, + "learning_rate": 3.9187439541632565e-05, + "loss": 1.5101, + "step": 3430 + }, + { + "epoch": 0.76, + "learning_rate": 3.918446312969715e-05, + "loss": 1.5207, + "step": 3440 + }, + { + "epoch": 0.77, + "learning_rate": 3.918148671776174e-05, + "loss": 1.518, + "step": 3450 + }, + { + "epoch": 0.77, + "learning_rate": 3.9178510305826326e-05, + "loss": 1.5381, + "step": 3460 + }, + { + "epoch": 0.77, + "learning_rate": 3.917553389389092e-05, + "loss": 1.5193, + "step": 3470 + }, + { + "epoch": 0.77, + "learning_rate": 3.917255748195551e-05, + "loss": 1.5152, + "step": 3480 + }, + { + "epoch": 0.78, + "learning_rate": 3.9169581070020094e-05, + "loss": 1.5376, + "step": 3490 + }, + { + "epoch": 0.78, + "learning_rate": 3.916660465808468e-05, + "loss": 1.5163, + "step": 3500 + }, + { + "epoch": 0.78, + "learning_rate": 3.9163628246149274e-05, + "loss": 1.5132, + "step": 3510 + }, + { + "epoch": 0.78, + "learning_rate": 3.9160651834213854e-05, + "loss": 1.5107, + "step": 3520 + }, + { + "epoch": 0.78, + "learning_rate": 3.915767542227845e-05, + "loss": 1.5345, + "step": 3530 + }, + { + "epoch": 0.79, + "learning_rate": 3.9154699010343035e-05, + "loss": 1.5292, + "step": 3540 + }, + { + "epoch": 0.79, + "learning_rate": 3.915172259840762e-05, + "loss": 1.5181, + "step": 3550 + }, + { + "epoch": 0.79, + "learning_rate": 3.914874618647221e-05, + "loss": 1.5143, + "step": 3560 + }, + { + "epoch": 0.79, + "learning_rate": 3.91457697745368e-05, + "loss": 1.4955, + "step": 3570 + }, + { + "epoch": 0.8, + "learning_rate": 3.914279336260139e-05, + "loss": 1.5133, + "step": 3580 + }, + { + "epoch": 0.8, + "learning_rate": 3.9139816950665976e-05, + "loss": 1.5087, + "step": 3590 + }, + { + "epoch": 0.8, + "learning_rate": 3.913684053873056e-05, + "loss": 1.5068, + "step": 3600 + }, + { + "epoch": 0.8, + "learning_rate": 3.913386412679516e-05, + "loss": 1.5199, + "step": 3610 + }, + { + "epoch": 0.8, + "learning_rate": 3.913088771485974e-05, + "loss": 1.5251, + "step": 3620 + }, + { + "epoch": 0.81, + "learning_rate": 3.9127911302924324e-05, + "loss": 1.5158, + "step": 3630 + }, + { + "epoch": 0.81, + "learning_rate": 3.912493489098892e-05, + "loss": 1.5103, + "step": 3640 + }, + { + "epoch": 0.81, + "learning_rate": 3.9121958479053504e-05, + "loss": 1.5286, + "step": 3650 + }, + { + "epoch": 0.81, + "learning_rate": 3.911898206711809e-05, + "loss": 1.5275, + "step": 3660 + }, + { + "epoch": 0.82, + "learning_rate": 3.911600565518268e-05, + "loss": 1.5083, + "step": 3670 + }, + { + "epoch": 0.82, + "learning_rate": 3.911302924324727e-05, + "loss": 1.525, + "step": 3680 + }, + { + "epoch": 0.82, + "learning_rate": 3.911005283131186e-05, + "loss": 1.5235, + "step": 3690 + }, + { + "epoch": 0.82, + "learning_rate": 3.9107076419376446e-05, + "loss": 1.5002, + "step": 3700 + }, + { + "epoch": 0.82, + "learning_rate": 3.910410000744103e-05, + "loss": 1.5282, + "step": 3710 + }, + { + "epoch": 0.83, + "learning_rate": 3.910112359550562e-05, + "loss": 1.5188, + "step": 3720 + }, + { + "epoch": 0.83, + "learning_rate": 3.9098147183570206e-05, + "loss": 1.5043, + "step": 3730 + }, + { + "epoch": 0.83, + "learning_rate": 3.909517077163479e-05, + "loss": 1.5069, + "step": 3740 + }, + { + "epoch": 0.83, + "learning_rate": 3.909219435969939e-05, + "loss": 1.5304, + "step": 3750 + }, + { + "epoch": 0.83, + "learning_rate": 3.9089217947763974e-05, + "loss": 1.5199, + "step": 3760 + }, + { + "epoch": 0.84, + "learning_rate": 3.908624153582856e-05, + "loss": 1.5137, + "step": 3770 + }, + { + "epoch": 0.84, + "learning_rate": 3.908326512389315e-05, + "loss": 1.5223, + "step": 3780 + }, + { + "epoch": 0.84, + "learning_rate": 3.908028871195774e-05, + "loss": 1.517, + "step": 3790 + }, + { + "epoch": 0.84, + "learning_rate": 3.907731230002233e-05, + "loss": 1.5034, + "step": 3800 + }, + { + "epoch": 0.85, + "learning_rate": 3.9074335888086915e-05, + "loss": 1.517, + "step": 3810 + }, + { + "epoch": 0.85, + "learning_rate": 3.90713594761515e-05, + "loss": 1.5099, + "step": 3820 + }, + { + "epoch": 0.85, + "learning_rate": 3.906838306421609e-05, + "loss": 1.498, + "step": 3830 + }, + { + "epoch": 0.85, + "learning_rate": 3.9065406652280676e-05, + "loss": 1.5118, + "step": 3840 + }, + { + "epoch": 0.85, + "learning_rate": 3.906243024034526e-05, + "loss": 1.4996, + "step": 3850 + }, + { + "epoch": 0.86, + "learning_rate": 3.9059453828409856e-05, + "loss": 1.5113, + "step": 3860 + }, + { + "epoch": 0.86, + "learning_rate": 3.905647741647444e-05, + "loss": 1.494, + "step": 3870 + }, + { + "epoch": 0.86, + "learning_rate": 3.905350100453903e-05, + "loss": 1.5024, + "step": 3880 + }, + { + "epoch": 0.86, + "learning_rate": 3.905052459260362e-05, + "loss": 1.5118, + "step": 3890 + }, + { + "epoch": 0.87, + "learning_rate": 3.904754818066821e-05, + "loss": 1.488, + "step": 3900 + }, + { + "epoch": 0.87, + "learning_rate": 3.90445717687328e-05, + "loss": 1.5065, + "step": 3910 + }, + { + "epoch": 0.87, + "learning_rate": 3.9041595356797385e-05, + "loss": 1.5061, + "step": 3920 + }, + { + "epoch": 0.87, + "learning_rate": 3.903861894486197e-05, + "loss": 1.508, + "step": 3930 + }, + { + "epoch": 0.87, + "learning_rate": 3.903564253292656e-05, + "loss": 1.4942, + "step": 3940 + }, + { + "epoch": 0.88, + "learning_rate": 3.9032666120991145e-05, + "loss": 1.5033, + "step": 3950 + }, + { + "epoch": 0.88, + "learning_rate": 3.902968970905574e-05, + "loss": 1.5048, + "step": 3960 + }, + { + "epoch": 0.88, + "learning_rate": 3.9026713297120326e-05, + "loss": 1.4964, + "step": 3970 + }, + { + "epoch": 0.88, + "learning_rate": 3.902373688518491e-05, + "loss": 1.5118, + "step": 3980 + }, + { + "epoch": 0.89, + "learning_rate": 3.90207604732495e-05, + "loss": 1.5213, + "step": 3990 + }, + { + "epoch": 0.89, + "learning_rate": 3.901778406131409e-05, + "loss": 1.5256, + "step": 4000 + }, + { + "epoch": 0.89, + "learning_rate": 3.901480764937868e-05, + "loss": 1.4907, + "step": 4010 + }, + { + "epoch": 0.89, + "learning_rate": 3.901183123744327e-05, + "loss": 1.5063, + "step": 4020 + }, + { + "epoch": 0.89, + "learning_rate": 3.9008854825507854e-05, + "loss": 1.5242, + "step": 4030 + }, + { + "epoch": 0.9, + "learning_rate": 3.900587841357244e-05, + "loss": 1.4974, + "step": 4040 + }, + { + "epoch": 0.9, + "learning_rate": 3.900290200163703e-05, + "loss": 1.5151, + "step": 4050 + }, + { + "epoch": 0.9, + "learning_rate": 3.8999925589701615e-05, + "loss": 1.4985, + "step": 4060 + }, + { + "epoch": 0.9, + "learning_rate": 3.899694917776621e-05, + "loss": 1.4966, + "step": 4070 + }, + { + "epoch": 0.91, + "learning_rate": 3.8993972765830795e-05, + "loss": 1.4988, + "step": 4080 + }, + { + "epoch": 0.91, + "learning_rate": 3.899099635389538e-05, + "loss": 1.4973, + "step": 4090 + }, + { + "epoch": 0.91, + "learning_rate": 3.898801994195997e-05, + "loss": 1.498, + "step": 4100 + }, + { + "epoch": 0.91, + "learning_rate": 3.898504353002456e-05, + "loss": 1.4953, + "step": 4110 + }, + { + "epoch": 0.91, + "learning_rate": 3.898206711808915e-05, + "loss": 1.5003, + "step": 4120 + }, + { + "epoch": 0.92, + "learning_rate": 3.897909070615373e-05, + "loss": 1.5168, + "step": 4130 + }, + { + "epoch": 0.92, + "learning_rate": 3.897611429421832e-05, + "loss": 1.5111, + "step": 4140 + }, + { + "epoch": 0.92, + "learning_rate": 3.897313788228291e-05, + "loss": 1.498, + "step": 4150 + }, + { + "epoch": 0.92, + "learning_rate": 3.89701614703475e-05, + "loss": 1.4998, + "step": 4160 + }, + { + "epoch": 0.93, + "learning_rate": 3.8967185058412084e-05, + "loss": 1.495, + "step": 4170 + }, + { + "epoch": 0.93, + "learning_rate": 3.896420864647668e-05, + "loss": 1.4861, + "step": 4180 + }, + { + "epoch": 0.93, + "learning_rate": 3.8961232234541265e-05, + "loss": 1.4928, + "step": 4190 + }, + { + "epoch": 0.93, + "learning_rate": 3.895825582260585e-05, + "loss": 1.4995, + "step": 4200 + }, + { + "epoch": 0.93, + "learning_rate": 3.895527941067044e-05, + "loss": 1.4965, + "step": 4210 + }, + { + "epoch": 0.94, + "learning_rate": 3.895230299873503e-05, + "loss": 1.4994, + "step": 4220 + }, + { + "epoch": 0.94, + "learning_rate": 3.894932658679962e-05, + "loss": 1.4907, + "step": 4230 + }, + { + "epoch": 0.94, + "learning_rate": 3.8946350174864206e-05, + "loss": 1.5004, + "step": 4240 + }, + { + "epoch": 0.94, + "learning_rate": 3.894337376292879e-05, + "loss": 1.4992, + "step": 4250 + }, + { + "epoch": 0.95, + "learning_rate": 3.894039735099338e-05, + "loss": 1.503, + "step": 4260 + }, + { + "epoch": 0.95, + "learning_rate": 3.893742093905797e-05, + "loss": 1.4895, + "step": 4270 + }, + { + "epoch": 0.95, + "learning_rate": 3.8934444527122554e-05, + "loss": 1.4848, + "step": 4280 + }, + { + "epoch": 0.95, + "learning_rate": 3.893146811518715e-05, + "loss": 1.4815, + "step": 4290 + }, + { + "epoch": 0.95, + "learning_rate": 3.8928491703251734e-05, + "loss": 1.4897, + "step": 4300 + }, + { + "epoch": 0.96, + "learning_rate": 3.892551529131632e-05, + "loss": 1.4727, + "step": 4310 + }, + { + "epoch": 0.96, + "learning_rate": 3.892253887938091e-05, + "loss": 1.4977, + "step": 4320 + }, + { + "epoch": 0.96, + "learning_rate": 3.89195624674455e-05, + "loss": 1.5028, + "step": 4330 + }, + { + "epoch": 0.96, + "learning_rate": 3.891658605551008e-05, + "loss": 1.4847, + "step": 4340 + }, + { + "epoch": 0.97, + "learning_rate": 3.8913609643574675e-05, + "loss": 1.4878, + "step": 4350 + }, + { + "epoch": 0.97, + "learning_rate": 3.891063323163926e-05, + "loss": 1.478, + "step": 4360 + }, + { + "epoch": 0.97, + "learning_rate": 3.890765681970385e-05, + "loss": 1.494, + "step": 4370 + }, + { + "epoch": 0.97, + "learning_rate": 3.8904680407768436e-05, + "loss": 1.4794, + "step": 4380 + }, + { + "epoch": 0.97, + "learning_rate": 3.890170399583303e-05, + "loss": 1.4735, + "step": 4390 + }, + { + "epoch": 0.98, + "learning_rate": 3.889872758389762e-05, + "loss": 1.4782, + "step": 4400 + }, + { + "epoch": 0.98, + "learning_rate": 3.8895751171962204e-05, + "loss": 1.4703, + "step": 4410 + }, + { + "epoch": 0.98, + "learning_rate": 3.889277476002679e-05, + "loss": 1.5031, + "step": 4420 + }, + { + "epoch": 0.98, + "learning_rate": 3.8889798348091384e-05, + "loss": 1.4813, + "step": 4430 + }, + { + "epoch": 0.99, + "learning_rate": 3.8886821936155964e-05, + "loss": 1.4947, + "step": 4440 + }, + { + "epoch": 0.99, + "learning_rate": 3.888384552422055e-05, + "loss": 1.4931, + "step": 4450 + }, + { + "epoch": 0.99, + "learning_rate": 3.8880869112285145e-05, + "loss": 1.4838, + "step": 4460 + }, + { + "epoch": 0.99, + "learning_rate": 3.887789270034973e-05, + "loss": 1.4984, + "step": 4470 + }, + { + "epoch": 0.99, + "learning_rate": 3.887491628841432e-05, + "loss": 1.4859, + "step": 4480 + }, + { + "epoch": 1.0, + "learning_rate": 3.8871939876478906e-05, + "loss": 1.4872, + "step": 4490 + }, + { + "epoch": 1.0, + "learning_rate": 3.88689634645435e-05, + "loss": 1.4909, + "step": 4500 + }, + { + "epoch": 1.0, + "eval_cer": 4.69017094017094, + "eval_loss": 1.3148901462554932, + "eval_runtime": 5.6408, + "eval_samples_per_second": 1.773, + "eval_steps_per_second": 0.177, + "eval_wer": 1.1168831168831168, + "step": 4503 + }, + { + "epoch": 1.0, + "learning_rate": 3.8865987052608086e-05, + "loss": 1.4926, + "step": 4510 + }, + { + "epoch": 1.0, + "learning_rate": 3.886301064067267e-05, + "loss": 1.4731, + "step": 4520 + }, + { + "epoch": 1.01, + "learning_rate": 3.886003422873726e-05, + "loss": 1.4858, + "step": 4530 + }, + { + "epoch": 1.01, + "learning_rate": 3.885705781680185e-05, + "loss": 1.4486, + "step": 4540 + }, + { + "epoch": 1.01, + "learning_rate": 3.8854081404866434e-05, + "loss": 1.4782, + "step": 4550 + }, + { + "epoch": 1.01, + "learning_rate": 3.885110499293102e-05, + "loss": 1.4846, + "step": 4560 + }, + { + "epoch": 1.01, + "learning_rate": 3.8848128580995614e-05, + "loss": 1.47, + "step": 4570 + }, + { + "epoch": 1.02, + "learning_rate": 3.88451521690602e-05, + "loss": 1.4743, + "step": 4580 + }, + { + "epoch": 1.02, + "learning_rate": 3.884217575712479e-05, + "loss": 1.4752, + "step": 4590 + }, + { + "epoch": 1.02, + "learning_rate": 3.8839199345189375e-05, + "loss": 1.475, + "step": 4600 + }, + { + "epoch": 1.02, + "learning_rate": 3.883622293325397e-05, + "loss": 1.4657, + "step": 4610 + }, + { + "epoch": 1.03, + "learning_rate": 3.8833246521318556e-05, + "loss": 1.4593, + "step": 4620 + }, + { + "epoch": 1.03, + "learning_rate": 3.883027010938314e-05, + "loss": 1.4578, + "step": 4630 + }, + { + "epoch": 1.03, + "learning_rate": 3.882729369744773e-05, + "loss": 1.4776, + "step": 4640 + }, + { + "epoch": 1.03, + "learning_rate": 3.8824317285512316e-05, + "loss": 1.4621, + "step": 4650 + }, + { + "epoch": 1.03, + "learning_rate": 3.88213408735769e-05, + "loss": 1.4662, + "step": 4660 + }, + { + "epoch": 1.04, + "learning_rate": 3.88183644616415e-05, + "loss": 1.4679, + "step": 4670 + }, + { + "epoch": 1.04, + "learning_rate": 3.8815388049706084e-05, + "loss": 1.4585, + "step": 4680 + }, + { + "epoch": 1.04, + "learning_rate": 3.881241163777067e-05, + "loss": 1.465, + "step": 4690 + }, + { + "epoch": 1.04, + "learning_rate": 3.880943522583526e-05, + "loss": 1.4631, + "step": 4700 + }, + { + "epoch": 1.05, + "learning_rate": 3.8806458813899844e-05, + "loss": 1.4629, + "step": 4710 + }, + { + "epoch": 1.05, + "learning_rate": 3.880348240196444e-05, + "loss": 1.4545, + "step": 4720 + }, + { + "epoch": 1.05, + "learning_rate": 3.8800505990029025e-05, + "loss": 1.459, + "step": 4730 + }, + { + "epoch": 1.05, + "learning_rate": 3.879752957809361e-05, + "loss": 1.4657, + "step": 4740 + }, + { + "epoch": 1.05, + "learning_rate": 3.87945531661582e-05, + "loss": 1.4664, + "step": 4750 + }, + { + "epoch": 1.06, + "learning_rate": 3.8791576754222786e-05, + "loss": 1.444, + "step": 4760 + }, + { + "epoch": 1.06, + "learning_rate": 3.878860034228737e-05, + "loss": 1.4447, + "step": 4770 + }, + { + "epoch": 1.06, + "learning_rate": 3.8785623930351966e-05, + "loss": 1.4506, + "step": 4780 + }, + { + "epoch": 1.06, + "learning_rate": 3.878264751841655e-05, + "loss": 1.4517, + "step": 4790 + }, + { + "epoch": 1.07, + "learning_rate": 3.877967110648114e-05, + "loss": 1.4575, + "step": 4800 + }, + { + "epoch": 1.07, + "learning_rate": 3.877669469454573e-05, + "loss": 1.469, + "step": 4810 + }, + { + "epoch": 1.07, + "learning_rate": 3.877371828261032e-05, + "loss": 1.4533, + "step": 4820 + }, + { + "epoch": 1.07, + "learning_rate": 3.877074187067491e-05, + "loss": 1.4464, + "step": 4830 + }, + { + "epoch": 1.07, + "learning_rate": 3.8767765458739494e-05, + "loss": 1.4299, + "step": 4840 + }, + { + "epoch": 1.08, + "learning_rate": 3.876478904680408e-05, + "loss": 1.4515, + "step": 4850 + }, + { + "epoch": 1.08, + "learning_rate": 3.876181263486867e-05, + "loss": 1.4467, + "step": 4860 + }, + { + "epoch": 1.08, + "learning_rate": 3.8758836222933255e-05, + "loss": 1.4511, + "step": 4870 + }, + { + "epoch": 1.08, + "learning_rate": 3.875585981099784e-05, + "loss": 1.4352, + "step": 4880 + }, + { + "epoch": 1.09, + "learning_rate": 3.8752883399062436e-05, + "loss": 1.4481, + "step": 4890 + }, + { + "epoch": 1.09, + "learning_rate": 3.874990698712702e-05, + "loss": 1.4364, + "step": 4900 + }, + { + "epoch": 1.09, + "learning_rate": 3.874693057519161e-05, + "loss": 1.4314, + "step": 4910 + }, + { + "epoch": 1.09, + "learning_rate": 3.8743954163256196e-05, + "loss": 1.4184, + "step": 4920 + }, + { + "epoch": 1.09, + "learning_rate": 3.874097775132079e-05, + "loss": 1.4295, + "step": 4930 + }, + { + "epoch": 1.1, + "learning_rate": 3.873800133938538e-05, + "loss": 1.4238, + "step": 4940 + }, + { + "epoch": 1.1, + "learning_rate": 3.873502492744996e-05, + "loss": 1.4238, + "step": 4950 + }, + { + "epoch": 1.1, + "learning_rate": 3.873204851551455e-05, + "loss": 1.4335, + "step": 4960 + }, + { + "epoch": 1.1, + "learning_rate": 3.872907210357914e-05, + "loss": 1.4314, + "step": 4970 + }, + { + "epoch": 1.11, + "learning_rate": 3.8726095691643725e-05, + "loss": 1.419, + "step": 4980 + }, + { + "epoch": 1.11, + "learning_rate": 3.872311927970831e-05, + "loss": 1.422, + "step": 4990 + }, + { + "epoch": 1.11, + "learning_rate": 3.8720142867772905e-05, + "loss": 1.4268, + "step": 5000 + }, + { + "epoch": 1.11, + "learning_rate": 3.871716645583749e-05, + "loss": 1.4154, + "step": 5010 + }, + { + "epoch": 1.11, + "learning_rate": 3.871419004390208e-05, + "loss": 1.4218, + "step": 5020 + }, + { + "epoch": 1.12, + "learning_rate": 3.8711213631966666e-05, + "loss": 1.4336, + "step": 5030 + }, + { + "epoch": 1.12, + "learning_rate": 3.870823722003126e-05, + "loss": 1.4207, + "step": 5040 + }, + { + "epoch": 1.12, + "learning_rate": 3.8705260808095846e-05, + "loss": 1.4342, + "step": 5050 + }, + { + "epoch": 1.12, + "learning_rate": 3.870228439616043e-05, + "loss": 1.4181, + "step": 5060 + }, + { + "epoch": 1.13, + "learning_rate": 3.869930798422502e-05, + "loss": 1.4022, + "step": 5070 + }, + { + "epoch": 1.13, + "learning_rate": 3.869633157228961e-05, + "loss": 1.4094, + "step": 5080 + }, + { + "epoch": 1.13, + "learning_rate": 3.8693355160354194e-05, + "loss": 1.4184, + "step": 5090 + }, + { + "epoch": 1.13, + "learning_rate": 3.869037874841879e-05, + "loss": 1.417, + "step": 5100 + }, + { + "epoch": 1.13, + "learning_rate": 3.8687402336483375e-05, + "loss": 1.4027, + "step": 5110 + }, + { + "epoch": 1.14, + "learning_rate": 3.868442592454796e-05, + "loss": 1.4239, + "step": 5120 + }, + { + "epoch": 1.14, + "learning_rate": 3.868144951261255e-05, + "loss": 1.4125, + "step": 5130 + }, + { + "epoch": 1.14, + "learning_rate": 3.8678473100677135e-05, + "loss": 1.4163, + "step": 5140 + }, + { + "epoch": 1.14, + "learning_rate": 3.867549668874173e-05, + "loss": 1.4191, + "step": 5150 + }, + { + "epoch": 1.15, + "learning_rate": 3.867252027680631e-05, + "loss": 1.4077, + "step": 5160 + }, + { + "epoch": 1.15, + "learning_rate": 3.86695438648709e-05, + "loss": 1.4004, + "step": 5170 + }, + { + "epoch": 1.15, + "learning_rate": 3.866656745293549e-05, + "loss": 1.4075, + "step": 5180 + }, + { + "epoch": 1.15, + "learning_rate": 3.8663591041000077e-05, + "loss": 1.3929, + "step": 5190 + }, + { + "epoch": 1.15, + "learning_rate": 3.8660614629064663e-05, + "loss": 1.4008, + "step": 5200 + }, + { + "epoch": 1.16, + "learning_rate": 3.865763821712926e-05, + "loss": 1.3993, + "step": 5210 + }, + { + "epoch": 1.16, + "learning_rate": 3.8654661805193844e-05, + "loss": 1.4076, + "step": 5220 + }, + { + "epoch": 1.16, + "learning_rate": 3.865168539325843e-05, + "loss": 1.4128, + "step": 5230 + }, + { + "epoch": 1.16, + "learning_rate": 3.864870898132302e-05, + "loss": 1.4082, + "step": 5240 + }, + { + "epoch": 1.17, + "learning_rate": 3.864573256938761e-05, + "loss": 1.4015, + "step": 5250 + }, + { + "epoch": 1.17, + "learning_rate": 3.864275615745219e-05, + "loss": 1.4157, + "step": 5260 + }, + { + "epoch": 1.17, + "learning_rate": 3.863977974551678e-05, + "loss": 1.409, + "step": 5270 + }, + { + "epoch": 1.17, + "learning_rate": 3.863680333358137e-05, + "loss": 1.4193, + "step": 5280 + }, + { + "epoch": 1.17, + "learning_rate": 3.863382692164596e-05, + "loss": 1.4031, + "step": 5290 + }, + { + "epoch": 1.18, + "learning_rate": 3.8630850509710546e-05, + "loss": 1.3987, + "step": 5300 + }, + { + "epoch": 1.18, + "learning_rate": 3.862787409777513e-05, + "loss": 1.3973, + "step": 5310 + }, + { + "epoch": 1.18, + "learning_rate": 3.8624897685839727e-05, + "loss": 1.3985, + "step": 5320 + }, + { + "epoch": 1.18, + "learning_rate": 3.8621921273904313e-05, + "loss": 1.3926, + "step": 5330 + }, + { + "epoch": 1.19, + "learning_rate": 3.86189448619689e-05, + "loss": 1.3898, + "step": 5340 + }, + { + "epoch": 1.19, + "learning_rate": 3.861596845003349e-05, + "loss": 1.3939, + "step": 5350 + }, + { + "epoch": 1.19, + "learning_rate": 3.8612992038098074e-05, + "loss": 1.4008, + "step": 5360 + }, + { + "epoch": 1.19, + "learning_rate": 3.861001562616266e-05, + "loss": 1.4226, + "step": 5370 + }, + { + "epoch": 1.19, + "learning_rate": 3.860703921422725e-05, + "loss": 1.3995, + "step": 5380 + }, + { + "epoch": 1.2, + "learning_rate": 3.860406280229184e-05, + "loss": 1.4043, + "step": 5390 + }, + { + "epoch": 1.2, + "learning_rate": 3.860108639035643e-05, + "loss": 1.3828, + "step": 5400 + }, + { + "epoch": 1.2, + "learning_rate": 3.8598109978421015e-05, + "loss": 1.3749, + "step": 5410 + }, + { + "epoch": 1.2, + "learning_rate": 3.85951335664856e-05, + "loss": 1.3977, + "step": 5420 + }, + { + "epoch": 1.21, + "learning_rate": 3.8592157154550196e-05, + "loss": 1.4125, + "step": 5430 + }, + { + "epoch": 1.21, + "learning_rate": 3.858918074261478e-05, + "loss": 1.4094, + "step": 5440 + }, + { + "epoch": 1.21, + "learning_rate": 3.858620433067937e-05, + "loss": 1.3844, + "step": 5450 + }, + { + "epoch": 1.21, + "learning_rate": 3.858322791874396e-05, + "loss": 1.3934, + "step": 5460 + }, + { + "epoch": 1.21, + "learning_rate": 3.8580251506808544e-05, + "loss": 1.4037, + "step": 5470 + }, + { + "epoch": 1.22, + "learning_rate": 3.857727509487313e-05, + "loss": 1.3893, + "step": 5480 + }, + { + "epoch": 1.22, + "learning_rate": 3.8574298682937724e-05, + "loss": 1.389, + "step": 5490 + }, + { + "epoch": 1.22, + "learning_rate": 3.857132227100231e-05, + "loss": 1.4043, + "step": 5500 + }, + { + "epoch": 1.22, + "learning_rate": 3.85683458590669e-05, + "loss": 1.3974, + "step": 5510 + }, + { + "epoch": 1.23, + "learning_rate": 3.8565369447131485e-05, + "loss": 1.3681, + "step": 5520 + }, + { + "epoch": 1.23, + "learning_rate": 3.856239303519608e-05, + "loss": 1.3802, + "step": 5530 + }, + { + "epoch": 1.23, + "learning_rate": 3.8559416623260665e-05, + "loss": 1.3917, + "step": 5540 + }, + { + "epoch": 1.23, + "learning_rate": 3.855644021132525e-05, + "loss": 1.3793, + "step": 5550 + }, + { + "epoch": 1.23, + "learning_rate": 3.855346379938984e-05, + "loss": 1.3902, + "step": 5560 + }, + { + "epoch": 1.24, + "learning_rate": 3.8550487387454426e-05, + "loss": 1.3861, + "step": 5570 + }, + { + "epoch": 1.24, + "learning_rate": 3.854751097551901e-05, + "loss": 1.3954, + "step": 5580 + }, + { + "epoch": 1.24, + "learning_rate": 3.85445345635836e-05, + "loss": 1.3891, + "step": 5590 + }, + { + "epoch": 1.24, + "learning_rate": 3.8541558151648194e-05, + "loss": 1.3867, + "step": 5600 + }, + { + "epoch": 1.25, + "learning_rate": 3.853858173971278e-05, + "loss": 1.3819, + "step": 5610 + }, + { + "epoch": 1.25, + "learning_rate": 3.853560532777737e-05, + "loss": 1.3841, + "step": 5620 + }, + { + "epoch": 1.25, + "learning_rate": 3.8532628915841954e-05, + "loss": 1.369, + "step": 5630 + }, + { + "epoch": 1.25, + "learning_rate": 3.852965250390655e-05, + "loss": 1.3736, + "step": 5640 + }, + { + "epoch": 1.25, + "learning_rate": 3.8526676091971135e-05, + "loss": 1.3788, + "step": 5650 + }, + { + "epoch": 1.26, + "learning_rate": 3.852369968003572e-05, + "loss": 1.3843, + "step": 5660 + }, + { + "epoch": 1.26, + "learning_rate": 3.852072326810031e-05, + "loss": 1.3877, + "step": 5670 + }, + { + "epoch": 1.26, + "learning_rate": 3.8517746856164896e-05, + "loss": 1.382, + "step": 5680 + }, + { + "epoch": 1.26, + "learning_rate": 3.851477044422948e-05, + "loss": 1.3705, + "step": 5690 + }, + { + "epoch": 1.27, + "learning_rate": 3.851179403229407e-05, + "loss": 1.3871, + "step": 5700 + }, + { + "epoch": 1.27, + "learning_rate": 3.850881762035866e-05, + "loss": 1.3757, + "step": 5710 + }, + { + "epoch": 1.27, + "learning_rate": 3.850584120842325e-05, + "loss": 1.3762, + "step": 5720 + }, + { + "epoch": 1.27, + "learning_rate": 3.850286479648784e-05, + "loss": 1.3688, + "step": 5730 + }, + { + "epoch": 1.27, + "learning_rate": 3.8499888384552424e-05, + "loss": 1.3845, + "step": 5740 + }, + { + "epoch": 1.28, + "learning_rate": 3.849691197261702e-05, + "loss": 1.3674, + "step": 5750 + }, + { + "epoch": 1.28, + "learning_rate": 3.8493935560681604e-05, + "loss": 1.3719, + "step": 5760 + }, + { + "epoch": 1.28, + "learning_rate": 3.8490959148746184e-05, + "loss": 1.3572, + "step": 5770 + }, + { + "epoch": 1.28, + "learning_rate": 3.848798273681078e-05, + "loss": 1.3708, + "step": 5780 + }, + { + "epoch": 1.29, + "learning_rate": 3.8485006324875365e-05, + "loss": 1.3611, + "step": 5790 + }, + { + "epoch": 1.29, + "learning_rate": 3.848202991293995e-05, + "loss": 1.3665, + "step": 5800 + }, + { + "epoch": 1.29, + "learning_rate": 3.847905350100454e-05, + "loss": 1.3625, + "step": 5810 + }, + { + "epoch": 1.29, + "learning_rate": 3.847607708906913e-05, + "loss": 1.3655, + "step": 5820 + }, + { + "epoch": 1.29, + "learning_rate": 3.847310067713372e-05, + "loss": 1.3733, + "step": 5830 + }, + { + "epoch": 1.3, + "learning_rate": 3.8470124265198306e-05, + "loss": 1.364, + "step": 5840 + }, + { + "epoch": 1.3, + "learning_rate": 3.846714785326289e-05, + "loss": 1.3572, + "step": 5850 + }, + { + "epoch": 1.3, + "learning_rate": 3.846417144132749e-05, + "loss": 1.3562, + "step": 5860 + }, + { + "epoch": 1.3, + "learning_rate": 3.846119502939207e-05, + "loss": 1.3629, + "step": 5870 + }, + { + "epoch": 1.31, + "learning_rate": 3.845821861745666e-05, + "loss": 1.3697, + "step": 5880 + }, + { + "epoch": 1.31, + "learning_rate": 3.845524220552125e-05, + "loss": 1.3615, + "step": 5890 + }, + { + "epoch": 1.31, + "learning_rate": 3.8452265793585834e-05, + "loss": 1.3758, + "step": 5900 + }, + { + "epoch": 1.31, + "learning_rate": 3.844928938165042e-05, + "loss": 1.3569, + "step": 5910 + }, + { + "epoch": 1.31, + "learning_rate": 3.8446312969715015e-05, + "loss": 1.3677, + "step": 5920 + }, + { + "epoch": 1.32, + "learning_rate": 3.84433365577796e-05, + "loss": 1.376, + "step": 5930 + }, + { + "epoch": 1.32, + "learning_rate": 3.844036014584419e-05, + "loss": 1.3629, + "step": 5940 + }, + { + "epoch": 1.32, + "learning_rate": 3.8437383733908776e-05, + "loss": 1.3652, + "step": 5950 + }, + { + "epoch": 1.32, + "learning_rate": 3.843440732197337e-05, + "loss": 1.3707, + "step": 5960 + }, + { + "epoch": 1.33, + "learning_rate": 3.8431430910037956e-05, + "loss": 1.3635, + "step": 5970 + }, + { + "epoch": 1.33, + "learning_rate": 3.8428454498102536e-05, + "loss": 1.3577, + "step": 5980 + }, + { + "epoch": 1.33, + "learning_rate": 3.842547808616713e-05, + "loss": 1.3464, + "step": 5990 + }, + { + "epoch": 1.33, + "learning_rate": 3.842250167423172e-05, + "loss": 1.3467, + "step": 6000 + }, + { + "epoch": 1.33, + "learning_rate": 3.8419525262296304e-05, + "loss": 1.3635, + "step": 6010 + }, + { + "epoch": 1.34, + "learning_rate": 3.841654885036089e-05, + "loss": 1.3531, + "step": 6020 + }, + { + "epoch": 1.34, + "learning_rate": 3.8413572438425484e-05, + "loss": 1.3516, + "step": 6030 + }, + { + "epoch": 1.34, + "learning_rate": 3.841059602649007e-05, + "loss": 1.3668, + "step": 6040 + }, + { + "epoch": 1.34, + "learning_rate": 3.840761961455466e-05, + "loss": 1.3615, + "step": 6050 + }, + { + "epoch": 1.35, + "learning_rate": 3.8404643202619245e-05, + "loss": 1.3707, + "step": 6060 + }, + { + "epoch": 1.35, + "learning_rate": 3.840166679068384e-05, + "loss": 1.3546, + "step": 6070 + }, + { + "epoch": 1.35, + "learning_rate": 3.839869037874842e-05, + "loss": 1.355, + "step": 6080 + }, + { + "epoch": 1.35, + "learning_rate": 3.8395713966813006e-05, + "loss": 1.3625, + "step": 6090 + }, + { + "epoch": 1.35, + "learning_rate": 3.83927375548776e-05, + "loss": 1.3644, + "step": 6100 + }, + { + "epoch": 1.36, + "learning_rate": 3.8389761142942186e-05, + "loss": 1.3525, + "step": 6110 + }, + { + "epoch": 1.36, + "learning_rate": 3.838678473100677e-05, + "loss": 1.3383, + "step": 6120 + }, + { + "epoch": 1.36, + "learning_rate": 3.838380831907136e-05, + "loss": 1.3394, + "step": 6130 + }, + { + "epoch": 1.36, + "learning_rate": 3.8380831907135954e-05, + "loss": 1.3548, + "step": 6140 + }, + { + "epoch": 1.37, + "learning_rate": 3.837785549520054e-05, + "loss": 1.3565, + "step": 6150 + }, + { + "epoch": 1.37, + "learning_rate": 3.837487908326513e-05, + "loss": 1.3586, + "step": 6160 + }, + { + "epoch": 1.37, + "learning_rate": 3.8371902671329715e-05, + "loss": 1.3674, + "step": 6170 + }, + { + "epoch": 1.37, + "learning_rate": 3.83689262593943e-05, + "loss": 1.3495, + "step": 6180 + }, + { + "epoch": 1.37, + "learning_rate": 3.836594984745889e-05, + "loss": 1.3432, + "step": 6190 + }, + { + "epoch": 1.38, + "learning_rate": 3.8362973435523475e-05, + "loss": 1.349, + "step": 6200 + }, + { + "epoch": 1.38, + "learning_rate": 3.835999702358807e-05, + "loss": 1.354, + "step": 6210 + }, + { + "epoch": 1.38, + "learning_rate": 3.8357020611652656e-05, + "loss": 1.3522, + "step": 6220 + }, + { + "epoch": 1.38, + "learning_rate": 3.835404419971724e-05, + "loss": 1.3609, + "step": 6230 + }, + { + "epoch": 1.39, + "learning_rate": 3.835106778778183e-05, + "loss": 1.3296, + "step": 6240 + }, + { + "epoch": 1.39, + "learning_rate": 3.834809137584642e-05, + "loss": 1.3521, + "step": 6250 + }, + { + "epoch": 1.39, + "learning_rate": 3.834511496391101e-05, + "loss": 1.3495, + "step": 6260 + }, + { + "epoch": 1.39, + "learning_rate": 3.83421385519756e-05, + "loss": 1.3502, + "step": 6270 + }, + { + "epoch": 1.39, + "learning_rate": 3.8339162140040184e-05, + "loss": 1.3451, + "step": 6280 + }, + { + "epoch": 1.4, + "learning_rate": 3.833618572810477e-05, + "loss": 1.3381, + "step": 6290 + }, + { + "epoch": 1.4, + "learning_rate": 3.833320931616936e-05, + "loss": 1.335, + "step": 6300 + }, + { + "epoch": 1.4, + "learning_rate": 3.833023290423395e-05, + "loss": 1.3475, + "step": 6310 + }, + { + "epoch": 1.4, + "learning_rate": 3.832725649229854e-05, + "loss": 1.3534, + "step": 6320 + }, + { + "epoch": 1.41, + "learning_rate": 3.8324280080363125e-05, + "loss": 1.349, + "step": 6330 + }, + { + "epoch": 1.41, + "learning_rate": 3.832130366842771e-05, + "loss": 1.3437, + "step": 6340 + }, + { + "epoch": 1.41, + "learning_rate": 3.8318327256492306e-05, + "loss": 1.3464, + "step": 6350 + }, + { + "epoch": 1.41, + "learning_rate": 3.831535084455689e-05, + "loss": 1.3517, + "step": 6360 + }, + { + "epoch": 1.41, + "learning_rate": 3.831237443262148e-05, + "loss": 1.348, + "step": 6370 + }, + { + "epoch": 1.42, + "learning_rate": 3.8309398020686067e-05, + "loss": 1.3504, + "step": 6380 + }, + { + "epoch": 1.42, + "learning_rate": 3.8306421608750653e-05, + "loss": 1.3401, + "step": 6390 + }, + { + "epoch": 1.42, + "learning_rate": 3.830344519681524e-05, + "loss": 1.3453, + "step": 6400 + }, + { + "epoch": 1.42, + "learning_rate": 3.830046878487983e-05, + "loss": 1.34, + "step": 6410 + }, + { + "epoch": 1.43, + "learning_rate": 3.829749237294442e-05, + "loss": 1.3435, + "step": 6420 + }, + { + "epoch": 1.43, + "learning_rate": 3.829451596100901e-05, + "loss": 1.3427, + "step": 6430 + }, + { + "epoch": 1.43, + "learning_rate": 3.8291539549073595e-05, + "loss": 1.3379, + "step": 6440 + }, + { + "epoch": 1.43, + "learning_rate": 3.828856313713818e-05, + "loss": 1.3315, + "step": 6450 + }, + { + "epoch": 1.43, + "learning_rate": 3.8285586725202775e-05, + "loss": 1.3286, + "step": 6460 + }, + { + "epoch": 1.44, + "learning_rate": 3.828261031326736e-05, + "loss": 1.3378, + "step": 6470 + }, + { + "epoch": 1.44, + "learning_rate": 3.827963390133195e-05, + "loss": 1.3354, + "step": 6480 + }, + { + "epoch": 1.44, + "learning_rate": 3.8276657489396536e-05, + "loss": 1.3238, + "step": 6490 + }, + { + "epoch": 1.44, + "learning_rate": 3.827368107746112e-05, + "loss": 1.334, + "step": 6500 + }, + { + "epoch": 1.45, + "learning_rate": 3.827070466552571e-05, + "loss": 1.3356, + "step": 6510 + }, + { + "epoch": 1.45, + "learning_rate": 3.82677282535903e-05, + "loss": 1.3249, + "step": 6520 + }, + { + "epoch": 1.45, + "learning_rate": 3.826475184165489e-05, + "loss": 1.3282, + "step": 6530 + }, + { + "epoch": 1.45, + "learning_rate": 3.826177542971948e-05, + "loss": 1.3448, + "step": 6540 + }, + { + "epoch": 1.45, + "learning_rate": 3.8258799017784064e-05, + "loss": 1.3322, + "step": 6550 + }, + { + "epoch": 1.46, + "learning_rate": 3.825582260584865e-05, + "loss": 1.3375, + "step": 6560 + }, + { + "epoch": 1.46, + "learning_rate": 3.8252846193913245e-05, + "loss": 1.3331, + "step": 6570 + }, + { + "epoch": 1.46, + "learning_rate": 3.824986978197783e-05, + "loss": 1.3254, + "step": 6580 + }, + { + "epoch": 1.46, + "learning_rate": 3.824689337004241e-05, + "loss": 1.3414, + "step": 6590 + }, + { + "epoch": 1.47, + "learning_rate": 3.8243916958107005e-05, + "loss": 1.3346, + "step": 6600 + }, + { + "epoch": 1.47, + "learning_rate": 3.824094054617159e-05, + "loss": 1.3497, + "step": 6610 + }, + { + "epoch": 1.47, + "learning_rate": 3.823796413423618e-05, + "loss": 1.3214, + "step": 6620 + }, + { + "epoch": 1.47, + "learning_rate": 3.8234987722300766e-05, + "loss": 1.3413, + "step": 6630 + }, + { + "epoch": 1.47, + "learning_rate": 3.823201131036536e-05, + "loss": 1.3294, + "step": 6640 + }, + { + "epoch": 1.48, + "learning_rate": 3.822903489842995e-05, + "loss": 1.3275, + "step": 6650 + }, + { + "epoch": 1.48, + "learning_rate": 3.8226058486494534e-05, + "loss": 1.3325, + "step": 6660 + }, + { + "epoch": 1.48, + "learning_rate": 3.822308207455912e-05, + "loss": 1.3388, + "step": 6670 + }, + { + "epoch": 1.48, + "learning_rate": 3.8220105662623714e-05, + "loss": 1.3361, + "step": 6680 + }, + { + "epoch": 1.49, + "learning_rate": 3.8217129250688294e-05, + "loss": 1.3263, + "step": 6690 + }, + { + "epoch": 1.49, + "learning_rate": 3.821415283875289e-05, + "loss": 1.333, + "step": 6700 + }, + { + "epoch": 1.49, + "learning_rate": 3.8211176426817475e-05, + "loss": 1.3163, + "step": 6710 + }, + { + "epoch": 1.49, + "learning_rate": 3.820820001488206e-05, + "loss": 1.3371, + "step": 6720 + }, + { + "epoch": 1.49, + "learning_rate": 3.820522360294665e-05, + "loss": 1.3405, + "step": 6730 + }, + { + "epoch": 1.5, + "learning_rate": 3.820224719101124e-05, + "loss": 1.3207, + "step": 6740 + }, + { + "epoch": 1.5, + "learning_rate": 3.819927077907583e-05, + "loss": 1.3358, + "step": 6750 + }, + { + "epoch": 1.5, + "learning_rate": 3.8196294367140416e-05, + "loss": 1.3252, + "step": 6760 + }, + { + "epoch": 1.5, + "learning_rate": 3.8193317955205e-05, + "loss": 1.3308, + "step": 6770 + }, + { + "epoch": 1.51, + "learning_rate": 3.81903415432696e-05, + "loss": 1.3392, + "step": 6780 + }, + { + "epoch": 1.51, + "learning_rate": 3.8187365131334184e-05, + "loss": 1.3205, + "step": 6790 + }, + { + "epoch": 1.51, + "learning_rate": 3.8184388719398764e-05, + "loss": 1.3354, + "step": 6800 + }, + { + "epoch": 1.51, + "learning_rate": 3.818141230746336e-05, + "loss": 1.3209, + "step": 6810 + }, + { + "epoch": 1.51, + "learning_rate": 3.8178435895527944e-05, + "loss": 1.3408, + "step": 6820 + }, + { + "epoch": 1.52, + "learning_rate": 3.817545948359253e-05, + "loss": 1.3272, + "step": 6830 + }, + { + "epoch": 1.52, + "learning_rate": 3.817248307165712e-05, + "loss": 1.3233, + "step": 6840 + }, + { + "epoch": 1.52, + "learning_rate": 3.816950665972171e-05, + "loss": 1.3371, + "step": 6850 + }, + { + "epoch": 1.52, + "learning_rate": 3.81665302477863e-05, + "loss": 1.339, + "step": 6860 + }, + { + "epoch": 1.53, + "learning_rate": 3.8163553835850886e-05, + "loss": 1.3347, + "step": 6870 + }, + { + "epoch": 1.53, + "learning_rate": 3.816057742391547e-05, + "loss": 1.3167, + "step": 6880 + }, + { + "epoch": 1.53, + "learning_rate": 3.8157601011980066e-05, + "loss": 1.3208, + "step": 6890 + }, + { + "epoch": 1.53, + "learning_rate": 3.8154624600044646e-05, + "loss": 1.3216, + "step": 6900 + }, + { + "epoch": 1.53, + "learning_rate": 3.815164818810923e-05, + "loss": 1.323, + "step": 6910 + }, + { + "epoch": 1.54, + "learning_rate": 3.814867177617383e-05, + "loss": 1.3258, + "step": 6920 + }, + { + "epoch": 1.54, + "learning_rate": 3.8145695364238414e-05, + "loss": 1.3157, + "step": 6930 + }, + { + "epoch": 1.54, + "learning_rate": 3.8142718952303e-05, + "loss": 1.3214, + "step": 6940 + }, + { + "epoch": 1.54, + "learning_rate": 3.813974254036759e-05, + "loss": 1.3354, + "step": 6950 + }, + { + "epoch": 1.55, + "learning_rate": 3.813676612843218e-05, + "loss": 1.3185, + "step": 6960 + }, + { + "epoch": 1.55, + "learning_rate": 3.813378971649677e-05, + "loss": 1.3367, + "step": 6970 + }, + { + "epoch": 1.55, + "learning_rate": 3.8130813304561355e-05, + "loss": 1.3354, + "step": 6980 + }, + { + "epoch": 1.55, + "learning_rate": 3.812783689262594e-05, + "loss": 1.3301, + "step": 6990 + }, + { + "epoch": 1.55, + "learning_rate": 3.812486048069053e-05, + "loss": 1.3191, + "step": 7000 + }, + { + "epoch": 1.56, + "learning_rate": 3.8121884068755116e-05, + "loss": 1.3136, + "step": 7010 + }, + { + "epoch": 1.56, + "learning_rate": 3.81189076568197e-05, + "loss": 1.3258, + "step": 7020 + }, + { + "epoch": 1.56, + "learning_rate": 3.8115931244884296e-05, + "loss": 1.3247, + "step": 7030 + }, + { + "epoch": 1.56, + "learning_rate": 3.811295483294888e-05, + "loss": 1.3105, + "step": 7040 + }, + { + "epoch": 1.57, + "learning_rate": 3.810997842101347e-05, + "loss": 1.3231, + "step": 7050 + }, + { + "epoch": 1.57, + "learning_rate": 3.810700200907806e-05, + "loss": 1.3237, + "step": 7060 + }, + { + "epoch": 1.57, + "learning_rate": 3.810402559714265e-05, + "loss": 1.3178, + "step": 7070 + }, + { + "epoch": 1.57, + "learning_rate": 3.810104918520724e-05, + "loss": 1.317, + "step": 7080 + }, + { + "epoch": 1.57, + "learning_rate": 3.8098072773271824e-05, + "loss": 1.3293, + "step": 7090 + }, + { + "epoch": 1.58, + "learning_rate": 3.809509636133641e-05, + "loss": 1.3291, + "step": 7100 + }, + { + "epoch": 1.58, + "learning_rate": 3.8092119949401e-05, + "loss": 1.3165, + "step": 7110 + }, + { + "epoch": 1.58, + "learning_rate": 3.8089143537465585e-05, + "loss": 1.3253, + "step": 7120 + }, + { + "epoch": 1.58, + "learning_rate": 3.808616712553018e-05, + "loss": 1.3076, + "step": 7130 + }, + { + "epoch": 1.59, + "learning_rate": 3.8083190713594766e-05, + "loss": 1.3237, + "step": 7140 + }, + { + "epoch": 1.59, + "learning_rate": 3.808021430165935e-05, + "loss": 1.3155, + "step": 7150 + }, + { + "epoch": 1.59, + "learning_rate": 3.807723788972394e-05, + "loss": 1.3141, + "step": 7160 + }, + { + "epoch": 1.59, + "learning_rate": 3.807426147778853e-05, + "loss": 1.3134, + "step": 7170 + }, + { + "epoch": 1.59, + "learning_rate": 3.807128506585312e-05, + "loss": 1.3274, + "step": 7180 + }, + { + "epoch": 1.6, + "learning_rate": 3.806830865391771e-05, + "loss": 1.309, + "step": 7190 + }, + { + "epoch": 1.6, + "learning_rate": 3.8065332241982294e-05, + "loss": 1.3044, + "step": 7200 + }, + { + "epoch": 1.6, + "learning_rate": 3.806235583004688e-05, + "loss": 1.3058, + "step": 7210 + }, + { + "epoch": 1.6, + "learning_rate": 3.805937941811147e-05, + "loss": 1.3185, + "step": 7220 + }, + { + "epoch": 1.61, + "learning_rate": 3.8056403006176055e-05, + "loss": 1.3152, + "step": 7230 + }, + { + "epoch": 1.61, + "learning_rate": 3.805342659424065e-05, + "loss": 1.3186, + "step": 7240 + }, + { + "epoch": 1.61, + "learning_rate": 3.8050450182305235e-05, + "loss": 1.3153, + "step": 7250 + }, + { + "epoch": 1.61, + "learning_rate": 3.804747377036982e-05, + "loss": 1.3061, + "step": 7260 + }, + { + "epoch": 1.61, + "learning_rate": 3.804449735843441e-05, + "loss": 1.3144, + "step": 7270 + }, + { + "epoch": 1.62, + "learning_rate": 3.8041520946499e-05, + "loss": 1.3165, + "step": 7280 + }, + { + "epoch": 1.62, + "learning_rate": 3.803854453456359e-05, + "loss": 1.3091, + "step": 7290 + }, + { + "epoch": 1.62, + "learning_rate": 3.8035568122628176e-05, + "loss": 1.3199, + "step": 7300 + }, + { + "epoch": 1.62, + "learning_rate": 3.803259171069276e-05, + "loss": 1.3201, + "step": 7310 + }, + { + "epoch": 1.63, + "learning_rate": 3.802961529875735e-05, + "loss": 1.3095, + "step": 7320 + }, + { + "epoch": 1.63, + "learning_rate": 3.802663888682194e-05, + "loss": 1.3148, + "step": 7330 + }, + { + "epoch": 1.63, + "learning_rate": 3.8023662474886524e-05, + "loss": 1.3008, + "step": 7340 + }, + { + "epoch": 1.63, + "learning_rate": 3.802068606295112e-05, + "loss": 1.3027, + "step": 7350 + }, + { + "epoch": 1.63, + "learning_rate": 3.8017709651015705e-05, + "loss": 1.3095, + "step": 7360 + }, + { + "epoch": 1.64, + "learning_rate": 3.801473323908029e-05, + "loss": 1.3031, + "step": 7370 + }, + { + "epoch": 1.64, + "learning_rate": 3.801175682714488e-05, + "loss": 1.3165, + "step": 7380 + }, + { + "epoch": 1.64, + "learning_rate": 3.800878041520947e-05, + "loss": 1.3206, + "step": 7390 + }, + { + "epoch": 1.64, + "learning_rate": 3.800580400327406e-05, + "loss": 1.3076, + "step": 7400 + }, + { + "epoch": 1.65, + "learning_rate": 3.800282759133864e-05, + "loss": 1.3143, + "step": 7410 + }, + { + "epoch": 1.65, + "learning_rate": 3.799985117940323e-05, + "loss": 1.3227, + "step": 7420 + }, + { + "epoch": 1.65, + "learning_rate": 3.799687476746782e-05, + "loss": 1.3159, + "step": 7430 + }, + { + "epoch": 1.65, + "learning_rate": 3.7993898355532407e-05, + "loss": 1.2961, + "step": 7440 + }, + { + "epoch": 1.65, + "learning_rate": 3.7990921943596993e-05, + "loss": 1.3064, + "step": 7450 + }, + { + "epoch": 1.66, + "learning_rate": 3.798794553166159e-05, + "loss": 1.309, + "step": 7460 + }, + { + "epoch": 1.66, + "learning_rate": 3.7984969119726174e-05, + "loss": 1.3039, + "step": 7470 + }, + { + "epoch": 1.66, + "learning_rate": 3.798199270779076e-05, + "loss": 1.3091, + "step": 7480 + }, + { + "epoch": 1.66, + "learning_rate": 3.797901629585535e-05, + "loss": 1.299, + "step": 7490 + }, + { + "epoch": 1.67, + "learning_rate": 3.797603988391994e-05, + "loss": 1.302, + "step": 7500 + }, + { + "epoch": 1.67, + "learning_rate": 3.797306347198452e-05, + "loss": 1.3035, + "step": 7510 + }, + { + "epoch": 1.67, + "learning_rate": 3.7970087060049115e-05, + "loss": 1.3283, + "step": 7520 + }, + { + "epoch": 1.67, + "learning_rate": 3.79671106481137e-05, + "loss": 1.3098, + "step": 7530 + }, + { + "epoch": 1.67, + "learning_rate": 3.796413423617829e-05, + "loss": 1.3179, + "step": 7540 + }, + { + "epoch": 1.68, + "learning_rate": 3.7961157824242876e-05, + "loss": 1.3247, + "step": 7550 + }, + { + "epoch": 1.68, + "learning_rate": 3.795818141230747e-05, + "loss": 1.2883, + "step": 7560 + }, + { + "epoch": 1.68, + "learning_rate": 3.7955205000372057e-05, + "loss": 1.296, + "step": 7570 + }, + { + "epoch": 1.68, + "learning_rate": 3.7952228588436644e-05, + "loss": 1.3131, + "step": 7580 + }, + { + "epoch": 1.69, + "learning_rate": 3.794925217650123e-05, + "loss": 1.3031, + "step": 7590 + }, + { + "epoch": 1.69, + "learning_rate": 3.7946275764565824e-05, + "loss": 1.3067, + "step": 7600 + }, + { + "epoch": 1.69, + "learning_rate": 3.7943299352630404e-05, + "loss": 1.2966, + "step": 7610 + }, + { + "epoch": 1.69, + "learning_rate": 3.794032294069499e-05, + "loss": 1.3065, + "step": 7620 + }, + { + "epoch": 1.69, + "learning_rate": 3.7937346528759585e-05, + "loss": 1.3114, + "step": 7630 + }, + { + "epoch": 1.7, + "learning_rate": 3.793437011682417e-05, + "loss": 1.2946, + "step": 7640 + }, + { + "epoch": 1.7, + "learning_rate": 3.793139370488876e-05, + "loss": 1.3103, + "step": 7650 + }, + { + "epoch": 1.7, + "learning_rate": 3.7928417292953345e-05, + "loss": 1.2927, + "step": 7660 + }, + { + "epoch": 1.7, + "learning_rate": 3.792544088101794e-05, + "loss": 1.3005, + "step": 7670 + }, + { + "epoch": 1.71, + "learning_rate": 3.7922464469082526e-05, + "loss": 1.2924, + "step": 7680 + }, + { + "epoch": 1.71, + "learning_rate": 3.791948805714711e-05, + "loss": 1.3107, + "step": 7690 + }, + { + "epoch": 1.71, + "learning_rate": 3.79165116452117e-05, + "loss": 1.3135, + "step": 7700 + }, + { + "epoch": 1.71, + "learning_rate": 3.7913535233276294e-05, + "loss": 1.3041, + "step": 7710 + }, + { + "epoch": 1.71, + "learning_rate": 3.7910558821340874e-05, + "loss": 1.3062, + "step": 7720 + }, + { + "epoch": 1.72, + "learning_rate": 3.790758240940546e-05, + "loss": 1.3002, + "step": 7730 + }, + { + "epoch": 1.72, + "learning_rate": 3.7904605997470054e-05, + "loss": 1.2952, + "step": 7740 + }, + { + "epoch": 1.72, + "learning_rate": 3.790162958553464e-05, + "loss": 1.2926, + "step": 7750 + }, + { + "epoch": 1.72, + "learning_rate": 3.789865317359923e-05, + "loss": 1.3107, + "step": 7760 + }, + { + "epoch": 1.73, + "learning_rate": 3.7895676761663815e-05, + "loss": 1.3047, + "step": 7770 + }, + { + "epoch": 1.73, + "learning_rate": 3.789270034972841e-05, + "loss": 1.3062, + "step": 7780 + }, + { + "epoch": 1.73, + "learning_rate": 3.7889723937792995e-05, + "loss": 1.2938, + "step": 7790 + }, + { + "epoch": 1.73, + "learning_rate": 3.788674752585758e-05, + "loss": 1.3065, + "step": 7800 + }, + { + "epoch": 1.73, + "learning_rate": 3.788377111392217e-05, + "loss": 1.2996, + "step": 7810 + }, + { + "epoch": 1.74, + "learning_rate": 3.7880794701986756e-05, + "loss": 1.2945, + "step": 7820 + }, + { + "epoch": 1.74, + "learning_rate": 3.787781829005134e-05, + "loss": 1.3094, + "step": 7830 + }, + { + "epoch": 1.74, + "learning_rate": 3.787484187811593e-05, + "loss": 1.2981, + "step": 7840 + }, + { + "epoch": 1.74, + "learning_rate": 3.7871865466180524e-05, + "loss": 1.3001, + "step": 7850 + }, + { + "epoch": 1.75, + "learning_rate": 3.786888905424511e-05, + "loss": 1.3054, + "step": 7860 + }, + { + "epoch": 1.75, + "learning_rate": 3.78659126423097e-05, + "loss": 1.2942, + "step": 7870 + }, + { + "epoch": 1.75, + "learning_rate": 3.7862936230374284e-05, + "loss": 1.2877, + "step": 7880 + }, + { + "epoch": 1.75, + "learning_rate": 3.785995981843888e-05, + "loss": 1.3055, + "step": 7890 + }, + { + "epoch": 1.75, + "learning_rate": 3.7856983406503465e-05, + "loss": 1.2837, + "step": 7900 + }, + { + "epoch": 1.76, + "learning_rate": 3.785400699456805e-05, + "loss": 1.2919, + "step": 7910 + }, + { + "epoch": 1.76, + "learning_rate": 3.785103058263264e-05, + "loss": 1.2827, + "step": 7920 + }, + { + "epoch": 1.76, + "learning_rate": 3.7848054170697226e-05, + "loss": 1.2806, + "step": 7930 + }, + { + "epoch": 1.76, + "learning_rate": 3.784507775876181e-05, + "loss": 1.2827, + "step": 7940 + }, + { + "epoch": 1.77, + "learning_rate": 3.7842101346826406e-05, + "loss": 1.2921, + "step": 7950 + }, + { + "epoch": 1.77, + "learning_rate": 3.783912493489099e-05, + "loss": 1.2928, + "step": 7960 + }, + { + "epoch": 1.77, + "learning_rate": 3.783614852295558e-05, + "loss": 1.3016, + "step": 7970 + }, + { + "epoch": 1.77, + "learning_rate": 3.783317211102017e-05, + "loss": 1.2841, + "step": 7980 + }, + { + "epoch": 1.77, + "learning_rate": 3.783019569908476e-05, + "loss": 1.3047, + "step": 7990 + }, + { + "epoch": 1.78, + "learning_rate": 3.782721928714935e-05, + "loss": 1.2971, + "step": 8000 + }, + { + "epoch": 1.78, + "learning_rate": 3.7824242875213934e-05, + "loss": 1.2813, + "step": 8010 + }, + { + "epoch": 1.78, + "learning_rate": 3.782126646327852e-05, + "loss": 1.2758, + "step": 8020 + }, + { + "epoch": 1.78, + "learning_rate": 3.781829005134311e-05, + "loss": 1.2962, + "step": 8030 + }, + { + "epoch": 1.79, + "learning_rate": 3.7815313639407695e-05, + "loss": 1.287, + "step": 8040 + }, + { + "epoch": 1.79, + "learning_rate": 3.781233722747228e-05, + "loss": 1.3027, + "step": 8050 + }, + { + "epoch": 1.79, + "learning_rate": 3.7809360815536876e-05, + "loss": 1.2932, + "step": 8060 + }, + { + "epoch": 1.79, + "learning_rate": 3.780638440360146e-05, + "loss": 1.2765, + "step": 8070 + }, + { + "epoch": 1.79, + "learning_rate": 3.780340799166605e-05, + "loss": 1.2905, + "step": 8080 + }, + { + "epoch": 1.8, + "learning_rate": 3.7800431579730636e-05, + "loss": 1.2983, + "step": 8090 + }, + { + "epoch": 1.8, + "learning_rate": 3.779745516779523e-05, + "loss": 1.2811, + "step": 8100 + }, + { + "epoch": 1.8, + "learning_rate": 3.779447875585982e-05, + "loss": 1.2961, + "step": 8110 + }, + { + "epoch": 1.8, + "learning_rate": 3.7791502343924404e-05, + "loss": 1.2794, + "step": 8120 + }, + { + "epoch": 1.81, + "learning_rate": 3.778852593198899e-05, + "loss": 1.287, + "step": 8130 + }, + { + "epoch": 1.81, + "learning_rate": 3.778554952005358e-05, + "loss": 1.3107, + "step": 8140 + }, + { + "epoch": 1.81, + "learning_rate": 3.7782573108118165e-05, + "loss": 1.2854, + "step": 8150 + }, + { + "epoch": 1.81, + "learning_rate": 3.777959669618275e-05, + "loss": 1.2832, + "step": 8160 + }, + { + "epoch": 1.81, + "learning_rate": 3.7776620284247345e-05, + "loss": 1.2763, + "step": 8170 + }, + { + "epoch": 1.82, + "learning_rate": 3.777364387231193e-05, + "loss": 1.2793, + "step": 8180 + }, + { + "epoch": 1.82, + "learning_rate": 3.777066746037652e-05, + "loss": 1.2963, + "step": 8190 + }, + { + "epoch": 1.82, + "learning_rate": 3.7767691048441106e-05, + "loss": 1.2826, + "step": 8200 + }, + { + "epoch": 1.82, + "learning_rate": 3.77647146365057e-05, + "loss": 1.2796, + "step": 8210 + }, + { + "epoch": 1.83, + "learning_rate": 3.7761738224570286e-05, + "loss": 1.2846, + "step": 8220 + }, + { + "epoch": 1.83, + "learning_rate": 3.7758761812634866e-05, + "loss": 1.2826, + "step": 8230 + }, + { + "epoch": 1.83, + "learning_rate": 3.775578540069946e-05, + "loss": 1.2809, + "step": 8240 + }, + { + "epoch": 1.83, + "learning_rate": 3.775280898876405e-05, + "loss": 1.2859, + "step": 8250 + }, + { + "epoch": 1.83, + "learning_rate": 3.7749832576828634e-05, + "loss": 1.2897, + "step": 8260 + }, + { + "epoch": 1.84, + "learning_rate": 3.774685616489322e-05, + "loss": 1.2835, + "step": 8270 + }, + { + "epoch": 1.84, + "learning_rate": 3.7743879752957815e-05, + "loss": 1.2798, + "step": 8280 + }, + { + "epoch": 1.84, + "learning_rate": 3.77409033410224e-05, + "loss": 1.2706, + "step": 8290 + }, + { + "epoch": 1.84, + "learning_rate": 3.773792692908699e-05, + "loss": 1.2853, + "step": 8300 + }, + { + "epoch": 1.85, + "learning_rate": 3.7734950517151575e-05, + "loss": 1.2937, + "step": 8310 + }, + { + "epoch": 1.85, + "learning_rate": 3.773197410521617e-05, + "loss": 1.2922, + "step": 8320 + }, + { + "epoch": 1.85, + "learning_rate": 3.772899769328075e-05, + "loss": 1.2876, + "step": 8330 + }, + { + "epoch": 1.85, + "learning_rate": 3.772602128134534e-05, + "loss": 1.2746, + "step": 8340 + }, + { + "epoch": 1.85, + "learning_rate": 3.772304486940993e-05, + "loss": 1.2877, + "step": 8350 + }, + { + "epoch": 1.86, + "learning_rate": 3.7720068457474516e-05, + "loss": 1.2732, + "step": 8360 + }, + { + "epoch": 1.86, + "learning_rate": 3.77170920455391e-05, + "loss": 1.2755, + "step": 8370 + }, + { + "epoch": 1.86, + "learning_rate": 3.77141156336037e-05, + "loss": 1.2722, + "step": 8380 + }, + { + "epoch": 1.86, + "learning_rate": 3.7711139221668284e-05, + "loss": 1.2914, + "step": 8390 + }, + { + "epoch": 1.87, + "learning_rate": 3.770816280973287e-05, + "loss": 1.2845, + "step": 8400 + }, + { + "epoch": 1.87, + "learning_rate": 3.770518639779746e-05, + "loss": 1.2751, + "step": 8410 + }, + { + "epoch": 1.87, + "learning_rate": 3.770220998586205e-05, + "loss": 1.2764, + "step": 8420 + }, + { + "epoch": 1.87, + "learning_rate": 3.769923357392663e-05, + "loss": 1.2789, + "step": 8430 + }, + { + "epoch": 1.87, + "learning_rate": 3.769625716199122e-05, + "loss": 1.2916, + "step": 8440 + }, + { + "epoch": 1.88, + "learning_rate": 3.769328075005581e-05, + "loss": 1.2739, + "step": 8450 + }, + { + "epoch": 1.88, + "learning_rate": 3.76903043381204e-05, + "loss": 1.2705, + "step": 8460 + }, + { + "epoch": 1.88, + "learning_rate": 3.7687327926184986e-05, + "loss": 1.2793, + "step": 8470 + }, + { + "epoch": 1.88, + "learning_rate": 3.768435151424957e-05, + "loss": 1.2734, + "step": 8480 + }, + { + "epoch": 1.89, + "learning_rate": 3.7681375102314166e-05, + "loss": 1.2813, + "step": 8490 + }, + { + "epoch": 1.89, + "learning_rate": 3.7678398690378753e-05, + "loss": 1.2794, + "step": 8500 + }, + { + "epoch": 1.89, + "learning_rate": 3.767542227844334e-05, + "loss": 1.278, + "step": 8510 + }, + { + "epoch": 1.89, + "learning_rate": 3.767244586650793e-05, + "loss": 1.2579, + "step": 8520 + }, + { + "epoch": 1.89, + "learning_rate": 3.766946945457252e-05, + "loss": 1.2898, + "step": 8530 + }, + { + "epoch": 1.9, + "learning_rate": 3.76664930426371e-05, + "loss": 1.2864, + "step": 8540 + }, + { + "epoch": 1.9, + "learning_rate": 3.766351663070169e-05, + "loss": 1.2807, + "step": 8550 + }, + { + "epoch": 1.9, + "learning_rate": 3.766054021876628e-05, + "loss": 1.2759, + "step": 8560 + }, + { + "epoch": 1.9, + "learning_rate": 3.765756380683087e-05, + "loss": 1.2835, + "step": 8570 + }, + { + "epoch": 1.91, + "learning_rate": 3.7654587394895455e-05, + "loss": 1.2992, + "step": 8580 + }, + { + "epoch": 1.91, + "learning_rate": 3.765161098296004e-05, + "loss": 1.2739, + "step": 8590 + }, + { + "epoch": 1.91, + "learning_rate": 3.7648634571024636e-05, + "loss": 1.2866, + "step": 8600 + }, + { + "epoch": 1.91, + "learning_rate": 3.764565815908922e-05, + "loss": 1.2804, + "step": 8610 + }, + { + "epoch": 1.91, + "learning_rate": 3.764268174715381e-05, + "loss": 1.2865, + "step": 8620 + }, + { + "epoch": 1.92, + "learning_rate": 3.76397053352184e-05, + "loss": 1.2904, + "step": 8630 + }, + { + "epoch": 1.92, + "learning_rate": 3.7636728923282984e-05, + "loss": 1.2872, + "step": 8640 + }, + { + "epoch": 1.92, + "learning_rate": 3.763375251134757e-05, + "loss": 1.2725, + "step": 8650 + }, + { + "epoch": 1.92, + "learning_rate": 3.763077609941216e-05, + "loss": 1.2874, + "step": 8660 + }, + { + "epoch": 1.93, + "learning_rate": 3.762779968747675e-05, + "loss": 1.2714, + "step": 8670 + }, + { + "epoch": 1.93, + "learning_rate": 3.762482327554134e-05, + "loss": 1.2699, + "step": 8680 + }, + { + "epoch": 1.93, + "learning_rate": 3.7621846863605925e-05, + "loss": 1.2727, + "step": 8690 + }, + { + "epoch": 1.93, + "learning_rate": 3.761887045167051e-05, + "loss": 1.2732, + "step": 8700 + }, + { + "epoch": 1.93, + "learning_rate": 3.7615894039735105e-05, + "loss": 1.2711, + "step": 8710 + }, + { + "epoch": 1.94, + "learning_rate": 3.761291762779969e-05, + "loss": 1.267, + "step": 8720 + }, + { + "epoch": 1.94, + "learning_rate": 3.760994121586428e-05, + "loss": 1.2725, + "step": 8730 + }, + { + "epoch": 1.94, + "learning_rate": 3.7606964803928866e-05, + "loss": 1.273, + "step": 8740 + }, + { + "epoch": 1.94, + "learning_rate": 3.760398839199345e-05, + "loss": 1.2794, + "step": 8750 + }, + { + "epoch": 1.95, + "learning_rate": 3.760101198005804e-05, + "loss": 1.279, + "step": 8760 + }, + { + "epoch": 1.95, + "learning_rate": 3.7598035568122634e-05, + "loss": 1.2688, + "step": 8770 + }, + { + "epoch": 1.95, + "learning_rate": 3.759505915618722e-05, + "loss": 1.2731, + "step": 8780 + }, + { + "epoch": 1.95, + "learning_rate": 3.759208274425181e-05, + "loss": 1.2664, + "step": 8790 + }, + { + "epoch": 1.95, + "learning_rate": 3.7589106332316394e-05, + "loss": 1.2708, + "step": 8800 + }, + { + "epoch": 1.96, + "learning_rate": 3.758612992038099e-05, + "loss": 1.2672, + "step": 8810 + }, + { + "epoch": 1.96, + "learning_rate": 3.7583153508445575e-05, + "loss": 1.2617, + "step": 8820 + }, + { + "epoch": 1.96, + "learning_rate": 3.758017709651016e-05, + "loss": 1.2721, + "step": 8830 + }, + { + "epoch": 1.96, + "learning_rate": 3.757720068457475e-05, + "loss": 1.2667, + "step": 8840 + }, + { + "epoch": 1.97, + "learning_rate": 3.7574224272639336e-05, + "loss": 1.2706, + "step": 8850 + }, + { + "epoch": 1.97, + "learning_rate": 3.757124786070392e-05, + "loss": 1.2882, + "step": 8860 + }, + { + "epoch": 1.97, + "learning_rate": 3.756827144876851e-05, + "loss": 1.2705, + "step": 8870 + }, + { + "epoch": 1.97, + "learning_rate": 3.75652950368331e-05, + "loss": 1.2737, + "step": 8880 + }, + { + "epoch": 1.97, + "learning_rate": 3.756231862489769e-05, + "loss": 1.2712, + "step": 8890 + }, + { + "epoch": 1.98, + "learning_rate": 3.755934221296228e-05, + "loss": 1.2758, + "step": 8900 + }, + { + "epoch": 1.98, + "learning_rate": 3.7556365801026864e-05, + "loss": 1.2616, + "step": 8910 + }, + { + "epoch": 1.98, + "learning_rate": 3.755338938909146e-05, + "loss": 1.2658, + "step": 8920 + }, + { + "epoch": 1.98, + "learning_rate": 3.7550412977156044e-05, + "loss": 1.2787, + "step": 8930 + }, + { + "epoch": 1.99, + "learning_rate": 3.7547436565220624e-05, + "loss": 1.2606, + "step": 8940 + }, + { + "epoch": 1.99, + "learning_rate": 3.754446015328522e-05, + "loss": 1.2642, + "step": 8950 + }, + { + "epoch": 1.99, + "learning_rate": 3.7541483741349805e-05, + "loss": 1.2595, + "step": 8960 + }, + { + "epoch": 1.99, + "learning_rate": 3.753850732941439e-05, + "loss": 1.2627, + "step": 8970 + }, + { + "epoch": 1.99, + "learning_rate": 3.753553091747898e-05, + "loss": 1.2674, + "step": 8980 + }, + { + "epoch": 2.0, + "learning_rate": 3.753255450554357e-05, + "loss": 1.2737, + "step": 8990 + }, + { + "epoch": 2.0, + "learning_rate": 3.752957809360816e-05, + "loss": 1.2818, + "step": 9000 + }, + { + "epoch": 2.0, + "eval_cer": 4.782051282051282, + "eval_loss": 1.1977766752243042, + "eval_runtime": 5.6706, + "eval_samples_per_second": 1.763, + "eval_steps_per_second": 0.176, + "eval_wer": 1.0389610389610389, + "step": 9006 + }, + { + "epoch": 2.0, + "learning_rate": 3.7526601681672746e-05, + "loss": 1.2688, + "step": 9010 + }, + { + "epoch": 2.0, + "learning_rate": 3.752362526973733e-05, + "loss": 1.2758, + "step": 9020 + }, + { + "epoch": 2.01, + "learning_rate": 3.752064885780193e-05, + "loss": 1.2735, + "step": 9030 + }, + { + "epoch": 2.01, + "learning_rate": 3.7517672445866514e-05, + "loss": 1.2591, + "step": 9040 + }, + { + "epoch": 2.01, + "learning_rate": 3.75146960339311e-05, + "loss": 1.2773, + "step": 9050 + }, + { + "epoch": 2.01, + "learning_rate": 3.751171962199569e-05, + "loss": 1.263, + "step": 9060 + }, + { + "epoch": 2.01, + "learning_rate": 3.7508743210060274e-05, + "loss": 1.2699, + "step": 9070 + }, + { + "epoch": 2.02, + "learning_rate": 3.750576679812486e-05, + "loss": 1.2631, + "step": 9080 + }, + { + "epoch": 2.02, + "learning_rate": 3.750279038618945e-05, + "loss": 1.2583, + "step": 9090 + }, + { + "epoch": 2.02, + "learning_rate": 3.749981397425404e-05, + "loss": 1.2732, + "step": 9100 + }, + { + "epoch": 2.02, + "learning_rate": 3.749683756231863e-05, + "loss": 1.2596, + "step": 9110 + }, + { + "epoch": 2.03, + "learning_rate": 3.7493861150383216e-05, + "loss": 1.2627, + "step": 9120 + }, + { + "epoch": 2.03, + "learning_rate": 3.74908847384478e-05, + "loss": 1.273, + "step": 9130 + }, + { + "epoch": 2.03, + "learning_rate": 3.7487908326512396e-05, + "loss": 1.2677, + "step": 9140 + }, + { + "epoch": 2.03, + "learning_rate": 3.7484931914576976e-05, + "loss": 1.2619, + "step": 9150 + }, + { + "epoch": 2.03, + "learning_rate": 3.748195550264157e-05, + "loss": 1.2703, + "step": 9160 + }, + { + "epoch": 2.04, + "learning_rate": 3.747897909070616e-05, + "loss": 1.2778, + "step": 9170 + }, + { + "epoch": 2.04, + "learning_rate": 3.7476002678770744e-05, + "loss": 1.2628, + "step": 9180 + }, + { + "epoch": 2.04, + "learning_rate": 3.747302626683533e-05, + "loss": 1.2509, + "step": 9190 + }, + { + "epoch": 2.04, + "learning_rate": 3.7470049854899924e-05, + "loss": 1.2606, + "step": 9200 + }, + { + "epoch": 2.05, + "learning_rate": 3.746707344296451e-05, + "loss": 1.266, + "step": 9210 + }, + { + "epoch": 2.05, + "learning_rate": 3.74640970310291e-05, + "loss": 1.2697, + "step": 9220 + }, + { + "epoch": 2.05, + "learning_rate": 3.7461120619093685e-05, + "loss": 1.2683, + "step": 9230 + }, + { + "epoch": 2.05, + "learning_rate": 3.745814420715828e-05, + "loss": 1.2571, + "step": 9240 + }, + { + "epoch": 2.05, + "learning_rate": 3.745516779522286e-05, + "loss": 1.2676, + "step": 9250 + }, + { + "epoch": 2.06, + "learning_rate": 3.7452191383287446e-05, + "loss": 1.2674, + "step": 9260 + }, + { + "epoch": 2.06, + "learning_rate": 3.744921497135204e-05, + "loss": 1.2688, + "step": 9270 + }, + { + "epoch": 2.06, + "learning_rate": 3.7446238559416626e-05, + "loss": 1.2669, + "step": 9280 + }, + { + "epoch": 2.06, + "learning_rate": 3.744326214748121e-05, + "loss": 1.2627, + "step": 9290 + }, + { + "epoch": 2.07, + "learning_rate": 3.74402857355458e-05, + "loss": 1.2678, + "step": 9300 + }, + { + "epoch": 2.07, + "learning_rate": 3.7437309323610394e-05, + "loss": 1.2664, + "step": 9310 + }, + { + "epoch": 2.07, + "learning_rate": 3.743433291167498e-05, + "loss": 1.2744, + "step": 9320 + }, + { + "epoch": 2.07, + "learning_rate": 3.743135649973957e-05, + "loss": 1.2556, + "step": 9330 + }, + { + "epoch": 2.07, + "learning_rate": 3.7428380087804155e-05, + "loss": 1.2537, + "step": 9340 + }, + { + "epoch": 2.08, + "learning_rate": 3.742540367586874e-05, + "loss": 1.2616, + "step": 9350 + }, + { + "epoch": 2.08, + "learning_rate": 3.742242726393333e-05, + "loss": 1.2558, + "step": 9360 + }, + { + "epoch": 2.08, + "learning_rate": 3.7419450851997915e-05, + "loss": 1.2721, + "step": 9370 + }, + { + "epoch": 2.08, + "learning_rate": 3.741647444006251e-05, + "loss": 1.2562, + "step": 9380 + }, + { + "epoch": 2.09, + "learning_rate": 3.7413498028127096e-05, + "loss": 1.2503, + "step": 9390 + }, + { + "epoch": 2.09, + "learning_rate": 3.741052161619168e-05, + "loss": 1.2612, + "step": 9400 + }, + { + "epoch": 2.09, + "learning_rate": 3.740754520425627e-05, + "loss": 1.2612, + "step": 9410 + }, + { + "epoch": 2.09, + "learning_rate": 3.740456879232086e-05, + "loss": 1.2567, + "step": 9420 + }, + { + "epoch": 2.09, + "learning_rate": 3.740159238038545e-05, + "loss": 1.2588, + "step": 9430 + }, + { + "epoch": 2.1, + "learning_rate": 3.739861596845004e-05, + "loss": 1.2572, + "step": 9440 + }, + { + "epoch": 2.1, + "learning_rate": 3.7395639556514624e-05, + "loss": 1.2568, + "step": 9450 + }, + { + "epoch": 2.1, + "learning_rate": 3.739266314457921e-05, + "loss": 1.2582, + "step": 9460 + }, + { + "epoch": 2.1, + "learning_rate": 3.73896867326438e-05, + "loss": 1.2638, + "step": 9470 + }, + { + "epoch": 2.11, + "learning_rate": 3.738671032070839e-05, + "loss": 1.2651, + "step": 9480 + }, + { + "epoch": 2.11, + "learning_rate": 3.738373390877298e-05, + "loss": 1.2555, + "step": 9490 + }, + { + "epoch": 2.11, + "learning_rate": 3.7380757496837565e-05, + "loss": 1.2537, + "step": 9500 + }, + { + "epoch": 2.11, + "learning_rate": 3.737778108490215e-05, + "loss": 1.2474, + "step": 9510 + }, + { + "epoch": 2.11, + "learning_rate": 3.737480467296674e-05, + "loss": 1.2725, + "step": 9520 + }, + { + "epoch": 2.12, + "learning_rate": 3.737182826103133e-05, + "loss": 1.2583, + "step": 9530 + }, + { + "epoch": 2.12, + "learning_rate": 3.736885184909592e-05, + "loss": 1.2605, + "step": 9540 + }, + { + "epoch": 2.12, + "learning_rate": 3.7365875437160507e-05, + "loss": 1.2681, + "step": 9550 + }, + { + "epoch": 2.12, + "learning_rate": 3.7362899025225093e-05, + "loss": 1.2609, + "step": 9560 + }, + { + "epoch": 2.13, + "learning_rate": 3.735992261328968e-05, + "loss": 1.2714, + "step": 9570 + }, + { + "epoch": 2.13, + "learning_rate": 3.735694620135427e-05, + "loss": 1.261, + "step": 9580 + }, + { + "epoch": 2.13, + "learning_rate": 3.735396978941886e-05, + "loss": 1.2623, + "step": 9590 + }, + { + "epoch": 2.13, + "learning_rate": 3.735099337748345e-05, + "loss": 1.2561, + "step": 9600 + }, + { + "epoch": 2.13, + "learning_rate": 3.7348016965548035e-05, + "loss": 1.2412, + "step": 9610 + }, + { + "epoch": 2.14, + "learning_rate": 3.734504055361262e-05, + "loss": 1.2623, + "step": 9620 + }, + { + "epoch": 2.14, + "learning_rate": 3.7342064141677215e-05, + "loss": 1.2684, + "step": 9630 + }, + { + "epoch": 2.14, + "learning_rate": 3.73390877297418e-05, + "loss": 1.2627, + "step": 9640 + }, + { + "epoch": 2.14, + "learning_rate": 3.733611131780639e-05, + "loss": 1.2657, + "step": 9650 + }, + { + "epoch": 2.15, + "learning_rate": 3.7333134905870976e-05, + "loss": 1.2496, + "step": 9660 + }, + { + "epoch": 2.15, + "learning_rate": 3.733015849393556e-05, + "loss": 1.2617, + "step": 9670 + }, + { + "epoch": 2.15, + "learning_rate": 3.732718208200015e-05, + "loss": 1.2403, + "step": 9680 + }, + { + "epoch": 2.15, + "learning_rate": 3.732420567006474e-05, + "loss": 1.2727, + "step": 9690 + }, + { + "epoch": 2.15, + "learning_rate": 3.732122925812933e-05, + "loss": 1.2525, + "step": 9700 + }, + { + "epoch": 2.16, + "learning_rate": 3.731825284619392e-05, + "loss": 1.2576, + "step": 9710 + }, + { + "epoch": 2.16, + "learning_rate": 3.7315276434258504e-05, + "loss": 1.2627, + "step": 9720 + }, + { + "epoch": 2.16, + "learning_rate": 3.731230002232309e-05, + "loss": 1.2637, + "step": 9730 + }, + { + "epoch": 2.16, + "learning_rate": 3.7309323610387685e-05, + "loss": 1.2594, + "step": 9740 + }, + { + "epoch": 2.17, + "learning_rate": 3.730634719845227e-05, + "loss": 1.2494, + "step": 9750 + }, + { + "epoch": 2.17, + "learning_rate": 3.730337078651685e-05, + "loss": 1.2612, + "step": 9760 + }, + { + "epoch": 2.17, + "learning_rate": 3.7300394374581445e-05, + "loss": 1.2578, + "step": 9770 + }, + { + "epoch": 2.17, + "learning_rate": 3.729741796264603e-05, + "loss": 1.2669, + "step": 9780 + }, + { + "epoch": 2.17, + "learning_rate": 3.729444155071062e-05, + "loss": 1.2731, + "step": 9790 + }, + { + "epoch": 2.18, + "learning_rate": 3.7291465138775206e-05, + "loss": 1.2397, + "step": 9800 + }, + { + "epoch": 2.18, + "learning_rate": 3.72884887268398e-05, + "loss": 1.2514, + "step": 9810 + }, + { + "epoch": 2.18, + "learning_rate": 3.728551231490439e-05, + "loss": 1.261, + "step": 9820 + }, + { + "epoch": 2.18, + "learning_rate": 3.7282535902968974e-05, + "loss": 1.2573, + "step": 9830 + }, + { + "epoch": 2.19, + "learning_rate": 3.727955949103356e-05, + "loss": 1.2488, + "step": 9840 + }, + { + "epoch": 2.19, + "learning_rate": 3.7276583079098154e-05, + "loss": 1.2482, + "step": 9850 + }, + { + "epoch": 2.19, + "learning_rate": 3.727360666716274e-05, + "loss": 1.2518, + "step": 9860 + }, + { + "epoch": 2.19, + "learning_rate": 3.727063025522733e-05, + "loss": 1.2574, + "step": 9870 + }, + { + "epoch": 2.19, + "learning_rate": 3.7267653843291915e-05, + "loss": 1.2535, + "step": 9880 + }, + { + "epoch": 2.2, + "learning_rate": 3.72646774313565e-05, + "loss": 1.2407, + "step": 9890 + }, + { + "epoch": 2.2, + "learning_rate": 3.726170101942109e-05, + "loss": 1.2641, + "step": 9900 + }, + { + "epoch": 2.2, + "learning_rate": 3.725872460748568e-05, + "loss": 1.2625, + "step": 9910 + }, + { + "epoch": 2.2, + "learning_rate": 3.725574819555027e-05, + "loss": 1.2705, + "step": 9920 + }, + { + "epoch": 2.21, + "learning_rate": 3.7252771783614856e-05, + "loss": 1.2598, + "step": 9930 + }, + { + "epoch": 2.21, + "learning_rate": 3.724979537167944e-05, + "loss": 1.2495, + "step": 9940 + }, + { + "epoch": 2.21, + "learning_rate": 3.724681895974403e-05, + "loss": 1.2514, + "step": 9950 + }, + { + "epoch": 2.21, + "learning_rate": 3.7243842547808624e-05, + "loss": 1.2362, + "step": 9960 + }, + { + "epoch": 2.21, + "learning_rate": 3.7240866135873204e-05, + "loss": 1.2595, + "step": 9970 + }, + { + "epoch": 2.22, + "learning_rate": 3.72378897239378e-05, + "loss": 1.2564, + "step": 9980 + }, + { + "epoch": 2.22, + "learning_rate": 3.7234913312002384e-05, + "loss": 1.2687, + "step": 9990 + }, + { + "epoch": 2.22, + "learning_rate": 3.723193690006697e-05, + "loss": 1.2474, + "step": 10000 + }, + { + "epoch": 2.22, + "learning_rate": 3.722896048813156e-05, + "loss": 1.25, + "step": 10010 + }, + { + "epoch": 2.23, + "learning_rate": 3.722598407619615e-05, + "loss": 1.2333, + "step": 10020 + }, + { + "epoch": 2.23, + "learning_rate": 3.722300766426074e-05, + "loss": 1.2758, + "step": 10030 + }, + { + "epoch": 2.23, + "learning_rate": 3.7220031252325326e-05, + "loss": 1.2446, + "step": 10040 + }, + { + "epoch": 2.23, + "learning_rate": 3.721705484038991e-05, + "loss": 1.2416, + "step": 10050 + }, + { + "epoch": 2.23, + "learning_rate": 3.7214078428454506e-05, + "loss": 1.245, + "step": 10060 + }, + { + "epoch": 2.24, + "learning_rate": 3.7211102016519086e-05, + "loss": 1.2408, + "step": 10070 + }, + { + "epoch": 2.24, + "learning_rate": 3.720812560458367e-05, + "loss": 1.2515, + "step": 10080 + }, + { + "epoch": 2.24, + "learning_rate": 3.720514919264827e-05, + "loss": 1.242, + "step": 10090 + }, + { + "epoch": 2.24, + "learning_rate": 3.7202172780712854e-05, + "loss": 1.264, + "step": 10100 + }, + { + "epoch": 2.25, + "learning_rate": 3.719919636877744e-05, + "loss": 1.2542, + "step": 10110 + }, + { + "epoch": 2.25, + "learning_rate": 3.719621995684203e-05, + "loss": 1.2416, + "step": 10120 + }, + { + "epoch": 2.25, + "learning_rate": 3.719324354490662e-05, + "loss": 1.253, + "step": 10130 + }, + { + "epoch": 2.25, + "learning_rate": 3.719026713297121e-05, + "loss": 1.2612, + "step": 10140 + }, + { + "epoch": 2.25, + "learning_rate": 3.7187290721035795e-05, + "loss": 1.2544, + "step": 10150 + }, + { + "epoch": 2.26, + "learning_rate": 3.718431430910038e-05, + "loss": 1.2511, + "step": 10160 + }, + { + "epoch": 2.26, + "learning_rate": 3.718133789716497e-05, + "loss": 1.2673, + "step": 10170 + }, + { + "epoch": 2.26, + "learning_rate": 3.7178361485229556e-05, + "loss": 1.256, + "step": 10180 + }, + { + "epoch": 2.26, + "learning_rate": 3.717538507329414e-05, + "loss": 1.2706, + "step": 10190 + }, + { + "epoch": 2.27, + "learning_rate": 3.7172408661358736e-05, + "loss": 1.2486, + "step": 10200 + }, + { + "epoch": 2.27, + "learning_rate": 3.716943224942332e-05, + "loss": 1.2476, + "step": 10210 + }, + { + "epoch": 2.27, + "learning_rate": 3.716645583748791e-05, + "loss": 1.2431, + "step": 10220 + }, + { + "epoch": 2.27, + "learning_rate": 3.71634794255525e-05, + "loss": 1.267, + "step": 10230 + }, + { + "epoch": 2.27, + "learning_rate": 3.716050301361709e-05, + "loss": 1.2573, + "step": 10240 + }, + { + "epoch": 2.28, + "learning_rate": 3.715752660168168e-05, + "loss": 1.2459, + "step": 10250 + }, + { + "epoch": 2.28, + "learning_rate": 3.7154550189746264e-05, + "loss": 1.2415, + "step": 10260 + }, + { + "epoch": 2.28, + "learning_rate": 3.715157377781085e-05, + "loss": 1.2614, + "step": 10270 + }, + { + "epoch": 2.28, + "learning_rate": 3.714859736587544e-05, + "loss": 1.2414, + "step": 10280 + }, + { + "epoch": 2.29, + "learning_rate": 3.7145620953940025e-05, + "loss": 1.2451, + "step": 10290 + }, + { + "epoch": 2.29, + "learning_rate": 3.714264454200462e-05, + "loss": 1.2462, + "step": 10300 + }, + { + "epoch": 2.29, + "learning_rate": 3.7139668130069206e-05, + "loss": 1.262, + "step": 10310 + }, + { + "epoch": 2.29, + "learning_rate": 3.713669171813379e-05, + "loss": 1.2543, + "step": 10320 + }, + { + "epoch": 2.29, + "learning_rate": 3.713371530619838e-05, + "loss": 1.2452, + "step": 10330 + }, + { + "epoch": 2.3, + "learning_rate": 3.713073889426297e-05, + "loss": 1.2509, + "step": 10340 + }, + { + "epoch": 2.3, + "learning_rate": 3.712776248232756e-05, + "loss": 1.2443, + "step": 10350 + }, + { + "epoch": 2.3, + "learning_rate": 3.712478607039215e-05, + "loss": 1.2502, + "step": 10360 + }, + { + "epoch": 2.3, + "learning_rate": 3.7121809658456734e-05, + "loss": 1.2459, + "step": 10370 + }, + { + "epoch": 2.31, + "learning_rate": 3.711883324652132e-05, + "loss": 1.2411, + "step": 10380 + }, + { + "epoch": 2.31, + "learning_rate": 3.711585683458591e-05, + "loss": 1.2533, + "step": 10390 + }, + { + "epoch": 2.31, + "learning_rate": 3.7112880422650495e-05, + "loss": 1.2383, + "step": 10400 + }, + { + "epoch": 2.31, + "learning_rate": 3.710990401071509e-05, + "loss": 1.2574, + "step": 10410 + }, + { + "epoch": 2.31, + "learning_rate": 3.7106927598779675e-05, + "loss": 1.2529, + "step": 10420 + }, + { + "epoch": 2.32, + "learning_rate": 3.710395118684426e-05, + "loss": 1.2569, + "step": 10430 + }, + { + "epoch": 2.32, + "learning_rate": 3.710097477490885e-05, + "loss": 1.2733, + "step": 10440 + }, + { + "epoch": 2.32, + "learning_rate": 3.709799836297344e-05, + "loss": 1.2558, + "step": 10450 + }, + { + "epoch": 2.32, + "learning_rate": 3.709502195103803e-05, + "loss": 1.2477, + "step": 10460 + }, + { + "epoch": 2.33, + "learning_rate": 3.7092045539102616e-05, + "loss": 1.2487, + "step": 10470 + }, + { + "epoch": 2.33, + "learning_rate": 3.70890691271672e-05, + "loss": 1.2505, + "step": 10480 + }, + { + "epoch": 2.33, + "learning_rate": 3.708609271523179e-05, + "loss": 1.2619, + "step": 10490 + }, + { + "epoch": 2.33, + "learning_rate": 3.708311630329638e-05, + "loss": 1.2463, + "step": 10500 + }, + { + "epoch": 2.33, + "learning_rate": 3.7080139891360964e-05, + "loss": 1.2534, + "step": 10510 + }, + { + "epoch": 2.34, + "learning_rate": 3.707716347942556e-05, + "loss": 1.2687, + "step": 10520 + }, + { + "epoch": 2.34, + "learning_rate": 3.7074187067490145e-05, + "loss": 1.2644, + "step": 10530 + }, + { + "epoch": 2.34, + "learning_rate": 3.707121065555473e-05, + "loss": 1.2634, + "step": 10540 + }, + { + "epoch": 2.34, + "learning_rate": 3.706823424361932e-05, + "loss": 1.2507, + "step": 10550 + }, + { + "epoch": 2.35, + "learning_rate": 3.706525783168391e-05, + "loss": 1.2382, + "step": 10560 + }, + { + "epoch": 2.35, + "learning_rate": 3.70622814197485e-05, + "loss": 1.2662, + "step": 10570 + }, + { + "epoch": 2.35, + "learning_rate": 3.705930500781308e-05, + "loss": 1.2534, + "step": 10580 + }, + { + "epoch": 2.35, + "learning_rate": 3.705632859587767e-05, + "loss": 1.2561, + "step": 10590 + }, + { + "epoch": 2.35, + "learning_rate": 3.705335218394226e-05, + "loss": 1.2401, + "step": 10600 + }, + { + "epoch": 2.36, + "learning_rate": 3.7050375772006847e-05, + "loss": 1.2461, + "step": 10610 + }, + { + "epoch": 2.36, + "learning_rate": 3.7047399360071433e-05, + "loss": 1.2454, + "step": 10620 + }, + { + "epoch": 2.36, + "learning_rate": 3.704442294813603e-05, + "loss": 1.2449, + "step": 10630 + }, + { + "epoch": 2.36, + "learning_rate": 3.7041446536200614e-05, + "loss": 1.2401, + "step": 10640 + }, + { + "epoch": 2.37, + "learning_rate": 3.70384701242652e-05, + "loss": 1.2365, + "step": 10650 + }, + { + "epoch": 2.37, + "learning_rate": 3.703549371232979e-05, + "loss": 1.2637, + "step": 10660 + }, + { + "epoch": 2.37, + "learning_rate": 3.703251730039438e-05, + "loss": 1.2526, + "step": 10670 + }, + { + "epoch": 2.37, + "learning_rate": 3.702954088845896e-05, + "loss": 1.2501, + "step": 10680 + }, + { + "epoch": 2.37, + "learning_rate": 3.7026564476523555e-05, + "loss": 1.2432, + "step": 10690 + }, + { + "epoch": 2.38, + "learning_rate": 3.702358806458814e-05, + "loss": 1.2649, + "step": 10700 + }, + { + "epoch": 2.38, + "learning_rate": 3.702061165265273e-05, + "loss": 1.2698, + "step": 10710 + }, + { + "epoch": 2.38, + "learning_rate": 3.7017635240717316e-05, + "loss": 1.2648, + "step": 10720 + }, + { + "epoch": 2.38, + "learning_rate": 3.701465882878191e-05, + "loss": 1.2587, + "step": 10730 + }, + { + "epoch": 2.39, + "learning_rate": 3.7011682416846497e-05, + "loss": 1.2565, + "step": 10740 + }, + { + "epoch": 2.39, + "learning_rate": 3.7008706004911083e-05, + "loss": 1.2418, + "step": 10750 + }, + { + "epoch": 2.39, + "learning_rate": 3.700572959297567e-05, + "loss": 1.2241, + "step": 10760 + }, + { + "epoch": 2.39, + "learning_rate": 3.7002753181040264e-05, + "loss": 1.2446, + "step": 10770 + }, + { + "epoch": 2.39, + "learning_rate": 3.699977676910485e-05, + "loss": 1.2379, + "step": 10780 + }, + { + "epoch": 2.4, + "learning_rate": 3.699680035716943e-05, + "loss": 1.2459, + "step": 10790 + }, + { + "epoch": 2.4, + "learning_rate": 3.6993823945234025e-05, + "loss": 1.2534, + "step": 10800 + }, + { + "epoch": 2.4, + "learning_rate": 3.699084753329861e-05, + "loss": 1.2536, + "step": 10810 + }, + { + "epoch": 2.4, + "learning_rate": 3.69878711213632e-05, + "loss": 1.2327, + "step": 10820 + }, + { + "epoch": 2.41, + "learning_rate": 3.6984894709427785e-05, + "loss": 1.24, + "step": 10830 + }, + { + "epoch": 2.41, + "learning_rate": 3.698191829749238e-05, + "loss": 1.242, + "step": 10840 + }, + { + "epoch": 2.41, + "learning_rate": 3.6978941885556966e-05, + "loss": 1.2355, + "step": 10850 + }, + { + "epoch": 2.41, + "learning_rate": 3.697596547362155e-05, + "loss": 1.2488, + "step": 10860 + }, + { + "epoch": 2.41, + "learning_rate": 3.697298906168614e-05, + "loss": 1.2404, + "step": 10870 + }, + { + "epoch": 2.42, + "learning_rate": 3.6970012649750733e-05, + "loss": 1.2336, + "step": 10880 + }, + { + "epoch": 2.42, + "learning_rate": 3.6967036237815314e-05, + "loss": 1.2524, + "step": 10890 + }, + { + "epoch": 2.42, + "learning_rate": 3.69640598258799e-05, + "loss": 1.2386, + "step": 10900 + }, + { + "epoch": 2.42, + "learning_rate": 3.6961083413944494e-05, + "loss": 1.2455, + "step": 10910 + }, + { + "epoch": 2.43, + "learning_rate": 3.695810700200908e-05, + "loss": 1.246, + "step": 10920 + }, + { + "epoch": 2.43, + "learning_rate": 3.695513059007367e-05, + "loss": 1.2494, + "step": 10930 + }, + { + "epoch": 2.43, + "learning_rate": 3.6952154178138255e-05, + "loss": 1.2576, + "step": 10940 + }, + { + "epoch": 2.43, + "learning_rate": 3.694917776620285e-05, + "loss": 1.2712, + "step": 10950 + }, + { + "epoch": 2.43, + "learning_rate": 3.6946201354267435e-05, + "loss": 1.2517, + "step": 10960 + }, + { + "epoch": 2.44, + "learning_rate": 3.694322494233202e-05, + "loss": 1.2471, + "step": 10970 + }, + { + "epoch": 2.44, + "learning_rate": 3.694024853039661e-05, + "loss": 1.2579, + "step": 10980 + }, + { + "epoch": 2.44, + "learning_rate": 3.6937272118461196e-05, + "loss": 1.2556, + "step": 10990 + }, + { + "epoch": 2.44, + "learning_rate": 3.693429570652578e-05, + "loss": 1.2626, + "step": 11000 + }, + { + "epoch": 2.45, + "learning_rate": 3.693131929459037e-05, + "loss": 1.2578, + "step": 11010 + }, + { + "epoch": 2.45, + "learning_rate": 3.6928342882654964e-05, + "loss": 1.2545, + "step": 11020 + }, + { + "epoch": 2.45, + "learning_rate": 3.692536647071955e-05, + "loss": 1.2467, + "step": 11030 + }, + { + "epoch": 2.45, + "learning_rate": 3.692239005878414e-05, + "loss": 1.2498, + "step": 11040 + }, + { + "epoch": 2.45, + "learning_rate": 3.6919413646848724e-05, + "loss": 1.2575, + "step": 11050 + }, + { + "epoch": 2.46, + "learning_rate": 3.691643723491332e-05, + "loss": 1.2624, + "step": 11060 + }, + { + "epoch": 2.46, + "learning_rate": 3.6913460822977905e-05, + "loss": 1.2586, + "step": 11070 + }, + { + "epoch": 2.46, + "learning_rate": 3.691048441104249e-05, + "loss": 1.2376, + "step": 11080 + }, + { + "epoch": 2.46, + "learning_rate": 3.690750799910708e-05, + "loss": 1.2565, + "step": 11090 + }, + { + "epoch": 2.47, + "learning_rate": 3.6904531587171666e-05, + "loss": 1.2547, + "step": 11100 + }, + { + "epoch": 2.47, + "learning_rate": 3.690155517523625e-05, + "loss": 1.2527, + "step": 11110 + }, + { + "epoch": 2.47, + "learning_rate": 3.6898578763300846e-05, + "loss": 1.2605, + "step": 11120 + }, + { + "epoch": 2.47, + "learning_rate": 3.689560235136543e-05, + "loss": 1.2571, + "step": 11130 + }, + { + "epoch": 2.47, + "learning_rate": 3.689262593943002e-05, + "loss": 1.2611, + "step": 11140 + }, + { + "epoch": 2.48, + "learning_rate": 3.688964952749461e-05, + "loss": 1.236, + "step": 11150 + }, + { + "epoch": 2.48, + "learning_rate": 3.68866731155592e-05, + "loss": 1.2416, + "step": 11160 + }, + { + "epoch": 2.48, + "learning_rate": 3.688369670362379e-05, + "loss": 1.2523, + "step": 11170 + }, + { + "epoch": 2.48, + "learning_rate": 3.6880720291688374e-05, + "loss": 1.2642, + "step": 11180 + }, + { + "epoch": 2.49, + "learning_rate": 3.687774387975296e-05, + "loss": 1.2432, + "step": 11190 + }, + { + "epoch": 2.49, + "learning_rate": 3.687476746781755e-05, + "loss": 1.245, + "step": 11200 + }, + { + "epoch": 2.49, + "learning_rate": 3.6871791055882135e-05, + "loss": 1.2443, + "step": 11210 + }, + { + "epoch": 2.49, + "learning_rate": 3.686881464394672e-05, + "loss": 1.2489, + "step": 11220 + }, + { + "epoch": 2.49, + "learning_rate": 3.6865838232011316e-05, + "loss": 1.2466, + "step": 11230 + }, + { + "epoch": 2.5, + "learning_rate": 3.68628618200759e-05, + "loss": 1.2258, + "step": 11240 + }, + { + "epoch": 2.5, + "learning_rate": 3.685988540814049e-05, + "loss": 1.2391, + "step": 11250 + }, + { + "epoch": 2.5, + "learning_rate": 3.6856908996205076e-05, + "loss": 1.2451, + "step": 11260 + }, + { + "epoch": 2.5, + "learning_rate": 3.685393258426967e-05, + "loss": 1.2452, + "step": 11270 + }, + { + "epoch": 2.5, + "learning_rate": 3.685095617233426e-05, + "loss": 1.2376, + "step": 11280 + }, + { + "epoch": 2.51, + "learning_rate": 3.6847979760398844e-05, + "loss": 1.2376, + "step": 11290 + }, + { + "epoch": 2.51, + "learning_rate": 3.684500334846343e-05, + "loss": 1.2461, + "step": 11300 + }, + { + "epoch": 2.51, + "learning_rate": 3.684202693652802e-05, + "loss": 1.232, + "step": 11310 + }, + { + "epoch": 2.51, + "learning_rate": 3.6839050524592604e-05, + "loss": 1.2349, + "step": 11320 + }, + { + "epoch": 2.52, + "learning_rate": 3.683607411265719e-05, + "loss": 1.2367, + "step": 11330 + }, + { + "epoch": 2.52, + "learning_rate": 3.6833097700721785e-05, + "loss": 1.2385, + "step": 11340 + }, + { + "epoch": 2.52, + "learning_rate": 3.683012128878637e-05, + "loss": 1.2354, + "step": 11350 + }, + { + "epoch": 2.52, + "learning_rate": 3.682714487685096e-05, + "loss": 1.2343, + "step": 11360 + }, + { + "epoch": 2.52, + "learning_rate": 3.6824168464915546e-05, + "loss": 1.2398, + "step": 11370 + }, + { + "epoch": 2.53, + "learning_rate": 3.682119205298014e-05, + "loss": 1.2346, + "step": 11380 + }, + { + "epoch": 2.53, + "learning_rate": 3.6818215641044726e-05, + "loss": 1.2526, + "step": 11390 + }, + { + "epoch": 2.53, + "learning_rate": 3.6815239229109306e-05, + "loss": 1.2589, + "step": 11400 + }, + { + "epoch": 2.53, + "learning_rate": 3.68122628171739e-05, + "loss": 1.2495, + "step": 11410 + }, + { + "epoch": 2.54, + "learning_rate": 3.680928640523849e-05, + "loss": 1.2361, + "step": 11420 + }, + { + "epoch": 2.54, + "learning_rate": 3.6806309993303074e-05, + "loss": 1.2428, + "step": 11430 + }, + { + "epoch": 2.54, + "learning_rate": 3.680333358136766e-05, + "loss": 1.2177, + "step": 11440 + }, + { + "epoch": 2.54, + "learning_rate": 3.6800357169432254e-05, + "loss": 1.2531, + "step": 11450 + }, + { + "epoch": 2.54, + "learning_rate": 3.679738075749684e-05, + "loss": 1.2292, + "step": 11460 + }, + { + "epoch": 2.55, + "learning_rate": 3.679440434556143e-05, + "loss": 1.2267, + "step": 11470 + }, + { + "epoch": 2.55, + "learning_rate": 3.6791427933626015e-05, + "loss": 1.2392, + "step": 11480 + }, + { + "epoch": 2.55, + "learning_rate": 3.678845152169061e-05, + "loss": 1.2413, + "step": 11490 + }, + { + "epoch": 2.55, + "learning_rate": 3.678547510975519e-05, + "loss": 1.2434, + "step": 11500 + }, + { + "epoch": 2.56, + "learning_rate": 3.678249869781978e-05, + "loss": 1.2287, + "step": 11510 + }, + { + "epoch": 2.56, + "learning_rate": 3.677952228588437e-05, + "loss": 1.2341, + "step": 11520 + }, + { + "epoch": 2.56, + "learning_rate": 3.6776545873948956e-05, + "loss": 1.2381, + "step": 11530 + }, + { + "epoch": 2.56, + "learning_rate": 3.677356946201354e-05, + "loss": 1.2676, + "step": 11540 + }, + { + "epoch": 2.56, + "learning_rate": 3.677059305007814e-05, + "loss": 1.2371, + "step": 11550 + }, + { + "epoch": 2.57, + "learning_rate": 3.6767616638142724e-05, + "loss": 1.2447, + "step": 11560 + }, + { + "epoch": 2.57, + "learning_rate": 3.676464022620731e-05, + "loss": 1.2219, + "step": 11570 + }, + { + "epoch": 2.57, + "learning_rate": 3.67616638142719e-05, + "loss": 1.2359, + "step": 11580 + }, + { + "epoch": 2.57, + "learning_rate": 3.675868740233649e-05, + "loss": 1.2286, + "step": 11590 + }, + { + "epoch": 2.58, + "learning_rate": 3.675571099040108e-05, + "loss": 1.2417, + "step": 11600 + }, + { + "epoch": 2.58, + "learning_rate": 3.675273457846566e-05, + "loss": 1.2229, + "step": 11610 + }, + { + "epoch": 2.58, + "learning_rate": 3.674975816653025e-05, + "loss": 1.239, + "step": 11620 + }, + { + "epoch": 2.58, + "learning_rate": 3.674678175459484e-05, + "loss": 1.2435, + "step": 11630 + }, + { + "epoch": 2.58, + "learning_rate": 3.6743805342659426e-05, + "loss": 1.229, + "step": 11640 + }, + { + "epoch": 2.59, + "learning_rate": 3.674082893072401e-05, + "loss": 1.2191, + "step": 11650 + }, + { + "epoch": 2.59, + "learning_rate": 3.6737852518788606e-05, + "loss": 1.2414, + "step": 11660 + }, + { + "epoch": 2.59, + "learning_rate": 3.673487610685319e-05, + "loss": 1.2254, + "step": 11670 + }, + { + "epoch": 2.59, + "learning_rate": 3.673189969491778e-05, + "loss": 1.2199, + "step": 11680 + }, + { + "epoch": 2.6, + "learning_rate": 3.672892328298237e-05, + "loss": 1.231, + "step": 11690 + }, + { + "epoch": 2.6, + "learning_rate": 3.672594687104696e-05, + "loss": 1.2246, + "step": 11700 + }, + { + "epoch": 2.6, + "learning_rate": 3.672297045911154e-05, + "loss": 1.2413, + "step": 11710 + }, + { + "epoch": 2.6, + "learning_rate": 3.671999404717613e-05, + "loss": 1.2359, + "step": 11720 + }, + { + "epoch": 2.6, + "learning_rate": 3.671701763524072e-05, + "loss": 1.2369, + "step": 11730 + }, + { + "epoch": 2.61, + "learning_rate": 3.671404122330531e-05, + "loss": 1.2442, + "step": 11740 + }, + { + "epoch": 2.61, + "learning_rate": 3.6711064811369895e-05, + "loss": 1.2302, + "step": 11750 + }, + { + "epoch": 2.61, + "learning_rate": 3.670808839943448e-05, + "loss": 1.2309, + "step": 11760 + }, + { + "epoch": 2.61, + "learning_rate": 3.6705111987499076e-05, + "loss": 1.2334, + "step": 11770 + }, + { + "epoch": 2.62, + "learning_rate": 3.670213557556366e-05, + "loss": 1.2213, + "step": 11780 + }, + { + "epoch": 2.62, + "learning_rate": 3.669915916362825e-05, + "loss": 1.2364, + "step": 11790 + }, + { + "epoch": 2.62, + "learning_rate": 3.6696182751692837e-05, + "loss": 1.2365, + "step": 11800 + }, + { + "epoch": 2.62, + "learning_rate": 3.6693206339757423e-05, + "loss": 1.2535, + "step": 11810 + }, + { + "epoch": 2.62, + "learning_rate": 3.669022992782201e-05, + "loss": 1.2428, + "step": 11820 + }, + { + "epoch": 2.63, + "learning_rate": 3.66872535158866e-05, + "loss": 1.2411, + "step": 11830 + }, + { + "epoch": 2.63, + "learning_rate": 3.668427710395119e-05, + "loss": 1.2534, + "step": 11840 + }, + { + "epoch": 2.63, + "learning_rate": 3.668130069201578e-05, + "loss": 1.2295, + "step": 11850 + }, + { + "epoch": 2.63, + "learning_rate": 3.6678324280080365e-05, + "loss": 1.2257, + "step": 11860 + }, + { + "epoch": 2.64, + "learning_rate": 3.667534786814495e-05, + "loss": 1.2379, + "step": 11870 + }, + { + "epoch": 2.64, + "learning_rate": 3.6672371456209545e-05, + "loss": 1.225, + "step": 11880 + }, + { + "epoch": 2.64, + "learning_rate": 3.666939504427413e-05, + "loss": 1.2338, + "step": 11890 + }, + { + "epoch": 2.64, + "learning_rate": 3.666641863233872e-05, + "loss": 1.232, + "step": 11900 + }, + { + "epoch": 2.64, + "learning_rate": 3.6663442220403306e-05, + "loss": 1.2355, + "step": 11910 + }, + { + "epoch": 2.65, + "learning_rate": 3.666046580846789e-05, + "loss": 1.2336, + "step": 11920 + }, + { + "epoch": 2.65, + "learning_rate": 3.665748939653248e-05, + "loss": 1.2403, + "step": 11930 + }, + { + "epoch": 2.65, + "learning_rate": 3.6654512984597074e-05, + "loss": 1.2178, + "step": 11940 + }, + { + "epoch": 2.65, + "learning_rate": 3.665153657266166e-05, + "loss": 1.235, + "step": 11950 + }, + { + "epoch": 2.66, + "learning_rate": 3.664856016072625e-05, + "loss": 1.2366, + "step": 11960 + }, + { + "epoch": 2.66, + "learning_rate": 3.6645583748790834e-05, + "loss": 1.2258, + "step": 11970 + }, + { + "epoch": 2.66, + "learning_rate": 3.664260733685543e-05, + "loss": 1.2286, + "step": 11980 + }, + { + "epoch": 2.66, + "learning_rate": 3.6639630924920015e-05, + "loss": 1.2458, + "step": 11990 + }, + { + "epoch": 2.66, + "learning_rate": 3.66366545129846e-05, + "loss": 1.2447, + "step": 12000 + }, + { + "epoch": 2.67, + "learning_rate": 3.663367810104919e-05, + "loss": 1.2407, + "step": 12010 + }, + { + "epoch": 2.67, + "learning_rate": 3.6630701689113775e-05, + "loss": 1.2314, + "step": 12020 + }, + { + "epoch": 2.67, + "learning_rate": 3.662772527717836e-05, + "loss": 1.2324, + "step": 12030 + }, + { + "epoch": 2.67, + "learning_rate": 3.662474886524295e-05, + "loss": 1.228, + "step": 12040 + }, + { + "epoch": 2.68, + "learning_rate": 3.662177245330754e-05, + "loss": 1.2236, + "step": 12050 + }, + { + "epoch": 2.68, + "learning_rate": 3.661879604137213e-05, + "loss": 1.2451, + "step": 12060 + }, + { + "epoch": 2.68, + "learning_rate": 3.661581962943672e-05, + "loss": 1.2387, + "step": 12070 + }, + { + "epoch": 2.68, + "learning_rate": 3.6612843217501304e-05, + "loss": 1.2152, + "step": 12080 + }, + { + "epoch": 2.68, + "learning_rate": 3.66098668055659e-05, + "loss": 1.2251, + "step": 12090 + }, + { + "epoch": 2.69, + "learning_rate": 3.6606890393630484e-05, + "loss": 1.2259, + "step": 12100 + }, + { + "epoch": 2.69, + "learning_rate": 3.660391398169507e-05, + "loss": 1.225, + "step": 12110 + }, + { + "epoch": 2.69, + "learning_rate": 3.660093756975966e-05, + "loss": 1.2268, + "step": 12120 + }, + { + "epoch": 2.69, + "learning_rate": 3.6597961157824245e-05, + "loss": 1.2385, + "step": 12130 + }, + { + "epoch": 2.7, + "learning_rate": 3.659498474588883e-05, + "loss": 1.2406, + "step": 12140 + }, + { + "epoch": 2.7, + "learning_rate": 3.659200833395342e-05, + "loss": 1.2266, + "step": 12150 + }, + { + "epoch": 2.7, + "learning_rate": 3.658903192201801e-05, + "loss": 1.2333, + "step": 12160 + }, + { + "epoch": 2.7, + "learning_rate": 3.65860555100826e-05, + "loss": 1.2511, + "step": 12170 + }, + { + "epoch": 2.7, + "learning_rate": 3.6583079098147186e-05, + "loss": 1.2364, + "step": 12180 + }, + { + "epoch": 2.71, + "learning_rate": 3.658010268621177e-05, + "loss": 1.2442, + "step": 12190 + }, + { + "epoch": 2.71, + "learning_rate": 3.657712627427637e-05, + "loss": 1.2492, + "step": 12200 + }, + { + "epoch": 2.71, + "learning_rate": 3.6574149862340954e-05, + "loss": 1.2322, + "step": 12210 + }, + { + "epoch": 2.71, + "learning_rate": 3.6571173450405534e-05, + "loss": 1.2385, + "step": 12220 + }, + { + "epoch": 2.72, + "learning_rate": 3.656819703847013e-05, + "loss": 1.2306, + "step": 12230 + }, + { + "epoch": 2.72, + "learning_rate": 3.6565220626534714e-05, + "loss": 1.2357, + "step": 12240 + }, + { + "epoch": 2.72, + "learning_rate": 3.65622442145993e-05, + "loss": 1.2364, + "step": 12250 + }, + { + "epoch": 2.72, + "learning_rate": 3.655926780266389e-05, + "loss": 1.2252, + "step": 12260 + }, + { + "epoch": 2.72, + "learning_rate": 3.655629139072848e-05, + "loss": 1.2346, + "step": 12270 + }, + { + "epoch": 2.73, + "learning_rate": 3.655331497879307e-05, + "loss": 1.2358, + "step": 12280 + }, + { + "epoch": 2.73, + "learning_rate": 3.6550338566857656e-05, + "loss": 1.2522, + "step": 12290 + }, + { + "epoch": 2.73, + "learning_rate": 3.654736215492224e-05, + "loss": 1.2272, + "step": 12300 + }, + { + "epoch": 2.73, + "learning_rate": 3.6544385742986836e-05, + "loss": 1.2382, + "step": 12310 + }, + { + "epoch": 2.74, + "learning_rate": 3.6541409331051416e-05, + "loss": 1.2342, + "step": 12320 + }, + { + "epoch": 2.74, + "learning_rate": 3.653843291911601e-05, + "loss": 1.24, + "step": 12330 + }, + { + "epoch": 2.74, + "learning_rate": 3.65354565071806e-05, + "loss": 1.2348, + "step": 12340 + }, + { + "epoch": 2.74, + "learning_rate": 3.6532480095245184e-05, + "loss": 1.238, + "step": 12350 + }, + { + "epoch": 2.74, + "learning_rate": 3.652950368330977e-05, + "loss": 1.2574, + "step": 12360 + }, + { + "epoch": 2.75, + "learning_rate": 3.6526527271374364e-05, + "loss": 1.233, + "step": 12370 + }, + { + "epoch": 2.75, + "learning_rate": 3.652355085943895e-05, + "loss": 1.2441, + "step": 12380 + }, + { + "epoch": 2.75, + "learning_rate": 3.652057444750354e-05, + "loss": 1.2206, + "step": 12390 + }, + { + "epoch": 2.75, + "learning_rate": 3.6517598035568125e-05, + "loss": 1.2248, + "step": 12400 + }, + { + "epoch": 2.76, + "learning_rate": 3.651462162363272e-05, + "loss": 1.2375, + "step": 12410 + }, + { + "epoch": 2.76, + "learning_rate": 3.65116452116973e-05, + "loss": 1.2331, + "step": 12420 + }, + { + "epoch": 2.76, + "learning_rate": 3.6508668799761886e-05, + "loss": 1.2266, + "step": 12430 + }, + { + "epoch": 2.76, + "learning_rate": 3.650569238782648e-05, + "loss": 1.2379, + "step": 12440 + }, + { + "epoch": 2.76, + "learning_rate": 3.6502715975891066e-05, + "loss": 1.2227, + "step": 12450 + }, + { + "epoch": 2.77, + "learning_rate": 3.649973956395565e-05, + "loss": 1.222, + "step": 12460 + }, + { + "epoch": 2.77, + "learning_rate": 3.649676315202024e-05, + "loss": 1.2242, + "step": 12470 + }, + { + "epoch": 2.77, + "learning_rate": 3.6493786740084834e-05, + "loss": 1.2301, + "step": 12480 + }, + { + "epoch": 2.77, + "learning_rate": 3.649081032814942e-05, + "loss": 1.2246, + "step": 12490 + }, + { + "epoch": 2.78, + "learning_rate": 3.648783391621401e-05, + "loss": 1.232, + "step": 12500 + }, + { + "epoch": 2.78, + "learning_rate": 3.6484857504278595e-05, + "loss": 1.2185, + "step": 12510 + }, + { + "epoch": 2.78, + "learning_rate": 3.648188109234319e-05, + "loss": 1.2182, + "step": 12520 + }, + { + "epoch": 2.78, + "learning_rate": 3.647890468040777e-05, + "loss": 1.2057, + "step": 12530 + }, + { + "epoch": 2.78, + "learning_rate": 3.6475928268472355e-05, + "loss": 1.2351, + "step": 12540 + }, + { + "epoch": 2.79, + "learning_rate": 3.647295185653695e-05, + "loss": 1.2129, + "step": 12550 + }, + { + "epoch": 2.79, + "learning_rate": 3.6469975444601536e-05, + "loss": 1.2195, + "step": 12560 + }, + { + "epoch": 2.79, + "learning_rate": 3.646699903266612e-05, + "loss": 1.2167, + "step": 12570 + }, + { + "epoch": 2.79, + "learning_rate": 3.646402262073071e-05, + "loss": 1.2205, + "step": 12580 + }, + { + "epoch": 2.8, + "learning_rate": 3.64610462087953e-05, + "loss": 1.222, + "step": 12590 + }, + { + "epoch": 2.8, + "learning_rate": 3.645806979685989e-05, + "loss": 1.2334, + "step": 12600 + }, + { + "epoch": 2.8, + "learning_rate": 3.645509338492448e-05, + "loss": 1.2358, + "step": 12610 + }, + { + "epoch": 2.8, + "learning_rate": 3.6452116972989064e-05, + "loss": 1.2172, + "step": 12620 + }, + { + "epoch": 2.8, + "learning_rate": 3.644914056105365e-05, + "loss": 1.2186, + "step": 12630 + }, + { + "epoch": 2.81, + "learning_rate": 3.644616414911824e-05, + "loss": 1.2305, + "step": 12640 + }, + { + "epoch": 2.81, + "learning_rate": 3.6443187737182825e-05, + "loss": 1.2248, + "step": 12650 + }, + { + "epoch": 2.81, + "learning_rate": 3.644021132524742e-05, + "loss": 1.2135, + "step": 12660 + }, + { + "epoch": 2.81, + "learning_rate": 3.6437234913312005e-05, + "loss": 1.2152, + "step": 12670 + }, + { + "epoch": 2.82, + "learning_rate": 3.643425850137659e-05, + "loss": 1.2125, + "step": 12680 + }, + { + "epoch": 2.82, + "learning_rate": 3.643128208944118e-05, + "loss": 1.2235, + "step": 12690 + }, + { + "epoch": 2.82, + "learning_rate": 3.642830567750577e-05, + "loss": 1.2361, + "step": 12700 + }, + { + "epoch": 2.82, + "learning_rate": 3.642532926557036e-05, + "loss": 1.2135, + "step": 12710 + }, + { + "epoch": 2.82, + "learning_rate": 3.6422352853634946e-05, + "loss": 1.2292, + "step": 12720 + }, + { + "epoch": 2.83, + "learning_rate": 3.6419376441699533e-05, + "loss": 1.234, + "step": 12730 + }, + { + "epoch": 2.83, + "learning_rate": 3.641640002976412e-05, + "loss": 1.2145, + "step": 12740 + }, + { + "epoch": 2.83, + "learning_rate": 3.641342361782871e-05, + "loss": 1.2284, + "step": 12750 + }, + { + "epoch": 2.83, + "learning_rate": 3.64104472058933e-05, + "loss": 1.2216, + "step": 12760 + }, + { + "epoch": 2.84, + "learning_rate": 3.640747079395789e-05, + "loss": 1.228, + "step": 12770 + }, + { + "epoch": 2.84, + "learning_rate": 3.6404494382022475e-05, + "loss": 1.2217, + "step": 12780 + }, + { + "epoch": 2.84, + "learning_rate": 3.640151797008706e-05, + "loss": 1.2256, + "step": 12790 + }, + { + "epoch": 2.84, + "learning_rate": 3.6398541558151655e-05, + "loss": 1.2262, + "step": 12800 + }, + { + "epoch": 2.84, + "learning_rate": 3.639556514621624e-05, + "loss": 1.2169, + "step": 12810 + }, + { + "epoch": 2.85, + "learning_rate": 3.639258873428083e-05, + "loss": 1.222, + "step": 12820 + }, + { + "epoch": 2.85, + "learning_rate": 3.6389612322345416e-05, + "loss": 1.219, + "step": 12830 + }, + { + "epoch": 2.85, + "learning_rate": 3.638663591041e-05, + "loss": 1.2281, + "step": 12840 + }, + { + "epoch": 2.85, + "learning_rate": 3.638365949847459e-05, + "loss": 1.2131, + "step": 12850 + }, + { + "epoch": 2.86, + "learning_rate": 3.638068308653918e-05, + "loss": 1.2179, + "step": 12860 + }, + { + "epoch": 2.86, + "learning_rate": 3.637770667460377e-05, + "loss": 1.2305, + "step": 12870 + }, + { + "epoch": 2.86, + "learning_rate": 3.637473026266836e-05, + "loss": 1.2263, + "step": 12880 + }, + { + "epoch": 2.86, + "learning_rate": 3.6371753850732944e-05, + "loss": 1.217, + "step": 12890 + }, + { + "epoch": 2.86, + "learning_rate": 3.636877743879753e-05, + "loss": 1.2278, + "step": 12900 + }, + { + "epoch": 2.87, + "learning_rate": 3.6365801026862125e-05, + "loss": 1.2321, + "step": 12910 + }, + { + "epoch": 2.87, + "learning_rate": 3.636282461492671e-05, + "loss": 1.2163, + "step": 12920 + }, + { + "epoch": 2.87, + "learning_rate": 3.63598482029913e-05, + "loss": 1.2415, + "step": 12930 + }, + { + "epoch": 2.87, + "learning_rate": 3.6356871791055885e-05, + "loss": 1.225, + "step": 12940 + }, + { + "epoch": 2.88, + "learning_rate": 3.635389537912047e-05, + "loss": 1.2145, + "step": 12950 + }, + { + "epoch": 2.88, + "learning_rate": 3.635091896718506e-05, + "loss": 1.2261, + "step": 12960 + }, + { + "epoch": 2.88, + "learning_rate": 3.6347942555249646e-05, + "loss": 1.2114, + "step": 12970 + }, + { + "epoch": 2.88, + "learning_rate": 3.634496614331424e-05, + "loss": 1.2265, + "step": 12980 + }, + { + "epoch": 2.88, + "learning_rate": 3.634198973137883e-05, + "loss": 1.2239, + "step": 12990 + }, + { + "epoch": 2.89, + "learning_rate": 3.6339013319443414e-05, + "loss": 1.2336, + "step": 13000 + }, + { + "epoch": 2.89, + "learning_rate": 3.6336036907508e-05, + "loss": 1.211, + "step": 13010 + }, + { + "epoch": 2.89, + "learning_rate": 3.6333060495572594e-05, + "loss": 1.2109, + "step": 13020 + }, + { + "epoch": 2.89, + "learning_rate": 3.633008408363718e-05, + "loss": 1.2242, + "step": 13030 + }, + { + "epoch": 2.9, + "learning_rate": 3.632710767170177e-05, + "loss": 1.2168, + "step": 13040 + }, + { + "epoch": 2.9, + "learning_rate": 3.6324131259766355e-05, + "loss": 1.2141, + "step": 13050 + }, + { + "epoch": 2.9, + "learning_rate": 3.632115484783094e-05, + "loss": 1.224, + "step": 13060 + }, + { + "epoch": 2.9, + "learning_rate": 3.631817843589553e-05, + "loss": 1.2276, + "step": 13070 + }, + { + "epoch": 2.9, + "learning_rate": 3.6315202023960116e-05, + "loss": 1.2124, + "step": 13080 + }, + { + "epoch": 2.91, + "learning_rate": 3.631222561202471e-05, + "loss": 1.2196, + "step": 13090 + }, + { + "epoch": 2.91, + "learning_rate": 3.6309249200089296e-05, + "loss": 1.219, + "step": 13100 + }, + { + "epoch": 2.91, + "learning_rate": 3.630627278815388e-05, + "loss": 1.2121, + "step": 13110 + }, + { + "epoch": 2.91, + "learning_rate": 3.630329637621847e-05, + "loss": 1.2178, + "step": 13120 + }, + { + "epoch": 2.92, + "learning_rate": 3.6300319964283064e-05, + "loss": 1.2158, + "step": 13130 + }, + { + "epoch": 2.92, + "learning_rate": 3.6297343552347644e-05, + "loss": 1.2082, + "step": 13140 + }, + { + "epoch": 2.92, + "learning_rate": 3.629436714041224e-05, + "loss": 1.2043, + "step": 13150 + }, + { + "epoch": 2.92, + "learning_rate": 3.6291390728476824e-05, + "loss": 1.1986, + "step": 13160 + }, + { + "epoch": 2.92, + "learning_rate": 3.628841431654141e-05, + "loss": 1.223, + "step": 13170 + }, + { + "epoch": 2.93, + "learning_rate": 3.6285437904606e-05, + "loss": 1.2173, + "step": 13180 + }, + { + "epoch": 2.93, + "learning_rate": 3.628246149267059e-05, + "loss": 1.2209, + "step": 13190 + }, + { + "epoch": 2.93, + "learning_rate": 3.627948508073518e-05, + "loss": 1.2258, + "step": 13200 + }, + { + "epoch": 2.93, + "learning_rate": 3.6276508668799766e-05, + "loss": 1.2111, + "step": 13210 + }, + { + "epoch": 2.94, + "learning_rate": 3.627353225686435e-05, + "loss": 1.213, + "step": 13220 + }, + { + "epoch": 2.94, + "learning_rate": 3.6270555844928946e-05, + "loss": 1.2004, + "step": 13230 + }, + { + "epoch": 2.94, + "learning_rate": 3.6267579432993526e-05, + "loss": 1.2165, + "step": 13240 + }, + { + "epoch": 2.94, + "learning_rate": 3.626460302105811e-05, + "loss": 1.2248, + "step": 13250 + }, + { + "epoch": 2.94, + "learning_rate": 3.626162660912271e-05, + "loss": 1.2226, + "step": 13260 + }, + { + "epoch": 2.95, + "learning_rate": 3.6258650197187294e-05, + "loss": 1.2109, + "step": 13270 + }, + { + "epoch": 2.95, + "learning_rate": 3.625567378525188e-05, + "loss": 1.2355, + "step": 13280 + }, + { + "epoch": 2.95, + "learning_rate": 3.625269737331647e-05, + "loss": 1.2139, + "step": 13290 + }, + { + "epoch": 2.95, + "learning_rate": 3.624972096138106e-05, + "loss": 1.2093, + "step": 13300 + }, + { + "epoch": 2.96, + "learning_rate": 3.624674454944565e-05, + "loss": 1.2243, + "step": 13310 + }, + { + "epoch": 2.96, + "learning_rate": 3.6243768137510235e-05, + "loss": 1.2246, + "step": 13320 + }, + { + "epoch": 2.96, + "learning_rate": 3.624079172557482e-05, + "loss": 1.2117, + "step": 13330 + }, + { + "epoch": 2.96, + "learning_rate": 3.6237815313639416e-05, + "loss": 1.2253, + "step": 13340 + }, + { + "epoch": 2.96, + "learning_rate": 3.6234838901703996e-05, + "loss": 1.2243, + "step": 13350 + }, + { + "epoch": 2.97, + "learning_rate": 3.623186248976858e-05, + "loss": 1.2322, + "step": 13360 + }, + { + "epoch": 2.97, + "learning_rate": 3.6228886077833176e-05, + "loss": 1.2161, + "step": 13370 + }, + { + "epoch": 2.97, + "learning_rate": 3.622590966589776e-05, + "loss": 1.2116, + "step": 13380 + }, + { + "epoch": 2.97, + "learning_rate": 3.622293325396235e-05, + "loss": 1.2142, + "step": 13390 + }, + { + "epoch": 2.98, + "learning_rate": 3.621995684202694e-05, + "loss": 1.205, + "step": 13400 + }, + { + "epoch": 2.98, + "learning_rate": 3.621698043009153e-05, + "loss": 1.2235, + "step": 13410 + }, + { + "epoch": 2.98, + "learning_rate": 3.621400401815612e-05, + "loss": 1.2307, + "step": 13420 + }, + { + "epoch": 2.98, + "learning_rate": 3.6211027606220704e-05, + "loss": 1.2198, + "step": 13430 + }, + { + "epoch": 2.98, + "learning_rate": 3.620805119428529e-05, + "loss": 1.2243, + "step": 13440 + }, + { + "epoch": 2.99, + "learning_rate": 3.620507478234988e-05, + "loss": 1.2088, + "step": 13450 + }, + { + "epoch": 2.99, + "learning_rate": 3.6202098370414465e-05, + "loss": 1.2313, + "step": 13460 + }, + { + "epoch": 2.99, + "learning_rate": 3.619912195847906e-05, + "loss": 1.2202, + "step": 13470 + }, + { + "epoch": 2.99, + "learning_rate": 3.6196145546543646e-05, + "loss": 1.2126, + "step": 13480 + }, + { + "epoch": 3.0, + "learning_rate": 3.619316913460823e-05, + "loss": 1.2121, + "step": 13490 + }, + { + "epoch": 3.0, + "learning_rate": 3.619019272267282e-05, + "loss": 1.2244, + "step": 13500 + }, + { + "epoch": 3.0, + "eval_cer": 4.7756410256410255, + "eval_loss": 1.1598864793777466, + "eval_runtime": 5.7385, + "eval_samples_per_second": 1.743, + "eval_steps_per_second": 0.174, + "eval_wer": 1.0129870129870129, + "step": 13509 + }, + { + "epoch": 3.0, + "learning_rate": 3.6187216310737406e-05, + "loss": 1.2271, + "step": 13510 + }, + { + "epoch": 3.0, + "learning_rate": 3.6184239898802e-05, + "loss": 1.2057, + "step": 13520 + }, + { + "epoch": 3.0, + "learning_rate": 3.618126348686659e-05, + "loss": 1.2122, + "step": 13530 + }, + { + "epoch": 3.01, + "learning_rate": 3.6178287074931174e-05, + "loss": 1.1948, + "step": 13540 + }, + { + "epoch": 3.01, + "learning_rate": 3.617531066299576e-05, + "loss": 1.209, + "step": 13550 + }, + { + "epoch": 3.01, + "learning_rate": 3.617233425106035e-05, + "loss": 1.2247, + "step": 13560 + }, + { + "epoch": 3.01, + "learning_rate": 3.6169357839124935e-05, + "loss": 1.2267, + "step": 13570 + }, + { + "epoch": 3.02, + "learning_rate": 3.616638142718953e-05, + "loss": 1.2131, + "step": 13580 + }, + { + "epoch": 3.02, + "learning_rate": 3.6163405015254115e-05, + "loss": 1.2173, + "step": 13590 + }, + { + "epoch": 3.02, + "learning_rate": 3.61604286033187e-05, + "loss": 1.1959, + "step": 13600 + }, + { + "epoch": 3.02, + "learning_rate": 3.615745219138329e-05, + "loss": 1.2225, + "step": 13610 + }, + { + "epoch": 3.02, + "learning_rate": 3.615447577944788e-05, + "loss": 1.2228, + "step": 13620 + }, + { + "epoch": 3.03, + "learning_rate": 3.615149936751247e-05, + "loss": 1.2146, + "step": 13630 + }, + { + "epoch": 3.03, + "learning_rate": 3.6148522955577056e-05, + "loss": 1.2201, + "step": 13640 + }, + { + "epoch": 3.03, + "learning_rate": 3.614554654364164e-05, + "loss": 1.2042, + "step": 13650 + }, + { + "epoch": 3.03, + "learning_rate": 3.614257013170623e-05, + "loss": 1.2273, + "step": 13660 + }, + { + "epoch": 3.04, + "learning_rate": 3.613959371977082e-05, + "loss": 1.2099, + "step": 13670 + }, + { + "epoch": 3.04, + "learning_rate": 3.6136617307835404e-05, + "loss": 1.1863, + "step": 13680 + }, + { + "epoch": 3.04, + "learning_rate": 3.61336408959e-05, + "loss": 1.2294, + "step": 13690 + }, + { + "epoch": 3.04, + "learning_rate": 3.6130664483964585e-05, + "loss": 1.222, + "step": 13700 + }, + { + "epoch": 3.04, + "learning_rate": 3.612768807202917e-05, + "loss": 1.2148, + "step": 13710 + }, + { + "epoch": 3.05, + "learning_rate": 3.612471166009376e-05, + "loss": 1.2259, + "step": 13720 + }, + { + "epoch": 3.05, + "learning_rate": 3.612173524815835e-05, + "loss": 1.2151, + "step": 13730 + }, + { + "epoch": 3.05, + "learning_rate": 3.611875883622294e-05, + "loss": 1.2009, + "step": 13740 + }, + { + "epoch": 3.05, + "learning_rate": 3.6115782424287526e-05, + "loss": 1.2102, + "step": 13750 + }, + { + "epoch": 3.06, + "learning_rate": 3.611280601235211e-05, + "loss": 1.2143, + "step": 13760 + }, + { + "epoch": 3.06, + "learning_rate": 3.61098296004167e-05, + "loss": 1.2165, + "step": 13770 + }, + { + "epoch": 3.06, + "learning_rate": 3.6106853188481287e-05, + "loss": 1.2236, + "step": 13780 + }, + { + "epoch": 3.06, + "learning_rate": 3.6103876776545873e-05, + "loss": 1.2136, + "step": 13790 + }, + { + "epoch": 3.06, + "learning_rate": 3.610090036461047e-05, + "loss": 1.2222, + "step": 13800 + }, + { + "epoch": 3.07, + "learning_rate": 3.6097923952675054e-05, + "loss": 1.2171, + "step": 13810 + }, + { + "epoch": 3.07, + "learning_rate": 3.609494754073964e-05, + "loss": 1.2057, + "step": 13820 + }, + { + "epoch": 3.07, + "learning_rate": 3.609197112880423e-05, + "loss": 1.208, + "step": 13830 + }, + { + "epoch": 3.07, + "learning_rate": 3.608899471686882e-05, + "loss": 1.2084, + "step": 13840 + }, + { + "epoch": 3.08, + "learning_rate": 3.608601830493341e-05, + "loss": 1.2025, + "step": 13850 + }, + { + "epoch": 3.08, + "learning_rate": 3.6083041892997995e-05, + "loss": 1.2069, + "step": 13860 + }, + { + "epoch": 3.08, + "learning_rate": 3.608006548106258e-05, + "loss": 1.2022, + "step": 13870 + }, + { + "epoch": 3.08, + "learning_rate": 3.607708906912717e-05, + "loss": 1.2082, + "step": 13880 + }, + { + "epoch": 3.08, + "learning_rate": 3.6074112657191756e-05, + "loss": 1.2057, + "step": 13890 + }, + { + "epoch": 3.09, + "learning_rate": 3.607113624525635e-05, + "loss": 1.202, + "step": 13900 + }, + { + "epoch": 3.09, + "learning_rate": 3.6068159833320937e-05, + "loss": 1.2071, + "step": 13910 + }, + { + "epoch": 3.09, + "learning_rate": 3.6065183421385523e-05, + "loss": 1.2151, + "step": 13920 + }, + { + "epoch": 3.09, + "learning_rate": 3.606220700945011e-05, + "loss": 1.2176, + "step": 13930 + }, + { + "epoch": 3.1, + "learning_rate": 3.60592305975147e-05, + "loss": 1.2137, + "step": 13940 + }, + { + "epoch": 3.1, + "learning_rate": 3.605625418557929e-05, + "loss": 1.2228, + "step": 13950 + }, + { + "epoch": 3.1, + "learning_rate": 3.605327777364387e-05, + "loss": 1.2112, + "step": 13960 + }, + { + "epoch": 3.1, + "learning_rate": 3.6050301361708465e-05, + "loss": 1.2137, + "step": 13970 + }, + { + "epoch": 3.1, + "learning_rate": 3.604732494977305e-05, + "loss": 1.2075, + "step": 13980 + }, + { + "epoch": 3.11, + "learning_rate": 3.604434853783764e-05, + "loss": 1.2172, + "step": 13990 + }, + { + "epoch": 3.11, + "learning_rate": 3.6041372125902225e-05, + "loss": 1.2079, + "step": 14000 + }, + { + "epoch": 3.11, + "learning_rate": 3.603839571396682e-05, + "loss": 1.2106, + "step": 14010 + }, + { + "epoch": 3.11, + "learning_rate": 3.6035419302031406e-05, + "loss": 1.2007, + "step": 14020 + }, + { + "epoch": 3.12, + "learning_rate": 3.603244289009599e-05, + "loss": 1.2022, + "step": 14030 + }, + { + "epoch": 3.12, + "learning_rate": 3.602946647816058e-05, + "loss": 1.2003, + "step": 14040 + }, + { + "epoch": 3.12, + "learning_rate": 3.6026490066225173e-05, + "loss": 1.211, + "step": 14050 + }, + { + "epoch": 3.12, + "learning_rate": 3.6023513654289754e-05, + "loss": 1.2116, + "step": 14060 + }, + { + "epoch": 3.12, + "learning_rate": 3.602053724235434e-05, + "loss": 1.2088, + "step": 14070 + }, + { + "epoch": 3.13, + "learning_rate": 3.6017560830418934e-05, + "loss": 1.2119, + "step": 14080 + }, + { + "epoch": 3.13, + "learning_rate": 3.601458441848352e-05, + "loss": 1.2036, + "step": 14090 + }, + { + "epoch": 3.13, + "learning_rate": 3.601160800654811e-05, + "loss": 1.2048, + "step": 14100 + }, + { + "epoch": 3.13, + "learning_rate": 3.6008631594612695e-05, + "loss": 1.2111, + "step": 14110 + }, + { + "epoch": 3.14, + "learning_rate": 3.600565518267729e-05, + "loss": 1.2005, + "step": 14120 + }, + { + "epoch": 3.14, + "learning_rate": 3.6002678770741875e-05, + "loss": 1.2072, + "step": 14130 + }, + { + "epoch": 3.14, + "learning_rate": 3.599970235880646e-05, + "loss": 1.2217, + "step": 14140 + }, + { + "epoch": 3.14, + "learning_rate": 3.599672594687105e-05, + "loss": 1.2169, + "step": 14150 + }, + { + "epoch": 3.14, + "learning_rate": 3.5993749534935636e-05, + "loss": 1.2045, + "step": 14160 + }, + { + "epoch": 3.15, + "learning_rate": 3.599077312300022e-05, + "loss": 1.2103, + "step": 14170 + }, + { + "epoch": 3.15, + "learning_rate": 3.598779671106481e-05, + "loss": 1.1995, + "step": 14180 + }, + { + "epoch": 3.15, + "learning_rate": 3.5984820299129404e-05, + "loss": 1.2288, + "step": 14190 + }, + { + "epoch": 3.15, + "learning_rate": 3.598184388719399e-05, + "loss": 1.2078, + "step": 14200 + }, + { + "epoch": 3.16, + "learning_rate": 3.597886747525858e-05, + "loss": 1.2169, + "step": 14210 + }, + { + "epoch": 3.16, + "learning_rate": 3.5975891063323164e-05, + "loss": 1.2097, + "step": 14220 + }, + { + "epoch": 3.16, + "learning_rate": 3.597291465138776e-05, + "loss": 1.2151, + "step": 14230 + }, + { + "epoch": 3.16, + "learning_rate": 3.5969938239452345e-05, + "loss": 1.2018, + "step": 14240 + }, + { + "epoch": 3.16, + "learning_rate": 3.596696182751693e-05, + "loss": 1.1763, + "step": 14250 + }, + { + "epoch": 3.17, + "learning_rate": 3.596398541558152e-05, + "loss": 1.2096, + "step": 14260 + }, + { + "epoch": 3.17, + "learning_rate": 3.5961009003646106e-05, + "loss": 1.2086, + "step": 14270 + }, + { + "epoch": 3.17, + "learning_rate": 3.595803259171069e-05, + "loss": 1.222, + "step": 14280 + }, + { + "epoch": 3.17, + "learning_rate": 3.5955056179775286e-05, + "loss": 1.217, + "step": 14290 + }, + { + "epoch": 3.18, + "learning_rate": 3.595207976783987e-05, + "loss": 1.2052, + "step": 14300 + }, + { + "epoch": 3.18, + "learning_rate": 3.594910335590446e-05, + "loss": 1.209, + "step": 14310 + }, + { + "epoch": 3.18, + "learning_rate": 3.594612694396905e-05, + "loss": 1.2168, + "step": 14320 + }, + { + "epoch": 3.18, + "learning_rate": 3.594315053203364e-05, + "loss": 1.1888, + "step": 14330 + }, + { + "epoch": 3.18, + "learning_rate": 3.594017412009823e-05, + "loss": 1.22, + "step": 14340 + }, + { + "epoch": 3.19, + "learning_rate": 3.5937197708162814e-05, + "loss": 1.2319, + "step": 14350 + }, + { + "epoch": 3.19, + "learning_rate": 3.59342212962274e-05, + "loss": 1.1983, + "step": 14360 + }, + { + "epoch": 3.19, + "learning_rate": 3.593124488429199e-05, + "loss": 1.2098, + "step": 14370 + }, + { + "epoch": 3.19, + "learning_rate": 3.5928268472356575e-05, + "loss": 1.2253, + "step": 14380 + }, + { + "epoch": 3.2, + "learning_rate": 3.592529206042116e-05, + "loss": 1.2082, + "step": 14390 + }, + { + "epoch": 3.2, + "learning_rate": 3.5922315648485756e-05, + "loss": 1.2144, + "step": 14400 + }, + { + "epoch": 3.2, + "learning_rate": 3.591933923655034e-05, + "loss": 1.2106, + "step": 14410 + }, + { + "epoch": 3.2, + "learning_rate": 3.591636282461493e-05, + "loss": 1.1985, + "step": 14420 + }, + { + "epoch": 3.2, + "learning_rate": 3.5913386412679516e-05, + "loss": 1.2083, + "step": 14430 + }, + { + "epoch": 3.21, + "learning_rate": 3.591041000074411e-05, + "loss": 1.2085, + "step": 14440 + }, + { + "epoch": 3.21, + "learning_rate": 3.59074335888087e-05, + "loss": 1.2069, + "step": 14450 + }, + { + "epoch": 3.21, + "learning_rate": 3.5904457176873284e-05, + "loss": 1.2176, + "step": 14460 + }, + { + "epoch": 3.21, + "learning_rate": 3.590148076493787e-05, + "loss": 1.1939, + "step": 14470 + }, + { + "epoch": 3.22, + "learning_rate": 3.589850435300246e-05, + "loss": 1.1968, + "step": 14480 + }, + { + "epoch": 3.22, + "learning_rate": 3.5895527941067044e-05, + "loss": 1.1975, + "step": 14490 + }, + { + "epoch": 3.22, + "learning_rate": 3.589255152913163e-05, + "loss": 1.21, + "step": 14500 + }, + { + "epoch": 3.22, + "learning_rate": 3.5889575117196225e-05, + "loss": 1.2095, + "step": 14510 + }, + { + "epoch": 3.22, + "learning_rate": 3.588659870526081e-05, + "loss": 1.2162, + "step": 14520 + }, + { + "epoch": 3.23, + "learning_rate": 3.58836222933254e-05, + "loss": 1.2086, + "step": 14530 + }, + { + "epoch": 3.23, + "learning_rate": 3.5880645881389986e-05, + "loss": 1.2144, + "step": 14540 + }, + { + "epoch": 3.23, + "learning_rate": 3.587766946945458e-05, + "loss": 1.2114, + "step": 14550 + }, + { + "epoch": 3.23, + "learning_rate": 3.5874693057519166e-05, + "loss": 1.2134, + "step": 14560 + }, + { + "epoch": 3.24, + "learning_rate": 3.5871716645583746e-05, + "loss": 1.201, + "step": 14570 + }, + { + "epoch": 3.24, + "learning_rate": 3.586874023364834e-05, + "loss": 1.2046, + "step": 14580 + }, + { + "epoch": 3.24, + "learning_rate": 3.586576382171293e-05, + "loss": 1.2114, + "step": 14590 + }, + { + "epoch": 3.24, + "learning_rate": 3.5862787409777514e-05, + "loss": 1.1977, + "step": 14600 + }, + { + "epoch": 3.24, + "learning_rate": 3.58598109978421e-05, + "loss": 1.2025, + "step": 14610 + }, + { + "epoch": 3.25, + "learning_rate": 3.5856834585906694e-05, + "loss": 1.21, + "step": 14620 + }, + { + "epoch": 3.25, + "learning_rate": 3.585385817397128e-05, + "loss": 1.2088, + "step": 14630 + }, + { + "epoch": 3.25, + "learning_rate": 3.585088176203587e-05, + "loss": 1.2156, + "step": 14640 + }, + { + "epoch": 3.25, + "learning_rate": 3.5847905350100455e-05, + "loss": 1.1987, + "step": 14650 + }, + { + "epoch": 3.26, + "learning_rate": 3.584492893816505e-05, + "loss": 1.201, + "step": 14660 + }, + { + "epoch": 3.26, + "learning_rate": 3.5841952526229636e-05, + "loss": 1.2093, + "step": 14670 + }, + { + "epoch": 3.26, + "learning_rate": 3.583897611429422e-05, + "loss": 1.2011, + "step": 14680 + }, + { + "epoch": 3.26, + "learning_rate": 3.583599970235881e-05, + "loss": 1.1983, + "step": 14690 + }, + { + "epoch": 3.26, + "learning_rate": 3.5833023290423396e-05, + "loss": 1.1956, + "step": 14700 + }, + { + "epoch": 3.27, + "learning_rate": 3.583004687848798e-05, + "loss": 1.1993, + "step": 14710 + }, + { + "epoch": 3.27, + "learning_rate": 3.582707046655258e-05, + "loss": 1.2105, + "step": 14720 + }, + { + "epoch": 3.27, + "learning_rate": 3.5824094054617164e-05, + "loss": 1.2131, + "step": 14730 + }, + { + "epoch": 3.27, + "learning_rate": 3.582111764268175e-05, + "loss": 1.2085, + "step": 14740 + }, + { + "epoch": 3.28, + "learning_rate": 3.581814123074634e-05, + "loss": 1.209, + "step": 14750 + }, + { + "epoch": 3.28, + "learning_rate": 3.581516481881093e-05, + "loss": 1.2215, + "step": 14760 + }, + { + "epoch": 3.28, + "learning_rate": 3.581218840687552e-05, + "loss": 1.2089, + "step": 14770 + }, + { + "epoch": 3.28, + "learning_rate": 3.58092119949401e-05, + "loss": 1.2229, + "step": 14780 + }, + { + "epoch": 3.28, + "learning_rate": 3.580623558300469e-05, + "loss": 1.2025, + "step": 14790 + }, + { + "epoch": 3.29, + "learning_rate": 3.580325917106928e-05, + "loss": 1.1991, + "step": 14800 + }, + { + "epoch": 3.29, + "learning_rate": 3.5800282759133866e-05, + "loss": 1.2138, + "step": 14810 + }, + { + "epoch": 3.29, + "learning_rate": 3.579730634719845e-05, + "loss": 1.2137, + "step": 14820 + }, + { + "epoch": 3.29, + "learning_rate": 3.5794329935263046e-05, + "loss": 1.2151, + "step": 14830 + }, + { + "epoch": 3.3, + "learning_rate": 3.579135352332763e-05, + "loss": 1.2109, + "step": 14840 + }, + { + "epoch": 3.3, + "learning_rate": 3.578837711139222e-05, + "loss": 1.2012, + "step": 14850 + }, + { + "epoch": 3.3, + "learning_rate": 3.578540069945681e-05, + "loss": 1.2027, + "step": 14860 + }, + { + "epoch": 3.3, + "learning_rate": 3.57824242875214e-05, + "loss": 1.1981, + "step": 14870 + }, + { + "epoch": 3.3, + "learning_rate": 3.577944787558598e-05, + "loss": 1.2079, + "step": 14880 + }, + { + "epoch": 3.31, + "learning_rate": 3.577647146365057e-05, + "loss": 1.1946, + "step": 14890 + }, + { + "epoch": 3.31, + "learning_rate": 3.577349505171516e-05, + "loss": 1.2182, + "step": 14900 + }, + { + "epoch": 3.31, + "learning_rate": 3.577051863977975e-05, + "loss": 1.2133, + "step": 14910 + }, + { + "epoch": 3.31, + "learning_rate": 3.5767542227844335e-05, + "loss": 1.1944, + "step": 14920 + }, + { + "epoch": 3.32, + "learning_rate": 3.576456581590892e-05, + "loss": 1.1903, + "step": 14930 + }, + { + "epoch": 3.32, + "learning_rate": 3.5761589403973516e-05, + "loss": 1.1949, + "step": 14940 + }, + { + "epoch": 3.32, + "learning_rate": 3.57586129920381e-05, + "loss": 1.1931, + "step": 14950 + }, + { + "epoch": 3.32, + "learning_rate": 3.575563658010269e-05, + "loss": 1.2279, + "step": 14960 + }, + { + "epoch": 3.32, + "learning_rate": 3.5752660168167277e-05, + "loss": 1.218, + "step": 14970 + }, + { + "epoch": 3.33, + "learning_rate": 3.5749683756231863e-05, + "loss": 1.1998, + "step": 14980 + }, + { + "epoch": 3.33, + "learning_rate": 3.574670734429645e-05, + "loss": 1.1905, + "step": 14990 + }, + { + "epoch": 3.33, + "learning_rate": 3.574373093236104e-05, + "loss": 1.1938, + "step": 15000 + }, + { + "epoch": 3.33, + "learning_rate": 3.574075452042563e-05, + "loss": 1.2028, + "step": 15010 + }, + { + "epoch": 3.34, + "learning_rate": 3.573777810849022e-05, + "loss": 1.2004, + "step": 15020 + }, + { + "epoch": 3.34, + "learning_rate": 3.5734801696554805e-05, + "loss": 1.211, + "step": 15030 + }, + { + "epoch": 3.34, + "learning_rate": 3.573182528461939e-05, + "loss": 1.1959, + "step": 15040 + }, + { + "epoch": 3.34, + "learning_rate": 3.5728848872683985e-05, + "loss": 1.1935, + "step": 15050 + }, + { + "epoch": 3.34, + "learning_rate": 3.572587246074857e-05, + "loss": 1.1947, + "step": 15060 + }, + { + "epoch": 3.35, + "learning_rate": 3.572289604881316e-05, + "loss": 1.1991, + "step": 15070 + }, + { + "epoch": 3.35, + "learning_rate": 3.5719919636877746e-05, + "loss": 1.2058, + "step": 15080 + }, + { + "epoch": 3.35, + "learning_rate": 3.571694322494233e-05, + "loss": 1.2008, + "step": 15090 + }, + { + "epoch": 3.35, + "learning_rate": 3.571396681300692e-05, + "loss": 1.1774, + "step": 15100 + }, + { + "epoch": 3.36, + "learning_rate": 3.5710990401071513e-05, + "loss": 1.1968, + "step": 15110 + }, + { + "epoch": 3.36, + "learning_rate": 3.57080139891361e-05, + "loss": 1.2118, + "step": 15120 + }, + { + "epoch": 3.36, + "learning_rate": 3.570503757720069e-05, + "loss": 1.1906, + "step": 15130 + }, + { + "epoch": 3.36, + "learning_rate": 3.5702061165265274e-05, + "loss": 1.1935, + "step": 15140 + }, + { + "epoch": 3.36, + "learning_rate": 3.569908475332987e-05, + "loss": 1.1967, + "step": 15150 + }, + { + "epoch": 3.37, + "learning_rate": 3.5696108341394455e-05, + "loss": 1.1948, + "step": 15160 + }, + { + "epoch": 3.37, + "learning_rate": 3.569313192945904e-05, + "loss": 1.202, + "step": 15170 + }, + { + "epoch": 3.37, + "learning_rate": 3.569015551752363e-05, + "loss": 1.1985, + "step": 15180 + }, + { + "epoch": 3.37, + "learning_rate": 3.5687179105588215e-05, + "loss": 1.2098, + "step": 15190 + }, + { + "epoch": 3.38, + "learning_rate": 3.56842026936528e-05, + "loss": 1.198, + "step": 15200 + }, + { + "epoch": 3.38, + "learning_rate": 3.568122628171739e-05, + "loss": 1.2046, + "step": 15210 + }, + { + "epoch": 3.38, + "learning_rate": 3.567824986978198e-05, + "loss": 1.2095, + "step": 15220 + }, + { + "epoch": 3.38, + "learning_rate": 3.567527345784657e-05, + "loss": 1.1965, + "step": 15230 + }, + { + "epoch": 3.38, + "learning_rate": 3.567229704591116e-05, + "loss": 1.1886, + "step": 15240 + }, + { + "epoch": 3.39, + "learning_rate": 3.5669320633975744e-05, + "loss": 1.1839, + "step": 15250 + }, + { + "epoch": 3.39, + "learning_rate": 3.566634422204034e-05, + "loss": 1.1989, + "step": 15260 + }, + { + "epoch": 3.39, + "learning_rate": 3.5663367810104924e-05, + "loss": 1.1975, + "step": 15270 + }, + { + "epoch": 3.39, + "learning_rate": 3.566039139816951e-05, + "loss": 1.2085, + "step": 15280 + }, + { + "epoch": 3.4, + "learning_rate": 3.56574149862341e-05, + "loss": 1.2091, + "step": 15290 + }, + { + "epoch": 3.4, + "learning_rate": 3.5654438574298685e-05, + "loss": 1.1982, + "step": 15300 + }, + { + "epoch": 3.4, + "learning_rate": 3.565146216236327e-05, + "loss": 1.202, + "step": 15310 + }, + { + "epoch": 3.4, + "learning_rate": 3.564848575042786e-05, + "loss": 1.1854, + "step": 15320 + }, + { + "epoch": 3.4, + "learning_rate": 3.564550933849245e-05, + "loss": 1.2018, + "step": 15330 + }, + { + "epoch": 3.41, + "learning_rate": 3.564253292655704e-05, + "loss": 1.1962, + "step": 15340 + }, + { + "epoch": 3.41, + "learning_rate": 3.5639556514621626e-05, + "loss": 1.1962, + "step": 15350 + }, + { + "epoch": 3.41, + "learning_rate": 3.563658010268621e-05, + "loss": 1.1879, + "step": 15360 + }, + { + "epoch": 3.41, + "learning_rate": 3.563360369075081e-05, + "loss": 1.1959, + "step": 15370 + }, + { + "epoch": 3.42, + "learning_rate": 3.5630627278815394e-05, + "loss": 1.2094, + "step": 15380 + }, + { + "epoch": 3.42, + "learning_rate": 3.5627650866879974e-05, + "loss": 1.1879, + "step": 15390 + }, + { + "epoch": 3.42, + "learning_rate": 3.562467445494457e-05, + "loss": 1.201, + "step": 15400 + }, + { + "epoch": 3.42, + "learning_rate": 3.5621698043009154e-05, + "loss": 1.2081, + "step": 15410 + }, + { + "epoch": 3.42, + "learning_rate": 3.561872163107374e-05, + "loss": 1.1902, + "step": 15420 + }, + { + "epoch": 3.43, + "learning_rate": 3.561574521913833e-05, + "loss": 1.1996, + "step": 15430 + }, + { + "epoch": 3.43, + "learning_rate": 3.561276880720292e-05, + "loss": 1.1864, + "step": 15440 + }, + { + "epoch": 3.43, + "learning_rate": 3.560979239526751e-05, + "loss": 1.198, + "step": 15450 + }, + { + "epoch": 3.43, + "learning_rate": 3.5606815983332096e-05, + "loss": 1.2004, + "step": 15460 + }, + { + "epoch": 3.44, + "learning_rate": 3.560383957139668e-05, + "loss": 1.211, + "step": 15470 + }, + { + "epoch": 3.44, + "learning_rate": 3.5600863159461276e-05, + "loss": 1.1997, + "step": 15480 + }, + { + "epoch": 3.44, + "learning_rate": 3.559788674752586e-05, + "loss": 1.193, + "step": 15490 + }, + { + "epoch": 3.44, + "learning_rate": 3.559491033559045e-05, + "loss": 1.204, + "step": 15500 + }, + { + "epoch": 3.44, + "learning_rate": 3.559193392365504e-05, + "loss": 1.1936, + "step": 15510 + }, + { + "epoch": 3.45, + "learning_rate": 3.5588957511719624e-05, + "loss": 1.1893, + "step": 15520 + }, + { + "epoch": 3.45, + "learning_rate": 3.558598109978421e-05, + "loss": 1.2024, + "step": 15530 + }, + { + "epoch": 3.45, + "learning_rate": 3.5583004687848804e-05, + "loss": 1.2036, + "step": 15540 + }, + { + "epoch": 3.45, + "learning_rate": 3.558002827591339e-05, + "loss": 1.1928, + "step": 15550 + }, + { + "epoch": 3.46, + "learning_rate": 3.557705186397798e-05, + "loss": 1.1936, + "step": 15560 + }, + { + "epoch": 3.46, + "learning_rate": 3.5574075452042565e-05, + "loss": 1.2062, + "step": 15570 + }, + { + "epoch": 3.46, + "learning_rate": 3.557109904010716e-05, + "loss": 1.2006, + "step": 15580 + }, + { + "epoch": 3.46, + "learning_rate": 3.5568122628171746e-05, + "loss": 1.204, + "step": 15590 + }, + { + "epoch": 3.46, + "learning_rate": 3.5565146216236326e-05, + "loss": 1.1917, + "step": 15600 + }, + { + "epoch": 3.47, + "learning_rate": 3.556216980430092e-05, + "loss": 1.2094, + "step": 15610 + }, + { + "epoch": 3.47, + "learning_rate": 3.5559193392365506e-05, + "loss": 1.197, + "step": 15620 + }, + { + "epoch": 3.47, + "learning_rate": 3.555621698043009e-05, + "loss": 1.213, + "step": 15630 + }, + { + "epoch": 3.47, + "learning_rate": 3.555324056849468e-05, + "loss": 1.1888, + "step": 15640 + }, + { + "epoch": 3.48, + "learning_rate": 3.5550264156559274e-05, + "loss": 1.1919, + "step": 15650 + }, + { + "epoch": 3.48, + "learning_rate": 3.554728774462386e-05, + "loss": 1.2013, + "step": 15660 + }, + { + "epoch": 3.48, + "learning_rate": 3.554431133268845e-05, + "loss": 1.182, + "step": 15670 + }, + { + "epoch": 3.48, + "learning_rate": 3.5541334920753034e-05, + "loss": 1.1977, + "step": 15680 + }, + { + "epoch": 3.48, + "learning_rate": 3.553835850881763e-05, + "loss": 1.1892, + "step": 15690 + }, + { + "epoch": 3.49, + "learning_rate": 3.553538209688221e-05, + "loss": 1.1752, + "step": 15700 + }, + { + "epoch": 3.49, + "learning_rate": 3.5532405684946795e-05, + "loss": 1.1939, + "step": 15710 + }, + { + "epoch": 3.49, + "learning_rate": 3.552942927301139e-05, + "loss": 1.1829, + "step": 15720 + }, + { + "epoch": 3.49, + "learning_rate": 3.5526452861075976e-05, + "loss": 1.1898, + "step": 15730 + }, + { + "epoch": 3.5, + "learning_rate": 3.552347644914056e-05, + "loss": 1.1797, + "step": 15740 + }, + { + "epoch": 3.5, + "learning_rate": 3.552050003720515e-05, + "loss": 1.1879, + "step": 15750 + }, + { + "epoch": 3.5, + "learning_rate": 3.551752362526974e-05, + "loss": 1.1908, + "step": 15760 + }, + { + "epoch": 3.5, + "learning_rate": 3.551454721333433e-05, + "loss": 1.1874, + "step": 15770 + }, + { + "epoch": 3.5, + "learning_rate": 3.551157080139892e-05, + "loss": 1.1989, + "step": 15780 + }, + { + "epoch": 3.51, + "learning_rate": 3.5508594389463504e-05, + "loss": 1.1972, + "step": 15790 + }, + { + "epoch": 3.51, + "learning_rate": 3.550561797752809e-05, + "loss": 1.1991, + "step": 15800 + }, + { + "epoch": 3.51, + "learning_rate": 3.550264156559268e-05, + "loss": 1.2048, + "step": 15810 + }, + { + "epoch": 3.51, + "learning_rate": 3.5499665153657265e-05, + "loss": 1.2066, + "step": 15820 + }, + { + "epoch": 3.52, + "learning_rate": 3.549668874172186e-05, + "loss": 1.2009, + "step": 15830 + }, + { + "epoch": 3.52, + "learning_rate": 3.5493712329786445e-05, + "loss": 1.2081, + "step": 15840 + }, + { + "epoch": 3.52, + "learning_rate": 3.549073591785103e-05, + "loss": 1.1987, + "step": 15850 + }, + { + "epoch": 3.52, + "learning_rate": 3.548775950591562e-05, + "loss": 1.2055, + "step": 15860 + }, + { + "epoch": 3.52, + "learning_rate": 3.548478309398021e-05, + "loss": 1.1849, + "step": 15870 + }, + { + "epoch": 3.53, + "learning_rate": 3.54818066820448e-05, + "loss": 1.1996, + "step": 15880 + }, + { + "epoch": 3.53, + "learning_rate": 3.5478830270109386e-05, + "loss": 1.1994, + "step": 15890 + }, + { + "epoch": 3.53, + "learning_rate": 3.547585385817397e-05, + "loss": 1.2023, + "step": 15900 + }, + { + "epoch": 3.53, + "learning_rate": 3.547287744623856e-05, + "loss": 1.201, + "step": 15910 + }, + { + "epoch": 3.54, + "learning_rate": 3.546990103430315e-05, + "loss": 1.2049, + "step": 15920 + }, + { + "epoch": 3.54, + "learning_rate": 3.546692462236774e-05, + "loss": 1.1994, + "step": 15930 + }, + { + "epoch": 3.54, + "learning_rate": 3.546394821043233e-05, + "loss": 1.1945, + "step": 15940 + }, + { + "epoch": 3.54, + "learning_rate": 3.5460971798496915e-05, + "loss": 1.2, + "step": 15950 + }, + { + "epoch": 3.54, + "learning_rate": 3.54579953865615e-05, + "loss": 1.18, + "step": 15960 + }, + { + "epoch": 3.55, + "learning_rate": 3.5455018974626095e-05, + "loss": 1.2056, + "step": 15970 + }, + { + "epoch": 3.55, + "learning_rate": 3.545204256269068e-05, + "loss": 1.1988, + "step": 15980 + }, + { + "epoch": 3.55, + "learning_rate": 3.544906615075527e-05, + "loss": 1.1918, + "step": 15990 + }, + { + "epoch": 3.55, + "learning_rate": 3.5446089738819856e-05, + "loss": 1.2038, + "step": 16000 + }, + { + "epoch": 3.56, + "learning_rate": 3.544311332688444e-05, + "loss": 1.197, + "step": 16010 + }, + { + "epoch": 3.56, + "learning_rate": 3.544013691494903e-05, + "loss": 1.2034, + "step": 16020 + }, + { + "epoch": 3.56, + "learning_rate": 3.5437160503013617e-05, + "loss": 1.1903, + "step": 16030 + }, + { + "epoch": 3.56, + "learning_rate": 3.543418409107821e-05, + "loss": 1.2052, + "step": 16040 + }, + { + "epoch": 3.56, + "learning_rate": 3.54312076791428e-05, + "loss": 1.1866, + "step": 16050 + }, + { + "epoch": 3.57, + "learning_rate": 3.5428231267207384e-05, + "loss": 1.1919, + "step": 16060 + }, + { + "epoch": 3.57, + "learning_rate": 3.542525485527197e-05, + "loss": 1.1984, + "step": 16070 + }, + { + "epoch": 3.57, + "learning_rate": 3.5422278443336565e-05, + "loss": 1.1892, + "step": 16080 + }, + { + "epoch": 3.57, + "learning_rate": 3.541930203140115e-05, + "loss": 1.2101, + "step": 16090 + }, + { + "epoch": 3.58, + "learning_rate": 3.541632561946574e-05, + "loss": 1.1961, + "step": 16100 + }, + { + "epoch": 3.58, + "learning_rate": 3.5413349207530325e-05, + "loss": 1.189, + "step": 16110 + }, + { + "epoch": 3.58, + "learning_rate": 3.541037279559491e-05, + "loss": 1.2025, + "step": 16120 + }, + { + "epoch": 3.58, + "learning_rate": 3.54073963836595e-05, + "loss": 1.1957, + "step": 16130 + }, + { + "epoch": 3.58, + "learning_rate": 3.5404419971724086e-05, + "loss": 1.2099, + "step": 16140 + }, + { + "epoch": 3.59, + "learning_rate": 3.540144355978868e-05, + "loss": 1.1879, + "step": 16150 + }, + { + "epoch": 3.59, + "learning_rate": 3.5398467147853267e-05, + "loss": 1.1947, + "step": 16160 + }, + { + "epoch": 3.59, + "learning_rate": 3.5395490735917854e-05, + "loss": 1.194, + "step": 16170 + }, + { + "epoch": 3.59, + "learning_rate": 3.539251432398244e-05, + "loss": 1.1904, + "step": 16180 + }, + { + "epoch": 3.6, + "learning_rate": 3.5389537912047034e-05, + "loss": 1.1986, + "step": 16190 + }, + { + "epoch": 3.6, + "learning_rate": 3.538656150011162e-05, + "loss": 1.185, + "step": 16200 + }, + { + "epoch": 3.6, + "learning_rate": 3.53835850881762e-05, + "loss": 1.1831, + "step": 16210 + }, + { + "epoch": 3.6, + "learning_rate": 3.5380608676240795e-05, + "loss": 1.1865, + "step": 16220 + }, + { + "epoch": 3.6, + "learning_rate": 3.537763226430538e-05, + "loss": 1.1927, + "step": 16230 + }, + { + "epoch": 3.61, + "learning_rate": 3.537465585236997e-05, + "loss": 1.2009, + "step": 16240 + }, + { + "epoch": 3.61, + "learning_rate": 3.5371679440434555e-05, + "loss": 1.2114, + "step": 16250 + }, + { + "epoch": 3.61, + "learning_rate": 3.536870302849915e-05, + "loss": 1.1997, + "step": 16260 + }, + { + "epoch": 3.61, + "learning_rate": 3.5365726616563736e-05, + "loss": 1.1791, + "step": 16270 + }, + { + "epoch": 3.62, + "learning_rate": 3.536275020462832e-05, + "loss": 1.1893, + "step": 16280 + }, + { + "epoch": 3.62, + "learning_rate": 3.535977379269291e-05, + "loss": 1.1896, + "step": 16290 + }, + { + "epoch": 3.62, + "learning_rate": 3.5356797380757504e-05, + "loss": 1.1912, + "step": 16300 + }, + { + "epoch": 3.62, + "learning_rate": 3.5353820968822084e-05, + "loss": 1.1993, + "step": 16310 + }, + { + "epoch": 3.62, + "learning_rate": 3.535084455688668e-05, + "loss": 1.1918, + "step": 16320 + }, + { + "epoch": 3.63, + "learning_rate": 3.5347868144951264e-05, + "loss": 1.1911, + "step": 16330 + }, + { + "epoch": 3.63, + "learning_rate": 3.534489173301585e-05, + "loss": 1.1954, + "step": 16340 + }, + { + "epoch": 3.63, + "learning_rate": 3.534191532108044e-05, + "loss": 1.1835, + "step": 16350 + }, + { + "epoch": 3.63, + "learning_rate": 3.533893890914503e-05, + "loss": 1.1934, + "step": 16360 + }, + { + "epoch": 3.64, + "learning_rate": 3.533596249720962e-05, + "loss": 1.1888, + "step": 16370 + }, + { + "epoch": 3.64, + "learning_rate": 3.5332986085274205e-05, + "loss": 1.1911, + "step": 16380 + }, + { + "epoch": 3.64, + "learning_rate": 3.533000967333879e-05, + "loss": 1.1721, + "step": 16390 + }, + { + "epoch": 3.64, + "learning_rate": 3.5327033261403386e-05, + "loss": 1.196, + "step": 16400 + }, + { + "epoch": 3.64, + "learning_rate": 3.532405684946797e-05, + "loss": 1.1902, + "step": 16410 + }, + { + "epoch": 3.65, + "learning_rate": 3.532108043753255e-05, + "loss": 1.2061, + "step": 16420 + }, + { + "epoch": 3.65, + "learning_rate": 3.531810402559715e-05, + "loss": 1.2041, + "step": 16430 + }, + { + "epoch": 3.65, + "learning_rate": 3.5315127613661734e-05, + "loss": 1.1996, + "step": 16440 + }, + { + "epoch": 3.65, + "learning_rate": 3.531215120172632e-05, + "loss": 1.1817, + "step": 16450 + }, + { + "epoch": 3.66, + "learning_rate": 3.530917478979091e-05, + "loss": 1.1891, + "step": 16460 + }, + { + "epoch": 3.66, + "learning_rate": 3.53061983778555e-05, + "loss": 1.1897, + "step": 16470 + }, + { + "epoch": 3.66, + "learning_rate": 3.530322196592009e-05, + "loss": 1.1874, + "step": 16480 + }, + { + "epoch": 3.66, + "learning_rate": 3.5300245553984675e-05, + "loss": 1.188, + "step": 16490 + }, + { + "epoch": 3.66, + "learning_rate": 3.529726914204926e-05, + "loss": 1.1956, + "step": 16500 + }, + { + "epoch": 3.67, + "learning_rate": 3.5294292730113855e-05, + "loss": 1.2042, + "step": 16510 + }, + { + "epoch": 3.67, + "learning_rate": 3.5291316318178436e-05, + "loss": 1.1866, + "step": 16520 + }, + { + "epoch": 3.67, + "learning_rate": 3.528833990624302e-05, + "loss": 1.1906, + "step": 16530 + }, + { + "epoch": 3.67, + "learning_rate": 3.5285363494307616e-05, + "loss": 1.1885, + "step": 16540 + }, + { + "epoch": 3.68, + "learning_rate": 3.52823870823722e-05, + "loss": 1.1993, + "step": 16550 + }, + { + "epoch": 3.68, + "learning_rate": 3.527941067043679e-05, + "loss": 1.1954, + "step": 16560 + }, + { + "epoch": 3.68, + "learning_rate": 3.527643425850138e-05, + "loss": 1.1865, + "step": 16570 + }, + { + "epoch": 3.68, + "learning_rate": 3.527345784656597e-05, + "loss": 1.1832, + "step": 16580 + }, + { + "epoch": 3.68, + "learning_rate": 3.527048143463056e-05, + "loss": 1.1976, + "step": 16590 + }, + { + "epoch": 3.69, + "learning_rate": 3.5267505022695144e-05, + "loss": 1.1771, + "step": 16600 + }, + { + "epoch": 3.69, + "learning_rate": 3.526452861075973e-05, + "loss": 1.1912, + "step": 16610 + }, + { + "epoch": 3.69, + "learning_rate": 3.526155219882432e-05, + "loss": 1.1902, + "step": 16620 + }, + { + "epoch": 3.69, + "learning_rate": 3.5258575786888905e-05, + "loss": 1.1854, + "step": 16630 + }, + { + "epoch": 3.7, + "learning_rate": 3.525559937495349e-05, + "loss": 1.1737, + "step": 16640 + }, + { + "epoch": 3.7, + "learning_rate": 3.5252622963018086e-05, + "loss": 1.1943, + "step": 16650 + }, + { + "epoch": 3.7, + "learning_rate": 3.524964655108267e-05, + "loss": 1.2048, + "step": 16660 + }, + { + "epoch": 3.7, + "learning_rate": 3.524667013914726e-05, + "loss": 1.2057, + "step": 16670 + }, + { + "epoch": 3.7, + "learning_rate": 3.5243693727211846e-05, + "loss": 1.1889, + "step": 16680 + }, + { + "epoch": 3.71, + "learning_rate": 3.524071731527644e-05, + "loss": 1.1988, + "step": 16690 + }, + { + "epoch": 3.71, + "learning_rate": 3.523774090334103e-05, + "loss": 1.189, + "step": 16700 + }, + { + "epoch": 3.71, + "learning_rate": 3.5234764491405614e-05, + "loss": 1.1749, + "step": 16710 + }, + { + "epoch": 3.71, + "learning_rate": 3.52317880794702e-05, + "loss": 1.1987, + "step": 16720 + }, + { + "epoch": 3.72, + "learning_rate": 3.522881166753479e-05, + "loss": 1.1836, + "step": 16730 + }, + { + "epoch": 3.72, + "learning_rate": 3.5225835255599375e-05, + "loss": 1.1958, + "step": 16740 + }, + { + "epoch": 3.72, + "learning_rate": 3.522285884366397e-05, + "loss": 1.1768, + "step": 16750 + }, + { + "epoch": 3.72, + "learning_rate": 3.5219882431728555e-05, + "loss": 1.1856, + "step": 16760 + }, + { + "epoch": 3.72, + "learning_rate": 3.521690601979314e-05, + "loss": 1.1938, + "step": 16770 + }, + { + "epoch": 3.73, + "learning_rate": 3.521392960785773e-05, + "loss": 1.1881, + "step": 16780 + }, + { + "epoch": 3.73, + "learning_rate": 3.521095319592232e-05, + "loss": 1.2021, + "step": 16790 + }, + { + "epoch": 3.73, + "learning_rate": 3.520797678398691e-05, + "loss": 1.1891, + "step": 16800 + }, + { + "epoch": 3.73, + "learning_rate": 3.5205000372051496e-05, + "loss": 1.1827, + "step": 16810 + }, + { + "epoch": 3.74, + "learning_rate": 3.520202396011608e-05, + "loss": 1.1849, + "step": 16820 + }, + { + "epoch": 3.74, + "learning_rate": 3.519904754818067e-05, + "loss": 1.1745, + "step": 16830 + }, + { + "epoch": 3.74, + "learning_rate": 3.519607113624526e-05, + "loss": 1.1923, + "step": 16840 + }, + { + "epoch": 3.74, + "learning_rate": 3.5193094724309844e-05, + "loss": 1.1935, + "step": 16850 + }, + { + "epoch": 3.74, + "learning_rate": 3.519011831237444e-05, + "loss": 1.1806, + "step": 16860 + }, + { + "epoch": 3.75, + "learning_rate": 3.5187141900439025e-05, + "loss": 1.1826, + "step": 16870 + }, + { + "epoch": 3.75, + "learning_rate": 3.518416548850361e-05, + "loss": 1.1836, + "step": 16880 + }, + { + "epoch": 3.75, + "learning_rate": 3.51811890765682e-05, + "loss": 1.2089, + "step": 16890 + }, + { + "epoch": 3.75, + "learning_rate": 3.517821266463279e-05, + "loss": 1.1911, + "step": 16900 + }, + { + "epoch": 3.76, + "learning_rate": 3.517523625269738e-05, + "loss": 1.1937, + "step": 16910 + }, + { + "epoch": 3.76, + "learning_rate": 3.5172259840761966e-05, + "loss": 1.1812, + "step": 16920 + }, + { + "epoch": 3.76, + "learning_rate": 3.516928342882655e-05, + "loss": 1.1869, + "step": 16930 + }, + { + "epoch": 3.76, + "learning_rate": 3.516630701689114e-05, + "loss": 1.1949, + "step": 16940 + }, + { + "epoch": 3.76, + "learning_rate": 3.5163330604955726e-05, + "loss": 1.1926, + "step": 16950 + }, + { + "epoch": 3.77, + "learning_rate": 3.516035419302031e-05, + "loss": 1.1855, + "step": 16960 + }, + { + "epoch": 3.77, + "learning_rate": 3.515737778108491e-05, + "loss": 1.1648, + "step": 16970 + }, + { + "epoch": 3.77, + "learning_rate": 3.5154401369149494e-05, + "loss": 1.1806, + "step": 16980 + }, + { + "epoch": 3.77, + "learning_rate": 3.515142495721408e-05, + "loss": 1.1887, + "step": 16990 + }, + { + "epoch": 3.78, + "learning_rate": 3.514844854527867e-05, + "loss": 1.1931, + "step": 17000 + }, + { + "epoch": 3.78, + "learning_rate": 3.514547213334326e-05, + "loss": 1.1896, + "step": 17010 + }, + { + "epoch": 3.78, + "learning_rate": 3.514249572140785e-05, + "loss": 1.2086, + "step": 17020 + }, + { + "epoch": 3.78, + "learning_rate": 3.5139519309472435e-05, + "loss": 1.1942, + "step": 17030 + }, + { + "epoch": 3.78, + "learning_rate": 3.513654289753702e-05, + "loss": 1.1852, + "step": 17040 + }, + { + "epoch": 3.79, + "learning_rate": 3.513356648560161e-05, + "loss": 1.1882, + "step": 17050 + }, + { + "epoch": 3.79, + "learning_rate": 3.5130590073666196e-05, + "loss": 1.203, + "step": 17060 + }, + { + "epoch": 3.79, + "learning_rate": 3.512761366173078e-05, + "loss": 1.1857, + "step": 17070 + }, + { + "epoch": 3.79, + "learning_rate": 3.5124637249795376e-05, + "loss": 1.2064, + "step": 17080 + }, + { + "epoch": 3.8, + "learning_rate": 3.5121660837859963e-05, + "loss": 1.1951, + "step": 17090 + }, + { + "epoch": 3.8, + "learning_rate": 3.511868442592455e-05, + "loss": 1.1932, + "step": 17100 + }, + { + "epoch": 3.8, + "learning_rate": 3.511570801398914e-05, + "loss": 1.2039, + "step": 17110 + }, + { + "epoch": 3.8, + "learning_rate": 3.511273160205373e-05, + "loss": 1.1903, + "step": 17120 + }, + { + "epoch": 3.8, + "learning_rate": 3.510975519011831e-05, + "loss": 1.1813, + "step": 17130 + }, + { + "epoch": 3.81, + "learning_rate": 3.5106778778182905e-05, + "loss": 1.2101, + "step": 17140 + }, + { + "epoch": 3.81, + "learning_rate": 3.510380236624749e-05, + "loss": 1.1822, + "step": 17150 + }, + { + "epoch": 3.81, + "learning_rate": 3.510082595431208e-05, + "loss": 1.1839, + "step": 17160 + }, + { + "epoch": 3.81, + "learning_rate": 3.5097849542376665e-05, + "loss": 1.1822, + "step": 17170 + }, + { + "epoch": 3.82, + "learning_rate": 3.509487313044126e-05, + "loss": 1.2029, + "step": 17180 + }, + { + "epoch": 3.82, + "learning_rate": 3.5091896718505846e-05, + "loss": 1.2039, + "step": 17190 + }, + { + "epoch": 3.82, + "learning_rate": 3.508892030657043e-05, + "loss": 1.1957, + "step": 17200 + }, + { + "epoch": 3.82, + "learning_rate": 3.508594389463502e-05, + "loss": 1.1914, + "step": 17210 + }, + { + "epoch": 3.82, + "learning_rate": 3.5082967482699613e-05, + "loss": 1.2055, + "step": 17220 + }, + { + "epoch": 3.83, + "learning_rate": 3.50799910707642e-05, + "loss": 1.1905, + "step": 17230 + }, + { + "epoch": 3.83, + "learning_rate": 3.507701465882878e-05, + "loss": 1.1857, + "step": 17240 + }, + { + "epoch": 3.83, + "learning_rate": 3.5074038246893374e-05, + "loss": 1.1867, + "step": 17250 + }, + { + "epoch": 3.83, + "learning_rate": 3.507106183495796e-05, + "loss": 1.1882, + "step": 17260 + }, + { + "epoch": 3.84, + "learning_rate": 3.506808542302255e-05, + "loss": 1.2062, + "step": 17270 + }, + { + "epoch": 3.84, + "learning_rate": 3.5065109011087135e-05, + "loss": 1.1838, + "step": 17280 + }, + { + "epoch": 3.84, + "learning_rate": 3.506213259915173e-05, + "loss": 1.1848, + "step": 17290 + }, + { + "epoch": 3.84, + "learning_rate": 3.5059156187216315e-05, + "loss": 1.1831, + "step": 17300 + }, + { + "epoch": 3.84, + "learning_rate": 3.50561797752809e-05, + "loss": 1.1861, + "step": 17310 + }, + { + "epoch": 3.85, + "learning_rate": 3.505320336334549e-05, + "loss": 1.1731, + "step": 17320 + }, + { + "epoch": 3.85, + "learning_rate": 3.505022695141008e-05, + "loss": 1.1814, + "step": 17330 + }, + { + "epoch": 3.85, + "learning_rate": 3.504725053947466e-05, + "loss": 1.2008, + "step": 17340 + }, + { + "epoch": 3.85, + "learning_rate": 3.504427412753925e-05, + "loss": 1.2027, + "step": 17350 + }, + { + "epoch": 3.86, + "learning_rate": 3.5041297715603844e-05, + "loss": 1.1849, + "step": 17360 + }, + { + "epoch": 3.86, + "learning_rate": 3.503832130366843e-05, + "loss": 1.1857, + "step": 17370 + }, + { + "epoch": 3.86, + "learning_rate": 3.503534489173302e-05, + "loss": 1.1834, + "step": 17380 + }, + { + "epoch": 3.86, + "learning_rate": 3.5032368479797604e-05, + "loss": 1.1909, + "step": 17390 + }, + { + "epoch": 3.86, + "learning_rate": 3.50293920678622e-05, + "loss": 1.1975, + "step": 17400 + }, + { + "epoch": 3.87, + "learning_rate": 3.5026415655926785e-05, + "loss": 1.1899, + "step": 17410 + }, + { + "epoch": 3.87, + "learning_rate": 3.502343924399137e-05, + "loss": 1.2091, + "step": 17420 + }, + { + "epoch": 3.87, + "learning_rate": 3.502046283205596e-05, + "loss": 1.1957, + "step": 17430 + }, + { + "epoch": 3.87, + "learning_rate": 3.5017486420120546e-05, + "loss": 1.1933, + "step": 17440 + }, + { + "epoch": 3.88, + "learning_rate": 3.501451000818513e-05, + "loss": 1.1939, + "step": 17450 + }, + { + "epoch": 3.88, + "learning_rate": 3.501153359624972e-05, + "loss": 1.1777, + "step": 17460 + }, + { + "epoch": 3.88, + "learning_rate": 3.500855718431431e-05, + "loss": 1.1782, + "step": 17470 + }, + { + "epoch": 3.88, + "learning_rate": 3.50055807723789e-05, + "loss": 1.1857, + "step": 17480 + }, + { + "epoch": 3.88, + "learning_rate": 3.500260436044349e-05, + "loss": 1.1837, + "step": 17490 + }, + { + "epoch": 3.89, + "learning_rate": 3.4999627948508074e-05, + "loss": 1.1838, + "step": 17500 + }, + { + "epoch": 3.89, + "learning_rate": 3.499665153657267e-05, + "loss": 1.1935, + "step": 17510 + }, + { + "epoch": 3.89, + "learning_rate": 3.4993675124637254e-05, + "loss": 1.2069, + "step": 17520 + }, + { + "epoch": 3.89, + "learning_rate": 3.499069871270184e-05, + "loss": 1.185, + "step": 17530 + }, + { + "epoch": 3.9, + "learning_rate": 3.498772230076643e-05, + "loss": 1.1975, + "step": 17540 + }, + { + "epoch": 3.9, + "learning_rate": 3.4984745888831015e-05, + "loss": 1.1635, + "step": 17550 + }, + { + "epoch": 3.9, + "learning_rate": 3.49817694768956e-05, + "loss": 1.1897, + "step": 17560 + }, + { + "epoch": 3.9, + "learning_rate": 3.4978793064960196e-05, + "loss": 1.194, + "step": 17570 + }, + { + "epoch": 3.9, + "learning_rate": 3.497581665302478e-05, + "loss": 1.1841, + "step": 17580 + }, + { + "epoch": 3.91, + "learning_rate": 3.497284024108937e-05, + "loss": 1.1692, + "step": 17590 + }, + { + "epoch": 3.91, + "learning_rate": 3.4969863829153956e-05, + "loss": 1.1642, + "step": 17600 + }, + { + "epoch": 3.91, + "learning_rate": 3.496688741721855e-05, + "loss": 1.18, + "step": 17610 + }, + { + "epoch": 3.91, + "learning_rate": 3.496391100528314e-05, + "loss": 1.1878, + "step": 17620 + }, + { + "epoch": 3.92, + "learning_rate": 3.4960934593347724e-05, + "loss": 1.1778, + "step": 17630 + }, + { + "epoch": 3.92, + "learning_rate": 3.495795818141231e-05, + "loss": 1.173, + "step": 17640 + }, + { + "epoch": 3.92, + "learning_rate": 3.49549817694769e-05, + "loss": 1.1848, + "step": 17650 + }, + { + "epoch": 3.92, + "learning_rate": 3.4952005357541484e-05, + "loss": 1.1738, + "step": 17660 + }, + { + "epoch": 3.92, + "learning_rate": 3.494902894560607e-05, + "loss": 1.1857, + "step": 17670 + }, + { + "epoch": 3.93, + "learning_rate": 3.4946052533670665e-05, + "loss": 1.1846, + "step": 17680 + }, + { + "epoch": 3.93, + "learning_rate": 3.494307612173525e-05, + "loss": 1.1998, + "step": 17690 + }, + { + "epoch": 3.93, + "learning_rate": 3.494009970979984e-05, + "loss": 1.1795, + "step": 17700 + }, + { + "epoch": 3.93, + "learning_rate": 3.4937123297864426e-05, + "loss": 1.1759, + "step": 17710 + }, + { + "epoch": 3.94, + "learning_rate": 3.493414688592902e-05, + "loss": 1.1922, + "step": 17720 + }, + { + "epoch": 3.94, + "learning_rate": 3.4931170473993606e-05, + "loss": 1.1809, + "step": 17730 + }, + { + "epoch": 3.94, + "learning_rate": 3.492819406205819e-05, + "loss": 1.1781, + "step": 17740 + }, + { + "epoch": 3.94, + "learning_rate": 3.492521765012278e-05, + "loss": 1.1809, + "step": 17750 + }, + { + "epoch": 3.94, + "learning_rate": 3.492224123818737e-05, + "loss": 1.1704, + "step": 17760 + }, + { + "epoch": 3.95, + "learning_rate": 3.4919264826251954e-05, + "loss": 1.1768, + "step": 17770 + }, + { + "epoch": 3.95, + "learning_rate": 3.491628841431654e-05, + "loss": 1.1972, + "step": 17780 + }, + { + "epoch": 3.95, + "learning_rate": 3.4913312002381134e-05, + "loss": 1.1821, + "step": 17790 + }, + { + "epoch": 3.95, + "learning_rate": 3.491033559044572e-05, + "loss": 1.1947, + "step": 17800 + }, + { + "epoch": 3.96, + "learning_rate": 3.490735917851031e-05, + "loss": 1.1816, + "step": 17810 + }, + { + "epoch": 3.96, + "learning_rate": 3.4904382766574895e-05, + "loss": 1.1794, + "step": 17820 + }, + { + "epoch": 3.96, + "learning_rate": 3.490140635463949e-05, + "loss": 1.188, + "step": 17830 + }, + { + "epoch": 3.96, + "learning_rate": 3.4898429942704076e-05, + "loss": 1.1855, + "step": 17840 + }, + { + "epoch": 3.96, + "learning_rate": 3.489545353076866e-05, + "loss": 1.1694, + "step": 17850 + }, + { + "epoch": 3.97, + "learning_rate": 3.489247711883325e-05, + "loss": 1.188, + "step": 17860 + }, + { + "epoch": 3.97, + "learning_rate": 3.4889500706897836e-05, + "loss": 1.175, + "step": 17870 + }, + { + "epoch": 3.97, + "learning_rate": 3.488652429496242e-05, + "loss": 1.1805, + "step": 17880 + }, + { + "epoch": 3.97, + "learning_rate": 3.488354788302701e-05, + "loss": 1.1933, + "step": 17890 + }, + { + "epoch": 3.98, + "learning_rate": 3.4880571471091604e-05, + "loss": 1.1737, + "step": 17900 + }, + { + "epoch": 3.98, + "learning_rate": 3.487759505915619e-05, + "loss": 1.1799, + "step": 17910 + }, + { + "epoch": 3.98, + "learning_rate": 3.487461864722078e-05, + "loss": 1.1846, + "step": 17920 + }, + { + "epoch": 3.98, + "learning_rate": 3.4871642235285365e-05, + "loss": 1.1953, + "step": 17930 + }, + { + "epoch": 3.98, + "learning_rate": 3.486866582334996e-05, + "loss": 1.1752, + "step": 17940 + }, + { + "epoch": 3.99, + "learning_rate": 3.486568941141454e-05, + "loss": 1.1877, + "step": 17950 + }, + { + "epoch": 3.99, + "learning_rate": 3.486271299947913e-05, + "loss": 1.1805, + "step": 17960 + }, + { + "epoch": 3.99, + "learning_rate": 3.485973658754372e-05, + "loss": 1.1886, + "step": 17970 + }, + { + "epoch": 3.99, + "learning_rate": 3.4856760175608306e-05, + "loss": 1.1831, + "step": 17980 + }, + { + "epoch": 4.0, + "learning_rate": 3.485378376367289e-05, + "loss": 1.1868, + "step": 17990 + }, + { + "epoch": 4.0, + "learning_rate": 3.4850807351737486e-05, + "loss": 1.1897, + "step": 18000 + }, + { + "epoch": 4.0, + "learning_rate": 3.484783093980207e-05, + "loss": 1.1917, + "step": 18010 + }, + { + "epoch": 4.0, + "eval_cer": 4.771367521367521, + "eval_loss": 1.1306734085083008, + "eval_runtime": 5.7447, + "eval_samples_per_second": 1.741, + "eval_steps_per_second": 0.174, + "eval_wer": 1.0389610389610389, + "step": 18012 + }, + { + "epoch": 4.0, + "learning_rate": 3.484485452786666e-05, + "loss": 1.1876, + "step": 18020 + }, + { + "epoch": 4.0, + "learning_rate": 3.484187811593125e-05, + "loss": 1.1865, + "step": 18030 + }, + { + "epoch": 4.01, + "learning_rate": 3.483890170399584e-05, + "loss": 1.1602, + "step": 18040 + }, + { + "epoch": 4.01, + "learning_rate": 3.483592529206042e-05, + "loss": 1.188, + "step": 18050 + }, + { + "epoch": 4.01, + "learning_rate": 3.483294888012501e-05, + "loss": 1.1749, + "step": 18060 + }, + { + "epoch": 4.01, + "learning_rate": 3.48299724681896e-05, + "loss": 1.1685, + "step": 18070 + }, + { + "epoch": 4.02, + "learning_rate": 3.482699605625419e-05, + "loss": 1.1732, + "step": 18080 + }, + { + "epoch": 4.02, + "learning_rate": 3.4824019644318775e-05, + "loss": 1.1896, + "step": 18090 + }, + { + "epoch": 4.02, + "learning_rate": 3.482104323238336e-05, + "loss": 1.201, + "step": 18100 + }, + { + "epoch": 4.02, + "learning_rate": 3.4818066820447956e-05, + "loss": 1.1737, + "step": 18110 + }, + { + "epoch": 4.02, + "learning_rate": 3.481509040851254e-05, + "loss": 1.1684, + "step": 18120 + }, + { + "epoch": 4.03, + "learning_rate": 3.481211399657713e-05, + "loss": 1.181, + "step": 18130 + }, + { + "epoch": 4.03, + "learning_rate": 3.4809137584641717e-05, + "loss": 1.1805, + "step": 18140 + }, + { + "epoch": 4.03, + "learning_rate": 3.480616117270631e-05, + "loss": 1.1936, + "step": 18150 + }, + { + "epoch": 4.03, + "learning_rate": 3.480318476077089e-05, + "loss": 1.1701, + "step": 18160 + }, + { + "epoch": 4.04, + "learning_rate": 3.480020834883548e-05, + "loss": 1.1658, + "step": 18170 + }, + { + "epoch": 4.04, + "learning_rate": 3.479723193690007e-05, + "loss": 1.1796, + "step": 18180 + }, + { + "epoch": 4.04, + "learning_rate": 3.479425552496466e-05, + "loss": 1.1673, + "step": 18190 + }, + { + "epoch": 4.04, + "learning_rate": 3.4791279113029245e-05, + "loss": 1.1847, + "step": 18200 + }, + { + "epoch": 4.04, + "learning_rate": 3.478830270109383e-05, + "loss": 1.1691, + "step": 18210 + }, + { + "epoch": 4.05, + "learning_rate": 3.4785326289158425e-05, + "loss": 1.1889, + "step": 18220 + }, + { + "epoch": 4.05, + "learning_rate": 3.478234987722301e-05, + "loss": 1.1862, + "step": 18230 + }, + { + "epoch": 4.05, + "learning_rate": 3.47793734652876e-05, + "loss": 1.1714, + "step": 18240 + }, + { + "epoch": 4.05, + "learning_rate": 3.4776397053352186e-05, + "loss": 1.1812, + "step": 18250 + }, + { + "epoch": 4.06, + "learning_rate": 3.477342064141677e-05, + "loss": 1.1725, + "step": 18260 + }, + { + "epoch": 4.06, + "learning_rate": 3.477044422948136e-05, + "loss": 1.1828, + "step": 18270 + }, + { + "epoch": 4.06, + "learning_rate": 3.4767467817545953e-05, + "loss": 1.1789, + "step": 18280 + }, + { + "epoch": 4.06, + "learning_rate": 3.476449140561054e-05, + "loss": 1.1832, + "step": 18290 + }, + { + "epoch": 4.06, + "learning_rate": 3.476151499367513e-05, + "loss": 1.1984, + "step": 18300 + }, + { + "epoch": 4.07, + "learning_rate": 3.4758538581739714e-05, + "loss": 1.191, + "step": 18310 + }, + { + "epoch": 4.07, + "learning_rate": 3.47555621698043e-05, + "loss": 1.196, + "step": 18320 + }, + { + "epoch": 4.07, + "learning_rate": 3.4752585757868895e-05, + "loss": 1.1766, + "step": 18330 + }, + { + "epoch": 4.07, + "learning_rate": 3.474960934593348e-05, + "loss": 1.1789, + "step": 18340 + }, + { + "epoch": 4.08, + "learning_rate": 3.474663293399807e-05, + "loss": 1.1829, + "step": 18350 + }, + { + "epoch": 4.08, + "learning_rate": 3.4743656522062655e-05, + "loss": 1.1991, + "step": 18360 + }, + { + "epoch": 4.08, + "learning_rate": 3.474068011012724e-05, + "loss": 1.1808, + "step": 18370 + }, + { + "epoch": 4.08, + "learning_rate": 3.473770369819183e-05, + "loss": 1.1953, + "step": 18380 + }, + { + "epoch": 4.08, + "learning_rate": 3.473472728625642e-05, + "loss": 1.1929, + "step": 18390 + }, + { + "epoch": 4.09, + "learning_rate": 3.473175087432101e-05, + "loss": 1.1723, + "step": 18400 + }, + { + "epoch": 4.09, + "learning_rate": 3.47287744623856e-05, + "loss": 1.1731, + "step": 18410 + }, + { + "epoch": 4.09, + "learning_rate": 3.4725798050450184e-05, + "loss": 1.1768, + "step": 18420 + }, + { + "epoch": 4.09, + "learning_rate": 3.472282163851478e-05, + "loss": 1.186, + "step": 18430 + }, + { + "epoch": 4.1, + "learning_rate": 3.4719845226579364e-05, + "loss": 1.1789, + "step": 18440 + }, + { + "epoch": 4.1, + "learning_rate": 3.471686881464395e-05, + "loss": 1.1774, + "step": 18450 + }, + { + "epoch": 4.1, + "learning_rate": 3.471389240270854e-05, + "loss": 1.167, + "step": 18460 + }, + { + "epoch": 4.1, + "learning_rate": 3.4710915990773125e-05, + "loss": 1.1831, + "step": 18470 + }, + { + "epoch": 4.1, + "learning_rate": 3.470793957883771e-05, + "loss": 1.1712, + "step": 18480 + }, + { + "epoch": 4.11, + "learning_rate": 3.47049631669023e-05, + "loss": 1.1895, + "step": 18490 + }, + { + "epoch": 4.11, + "learning_rate": 3.470198675496689e-05, + "loss": 1.1795, + "step": 18500 + }, + { + "epoch": 4.11, + "learning_rate": 3.469901034303148e-05, + "loss": 1.1727, + "step": 18510 + }, + { + "epoch": 4.11, + "learning_rate": 3.4696033931096066e-05, + "loss": 1.1629, + "step": 18520 + }, + { + "epoch": 4.12, + "learning_rate": 3.469305751916065e-05, + "loss": 1.1801, + "step": 18530 + }, + { + "epoch": 4.12, + "learning_rate": 3.469008110722525e-05, + "loss": 1.1808, + "step": 18540 + }, + { + "epoch": 4.12, + "learning_rate": 3.4687104695289834e-05, + "loss": 1.1783, + "step": 18550 + }, + { + "epoch": 4.12, + "learning_rate": 3.468412828335442e-05, + "loss": 1.1733, + "step": 18560 + }, + { + "epoch": 4.12, + "learning_rate": 3.468115187141901e-05, + "loss": 1.184, + "step": 18570 + }, + { + "epoch": 4.13, + "learning_rate": 3.4678175459483594e-05, + "loss": 1.1799, + "step": 18580 + }, + { + "epoch": 4.13, + "learning_rate": 3.467519904754818e-05, + "loss": 1.1948, + "step": 18590 + }, + { + "epoch": 4.13, + "learning_rate": 3.467222263561277e-05, + "loss": 1.1727, + "step": 18600 + }, + { + "epoch": 4.13, + "learning_rate": 3.466924622367736e-05, + "loss": 1.1636, + "step": 18610 + }, + { + "epoch": 4.14, + "learning_rate": 3.466626981174195e-05, + "loss": 1.1751, + "step": 18620 + }, + { + "epoch": 4.14, + "learning_rate": 3.4663293399806536e-05, + "loss": 1.1761, + "step": 18630 + }, + { + "epoch": 4.14, + "learning_rate": 3.466031698787112e-05, + "loss": 1.1644, + "step": 18640 + }, + { + "epoch": 4.14, + "learning_rate": 3.4657340575935716e-05, + "loss": 1.1815, + "step": 18650 + }, + { + "epoch": 4.14, + "learning_rate": 3.46543641640003e-05, + "loss": 1.1808, + "step": 18660 + }, + { + "epoch": 4.15, + "learning_rate": 3.465138775206489e-05, + "loss": 1.181, + "step": 18670 + }, + { + "epoch": 4.15, + "learning_rate": 3.464841134012948e-05, + "loss": 1.1871, + "step": 18680 + }, + { + "epoch": 4.15, + "learning_rate": 3.4645434928194064e-05, + "loss": 1.1819, + "step": 18690 + }, + { + "epoch": 4.15, + "learning_rate": 3.464245851625865e-05, + "loss": 1.1756, + "step": 18700 + }, + { + "epoch": 4.16, + "learning_rate": 3.4639482104323244e-05, + "loss": 1.1766, + "step": 18710 + }, + { + "epoch": 4.16, + "learning_rate": 3.463650569238783e-05, + "loss": 1.1692, + "step": 18720 + }, + { + "epoch": 4.16, + "learning_rate": 3.463352928045242e-05, + "loss": 1.1795, + "step": 18730 + }, + { + "epoch": 4.16, + "learning_rate": 3.4630552868517005e-05, + "loss": 1.1775, + "step": 18740 + }, + { + "epoch": 4.16, + "learning_rate": 3.462757645658159e-05, + "loss": 1.1786, + "step": 18750 + }, + { + "epoch": 4.17, + "learning_rate": 3.4624600044646186e-05, + "loss": 1.1699, + "step": 18760 + }, + { + "epoch": 4.17, + "learning_rate": 3.4621623632710766e-05, + "loss": 1.1837, + "step": 18770 + }, + { + "epoch": 4.17, + "learning_rate": 3.461864722077536e-05, + "loss": 1.1697, + "step": 18780 + }, + { + "epoch": 4.17, + "learning_rate": 3.4615670808839946e-05, + "loss": 1.1645, + "step": 18790 + }, + { + "epoch": 4.17, + "learning_rate": 3.461269439690453e-05, + "loss": 1.1632, + "step": 18800 + }, + { + "epoch": 4.18, + "learning_rate": 3.460971798496912e-05, + "loss": 1.1807, + "step": 18810 + }, + { + "epoch": 4.18, + "learning_rate": 3.4606741573033714e-05, + "loss": 1.1851, + "step": 18820 + }, + { + "epoch": 4.18, + "learning_rate": 3.46037651610983e-05, + "loss": 1.1804, + "step": 18830 + }, + { + "epoch": 4.18, + "learning_rate": 3.460078874916289e-05, + "loss": 1.1865, + "step": 18840 + }, + { + "epoch": 4.19, + "learning_rate": 3.4597812337227474e-05, + "loss": 1.1751, + "step": 18850 + }, + { + "epoch": 4.19, + "learning_rate": 3.459483592529207e-05, + "loss": 1.1726, + "step": 18860 + }, + { + "epoch": 4.19, + "learning_rate": 3.459185951335665e-05, + "loss": 1.1711, + "step": 18870 + }, + { + "epoch": 4.19, + "learning_rate": 3.4588883101421235e-05, + "loss": 1.1968, + "step": 18880 + }, + { + "epoch": 4.19, + "learning_rate": 3.458590668948583e-05, + "loss": 1.1674, + "step": 18890 + }, + { + "epoch": 4.2, + "learning_rate": 3.4582930277550416e-05, + "loss": 1.1705, + "step": 18900 + }, + { + "epoch": 4.2, + "learning_rate": 3.4579953865615e-05, + "loss": 1.1946, + "step": 18910 + }, + { + "epoch": 4.2, + "learning_rate": 3.457697745367959e-05, + "loss": 1.1768, + "step": 18920 + }, + { + "epoch": 4.2, + "learning_rate": 3.457400104174418e-05, + "loss": 1.1726, + "step": 18930 + }, + { + "epoch": 4.21, + "learning_rate": 3.457102462980877e-05, + "loss": 1.1732, + "step": 18940 + }, + { + "epoch": 4.21, + "learning_rate": 3.456804821787336e-05, + "loss": 1.1964, + "step": 18950 + }, + { + "epoch": 4.21, + "learning_rate": 3.4565071805937944e-05, + "loss": 1.1706, + "step": 18960 + }, + { + "epoch": 4.21, + "learning_rate": 3.456209539400254e-05, + "loss": 1.1793, + "step": 18970 + }, + { + "epoch": 4.21, + "learning_rate": 3.455911898206712e-05, + "loss": 1.1698, + "step": 18980 + }, + { + "epoch": 4.22, + "learning_rate": 3.4556142570131705e-05, + "loss": 1.1704, + "step": 18990 + }, + { + "epoch": 4.22, + "learning_rate": 3.45531661581963e-05, + "loss": 1.1647, + "step": 19000 + }, + { + "epoch": 4.22, + "learning_rate": 3.4550189746260885e-05, + "loss": 1.1889, + "step": 19010 + }, + { + "epoch": 4.22, + "learning_rate": 3.454721333432547e-05, + "loss": 1.1707, + "step": 19020 + }, + { + "epoch": 4.23, + "learning_rate": 3.454423692239006e-05, + "loss": 1.17, + "step": 19030 + }, + { + "epoch": 4.23, + "learning_rate": 3.454126051045465e-05, + "loss": 1.1674, + "step": 19040 + }, + { + "epoch": 4.23, + "learning_rate": 3.453828409851924e-05, + "loss": 1.1845, + "step": 19050 + }, + { + "epoch": 4.23, + "learning_rate": 3.4535307686583826e-05, + "loss": 1.1788, + "step": 19060 + }, + { + "epoch": 4.23, + "learning_rate": 3.453233127464841e-05, + "loss": 1.1638, + "step": 19070 + }, + { + "epoch": 4.24, + "learning_rate": 3.4529354862713e-05, + "loss": 1.1909, + "step": 19080 + }, + { + "epoch": 4.24, + "learning_rate": 3.452637845077759e-05, + "loss": 1.1632, + "step": 19090 + }, + { + "epoch": 4.24, + "learning_rate": 3.452340203884218e-05, + "loss": 1.1759, + "step": 19100 + }, + { + "epoch": 4.24, + "learning_rate": 3.452042562690677e-05, + "loss": 1.1771, + "step": 19110 + }, + { + "epoch": 4.25, + "learning_rate": 3.4517449214971355e-05, + "loss": 1.1818, + "step": 19120 + }, + { + "epoch": 4.25, + "learning_rate": 3.451447280303594e-05, + "loss": 1.1624, + "step": 19130 + }, + { + "epoch": 4.25, + "learning_rate": 3.4511496391100535e-05, + "loss": 1.1744, + "step": 19140 + }, + { + "epoch": 4.25, + "learning_rate": 3.450851997916512e-05, + "loss": 1.1707, + "step": 19150 + }, + { + "epoch": 4.25, + "learning_rate": 3.450554356722971e-05, + "loss": 1.1754, + "step": 19160 + }, + { + "epoch": 4.26, + "learning_rate": 3.4502567155294296e-05, + "loss": 1.1679, + "step": 19170 + }, + { + "epoch": 4.26, + "learning_rate": 3.449959074335888e-05, + "loss": 1.1775, + "step": 19180 + }, + { + "epoch": 4.26, + "learning_rate": 3.449661433142347e-05, + "loss": 1.1674, + "step": 19190 + }, + { + "epoch": 4.26, + "learning_rate": 3.4493637919488057e-05, + "loss": 1.1838, + "step": 19200 + }, + { + "epoch": 4.27, + "learning_rate": 3.449066150755265e-05, + "loss": 1.169, + "step": 19210 + }, + { + "epoch": 4.27, + "learning_rate": 3.448768509561724e-05, + "loss": 1.1851, + "step": 19220 + }, + { + "epoch": 4.27, + "learning_rate": 3.4484708683681824e-05, + "loss": 1.1688, + "step": 19230 + }, + { + "epoch": 4.27, + "learning_rate": 3.448173227174641e-05, + "loss": 1.1615, + "step": 19240 + }, + { + "epoch": 4.27, + "learning_rate": 3.4478755859811005e-05, + "loss": 1.1908, + "step": 19250 + }, + { + "epoch": 4.28, + "learning_rate": 3.447577944787559e-05, + "loss": 1.1756, + "step": 19260 + }, + { + "epoch": 4.28, + "learning_rate": 3.447280303594018e-05, + "loss": 1.1748, + "step": 19270 + }, + { + "epoch": 4.28, + "learning_rate": 3.4469826624004765e-05, + "loss": 1.1755, + "step": 19280 + }, + { + "epoch": 4.28, + "learning_rate": 3.446685021206935e-05, + "loss": 1.1608, + "step": 19290 + }, + { + "epoch": 4.29, + "learning_rate": 3.446387380013394e-05, + "loss": 1.1552, + "step": 19300 + }, + { + "epoch": 4.29, + "learning_rate": 3.4460897388198526e-05, + "loss": 1.1696, + "step": 19310 + }, + { + "epoch": 4.29, + "learning_rate": 3.445792097626312e-05, + "loss": 1.1824, + "step": 19320 + }, + { + "epoch": 4.29, + "learning_rate": 3.4454944564327707e-05, + "loss": 1.1642, + "step": 19330 + }, + { + "epoch": 4.29, + "learning_rate": 3.4451968152392293e-05, + "loss": 1.1704, + "step": 19340 + }, + { + "epoch": 4.3, + "learning_rate": 3.444899174045688e-05, + "loss": 1.1752, + "step": 19350 + }, + { + "epoch": 4.3, + "learning_rate": 3.4446015328521474e-05, + "loss": 1.1658, + "step": 19360 + }, + { + "epoch": 4.3, + "learning_rate": 3.444303891658606e-05, + "loss": 1.1767, + "step": 19370 + }, + { + "epoch": 4.3, + "learning_rate": 3.444006250465064e-05, + "loss": 1.1934, + "step": 19380 + }, + { + "epoch": 4.31, + "learning_rate": 3.4437086092715235e-05, + "loss": 1.1613, + "step": 19390 + }, + { + "epoch": 4.31, + "learning_rate": 3.443410968077982e-05, + "loss": 1.1673, + "step": 19400 + }, + { + "epoch": 4.31, + "learning_rate": 3.443113326884441e-05, + "loss": 1.1737, + "step": 19410 + }, + { + "epoch": 4.31, + "learning_rate": 3.4428156856908995e-05, + "loss": 1.1716, + "step": 19420 + }, + { + "epoch": 4.31, + "learning_rate": 3.442518044497359e-05, + "loss": 1.1662, + "step": 19430 + }, + { + "epoch": 4.32, + "learning_rate": 3.4422204033038176e-05, + "loss": 1.1708, + "step": 19440 + }, + { + "epoch": 4.32, + "learning_rate": 3.441922762110276e-05, + "loss": 1.1701, + "step": 19450 + }, + { + "epoch": 4.32, + "learning_rate": 3.441625120916735e-05, + "loss": 1.1694, + "step": 19460 + }, + { + "epoch": 4.32, + "learning_rate": 3.4413274797231943e-05, + "loss": 1.1693, + "step": 19470 + }, + { + "epoch": 4.33, + "learning_rate": 3.441029838529653e-05, + "loss": 1.1683, + "step": 19480 + }, + { + "epoch": 4.33, + "learning_rate": 3.440732197336112e-05, + "loss": 1.1715, + "step": 19490 + }, + { + "epoch": 4.33, + "learning_rate": 3.4404345561425704e-05, + "loss": 1.1773, + "step": 19500 + }, + { + "epoch": 4.33, + "learning_rate": 3.440136914949029e-05, + "loss": 1.1767, + "step": 19510 + }, + { + "epoch": 4.33, + "learning_rate": 3.439839273755488e-05, + "loss": 1.1708, + "step": 19520 + }, + { + "epoch": 4.34, + "learning_rate": 3.439541632561947e-05, + "loss": 1.1694, + "step": 19530 + }, + { + "epoch": 4.34, + "learning_rate": 3.439243991368406e-05, + "loss": 1.1663, + "step": 19540 + }, + { + "epoch": 4.34, + "learning_rate": 3.4389463501748645e-05, + "loss": 1.1723, + "step": 19550 + }, + { + "epoch": 4.34, + "learning_rate": 3.438648708981323e-05, + "loss": 1.1858, + "step": 19560 + }, + { + "epoch": 4.35, + "learning_rate": 3.4383510677877826e-05, + "loss": 1.1757, + "step": 19570 + }, + { + "epoch": 4.35, + "learning_rate": 3.438053426594241e-05, + "loss": 1.1613, + "step": 19580 + }, + { + "epoch": 4.35, + "learning_rate": 3.437755785400699e-05, + "loss": 1.1707, + "step": 19590 + }, + { + "epoch": 4.35, + "learning_rate": 3.437458144207159e-05, + "loss": 1.1518, + "step": 19600 + }, + { + "epoch": 4.35, + "learning_rate": 3.4371605030136174e-05, + "loss": 1.1791, + "step": 19610 + }, + { + "epoch": 4.36, + "learning_rate": 3.436862861820076e-05, + "loss": 1.1797, + "step": 19620 + }, + { + "epoch": 4.36, + "learning_rate": 3.436565220626535e-05, + "loss": 1.1757, + "step": 19630 + }, + { + "epoch": 4.36, + "learning_rate": 3.436267579432994e-05, + "loss": 1.1853, + "step": 19640 + }, + { + "epoch": 4.36, + "learning_rate": 3.435969938239453e-05, + "loss": 1.1791, + "step": 19650 + }, + { + "epoch": 4.37, + "learning_rate": 3.4356722970459115e-05, + "loss": 1.1723, + "step": 19660 + }, + { + "epoch": 4.37, + "learning_rate": 3.43537465585237e-05, + "loss": 1.1691, + "step": 19670 + }, + { + "epoch": 4.37, + "learning_rate": 3.4350770146588295e-05, + "loss": 1.1587, + "step": 19680 + }, + { + "epoch": 4.37, + "learning_rate": 3.4347793734652876e-05, + "loss": 1.1735, + "step": 19690 + }, + { + "epoch": 4.37, + "learning_rate": 3.434481732271746e-05, + "loss": 1.1756, + "step": 19700 + }, + { + "epoch": 4.38, + "learning_rate": 3.4341840910782056e-05, + "loss": 1.1721, + "step": 19710 + }, + { + "epoch": 4.38, + "learning_rate": 3.433886449884664e-05, + "loss": 1.1581, + "step": 19720 + }, + { + "epoch": 4.38, + "learning_rate": 3.433588808691123e-05, + "loss": 1.167, + "step": 19730 + }, + { + "epoch": 4.38, + "learning_rate": 3.433291167497582e-05, + "loss": 1.1686, + "step": 19740 + }, + { + "epoch": 4.39, + "learning_rate": 3.432993526304041e-05, + "loss": 1.1586, + "step": 19750 + }, + { + "epoch": 4.39, + "learning_rate": 3.4326958851105e-05, + "loss": 1.1686, + "step": 19760 + }, + { + "epoch": 4.39, + "learning_rate": 3.4323982439169584e-05, + "loss": 1.1681, + "step": 19770 + }, + { + "epoch": 4.39, + "learning_rate": 3.432100602723417e-05, + "loss": 1.1702, + "step": 19780 + }, + { + "epoch": 4.39, + "learning_rate": 3.431802961529876e-05, + "loss": 1.156, + "step": 19790 + }, + { + "epoch": 4.4, + "learning_rate": 3.4315053203363345e-05, + "loss": 1.1697, + "step": 19800 + }, + { + "epoch": 4.4, + "learning_rate": 3.431207679142793e-05, + "loss": 1.1781, + "step": 19810 + }, + { + "epoch": 4.4, + "learning_rate": 3.4309100379492526e-05, + "loss": 1.1698, + "step": 19820 + }, + { + "epoch": 4.4, + "learning_rate": 3.430612396755711e-05, + "loss": 1.1773, + "step": 19830 + }, + { + "epoch": 4.41, + "learning_rate": 3.43031475556217e-05, + "loss": 1.17, + "step": 19840 + }, + { + "epoch": 4.41, + "learning_rate": 3.4300171143686286e-05, + "loss": 1.1767, + "step": 19850 + }, + { + "epoch": 4.41, + "learning_rate": 3.429719473175088e-05, + "loss": 1.1653, + "step": 19860 + }, + { + "epoch": 4.41, + "learning_rate": 3.429421831981547e-05, + "loss": 1.1655, + "step": 19870 + }, + { + "epoch": 4.41, + "learning_rate": 3.4291241907880054e-05, + "loss": 1.1661, + "step": 19880 + }, + { + "epoch": 4.42, + "learning_rate": 3.428826549594464e-05, + "loss": 1.1701, + "step": 19890 + }, + { + "epoch": 4.42, + "learning_rate": 3.428528908400923e-05, + "loss": 1.1626, + "step": 19900 + }, + { + "epoch": 4.42, + "learning_rate": 3.4282312672073814e-05, + "loss": 1.1675, + "step": 19910 + }, + { + "epoch": 4.42, + "learning_rate": 3.427933626013841e-05, + "loss": 1.1736, + "step": 19920 + }, + { + "epoch": 4.43, + "learning_rate": 3.4276359848202995e-05, + "loss": 1.1588, + "step": 19930 + }, + { + "epoch": 4.43, + "learning_rate": 3.427338343626758e-05, + "loss": 1.1791, + "step": 19940 + }, + { + "epoch": 4.43, + "learning_rate": 3.427040702433217e-05, + "loss": 1.1579, + "step": 19950 + }, + { + "epoch": 4.43, + "learning_rate": 3.426743061239676e-05, + "loss": 1.1736, + "step": 19960 + }, + { + "epoch": 4.43, + "learning_rate": 3.426445420046135e-05, + "loss": 1.1538, + "step": 19970 + }, + { + "epoch": 4.44, + "learning_rate": 3.4261477788525936e-05, + "loss": 1.1696, + "step": 19980 + }, + { + "epoch": 4.44, + "learning_rate": 3.425850137659052e-05, + "loss": 1.1579, + "step": 19990 + }, + { + "epoch": 4.44, + "learning_rate": 3.425552496465511e-05, + "loss": 1.1591, + "step": 20000 + }, + { + "epoch": 4.44, + "learning_rate": 3.42525485527197e-05, + "loss": 1.1802, + "step": 20010 + }, + { + "epoch": 4.45, + "learning_rate": 3.4249572140784284e-05, + "loss": 1.1778, + "step": 20020 + }, + { + "epoch": 4.45, + "learning_rate": 3.424659572884888e-05, + "loss": 1.1851, + "step": 20030 + }, + { + "epoch": 4.45, + "learning_rate": 3.4243619316913464e-05, + "loss": 1.1631, + "step": 20040 + }, + { + "epoch": 4.45, + "learning_rate": 3.424064290497805e-05, + "loss": 1.1571, + "step": 20050 + }, + { + "epoch": 4.45, + "learning_rate": 3.423766649304264e-05, + "loss": 1.1596, + "step": 20060 + }, + { + "epoch": 4.46, + "learning_rate": 3.423469008110723e-05, + "loss": 1.1552, + "step": 20070 + }, + { + "epoch": 4.46, + "learning_rate": 3.423171366917182e-05, + "loss": 1.1715, + "step": 20080 + }, + { + "epoch": 4.46, + "learning_rate": 3.4228737257236406e-05, + "loss": 1.1604, + "step": 20090 + }, + { + "epoch": 4.46, + "learning_rate": 3.422576084530099e-05, + "loss": 1.1717, + "step": 20100 + }, + { + "epoch": 4.47, + "learning_rate": 3.422278443336558e-05, + "loss": 1.1684, + "step": 20110 + }, + { + "epoch": 4.47, + "learning_rate": 3.4219808021430166e-05, + "loss": 1.1566, + "step": 20120 + }, + { + "epoch": 4.47, + "learning_rate": 3.421683160949475e-05, + "loss": 1.1836, + "step": 20130 + }, + { + "epoch": 4.47, + "learning_rate": 3.421385519755935e-05, + "loss": 1.1687, + "step": 20140 + }, + { + "epoch": 4.47, + "learning_rate": 3.4210878785623934e-05, + "loss": 1.1715, + "step": 20150 + }, + { + "epoch": 4.48, + "learning_rate": 3.420790237368852e-05, + "loss": 1.1726, + "step": 20160 + }, + { + "epoch": 4.48, + "learning_rate": 3.420492596175311e-05, + "loss": 1.1541, + "step": 20170 + }, + { + "epoch": 4.48, + "learning_rate": 3.42019495498177e-05, + "loss": 1.1746, + "step": 20180 + }, + { + "epoch": 4.48, + "learning_rate": 3.419897313788229e-05, + "loss": 1.1821, + "step": 20190 + }, + { + "epoch": 4.49, + "learning_rate": 3.419599672594687e-05, + "loss": 1.1599, + "step": 20200 + }, + { + "epoch": 4.49, + "learning_rate": 3.419302031401146e-05, + "loss": 1.1647, + "step": 20210 + }, + { + "epoch": 4.49, + "learning_rate": 3.419004390207605e-05, + "loss": 1.1622, + "step": 20220 + }, + { + "epoch": 4.49, + "learning_rate": 3.4187067490140636e-05, + "loss": 1.175, + "step": 20230 + }, + { + "epoch": 4.49, + "learning_rate": 3.418409107820522e-05, + "loss": 1.1709, + "step": 20240 + }, + { + "epoch": 4.5, + "learning_rate": 3.4181114666269816e-05, + "loss": 1.1749, + "step": 20250 + }, + { + "epoch": 4.5, + "learning_rate": 3.41781382543344e-05, + "loss": 1.1683, + "step": 20260 + }, + { + "epoch": 4.5, + "learning_rate": 3.417516184239899e-05, + "loss": 1.1703, + "step": 20270 + }, + { + "epoch": 4.5, + "learning_rate": 3.417218543046358e-05, + "loss": 1.1715, + "step": 20280 + }, + { + "epoch": 4.51, + "learning_rate": 3.416920901852817e-05, + "loss": 1.1696, + "step": 20290 + }, + { + "epoch": 4.51, + "learning_rate": 3.416623260659276e-05, + "loss": 1.1645, + "step": 20300 + }, + { + "epoch": 4.51, + "learning_rate": 3.4163256194657345e-05, + "loss": 1.1626, + "step": 20310 + }, + { + "epoch": 4.51, + "learning_rate": 3.416027978272193e-05, + "loss": 1.1765, + "step": 20320 + }, + { + "epoch": 4.51, + "learning_rate": 3.415730337078652e-05, + "loss": 1.1737, + "step": 20330 + }, + { + "epoch": 4.52, + "learning_rate": 3.4154326958851105e-05, + "loss": 1.1885, + "step": 20340 + }, + { + "epoch": 4.52, + "learning_rate": 3.41513505469157e-05, + "loss": 1.1606, + "step": 20350 + }, + { + "epoch": 4.52, + "learning_rate": 3.4148374134980286e-05, + "loss": 1.1574, + "step": 20360 + }, + { + "epoch": 4.52, + "learning_rate": 3.414539772304487e-05, + "loss": 1.1705, + "step": 20370 + }, + { + "epoch": 4.53, + "learning_rate": 3.414242131110946e-05, + "loss": 1.1627, + "step": 20380 + }, + { + "epoch": 4.53, + "learning_rate": 3.413944489917405e-05, + "loss": 1.1736, + "step": 20390 + }, + { + "epoch": 4.53, + "learning_rate": 3.413646848723864e-05, + "loss": 1.1619, + "step": 20400 + }, + { + "epoch": 4.53, + "learning_rate": 3.413349207530322e-05, + "loss": 1.1779, + "step": 20410 + }, + { + "epoch": 4.53, + "learning_rate": 3.4130515663367814e-05, + "loss": 1.1516, + "step": 20420 + }, + { + "epoch": 4.54, + "learning_rate": 3.41275392514324e-05, + "loss": 1.1565, + "step": 20430 + }, + { + "epoch": 4.54, + "learning_rate": 3.412456283949699e-05, + "loss": 1.1675, + "step": 20440 + }, + { + "epoch": 4.54, + "learning_rate": 3.4121586427561575e-05, + "loss": 1.1688, + "step": 20450 + }, + { + "epoch": 4.54, + "learning_rate": 3.411861001562617e-05, + "loss": 1.1661, + "step": 20460 + }, + { + "epoch": 4.55, + "learning_rate": 3.4115633603690755e-05, + "loss": 1.1707, + "step": 20470 + }, + { + "epoch": 4.55, + "learning_rate": 3.411265719175534e-05, + "loss": 1.1739, + "step": 20480 + }, + { + "epoch": 4.55, + "learning_rate": 3.410968077981993e-05, + "loss": 1.154, + "step": 20490 + }, + { + "epoch": 4.55, + "learning_rate": 3.410670436788452e-05, + "loss": 1.1518, + "step": 20500 + }, + { + "epoch": 4.55, + "learning_rate": 3.41037279559491e-05, + "loss": 1.1657, + "step": 20510 + }, + { + "epoch": 4.56, + "learning_rate": 3.410075154401369e-05, + "loss": 1.1515, + "step": 20520 + }, + { + "epoch": 4.56, + "learning_rate": 3.4097775132078284e-05, + "loss": 1.1588, + "step": 20530 + }, + { + "epoch": 4.56, + "learning_rate": 3.409479872014287e-05, + "loss": 1.1693, + "step": 20540 + }, + { + "epoch": 4.56, + "learning_rate": 3.409182230820746e-05, + "loss": 1.1618, + "step": 20550 + }, + { + "epoch": 4.57, + "learning_rate": 3.4088845896272044e-05, + "loss": 1.1539, + "step": 20560 + }, + { + "epoch": 4.57, + "learning_rate": 3.408586948433664e-05, + "loss": 1.1478, + "step": 20570 + }, + { + "epoch": 4.57, + "learning_rate": 3.4082893072401225e-05, + "loss": 1.165, + "step": 20580 + }, + { + "epoch": 4.57, + "learning_rate": 3.407991666046581e-05, + "loss": 1.1615, + "step": 20590 + }, + { + "epoch": 4.57, + "learning_rate": 3.40769402485304e-05, + "loss": 1.1758, + "step": 20600 + }, + { + "epoch": 4.58, + "learning_rate": 3.4073963836594985e-05, + "loss": 1.145, + "step": 20610 + }, + { + "epoch": 4.58, + "learning_rate": 3.407098742465957e-05, + "loss": 1.1712, + "step": 20620 + }, + { + "epoch": 4.58, + "learning_rate": 3.406801101272416e-05, + "loss": 1.156, + "step": 20630 + }, + { + "epoch": 4.58, + "learning_rate": 3.406503460078875e-05, + "loss": 1.1626, + "step": 20640 + }, + { + "epoch": 4.59, + "learning_rate": 3.406205818885334e-05, + "loss": 1.1486, + "step": 20650 + }, + { + "epoch": 4.59, + "learning_rate": 3.405908177691793e-05, + "loss": 1.1625, + "step": 20660 + }, + { + "epoch": 4.59, + "learning_rate": 3.4056105364982514e-05, + "loss": 1.1765, + "step": 20670 + }, + { + "epoch": 4.59, + "learning_rate": 3.405312895304711e-05, + "loss": 1.1678, + "step": 20680 + }, + { + "epoch": 4.59, + "learning_rate": 3.4050152541111694e-05, + "loss": 1.1692, + "step": 20690 + }, + { + "epoch": 4.6, + "learning_rate": 3.404717612917628e-05, + "loss": 1.1639, + "step": 20700 + }, + { + "epoch": 4.6, + "learning_rate": 3.404419971724087e-05, + "loss": 1.1538, + "step": 20710 + }, + { + "epoch": 4.6, + "learning_rate": 3.4041223305305455e-05, + "loss": 1.1631, + "step": 20720 + }, + { + "epoch": 4.6, + "learning_rate": 3.403824689337004e-05, + "loss": 1.1629, + "step": 20730 + }, + { + "epoch": 4.61, + "learning_rate": 3.4035270481434635e-05, + "loss": 1.1688, + "step": 20740 + }, + { + "epoch": 4.61, + "learning_rate": 3.403229406949922e-05, + "loss": 1.1798, + "step": 20750 + }, + { + "epoch": 4.61, + "learning_rate": 3.402931765756381e-05, + "loss": 1.1651, + "step": 20760 + }, + { + "epoch": 4.61, + "learning_rate": 3.4026341245628396e-05, + "loss": 1.1632, + "step": 20770 + }, + { + "epoch": 4.61, + "learning_rate": 3.402336483369299e-05, + "loss": 1.1786, + "step": 20780 + }, + { + "epoch": 4.62, + "learning_rate": 3.402038842175758e-05, + "loss": 1.1687, + "step": 20790 + }, + { + "epoch": 4.62, + "learning_rate": 3.4017412009822164e-05, + "loss": 1.1762, + "step": 20800 + }, + { + "epoch": 4.62, + "learning_rate": 3.401443559788675e-05, + "loss": 1.1486, + "step": 20810 + }, + { + "epoch": 4.62, + "learning_rate": 3.401145918595134e-05, + "loss": 1.1645, + "step": 20820 + }, + { + "epoch": 4.63, + "learning_rate": 3.4008482774015924e-05, + "loss": 1.17, + "step": 20830 + }, + { + "epoch": 4.63, + "learning_rate": 3.400550636208051e-05, + "loss": 1.1564, + "step": 20840 + }, + { + "epoch": 4.63, + "learning_rate": 3.4002529950145105e-05, + "loss": 1.1594, + "step": 20850 + }, + { + "epoch": 4.63, + "learning_rate": 3.399955353820969e-05, + "loss": 1.1767, + "step": 20860 + }, + { + "epoch": 4.63, + "learning_rate": 3.399657712627428e-05, + "loss": 1.1704, + "step": 20870 + }, + { + "epoch": 4.64, + "learning_rate": 3.3993600714338866e-05, + "loss": 1.1507, + "step": 20880 + }, + { + "epoch": 4.64, + "learning_rate": 3.399062430240346e-05, + "loss": 1.1579, + "step": 20890 + }, + { + "epoch": 4.64, + "learning_rate": 3.3987647890468046e-05, + "loss": 1.1656, + "step": 20900 + }, + { + "epoch": 4.64, + "learning_rate": 3.398467147853263e-05, + "loss": 1.1598, + "step": 20910 + }, + { + "epoch": 4.65, + "learning_rate": 3.398169506659722e-05, + "loss": 1.1595, + "step": 20920 + }, + { + "epoch": 4.65, + "learning_rate": 3.397871865466181e-05, + "loss": 1.1641, + "step": 20930 + }, + { + "epoch": 4.65, + "learning_rate": 3.3975742242726394e-05, + "loss": 1.161, + "step": 20940 + }, + { + "epoch": 4.65, + "learning_rate": 3.397276583079098e-05, + "loss": 1.1653, + "step": 20950 + }, + { + "epoch": 4.65, + "learning_rate": 3.3969789418855574e-05, + "loss": 1.1663, + "step": 20960 + }, + { + "epoch": 4.66, + "learning_rate": 3.396681300692016e-05, + "loss": 1.163, + "step": 20970 + }, + { + "epoch": 4.66, + "learning_rate": 3.396383659498475e-05, + "loss": 1.1634, + "step": 20980 + }, + { + "epoch": 4.66, + "learning_rate": 3.3960860183049335e-05, + "loss": 1.1775, + "step": 20990 + }, + { + "epoch": 4.66, + "learning_rate": 3.395788377111393e-05, + "loss": 1.1675, + "step": 21000 + }, + { + "epoch": 4.67, + "learning_rate": 3.3954907359178516e-05, + "loss": 1.1597, + "step": 21010 + }, + { + "epoch": 4.67, + "learning_rate": 3.3951930947243096e-05, + "loss": 1.167, + "step": 21020 + }, + { + "epoch": 4.67, + "learning_rate": 3.394895453530769e-05, + "loss": 1.1652, + "step": 21030 + }, + { + "epoch": 4.67, + "learning_rate": 3.3945978123372276e-05, + "loss": 1.1672, + "step": 21040 + }, + { + "epoch": 4.67, + "learning_rate": 3.394300171143686e-05, + "loss": 1.1776, + "step": 21050 + }, + { + "epoch": 4.68, + "learning_rate": 3.394002529950145e-05, + "loss": 1.1585, + "step": 21060 + }, + { + "epoch": 4.68, + "learning_rate": 3.3937048887566044e-05, + "loss": 1.1763, + "step": 21070 + }, + { + "epoch": 4.68, + "learning_rate": 3.393407247563063e-05, + "loss": 1.1698, + "step": 21080 + }, + { + "epoch": 4.68, + "learning_rate": 3.393109606369522e-05, + "loss": 1.175, + "step": 21090 + }, + { + "epoch": 4.69, + "learning_rate": 3.3928119651759805e-05, + "loss": 1.1681, + "step": 21100 + }, + { + "epoch": 4.69, + "learning_rate": 3.39251432398244e-05, + "loss": 1.1733, + "step": 21110 + }, + { + "epoch": 4.69, + "learning_rate": 3.392216682788898e-05, + "loss": 1.1633, + "step": 21120 + }, + { + "epoch": 4.69, + "learning_rate": 3.391919041595357e-05, + "loss": 1.1499, + "step": 21130 + }, + { + "epoch": 4.69, + "learning_rate": 3.391621400401816e-05, + "loss": 1.1702, + "step": 21140 + }, + { + "epoch": 4.7, + "learning_rate": 3.3913237592082746e-05, + "loss": 1.1688, + "step": 21150 + }, + { + "epoch": 4.7, + "learning_rate": 3.391026118014733e-05, + "loss": 1.1714, + "step": 21160 + }, + { + "epoch": 4.7, + "learning_rate": 3.3907284768211926e-05, + "loss": 1.1471, + "step": 21170 + }, + { + "epoch": 4.7, + "learning_rate": 3.390430835627651e-05, + "loss": 1.1679, + "step": 21180 + }, + { + "epoch": 4.71, + "learning_rate": 3.39013319443411e-05, + "loss": 1.1579, + "step": 21190 + }, + { + "epoch": 4.71, + "learning_rate": 3.389835553240569e-05, + "loss": 1.1676, + "step": 21200 + }, + { + "epoch": 4.71, + "learning_rate": 3.389537912047028e-05, + "loss": 1.156, + "step": 21210 + }, + { + "epoch": 4.71, + "learning_rate": 3.389240270853487e-05, + "loss": 1.1729, + "step": 21220 + }, + { + "epoch": 4.71, + "learning_rate": 3.388942629659945e-05, + "loss": 1.1583, + "step": 21230 + }, + { + "epoch": 4.72, + "learning_rate": 3.388644988466404e-05, + "loss": 1.1617, + "step": 21240 + }, + { + "epoch": 4.72, + "learning_rate": 3.388347347272863e-05, + "loss": 1.1568, + "step": 21250 + }, + { + "epoch": 4.72, + "learning_rate": 3.3880497060793215e-05, + "loss": 1.1668, + "step": 21260 + }, + { + "epoch": 4.72, + "learning_rate": 3.38775206488578e-05, + "loss": 1.1518, + "step": 21270 + }, + { + "epoch": 4.73, + "learning_rate": 3.3874544236922396e-05, + "loss": 1.1707, + "step": 21280 + }, + { + "epoch": 4.73, + "learning_rate": 3.387156782498698e-05, + "loss": 1.1532, + "step": 21290 + }, + { + "epoch": 4.73, + "learning_rate": 3.386859141305157e-05, + "loss": 1.154, + "step": 21300 + }, + { + "epoch": 4.73, + "learning_rate": 3.3865615001116156e-05, + "loss": 1.1614, + "step": 21310 + }, + { + "epoch": 4.73, + "learning_rate": 3.386263858918075e-05, + "loss": 1.1709, + "step": 21320 + }, + { + "epoch": 4.74, + "learning_rate": 3.385966217724533e-05, + "loss": 1.1752, + "step": 21330 + }, + { + "epoch": 4.74, + "learning_rate": 3.385668576530992e-05, + "loss": 1.1704, + "step": 21340 + }, + { + "epoch": 4.74, + "learning_rate": 3.385370935337451e-05, + "loss": 1.1442, + "step": 21350 + }, + { + "epoch": 4.74, + "learning_rate": 3.38507329414391e-05, + "loss": 1.1447, + "step": 21360 + }, + { + "epoch": 4.75, + "learning_rate": 3.3847756529503685e-05, + "loss": 1.165, + "step": 21370 + }, + { + "epoch": 4.75, + "learning_rate": 3.384478011756827e-05, + "loss": 1.1536, + "step": 21380 + }, + { + "epoch": 4.75, + "learning_rate": 3.3841803705632865e-05, + "loss": 1.1533, + "step": 21390 + }, + { + "epoch": 4.75, + "learning_rate": 3.383882729369745e-05, + "loss": 1.1403, + "step": 21400 + }, + { + "epoch": 4.75, + "learning_rate": 3.383585088176204e-05, + "loss": 1.1657, + "step": 21410 + }, + { + "epoch": 4.76, + "learning_rate": 3.3832874469826626e-05, + "loss": 1.1614, + "step": 21420 + }, + { + "epoch": 4.76, + "learning_rate": 3.382989805789121e-05, + "loss": 1.1558, + "step": 21430 + }, + { + "epoch": 4.76, + "learning_rate": 3.38269216459558e-05, + "loss": 1.1504, + "step": 21440 + }, + { + "epoch": 4.76, + "learning_rate": 3.382394523402039e-05, + "loss": 1.165, + "step": 21450 + }, + { + "epoch": 4.77, + "learning_rate": 3.382096882208498e-05, + "loss": 1.1629, + "step": 21460 + }, + { + "epoch": 4.77, + "learning_rate": 3.381799241014957e-05, + "loss": 1.1593, + "step": 21470 + }, + { + "epoch": 4.77, + "learning_rate": 3.3815015998214154e-05, + "loss": 1.1652, + "step": 21480 + }, + { + "epoch": 4.77, + "learning_rate": 3.381203958627874e-05, + "loss": 1.1665, + "step": 21490 + }, + { + "epoch": 4.77, + "learning_rate": 3.3809063174343335e-05, + "loss": 1.1583, + "step": 21500 + }, + { + "epoch": 4.78, + "learning_rate": 3.380608676240792e-05, + "loss": 1.1689, + "step": 21510 + }, + { + "epoch": 4.78, + "learning_rate": 3.380311035047251e-05, + "loss": 1.1625, + "step": 21520 + }, + { + "epoch": 4.78, + "learning_rate": 3.3800133938537095e-05, + "loss": 1.1563, + "step": 21530 + }, + { + "epoch": 4.78, + "learning_rate": 3.379715752660168e-05, + "loss": 1.1666, + "step": 21540 + }, + { + "epoch": 4.79, + "learning_rate": 3.379418111466627e-05, + "loss": 1.1712, + "step": 21550 + }, + { + "epoch": 4.79, + "learning_rate": 3.379120470273086e-05, + "loss": 1.1518, + "step": 21560 + }, + { + "epoch": 4.79, + "learning_rate": 3.378822829079545e-05, + "loss": 1.1574, + "step": 21570 + }, + { + "epoch": 4.79, + "learning_rate": 3.378525187886004e-05, + "loss": 1.1527, + "step": 21580 + }, + { + "epoch": 4.79, + "learning_rate": 3.3782275466924624e-05, + "loss": 1.1538, + "step": 21590 + }, + { + "epoch": 4.8, + "learning_rate": 3.377929905498922e-05, + "loss": 1.166, + "step": 21600 + }, + { + "epoch": 4.8, + "learning_rate": 3.3776322643053804e-05, + "loss": 1.1724, + "step": 21610 + }, + { + "epoch": 4.8, + "learning_rate": 3.377334623111839e-05, + "loss": 1.16, + "step": 21620 + }, + { + "epoch": 4.8, + "learning_rate": 3.377036981918298e-05, + "loss": 1.1779, + "step": 21630 + }, + { + "epoch": 4.81, + "learning_rate": 3.3767393407247565e-05, + "loss": 1.1547, + "step": 21640 + }, + { + "epoch": 4.81, + "learning_rate": 3.376441699531215e-05, + "loss": 1.1609, + "step": 21650 + }, + { + "epoch": 4.81, + "learning_rate": 3.376144058337674e-05, + "loss": 1.1595, + "step": 21660 + }, + { + "epoch": 4.81, + "learning_rate": 3.375846417144133e-05, + "loss": 1.1668, + "step": 21670 + }, + { + "epoch": 4.81, + "learning_rate": 3.375548775950592e-05, + "loss": 1.1654, + "step": 21680 + }, + { + "epoch": 4.82, + "learning_rate": 3.3752511347570506e-05, + "loss": 1.1546, + "step": 21690 + }, + { + "epoch": 4.82, + "learning_rate": 3.374953493563509e-05, + "loss": 1.1769, + "step": 21700 + }, + { + "epoch": 4.82, + "learning_rate": 3.374655852369969e-05, + "loss": 1.1642, + "step": 21710 + }, + { + "epoch": 4.82, + "learning_rate": 3.3743582111764274e-05, + "loss": 1.181, + "step": 21720 + }, + { + "epoch": 4.83, + "learning_rate": 3.374060569982886e-05, + "loss": 1.1638, + "step": 21730 + }, + { + "epoch": 4.83, + "learning_rate": 3.373762928789345e-05, + "loss": 1.1493, + "step": 21740 + }, + { + "epoch": 4.83, + "learning_rate": 3.3734652875958034e-05, + "loss": 1.1642, + "step": 21750 + }, + { + "epoch": 4.83, + "learning_rate": 3.373167646402262e-05, + "loss": 1.1626, + "step": 21760 + }, + { + "epoch": 4.83, + "learning_rate": 3.372870005208721e-05, + "loss": 1.1516, + "step": 21770 + }, + { + "epoch": 4.84, + "learning_rate": 3.37257236401518e-05, + "loss": 1.1547, + "step": 21780 + }, + { + "epoch": 4.84, + "learning_rate": 3.372274722821639e-05, + "loss": 1.1502, + "step": 21790 + }, + { + "epoch": 4.84, + "learning_rate": 3.3719770816280976e-05, + "loss": 1.1574, + "step": 21800 + }, + { + "epoch": 4.84, + "learning_rate": 3.371679440434556e-05, + "loss": 1.1575, + "step": 21810 + }, + { + "epoch": 4.85, + "learning_rate": 3.3713817992410156e-05, + "loss": 1.1446, + "step": 21820 + }, + { + "epoch": 4.85, + "learning_rate": 3.371084158047474e-05, + "loss": 1.1827, + "step": 21830 + }, + { + "epoch": 4.85, + "learning_rate": 3.370786516853933e-05, + "loss": 1.1777, + "step": 21840 + }, + { + "epoch": 4.85, + "learning_rate": 3.370488875660392e-05, + "loss": 1.1527, + "step": 21850 + }, + { + "epoch": 4.85, + "learning_rate": 3.3701912344668504e-05, + "loss": 1.15, + "step": 21860 + }, + { + "epoch": 4.86, + "learning_rate": 3.369893593273309e-05, + "loss": 1.1619, + "step": 21870 + }, + { + "epoch": 4.86, + "learning_rate": 3.369595952079768e-05, + "loss": 1.1691, + "step": 21880 + }, + { + "epoch": 4.86, + "learning_rate": 3.369298310886227e-05, + "loss": 1.1766, + "step": 21890 + }, + { + "epoch": 4.86, + "learning_rate": 3.369000669692686e-05, + "loss": 1.1617, + "step": 21900 + }, + { + "epoch": 4.87, + "learning_rate": 3.3687030284991445e-05, + "loss": 1.1659, + "step": 21910 + }, + { + "epoch": 4.87, + "learning_rate": 3.368405387305603e-05, + "loss": 1.157, + "step": 21920 + }, + { + "epoch": 4.87, + "learning_rate": 3.3681077461120626e-05, + "loss": 1.1624, + "step": 21930 + }, + { + "epoch": 4.87, + "learning_rate": 3.3678101049185206e-05, + "loss": 1.1662, + "step": 21940 + }, + { + "epoch": 4.87, + "learning_rate": 3.36751246372498e-05, + "loss": 1.1439, + "step": 21950 + }, + { + "epoch": 4.88, + "learning_rate": 3.3672148225314386e-05, + "loss": 1.1533, + "step": 21960 + }, + { + "epoch": 4.88, + "learning_rate": 3.366917181337897e-05, + "loss": 1.1605, + "step": 21970 + }, + { + "epoch": 4.88, + "learning_rate": 3.366619540144356e-05, + "loss": 1.1663, + "step": 21980 + }, + { + "epoch": 4.88, + "learning_rate": 3.3663218989508154e-05, + "loss": 1.1563, + "step": 21990 + }, + { + "epoch": 4.89, + "learning_rate": 3.366024257757274e-05, + "loss": 1.1674, + "step": 22000 + }, + { + "epoch": 4.89, + "learning_rate": 3.365726616563733e-05, + "loss": 1.1707, + "step": 22010 + }, + { + "epoch": 4.89, + "learning_rate": 3.3654289753701914e-05, + "loss": 1.168, + "step": 22020 + }, + { + "epoch": 4.89, + "learning_rate": 3.365131334176651e-05, + "loss": 1.1596, + "step": 22030 + }, + { + "epoch": 4.89, + "learning_rate": 3.3648336929831095e-05, + "loss": 1.158, + "step": 22040 + }, + { + "epoch": 4.9, + "learning_rate": 3.3645360517895675e-05, + "loss": 1.1365, + "step": 22050 + }, + { + "epoch": 4.9, + "learning_rate": 3.364238410596027e-05, + "loss": 1.155, + "step": 22060 + }, + { + "epoch": 4.9, + "learning_rate": 3.3639407694024856e-05, + "loss": 1.1576, + "step": 22070 + }, + { + "epoch": 4.9, + "learning_rate": 3.363643128208944e-05, + "loss": 1.1541, + "step": 22080 + }, + { + "epoch": 4.91, + "learning_rate": 3.363345487015403e-05, + "loss": 1.1624, + "step": 22090 + }, + { + "epoch": 4.91, + "learning_rate": 3.363047845821862e-05, + "loss": 1.1842, + "step": 22100 + }, + { + "epoch": 4.91, + "learning_rate": 3.362750204628321e-05, + "loss": 1.1597, + "step": 22110 + }, + { + "epoch": 4.91, + "learning_rate": 3.36245256343478e-05, + "loss": 1.1577, + "step": 22120 + }, + { + "epoch": 4.91, + "learning_rate": 3.3621549222412384e-05, + "loss": 1.158, + "step": 22130 + }, + { + "epoch": 4.92, + "learning_rate": 3.361857281047698e-05, + "loss": 1.1535, + "step": 22140 + }, + { + "epoch": 4.92, + "learning_rate": 3.361559639854156e-05, + "loss": 1.1613, + "step": 22150 + }, + { + "epoch": 4.92, + "learning_rate": 3.3612619986606145e-05, + "loss": 1.1599, + "step": 22160 + }, + { + "epoch": 4.92, + "learning_rate": 3.360964357467074e-05, + "loss": 1.1409, + "step": 22170 + }, + { + "epoch": 4.93, + "learning_rate": 3.3606667162735325e-05, + "loss": 1.166, + "step": 22180 + }, + { + "epoch": 4.93, + "learning_rate": 3.360369075079991e-05, + "loss": 1.1574, + "step": 22190 + }, + { + "epoch": 4.93, + "learning_rate": 3.36007143388645e-05, + "loss": 1.1538, + "step": 22200 + }, + { + "epoch": 4.93, + "learning_rate": 3.359773792692909e-05, + "loss": 1.1596, + "step": 22210 + }, + { + "epoch": 4.93, + "learning_rate": 3.359476151499368e-05, + "loss": 1.1556, + "step": 22220 + }, + { + "epoch": 4.94, + "learning_rate": 3.3591785103058266e-05, + "loss": 1.142, + "step": 22230 + }, + { + "epoch": 4.94, + "learning_rate": 3.358880869112285e-05, + "loss": 1.1517, + "step": 22240 + }, + { + "epoch": 4.94, + "learning_rate": 3.358583227918744e-05, + "loss": 1.1535, + "step": 22250 + }, + { + "epoch": 4.94, + "learning_rate": 3.358285586725203e-05, + "loss": 1.1592, + "step": 22260 + }, + { + "epoch": 4.95, + "learning_rate": 3.357987945531662e-05, + "loss": 1.1488, + "step": 22270 + }, + { + "epoch": 4.95, + "learning_rate": 3.357690304338121e-05, + "loss": 1.16, + "step": 22280 + }, + { + "epoch": 4.95, + "learning_rate": 3.3573926631445795e-05, + "loss": 1.158, + "step": 22290 + }, + { + "epoch": 4.95, + "learning_rate": 3.357095021951038e-05, + "loss": 1.1436, + "step": 22300 + }, + { + "epoch": 4.95, + "learning_rate": 3.356797380757497e-05, + "loss": 1.1768, + "step": 22310 + }, + { + "epoch": 4.96, + "learning_rate": 3.356499739563956e-05, + "loss": 1.1643, + "step": 22320 + }, + { + "epoch": 4.96, + "learning_rate": 3.356202098370415e-05, + "loss": 1.1461, + "step": 22330 + }, + { + "epoch": 4.96, + "learning_rate": 3.3559044571768736e-05, + "loss": 1.166, + "step": 22340 + }, + { + "epoch": 4.96, + "learning_rate": 3.355606815983332e-05, + "loss": 1.1469, + "step": 22350 + }, + { + "epoch": 4.97, + "learning_rate": 3.355309174789791e-05, + "loss": 1.159, + "step": 22360 + }, + { + "epoch": 4.97, + "learning_rate": 3.3550115335962497e-05, + "loss": 1.1619, + "step": 22370 + }, + { + "epoch": 4.97, + "learning_rate": 3.354713892402709e-05, + "loss": 1.1669, + "step": 22380 + }, + { + "epoch": 4.97, + "learning_rate": 3.354416251209168e-05, + "loss": 1.1526, + "step": 22390 + }, + { + "epoch": 4.97, + "learning_rate": 3.3541186100156264e-05, + "loss": 1.1698, + "step": 22400 + }, + { + "epoch": 4.98, + "learning_rate": 3.353820968822085e-05, + "loss": 1.153, + "step": 22410 + }, + { + "epoch": 4.98, + "learning_rate": 3.3535233276285445e-05, + "loss": 1.1518, + "step": 22420 + }, + { + "epoch": 4.98, + "learning_rate": 3.353225686435003e-05, + "loss": 1.1611, + "step": 22430 + }, + { + "epoch": 4.98, + "learning_rate": 3.352928045241462e-05, + "loss": 1.1491, + "step": 22440 + }, + { + "epoch": 4.99, + "learning_rate": 3.3526304040479205e-05, + "loss": 1.1524, + "step": 22450 + }, + { + "epoch": 4.99, + "learning_rate": 3.352332762854379e-05, + "loss": 1.166, + "step": 22460 + }, + { + "epoch": 4.99, + "learning_rate": 3.352035121660838e-05, + "loss": 1.1317, + "step": 22470 + }, + { + "epoch": 4.99, + "learning_rate": 3.3517374804672966e-05, + "loss": 1.1352, + "step": 22480 + }, + { + "epoch": 4.99, + "learning_rate": 3.351439839273756e-05, + "loss": 1.1552, + "step": 22490 + }, + { + "epoch": 5.0, + "learning_rate": 3.3511421980802147e-05, + "loss": 1.1499, + "step": 22500 + }, + { + "epoch": 5.0, + "learning_rate": 3.3508445568866733e-05, + "loss": 1.162, + "step": 22510 + }, + { + "epoch": 5.0, + "eval_cer": 4.760683760683761, + "eval_loss": 1.123809576034546, + "eval_runtime": 5.6237, + "eval_samples_per_second": 1.778, + "eval_steps_per_second": 0.178, + "eval_wer": 0.987012987012987, + "step": 22515 + }, + { + "epoch": 5.0, + "learning_rate": 3.350546915693132e-05, + "loss": 1.1466, + "step": 22520 + }, + { + "epoch": 5.0, + "learning_rate": 3.3502492744995914e-05, + "loss": 1.1569, + "step": 22530 + }, + { + "epoch": 5.01, + "learning_rate": 3.34995163330605e-05, + "loss": 1.158, + "step": 22540 + }, + { + "epoch": 5.01, + "learning_rate": 3.349653992112509e-05, + "loss": 1.1414, + "step": 22550 + }, + { + "epoch": 5.01, + "learning_rate": 3.3493563509189675e-05, + "loss": 1.1489, + "step": 22560 + }, + { + "epoch": 5.01, + "learning_rate": 3.349058709725426e-05, + "loss": 1.1484, + "step": 22570 + }, + { + "epoch": 5.01, + "learning_rate": 3.348761068531885e-05, + "loss": 1.1474, + "step": 22580 + }, + { + "epoch": 5.02, + "learning_rate": 3.3484634273383435e-05, + "loss": 1.1479, + "step": 22590 + }, + { + "epoch": 5.02, + "learning_rate": 3.348165786144803e-05, + "loss": 1.1599, + "step": 22600 + }, + { + "epoch": 5.02, + "learning_rate": 3.3478681449512616e-05, + "loss": 1.158, + "step": 22610 + }, + { + "epoch": 5.02, + "learning_rate": 3.34757050375772e-05, + "loss": 1.1658, + "step": 22620 + }, + { + "epoch": 5.03, + "learning_rate": 3.347272862564179e-05, + "loss": 1.1425, + "step": 22630 + }, + { + "epoch": 5.03, + "learning_rate": 3.3469752213706383e-05, + "loss": 1.1537, + "step": 22640 + }, + { + "epoch": 5.03, + "learning_rate": 3.346677580177097e-05, + "loss": 1.1634, + "step": 22650 + }, + { + "epoch": 5.03, + "learning_rate": 3.346379938983556e-05, + "loss": 1.1563, + "step": 22660 + }, + { + "epoch": 5.03, + "learning_rate": 3.3460822977900144e-05, + "loss": 1.1562, + "step": 22670 + }, + { + "epoch": 5.04, + "learning_rate": 3.345784656596473e-05, + "loss": 1.1524, + "step": 22680 + }, + { + "epoch": 5.04, + "learning_rate": 3.345487015402932e-05, + "loss": 1.155, + "step": 22690 + }, + { + "epoch": 5.04, + "learning_rate": 3.345189374209391e-05, + "loss": 1.1502, + "step": 22700 + }, + { + "epoch": 5.04, + "learning_rate": 3.34489173301585e-05, + "loss": 1.1406, + "step": 22710 + }, + { + "epoch": 5.05, + "learning_rate": 3.3445940918223085e-05, + "loss": 1.1537, + "step": 22720 + }, + { + "epoch": 5.05, + "learning_rate": 3.344296450628767e-05, + "loss": 1.1635, + "step": 22730 + }, + { + "epoch": 5.05, + "learning_rate": 3.343998809435226e-05, + "loss": 1.1565, + "step": 22740 + }, + { + "epoch": 5.05, + "learning_rate": 3.343701168241685e-05, + "loss": 1.1613, + "step": 22750 + }, + { + "epoch": 5.05, + "learning_rate": 3.343403527048143e-05, + "loss": 1.1417, + "step": 22760 + }, + { + "epoch": 5.06, + "learning_rate": 3.343105885854603e-05, + "loss": 1.1454, + "step": 22770 + }, + { + "epoch": 5.06, + "learning_rate": 3.3428082446610614e-05, + "loss": 1.1471, + "step": 22780 + }, + { + "epoch": 5.06, + "learning_rate": 3.34251060346752e-05, + "loss": 1.1529, + "step": 22790 + }, + { + "epoch": 5.06, + "learning_rate": 3.342212962273979e-05, + "loss": 1.1473, + "step": 22800 + }, + { + "epoch": 5.07, + "learning_rate": 3.341915321080438e-05, + "loss": 1.1526, + "step": 22810 + }, + { + "epoch": 5.07, + "learning_rate": 3.341617679886897e-05, + "loss": 1.1469, + "step": 22820 + }, + { + "epoch": 5.07, + "learning_rate": 3.3413200386933555e-05, + "loss": 1.1453, + "step": 22830 + }, + { + "epoch": 5.07, + "learning_rate": 3.341022397499814e-05, + "loss": 1.1496, + "step": 22840 + }, + { + "epoch": 5.07, + "learning_rate": 3.3407247563062735e-05, + "loss": 1.1428, + "step": 22850 + }, + { + "epoch": 5.08, + "learning_rate": 3.3404271151127316e-05, + "loss": 1.1599, + "step": 22860 + }, + { + "epoch": 5.08, + "learning_rate": 3.34012947391919e-05, + "loss": 1.146, + "step": 22870 + }, + { + "epoch": 5.08, + "learning_rate": 3.3398318327256496e-05, + "loss": 1.162, + "step": 22880 + }, + { + "epoch": 5.08, + "learning_rate": 3.339534191532108e-05, + "loss": 1.1636, + "step": 22890 + }, + { + "epoch": 5.09, + "learning_rate": 3.339236550338567e-05, + "loss": 1.1418, + "step": 22900 + }, + { + "epoch": 5.09, + "learning_rate": 3.338938909145026e-05, + "loss": 1.1663, + "step": 22910 + }, + { + "epoch": 5.09, + "learning_rate": 3.338641267951485e-05, + "loss": 1.1332, + "step": 22920 + }, + { + "epoch": 5.09, + "learning_rate": 3.338343626757944e-05, + "loss": 1.1467, + "step": 22930 + }, + { + "epoch": 5.09, + "learning_rate": 3.3380459855644024e-05, + "loss": 1.1534, + "step": 22940 + }, + { + "epoch": 5.1, + "learning_rate": 3.337748344370861e-05, + "loss": 1.1384, + "step": 22950 + }, + { + "epoch": 5.1, + "learning_rate": 3.3374507031773205e-05, + "loss": 1.1498, + "step": 22960 + }, + { + "epoch": 5.1, + "learning_rate": 3.3371530619837785e-05, + "loss": 1.1464, + "step": 22970 + }, + { + "epoch": 5.1, + "learning_rate": 3.336855420790237e-05, + "loss": 1.1468, + "step": 22980 + }, + { + "epoch": 5.11, + "learning_rate": 3.3365577795966966e-05, + "loss": 1.1502, + "step": 22990 + }, + { + "epoch": 5.11, + "learning_rate": 3.336260138403155e-05, + "loss": 1.1702, + "step": 23000 + }, + { + "epoch": 5.11, + "learning_rate": 3.335962497209614e-05, + "loss": 1.1491, + "step": 23010 + }, + { + "epoch": 5.11, + "learning_rate": 3.3356648560160726e-05, + "loss": 1.1614, + "step": 23020 + }, + { + "epoch": 5.11, + "learning_rate": 3.335367214822532e-05, + "loss": 1.1513, + "step": 23030 + }, + { + "epoch": 5.12, + "learning_rate": 3.335069573628991e-05, + "loss": 1.1492, + "step": 23040 + }, + { + "epoch": 5.12, + "learning_rate": 3.3347719324354494e-05, + "loss": 1.1478, + "step": 23050 + }, + { + "epoch": 5.12, + "learning_rate": 3.334474291241908e-05, + "loss": 1.1497, + "step": 23060 + }, + { + "epoch": 5.12, + "learning_rate": 3.334176650048367e-05, + "loss": 1.1429, + "step": 23070 + }, + { + "epoch": 5.13, + "learning_rate": 3.3338790088548254e-05, + "loss": 1.1526, + "step": 23080 + }, + { + "epoch": 5.13, + "learning_rate": 3.333581367661285e-05, + "loss": 1.1459, + "step": 23090 + }, + { + "epoch": 5.13, + "learning_rate": 3.3332837264677435e-05, + "loss": 1.1522, + "step": 23100 + }, + { + "epoch": 5.13, + "learning_rate": 3.332986085274202e-05, + "loss": 1.1507, + "step": 23110 + }, + { + "epoch": 5.13, + "learning_rate": 3.332688444080661e-05, + "loss": 1.1683, + "step": 23120 + }, + { + "epoch": 5.14, + "learning_rate": 3.33239080288712e-05, + "loss": 1.1394, + "step": 23130 + }, + { + "epoch": 5.14, + "learning_rate": 3.332093161693579e-05, + "loss": 1.1531, + "step": 23140 + }, + { + "epoch": 5.14, + "learning_rate": 3.3317955205000376e-05, + "loss": 1.1453, + "step": 23150 + }, + { + "epoch": 5.14, + "learning_rate": 3.331497879306496e-05, + "loss": 1.1517, + "step": 23160 + }, + { + "epoch": 5.15, + "learning_rate": 3.331200238112955e-05, + "loss": 1.1467, + "step": 23170 + }, + { + "epoch": 5.15, + "learning_rate": 3.330902596919414e-05, + "loss": 1.1421, + "step": 23180 + }, + { + "epoch": 5.15, + "learning_rate": 3.3306049557258724e-05, + "loss": 1.1574, + "step": 23190 + }, + { + "epoch": 5.15, + "learning_rate": 3.330307314532332e-05, + "loss": 1.1487, + "step": 23200 + }, + { + "epoch": 5.15, + "learning_rate": 3.3300096733387904e-05, + "loss": 1.1495, + "step": 23210 + }, + { + "epoch": 5.16, + "learning_rate": 3.329712032145249e-05, + "loss": 1.145, + "step": 23220 + }, + { + "epoch": 5.16, + "learning_rate": 3.329414390951708e-05, + "loss": 1.1373, + "step": 23230 + }, + { + "epoch": 5.16, + "learning_rate": 3.329116749758167e-05, + "loss": 1.1467, + "step": 23240 + }, + { + "epoch": 5.16, + "learning_rate": 3.328819108564626e-05, + "loss": 1.1649, + "step": 23250 + }, + { + "epoch": 5.17, + "learning_rate": 3.3285214673710846e-05, + "loss": 1.1664, + "step": 23260 + }, + { + "epoch": 5.17, + "learning_rate": 3.328223826177543e-05, + "loss": 1.1659, + "step": 23270 + }, + { + "epoch": 5.17, + "learning_rate": 3.327926184984002e-05, + "loss": 1.1564, + "step": 23280 + }, + { + "epoch": 5.17, + "learning_rate": 3.3276285437904606e-05, + "loss": 1.1481, + "step": 23290 + }, + { + "epoch": 5.17, + "learning_rate": 3.327330902596919e-05, + "loss": 1.1443, + "step": 23300 + }, + { + "epoch": 5.18, + "learning_rate": 3.327033261403379e-05, + "loss": 1.1386, + "step": 23310 + }, + { + "epoch": 5.18, + "learning_rate": 3.3267356202098374e-05, + "loss": 1.1568, + "step": 23320 + }, + { + "epoch": 5.18, + "learning_rate": 3.326437979016296e-05, + "loss": 1.165, + "step": 23330 + }, + { + "epoch": 5.18, + "learning_rate": 3.326140337822755e-05, + "loss": 1.1387, + "step": 23340 + }, + { + "epoch": 5.19, + "learning_rate": 3.325842696629214e-05, + "loss": 1.1544, + "step": 23350 + }, + { + "epoch": 5.19, + "learning_rate": 3.325545055435673e-05, + "loss": 1.1365, + "step": 23360 + }, + { + "epoch": 5.19, + "learning_rate": 3.3252474142421315e-05, + "loss": 1.1515, + "step": 23370 + }, + { + "epoch": 5.19, + "learning_rate": 3.32494977304859e-05, + "loss": 1.1353, + "step": 23380 + }, + { + "epoch": 5.19, + "learning_rate": 3.324652131855049e-05, + "loss": 1.1478, + "step": 23390 + }, + { + "epoch": 5.2, + "learning_rate": 3.3243544906615076e-05, + "loss": 1.1512, + "step": 23400 + }, + { + "epoch": 5.2, + "learning_rate": 3.324056849467966e-05, + "loss": 1.1315, + "step": 23410 + }, + { + "epoch": 5.2, + "learning_rate": 3.3237592082744256e-05, + "loss": 1.1498, + "step": 23420 + }, + { + "epoch": 5.2, + "learning_rate": 3.323461567080884e-05, + "loss": 1.1529, + "step": 23430 + }, + { + "epoch": 5.21, + "learning_rate": 3.323163925887343e-05, + "loss": 1.1572, + "step": 23440 + }, + { + "epoch": 5.21, + "learning_rate": 3.322866284693802e-05, + "loss": 1.1334, + "step": 23450 + }, + { + "epoch": 5.21, + "learning_rate": 3.322568643500261e-05, + "loss": 1.1499, + "step": 23460 + }, + { + "epoch": 5.21, + "learning_rate": 3.32227100230672e-05, + "loss": 1.1518, + "step": 23470 + }, + { + "epoch": 5.21, + "learning_rate": 3.3219733611131785e-05, + "loss": 1.1435, + "step": 23480 + }, + { + "epoch": 5.22, + "learning_rate": 3.321675719919637e-05, + "loss": 1.1403, + "step": 23490 + }, + { + "epoch": 5.22, + "learning_rate": 3.321378078726096e-05, + "loss": 1.1495, + "step": 23500 + }, + { + "epoch": 5.22, + "learning_rate": 3.3210804375325545e-05, + "loss": 1.1517, + "step": 23510 + }, + { + "epoch": 5.22, + "learning_rate": 3.320782796339014e-05, + "loss": 1.1477, + "step": 23520 + }, + { + "epoch": 5.23, + "learning_rate": 3.3204851551454726e-05, + "loss": 1.1585, + "step": 23530 + }, + { + "epoch": 5.23, + "learning_rate": 3.320187513951931e-05, + "loss": 1.1474, + "step": 23540 + }, + { + "epoch": 5.23, + "learning_rate": 3.31988987275839e-05, + "loss": 1.1525, + "step": 23550 + }, + { + "epoch": 5.23, + "learning_rate": 3.319592231564849e-05, + "loss": 1.1645, + "step": 23560 + }, + { + "epoch": 5.23, + "learning_rate": 3.319294590371308e-05, + "loss": 1.1541, + "step": 23570 + }, + { + "epoch": 5.24, + "learning_rate": 3.318996949177766e-05, + "loss": 1.1635, + "step": 23580 + }, + { + "epoch": 5.24, + "learning_rate": 3.3186993079842254e-05, + "loss": 1.1409, + "step": 23590 + }, + { + "epoch": 5.24, + "learning_rate": 3.318401666790684e-05, + "loss": 1.1324, + "step": 23600 + }, + { + "epoch": 5.24, + "learning_rate": 3.318104025597143e-05, + "loss": 1.1481, + "step": 23610 + }, + { + "epoch": 5.25, + "learning_rate": 3.3178063844036015e-05, + "loss": 1.1433, + "step": 23620 + }, + { + "epoch": 5.25, + "learning_rate": 3.317508743210061e-05, + "loss": 1.1332, + "step": 23630 + }, + { + "epoch": 5.25, + "learning_rate": 3.3172111020165195e-05, + "loss": 1.1467, + "step": 23640 + }, + { + "epoch": 5.25, + "learning_rate": 3.316913460822978e-05, + "loss": 1.1349, + "step": 23650 + }, + { + "epoch": 5.25, + "learning_rate": 3.316615819629437e-05, + "loss": 1.1424, + "step": 23660 + }, + { + "epoch": 5.26, + "learning_rate": 3.316318178435896e-05, + "loss": 1.1449, + "step": 23670 + }, + { + "epoch": 5.26, + "learning_rate": 3.316020537242354e-05, + "loss": 1.1366, + "step": 23680 + }, + { + "epoch": 5.26, + "learning_rate": 3.315722896048813e-05, + "loss": 1.1534, + "step": 23690 + }, + { + "epoch": 5.26, + "learning_rate": 3.3154252548552723e-05, + "loss": 1.1406, + "step": 23700 + }, + { + "epoch": 5.27, + "learning_rate": 3.315127613661731e-05, + "loss": 1.1421, + "step": 23710 + }, + { + "epoch": 5.27, + "learning_rate": 3.31482997246819e-05, + "loss": 1.1393, + "step": 23720 + }, + { + "epoch": 5.27, + "learning_rate": 3.3145323312746484e-05, + "loss": 1.1589, + "step": 23730 + }, + { + "epoch": 5.27, + "learning_rate": 3.314234690081108e-05, + "loss": 1.1349, + "step": 23740 + }, + { + "epoch": 5.27, + "learning_rate": 3.3139370488875665e-05, + "loss": 1.144, + "step": 23750 + }, + { + "epoch": 5.28, + "learning_rate": 3.313639407694025e-05, + "loss": 1.1503, + "step": 23760 + }, + { + "epoch": 5.28, + "learning_rate": 3.313341766500484e-05, + "loss": 1.1512, + "step": 23770 + }, + { + "epoch": 5.28, + "learning_rate": 3.313044125306943e-05, + "loss": 1.1453, + "step": 23780 + }, + { + "epoch": 5.28, + "learning_rate": 3.312746484113401e-05, + "loss": 1.1563, + "step": 23790 + }, + { + "epoch": 5.29, + "learning_rate": 3.31244884291986e-05, + "loss": 1.1443, + "step": 23800 + }, + { + "epoch": 5.29, + "learning_rate": 3.312151201726319e-05, + "loss": 1.1469, + "step": 23810 + }, + { + "epoch": 5.29, + "learning_rate": 3.311853560532778e-05, + "loss": 1.1503, + "step": 23820 + }, + { + "epoch": 5.29, + "learning_rate": 3.311555919339237e-05, + "loss": 1.1593, + "step": 23830 + }, + { + "epoch": 5.29, + "learning_rate": 3.3112582781456954e-05, + "loss": 1.1611, + "step": 23840 + }, + { + "epoch": 5.3, + "learning_rate": 3.310960636952155e-05, + "loss": 1.1406, + "step": 23850 + }, + { + "epoch": 5.3, + "learning_rate": 3.3106629957586134e-05, + "loss": 1.1429, + "step": 23860 + }, + { + "epoch": 5.3, + "learning_rate": 3.310365354565072e-05, + "loss": 1.1507, + "step": 23870 + }, + { + "epoch": 5.3, + "learning_rate": 3.310067713371531e-05, + "loss": 1.1395, + "step": 23880 + }, + { + "epoch": 5.31, + "learning_rate": 3.3097700721779895e-05, + "loss": 1.1448, + "step": 23890 + }, + { + "epoch": 5.31, + "learning_rate": 3.309472430984448e-05, + "loss": 1.1411, + "step": 23900 + }, + { + "epoch": 5.31, + "learning_rate": 3.3091747897909075e-05, + "loss": 1.1485, + "step": 23910 + }, + { + "epoch": 5.31, + "learning_rate": 3.308877148597366e-05, + "loss": 1.1451, + "step": 23920 + }, + { + "epoch": 5.31, + "learning_rate": 3.308579507403825e-05, + "loss": 1.1598, + "step": 23930 + }, + { + "epoch": 5.32, + "learning_rate": 3.3082818662102836e-05, + "loss": 1.1606, + "step": 23940 + }, + { + "epoch": 5.32, + "learning_rate": 3.307984225016743e-05, + "loss": 1.1415, + "step": 23950 + }, + { + "epoch": 5.32, + "learning_rate": 3.307686583823202e-05, + "loss": 1.1454, + "step": 23960 + }, + { + "epoch": 5.32, + "learning_rate": 3.3073889426296604e-05, + "loss": 1.1499, + "step": 23970 + }, + { + "epoch": 5.33, + "learning_rate": 3.307091301436119e-05, + "loss": 1.1466, + "step": 23980 + }, + { + "epoch": 5.33, + "learning_rate": 3.306793660242578e-05, + "loss": 1.1426, + "step": 23990 + }, + { + "epoch": 5.33, + "learning_rate": 3.3064960190490364e-05, + "loss": 1.1378, + "step": 24000 + }, + { + "epoch": 5.33, + "learning_rate": 3.306198377855495e-05, + "loss": 1.1533, + "step": 24010 + }, + { + "epoch": 5.33, + "learning_rate": 3.3059007366619545e-05, + "loss": 1.1346, + "step": 24020 + }, + { + "epoch": 5.34, + "learning_rate": 3.305603095468413e-05, + "loss": 1.1448, + "step": 24030 + }, + { + "epoch": 5.34, + "learning_rate": 3.305305454274872e-05, + "loss": 1.1493, + "step": 24040 + }, + { + "epoch": 5.34, + "learning_rate": 3.3050078130813306e-05, + "loss": 1.1428, + "step": 24050 + }, + { + "epoch": 5.34, + "learning_rate": 3.30471017188779e-05, + "loss": 1.1474, + "step": 24060 + }, + { + "epoch": 5.35, + "learning_rate": 3.3044125306942486e-05, + "loss": 1.1419, + "step": 24070 + }, + { + "epoch": 5.35, + "learning_rate": 3.304114889500707e-05, + "loss": 1.1325, + "step": 24080 + }, + { + "epoch": 5.35, + "learning_rate": 3.303817248307166e-05, + "loss": 1.1349, + "step": 24090 + }, + { + "epoch": 5.35, + "learning_rate": 3.303519607113625e-05, + "loss": 1.1408, + "step": 24100 + }, + { + "epoch": 5.35, + "learning_rate": 3.3032219659200834e-05, + "loss": 1.1342, + "step": 24110 + }, + { + "epoch": 5.36, + "learning_rate": 3.302924324726542e-05, + "loss": 1.1291, + "step": 24120 + }, + { + "epoch": 5.36, + "learning_rate": 3.3026266835330014e-05, + "loss": 1.1342, + "step": 24130 + }, + { + "epoch": 5.36, + "learning_rate": 3.30232904233946e-05, + "loss": 1.1467, + "step": 24140 + }, + { + "epoch": 5.36, + "learning_rate": 3.302031401145919e-05, + "loss": 1.1495, + "step": 24150 + }, + { + "epoch": 5.37, + "learning_rate": 3.3017337599523775e-05, + "loss": 1.1502, + "step": 24160 + }, + { + "epoch": 5.37, + "learning_rate": 3.301436118758837e-05, + "loss": 1.1529, + "step": 24170 + }, + { + "epoch": 5.37, + "learning_rate": 3.3011384775652956e-05, + "loss": 1.1453, + "step": 24180 + }, + { + "epoch": 5.37, + "learning_rate": 3.300840836371754e-05, + "loss": 1.1261, + "step": 24190 + }, + { + "epoch": 5.37, + "learning_rate": 3.300543195178213e-05, + "loss": 1.1442, + "step": 24200 + }, + { + "epoch": 5.38, + "learning_rate": 3.3002455539846716e-05, + "loss": 1.1393, + "step": 24210 + }, + { + "epoch": 5.38, + "learning_rate": 3.29994791279113e-05, + "loss": 1.1455, + "step": 24220 + }, + { + "epoch": 5.38, + "learning_rate": 3.299650271597589e-05, + "loss": 1.135, + "step": 24230 + }, + { + "epoch": 5.38, + "learning_rate": 3.2993526304040484e-05, + "loss": 1.145, + "step": 24240 + }, + { + "epoch": 5.39, + "learning_rate": 3.299054989210507e-05, + "loss": 1.1435, + "step": 24250 + }, + { + "epoch": 5.39, + "learning_rate": 3.298757348016966e-05, + "loss": 1.1418, + "step": 24260 + }, + { + "epoch": 5.39, + "learning_rate": 3.2984597068234244e-05, + "loss": 1.1444, + "step": 24270 + }, + { + "epoch": 5.39, + "learning_rate": 3.298162065629884e-05, + "loss": 1.1418, + "step": 24280 + }, + { + "epoch": 5.39, + "learning_rate": 3.2978644244363425e-05, + "loss": 1.14, + "step": 24290 + }, + { + "epoch": 5.4, + "learning_rate": 3.297566783242801e-05, + "loss": 1.1295, + "step": 24300 + }, + { + "epoch": 5.4, + "learning_rate": 3.29726914204926e-05, + "loss": 1.1561, + "step": 24310 + }, + { + "epoch": 5.4, + "learning_rate": 3.2969715008557186e-05, + "loss": 1.1518, + "step": 24320 + }, + { + "epoch": 5.4, + "learning_rate": 3.296673859662177e-05, + "loss": 1.1416, + "step": 24330 + }, + { + "epoch": 5.41, + "learning_rate": 3.2963762184686366e-05, + "loss": 1.1267, + "step": 24340 + }, + { + "epoch": 5.41, + "learning_rate": 3.296078577275095e-05, + "loss": 1.1377, + "step": 24350 + }, + { + "epoch": 5.41, + "learning_rate": 3.295780936081554e-05, + "loss": 1.1279, + "step": 24360 + }, + { + "epoch": 5.41, + "learning_rate": 3.295483294888013e-05, + "loss": 1.1476, + "step": 24370 + }, + { + "epoch": 5.41, + "learning_rate": 3.295185653694472e-05, + "loss": 1.1429, + "step": 24380 + }, + { + "epoch": 5.42, + "learning_rate": 3.294888012500931e-05, + "loss": 1.1525, + "step": 24390 + }, + { + "epoch": 5.42, + "learning_rate": 3.294590371307389e-05, + "loss": 1.1424, + "step": 24400 + }, + { + "epoch": 5.42, + "learning_rate": 3.294292730113848e-05, + "loss": 1.1388, + "step": 24410 + }, + { + "epoch": 5.42, + "learning_rate": 3.293995088920307e-05, + "loss": 1.137, + "step": 24420 + }, + { + "epoch": 5.43, + "learning_rate": 3.2936974477267655e-05, + "loss": 1.1454, + "step": 24430 + }, + { + "epoch": 5.43, + "learning_rate": 3.293399806533224e-05, + "loss": 1.1497, + "step": 24440 + }, + { + "epoch": 5.43, + "learning_rate": 3.2931021653396836e-05, + "loss": 1.1455, + "step": 24450 + }, + { + "epoch": 5.43, + "learning_rate": 3.292804524146142e-05, + "loss": 1.1531, + "step": 24460 + }, + { + "epoch": 5.43, + "learning_rate": 3.292506882952601e-05, + "loss": 1.1431, + "step": 24470 + }, + { + "epoch": 5.44, + "learning_rate": 3.2922092417590596e-05, + "loss": 1.1339, + "step": 24480 + }, + { + "epoch": 5.44, + "learning_rate": 3.291911600565519e-05, + "loss": 1.1541, + "step": 24490 + }, + { + "epoch": 5.44, + "learning_rate": 3.291613959371977e-05, + "loss": 1.1429, + "step": 24500 + }, + { + "epoch": 5.44, + "learning_rate": 3.291316318178436e-05, + "loss": 1.1486, + "step": 24510 + }, + { + "epoch": 5.45, + "learning_rate": 3.291018676984895e-05, + "loss": 1.1346, + "step": 24520 + }, + { + "epoch": 5.45, + "learning_rate": 3.290721035791354e-05, + "loss": 1.1436, + "step": 24530 + }, + { + "epoch": 5.45, + "learning_rate": 3.2904233945978125e-05, + "loss": 1.1375, + "step": 24540 + }, + { + "epoch": 5.45, + "learning_rate": 3.290125753404271e-05, + "loss": 1.15, + "step": 24550 + }, + { + "epoch": 5.45, + "learning_rate": 3.2898281122107305e-05, + "loss": 1.1295, + "step": 24560 + }, + { + "epoch": 5.46, + "learning_rate": 3.289530471017189e-05, + "loss": 1.1475, + "step": 24570 + }, + { + "epoch": 5.46, + "learning_rate": 3.289232829823648e-05, + "loss": 1.1519, + "step": 24580 + }, + { + "epoch": 5.46, + "learning_rate": 3.2889351886301066e-05, + "loss": 1.1467, + "step": 24590 + }, + { + "epoch": 5.46, + "learning_rate": 3.288637547436565e-05, + "loss": 1.1412, + "step": 24600 + }, + { + "epoch": 5.47, + "learning_rate": 3.288339906243024e-05, + "loss": 1.1405, + "step": 24610 + }, + { + "epoch": 5.47, + "learning_rate": 3.2880422650494827e-05, + "loss": 1.1493, + "step": 24620 + }, + { + "epoch": 5.47, + "learning_rate": 3.287744623855942e-05, + "loss": 1.1379, + "step": 24630 + }, + { + "epoch": 5.47, + "learning_rate": 3.287446982662401e-05, + "loss": 1.1475, + "step": 24640 + }, + { + "epoch": 5.47, + "learning_rate": 3.2871493414688594e-05, + "loss": 1.1465, + "step": 24650 + }, + { + "epoch": 5.48, + "learning_rate": 3.286851700275318e-05, + "loss": 1.1303, + "step": 24660 + }, + { + "epoch": 5.48, + "learning_rate": 3.2865540590817775e-05, + "loss": 1.1598, + "step": 24670 + }, + { + "epoch": 5.48, + "learning_rate": 3.286256417888236e-05, + "loss": 1.147, + "step": 24680 + }, + { + "epoch": 5.48, + "learning_rate": 3.285958776694695e-05, + "loss": 1.1545, + "step": 24690 + }, + { + "epoch": 5.49, + "learning_rate": 3.2856611355011535e-05, + "loss": 1.1434, + "step": 24700 + }, + { + "epoch": 5.49, + "learning_rate": 3.285363494307612e-05, + "loss": 1.1378, + "step": 24710 + }, + { + "epoch": 5.49, + "learning_rate": 3.285065853114071e-05, + "loss": 1.1525, + "step": 24720 + }, + { + "epoch": 5.49, + "learning_rate": 3.28476821192053e-05, + "loss": 1.1447, + "step": 24730 + }, + { + "epoch": 5.49, + "learning_rate": 3.284470570726989e-05, + "loss": 1.1455, + "step": 24740 + }, + { + "epoch": 5.5, + "learning_rate": 3.2841729295334477e-05, + "loss": 1.1493, + "step": 24750 + }, + { + "epoch": 5.5, + "learning_rate": 3.2838752883399064e-05, + "loss": 1.1325, + "step": 24760 + }, + { + "epoch": 5.5, + "learning_rate": 3.283577647146366e-05, + "loss": 1.1465, + "step": 24770 + }, + { + "epoch": 5.5, + "learning_rate": 3.2832800059528244e-05, + "loss": 1.1466, + "step": 24780 + }, + { + "epoch": 5.51, + "learning_rate": 3.282982364759283e-05, + "loss": 1.1436, + "step": 24790 + }, + { + "epoch": 5.51, + "learning_rate": 3.282684723565742e-05, + "loss": 1.1375, + "step": 24800 + }, + { + "epoch": 5.51, + "learning_rate": 3.2823870823722005e-05, + "loss": 1.13, + "step": 24810 + }, + { + "epoch": 5.51, + "learning_rate": 3.282089441178659e-05, + "loss": 1.1535, + "step": 24820 + }, + { + "epoch": 5.51, + "learning_rate": 3.281791799985118e-05, + "loss": 1.1394, + "step": 24830 + }, + { + "epoch": 5.52, + "learning_rate": 3.281494158791577e-05, + "loss": 1.139, + "step": 24840 + }, + { + "epoch": 5.52, + "learning_rate": 3.281196517598036e-05, + "loss": 1.1451, + "step": 24850 + }, + { + "epoch": 5.52, + "learning_rate": 3.2808988764044946e-05, + "loss": 1.1322, + "step": 24860 + }, + { + "epoch": 5.52, + "learning_rate": 3.280601235210953e-05, + "loss": 1.148, + "step": 24870 + }, + { + "epoch": 5.53, + "learning_rate": 3.280303594017413e-05, + "loss": 1.1346, + "step": 24880 + }, + { + "epoch": 5.53, + "learning_rate": 3.2800059528238714e-05, + "loss": 1.1374, + "step": 24890 + }, + { + "epoch": 5.53, + "learning_rate": 3.27970831163033e-05, + "loss": 1.1467, + "step": 24900 + }, + { + "epoch": 5.53, + "learning_rate": 3.279410670436789e-05, + "loss": 1.1378, + "step": 24910 + }, + { + "epoch": 5.53, + "learning_rate": 3.2791130292432474e-05, + "loss": 1.1526, + "step": 24920 + }, + { + "epoch": 5.54, + "learning_rate": 3.278815388049706e-05, + "loss": 1.1319, + "step": 24930 + }, + { + "epoch": 5.54, + "learning_rate": 3.278517746856165e-05, + "loss": 1.1481, + "step": 24940 + }, + { + "epoch": 5.54, + "learning_rate": 3.278220105662624e-05, + "loss": 1.1386, + "step": 24950 + }, + { + "epoch": 5.54, + "learning_rate": 3.277922464469083e-05, + "loss": 1.1466, + "step": 24960 + }, + { + "epoch": 5.55, + "learning_rate": 3.2776248232755415e-05, + "loss": 1.1442, + "step": 24970 + }, + { + "epoch": 5.55, + "learning_rate": 3.277327182082e-05, + "loss": 1.1437, + "step": 24980 + }, + { + "epoch": 5.55, + "learning_rate": 3.2770295408884596e-05, + "loss": 1.1311, + "step": 24990 + }, + { + "epoch": 5.55, + "learning_rate": 3.276731899694918e-05, + "loss": 1.152, + "step": 25000 + }, + { + "epoch": 5.55, + "learning_rate": 3.276434258501376e-05, + "loss": 1.1618, + "step": 25010 + }, + { + "epoch": 5.56, + "learning_rate": 3.276136617307836e-05, + "loss": 1.1416, + "step": 25020 + }, + { + "epoch": 5.56, + "learning_rate": 3.2758389761142944e-05, + "loss": 1.1492, + "step": 25030 + }, + { + "epoch": 5.56, + "learning_rate": 3.275541334920753e-05, + "loss": 1.1258, + "step": 25040 + }, + { + "epoch": 5.56, + "learning_rate": 3.275243693727212e-05, + "loss": 1.1429, + "step": 25050 + }, + { + "epoch": 5.57, + "learning_rate": 3.274946052533671e-05, + "loss": 1.148, + "step": 25060 + }, + { + "epoch": 5.57, + "learning_rate": 3.27464841134013e-05, + "loss": 1.1222, + "step": 25070 + }, + { + "epoch": 5.57, + "learning_rate": 3.2743507701465885e-05, + "loss": 1.1405, + "step": 25080 + }, + { + "epoch": 5.57, + "learning_rate": 3.274053128953047e-05, + "loss": 1.1338, + "step": 25090 + }, + { + "epoch": 5.57, + "learning_rate": 3.2737554877595066e-05, + "loss": 1.1301, + "step": 25100 + }, + { + "epoch": 5.58, + "learning_rate": 3.273457846565965e-05, + "loss": 1.142, + "step": 25110 + }, + { + "epoch": 5.58, + "learning_rate": 3.273160205372424e-05, + "loss": 1.1309, + "step": 25120 + }, + { + "epoch": 5.58, + "learning_rate": 3.2728625641788826e-05, + "loss": 1.1475, + "step": 25130 + }, + { + "epoch": 5.58, + "learning_rate": 3.272564922985341e-05, + "loss": 1.1472, + "step": 25140 + }, + { + "epoch": 5.59, + "learning_rate": 3.2722672817918e-05, + "loss": 1.1492, + "step": 25150 + }, + { + "epoch": 5.59, + "learning_rate": 3.2719696405982594e-05, + "loss": 1.149, + "step": 25160 + }, + { + "epoch": 5.59, + "learning_rate": 3.271671999404718e-05, + "loss": 1.1451, + "step": 25170 + }, + { + "epoch": 5.59, + "learning_rate": 3.271374358211177e-05, + "loss": 1.1492, + "step": 25180 + }, + { + "epoch": 5.59, + "learning_rate": 3.2710767170176354e-05, + "loss": 1.1458, + "step": 25190 + }, + { + "epoch": 5.6, + "learning_rate": 3.270779075824095e-05, + "loss": 1.1466, + "step": 25200 + }, + { + "epoch": 5.6, + "learning_rate": 3.2704814346305535e-05, + "loss": 1.1472, + "step": 25210 + }, + { + "epoch": 5.6, + "learning_rate": 3.2701837934370115e-05, + "loss": 1.1394, + "step": 25220 + }, + { + "epoch": 5.6, + "learning_rate": 3.269886152243471e-05, + "loss": 1.1519, + "step": 25230 + }, + { + "epoch": 5.61, + "learning_rate": 3.2695885110499296e-05, + "loss": 1.1419, + "step": 25240 + }, + { + "epoch": 5.61, + "learning_rate": 3.269290869856388e-05, + "loss": 1.1484, + "step": 25250 + }, + { + "epoch": 5.61, + "learning_rate": 3.268993228662847e-05, + "loss": 1.1515, + "step": 25260 + }, + { + "epoch": 5.61, + "learning_rate": 3.268695587469306e-05, + "loss": 1.1321, + "step": 25270 + }, + { + "epoch": 5.61, + "learning_rate": 3.268397946275765e-05, + "loss": 1.1344, + "step": 25280 + }, + { + "epoch": 5.62, + "learning_rate": 3.268100305082224e-05, + "loss": 1.1419, + "step": 25290 + }, + { + "epoch": 5.62, + "learning_rate": 3.2678026638886824e-05, + "loss": 1.1533, + "step": 25300 + }, + { + "epoch": 5.62, + "learning_rate": 3.267505022695142e-05, + "loss": 1.1442, + "step": 25310 + }, + { + "epoch": 5.62, + "learning_rate": 3.2672073815016e-05, + "loss": 1.1375, + "step": 25320 + }, + { + "epoch": 5.63, + "learning_rate": 3.2669097403080585e-05, + "loss": 1.1324, + "step": 25330 + }, + { + "epoch": 5.63, + "learning_rate": 3.266612099114518e-05, + "loss": 1.1409, + "step": 25340 + }, + { + "epoch": 5.63, + "learning_rate": 3.2663144579209765e-05, + "loss": 1.1286, + "step": 25350 + }, + { + "epoch": 5.63, + "learning_rate": 3.266016816727435e-05, + "loss": 1.1402, + "step": 25360 + }, + { + "epoch": 5.63, + "learning_rate": 3.265719175533894e-05, + "loss": 1.1396, + "step": 25370 + }, + { + "epoch": 5.64, + "learning_rate": 3.265421534340353e-05, + "loss": 1.1485, + "step": 25380 + }, + { + "epoch": 5.64, + "learning_rate": 3.265123893146812e-05, + "loss": 1.1469, + "step": 25390 + }, + { + "epoch": 5.64, + "learning_rate": 3.2648262519532706e-05, + "loss": 1.1315, + "step": 25400 + }, + { + "epoch": 5.64, + "learning_rate": 3.264528610759729e-05, + "loss": 1.1512, + "step": 25410 + }, + { + "epoch": 5.65, + "learning_rate": 3.264230969566188e-05, + "loss": 1.1417, + "step": 25420 + }, + { + "epoch": 5.65, + "learning_rate": 3.263933328372647e-05, + "loss": 1.1385, + "step": 25430 + }, + { + "epoch": 5.65, + "learning_rate": 3.2636356871791054e-05, + "loss": 1.1272, + "step": 25440 + }, + { + "epoch": 5.65, + "learning_rate": 3.263338045985565e-05, + "loss": 1.1459, + "step": 25450 + }, + { + "epoch": 5.65, + "learning_rate": 3.2630404047920235e-05, + "loss": 1.1447, + "step": 25460 + }, + { + "epoch": 5.66, + "learning_rate": 3.262742763598482e-05, + "loss": 1.1436, + "step": 25470 + }, + { + "epoch": 5.66, + "learning_rate": 3.262445122404941e-05, + "loss": 1.1187, + "step": 25480 + }, + { + "epoch": 5.66, + "learning_rate": 3.2621474812114e-05, + "loss": 1.1383, + "step": 25490 + }, + { + "epoch": 5.66, + "learning_rate": 3.261849840017859e-05, + "loss": 1.141, + "step": 25500 + }, + { + "epoch": 5.67, + "learning_rate": 3.2615521988243176e-05, + "loss": 1.1364, + "step": 25510 + }, + { + "epoch": 5.67, + "learning_rate": 3.261254557630776e-05, + "loss": 1.1502, + "step": 25520 + }, + { + "epoch": 5.67, + "learning_rate": 3.260956916437235e-05, + "loss": 1.1421, + "step": 25530 + }, + { + "epoch": 5.67, + "learning_rate": 3.2606592752436936e-05, + "loss": 1.1454, + "step": 25540 + }, + { + "epoch": 5.67, + "learning_rate": 3.260361634050153e-05, + "loss": 1.1566, + "step": 25550 + }, + { + "epoch": 5.68, + "learning_rate": 3.260063992856612e-05, + "loss": 1.1455, + "step": 25560 + }, + { + "epoch": 5.68, + "learning_rate": 3.2597663516630704e-05, + "loss": 1.1538, + "step": 25570 + }, + { + "epoch": 5.68, + "learning_rate": 3.259468710469529e-05, + "loss": 1.1433, + "step": 25580 + }, + { + "epoch": 5.68, + "learning_rate": 3.2591710692759885e-05, + "loss": 1.1392, + "step": 25590 + }, + { + "epoch": 5.69, + "learning_rate": 3.258873428082447e-05, + "loss": 1.134, + "step": 25600 + }, + { + "epoch": 5.69, + "learning_rate": 3.258575786888906e-05, + "loss": 1.1431, + "step": 25610 + }, + { + "epoch": 5.69, + "learning_rate": 3.2582781456953645e-05, + "loss": 1.1279, + "step": 25620 + }, + { + "epoch": 5.69, + "learning_rate": 3.257980504501823e-05, + "loss": 1.1406, + "step": 25630 + }, + { + "epoch": 5.69, + "learning_rate": 3.257682863308282e-05, + "loss": 1.152, + "step": 25640 + }, + { + "epoch": 5.7, + "learning_rate": 3.2573852221147406e-05, + "loss": 1.1403, + "step": 25650 + }, + { + "epoch": 5.7, + "learning_rate": 3.2570875809212e-05, + "loss": 1.1275, + "step": 25660 + }, + { + "epoch": 5.7, + "learning_rate": 3.2567899397276586e-05, + "loss": 1.1439, + "step": 25670 + }, + { + "epoch": 5.7, + "learning_rate": 3.2564922985341173e-05, + "loss": 1.1347, + "step": 25680 + }, + { + "epoch": 5.71, + "learning_rate": 3.256194657340576e-05, + "loss": 1.1463, + "step": 25690 + }, + { + "epoch": 5.71, + "learning_rate": 3.2558970161470354e-05, + "loss": 1.1596, + "step": 25700 + }, + { + "epoch": 5.71, + "learning_rate": 3.255599374953494e-05, + "loss": 1.1264, + "step": 25710 + }, + { + "epoch": 5.71, + "learning_rate": 3.255301733759953e-05, + "loss": 1.1388, + "step": 25720 + }, + { + "epoch": 5.71, + "learning_rate": 3.2550040925664115e-05, + "loss": 1.1313, + "step": 25730 + }, + { + "epoch": 5.72, + "learning_rate": 3.25470645137287e-05, + "loss": 1.1268, + "step": 25740 + }, + { + "epoch": 5.72, + "learning_rate": 3.254408810179329e-05, + "loss": 1.1379, + "step": 25750 + }, + { + "epoch": 5.72, + "learning_rate": 3.2541111689857875e-05, + "loss": 1.1401, + "step": 25760 + }, + { + "epoch": 5.72, + "learning_rate": 3.253813527792247e-05, + "loss": 1.1391, + "step": 25770 + }, + { + "epoch": 5.73, + "learning_rate": 3.2535158865987056e-05, + "loss": 1.134, + "step": 25780 + }, + { + "epoch": 5.73, + "learning_rate": 3.253218245405164e-05, + "loss": 1.1424, + "step": 25790 + }, + { + "epoch": 5.73, + "learning_rate": 3.252920604211623e-05, + "loss": 1.1363, + "step": 25800 + }, + { + "epoch": 5.73, + "learning_rate": 3.2526229630180823e-05, + "loss": 1.1428, + "step": 25810 + }, + { + "epoch": 5.73, + "learning_rate": 3.252325321824541e-05, + "loss": 1.1312, + "step": 25820 + }, + { + "epoch": 5.74, + "learning_rate": 3.252027680631e-05, + "loss": 1.1397, + "step": 25830 + }, + { + "epoch": 5.74, + "learning_rate": 3.2517300394374584e-05, + "loss": 1.125, + "step": 25840 + }, + { + "epoch": 5.74, + "learning_rate": 3.251432398243917e-05, + "loss": 1.1336, + "step": 25850 + }, + { + "epoch": 5.74, + "learning_rate": 3.251134757050376e-05, + "loss": 1.1512, + "step": 25860 + }, + { + "epoch": 5.75, + "learning_rate": 3.2508371158568345e-05, + "loss": 1.1278, + "step": 25870 + }, + { + "epoch": 5.75, + "learning_rate": 3.250539474663294e-05, + "loss": 1.1375, + "step": 25880 + }, + { + "epoch": 5.75, + "learning_rate": 3.2502418334697525e-05, + "loss": 1.1283, + "step": 25890 + }, + { + "epoch": 5.75, + "learning_rate": 3.249944192276211e-05, + "loss": 1.1205, + "step": 25900 + }, + { + "epoch": 5.75, + "learning_rate": 3.24964655108267e-05, + "loss": 1.1407, + "step": 25910 + }, + { + "epoch": 5.76, + "learning_rate": 3.249348909889129e-05, + "loss": 1.1447, + "step": 25920 + }, + { + "epoch": 5.76, + "learning_rate": 3.249051268695588e-05, + "loss": 1.1346, + "step": 25930 + }, + { + "epoch": 5.76, + "learning_rate": 3.248753627502047e-05, + "loss": 1.1344, + "step": 25940 + }, + { + "epoch": 5.76, + "learning_rate": 3.2484559863085054e-05, + "loss": 1.1448, + "step": 25950 + }, + { + "epoch": 5.77, + "learning_rate": 3.248158345114964e-05, + "loss": 1.141, + "step": 25960 + }, + { + "epoch": 5.77, + "learning_rate": 3.247860703921423e-05, + "loss": 1.1292, + "step": 25970 + }, + { + "epoch": 5.77, + "learning_rate": 3.247563062727882e-05, + "loss": 1.1615, + "step": 25980 + }, + { + "epoch": 5.77, + "learning_rate": 3.247265421534341e-05, + "loss": 1.1543, + "step": 25990 + }, + { + "epoch": 5.77, + "learning_rate": 3.2469677803407995e-05, + "loss": 1.1468, + "step": 26000 + }, + { + "epoch": 5.78, + "learning_rate": 3.246670139147258e-05, + "loss": 1.1542, + "step": 26010 + }, + { + "epoch": 5.78, + "learning_rate": 3.2463724979537175e-05, + "loss": 1.145, + "step": 26020 + }, + { + "epoch": 5.78, + "learning_rate": 3.246074856760176e-05, + "loss": 1.1433, + "step": 26030 + }, + { + "epoch": 5.78, + "learning_rate": 3.245777215566634e-05, + "loss": 1.1616, + "step": 26040 + }, + { + "epoch": 5.79, + "learning_rate": 3.2454795743730936e-05, + "loss": 1.1212, + "step": 26050 + }, + { + "epoch": 5.79, + "learning_rate": 3.245181933179552e-05, + "loss": 1.141, + "step": 26060 + }, + { + "epoch": 5.79, + "learning_rate": 3.244884291986011e-05, + "loss": 1.1478, + "step": 26070 + }, + { + "epoch": 5.79, + "learning_rate": 3.24458665079247e-05, + "loss": 1.1525, + "step": 26080 + }, + { + "epoch": 5.79, + "learning_rate": 3.244289009598929e-05, + "loss": 1.1381, + "step": 26090 + }, + { + "epoch": 5.8, + "learning_rate": 3.243991368405388e-05, + "loss": 1.1238, + "step": 26100 + }, + { + "epoch": 5.8, + "learning_rate": 3.2436937272118464e-05, + "loss": 1.1579, + "step": 26110 + }, + { + "epoch": 5.8, + "learning_rate": 3.243396086018305e-05, + "loss": 1.1415, + "step": 26120 + }, + { + "epoch": 5.8, + "learning_rate": 3.2430984448247645e-05, + "loss": 1.1498, + "step": 26130 + }, + { + "epoch": 5.81, + "learning_rate": 3.2428008036312225e-05, + "loss": 1.1348, + "step": 26140 + }, + { + "epoch": 5.81, + "learning_rate": 3.242503162437681e-05, + "loss": 1.1541, + "step": 26150 + }, + { + "epoch": 5.81, + "learning_rate": 3.2422055212441406e-05, + "loss": 1.1497, + "step": 26160 + }, + { + "epoch": 5.81, + "learning_rate": 3.241907880050599e-05, + "loss": 1.1275, + "step": 26170 + }, + { + "epoch": 5.81, + "learning_rate": 3.241610238857058e-05, + "loss": 1.1324, + "step": 26180 + }, + { + "epoch": 5.82, + "learning_rate": 3.2413125976635166e-05, + "loss": 1.1374, + "step": 26190 + }, + { + "epoch": 5.82, + "learning_rate": 3.241014956469976e-05, + "loss": 1.1427, + "step": 26200 + }, + { + "epoch": 5.82, + "learning_rate": 3.240717315276435e-05, + "loss": 1.1306, + "step": 26210 + }, + { + "epoch": 5.82, + "learning_rate": 3.2404196740828934e-05, + "loss": 1.1316, + "step": 26220 + }, + { + "epoch": 5.83, + "learning_rate": 3.240122032889352e-05, + "loss": 1.1281, + "step": 26230 + }, + { + "epoch": 5.83, + "learning_rate": 3.239824391695811e-05, + "loss": 1.1327, + "step": 26240 + }, + { + "epoch": 5.83, + "learning_rate": 3.2395267505022694e-05, + "loss": 1.1512, + "step": 26250 + }, + { + "epoch": 5.83, + "learning_rate": 3.239229109308729e-05, + "loss": 1.1423, + "step": 26260 + }, + { + "epoch": 5.83, + "learning_rate": 3.2389314681151875e-05, + "loss": 1.1442, + "step": 26270 + }, + { + "epoch": 5.84, + "learning_rate": 3.238633826921646e-05, + "loss": 1.1433, + "step": 26280 + }, + { + "epoch": 5.84, + "learning_rate": 3.238336185728105e-05, + "loss": 1.1488, + "step": 26290 + }, + { + "epoch": 5.84, + "learning_rate": 3.2380385445345636e-05, + "loss": 1.1273, + "step": 26300 + }, + { + "epoch": 5.84, + "learning_rate": 3.237740903341023e-05, + "loss": 1.1462, + "step": 26310 + }, + { + "epoch": 5.84, + "learning_rate": 3.2374432621474816e-05, + "loss": 1.1524, + "step": 26320 + }, + { + "epoch": 5.85, + "learning_rate": 3.23714562095394e-05, + "loss": 1.1447, + "step": 26330 + }, + { + "epoch": 5.85, + "learning_rate": 3.236847979760399e-05, + "loss": 1.1559, + "step": 26340 + }, + { + "epoch": 5.85, + "learning_rate": 3.236550338566858e-05, + "loss": 1.1414, + "step": 26350 + }, + { + "epoch": 5.85, + "learning_rate": 3.2362526973733164e-05, + "loss": 1.1408, + "step": 26360 + }, + { + "epoch": 5.86, + "learning_rate": 3.235955056179776e-05, + "loss": 1.1624, + "step": 26370 + }, + { + "epoch": 5.86, + "learning_rate": 3.2356574149862344e-05, + "loss": 1.1263, + "step": 26380 + }, + { + "epoch": 5.86, + "learning_rate": 3.235359773792693e-05, + "loss": 1.1386, + "step": 26390 + }, + { + "epoch": 5.86, + "learning_rate": 3.235062132599152e-05, + "loss": 1.1305, + "step": 26400 + }, + { + "epoch": 5.86, + "learning_rate": 3.234764491405611e-05, + "loss": 1.1382, + "step": 26410 + }, + { + "epoch": 5.87, + "learning_rate": 3.23446685021207e-05, + "loss": 1.1182, + "step": 26420 + }, + { + "epoch": 5.87, + "learning_rate": 3.2341692090185286e-05, + "loss": 1.1304, + "step": 26430 + }, + { + "epoch": 5.87, + "learning_rate": 3.233871567824987e-05, + "loss": 1.1304, + "step": 26440 + }, + { + "epoch": 5.87, + "learning_rate": 3.233573926631446e-05, + "loss": 1.1422, + "step": 26450 + }, + { + "epoch": 5.88, + "learning_rate": 3.2332762854379046e-05, + "loss": 1.144, + "step": 26460 + }, + { + "epoch": 5.88, + "learning_rate": 3.232978644244363e-05, + "loss": 1.1401, + "step": 26470 + }, + { + "epoch": 5.88, + "learning_rate": 3.232681003050823e-05, + "loss": 1.1505, + "step": 26480 + }, + { + "epoch": 5.88, + "learning_rate": 3.2323833618572814e-05, + "loss": 1.1402, + "step": 26490 + }, + { + "epoch": 5.88, + "learning_rate": 3.23208572066374e-05, + "loss": 1.148, + "step": 26500 + }, + { + "epoch": 5.89, + "learning_rate": 3.231788079470199e-05, + "loss": 1.1439, + "step": 26510 + }, + { + "epoch": 5.89, + "learning_rate": 3.231490438276658e-05, + "loss": 1.1443, + "step": 26520 + }, + { + "epoch": 5.89, + "learning_rate": 3.231192797083117e-05, + "loss": 1.1403, + "step": 26530 + }, + { + "epoch": 5.89, + "learning_rate": 3.2308951558895755e-05, + "loss": 1.1186, + "step": 26540 + }, + { + "epoch": 5.9, + "learning_rate": 3.230597514696034e-05, + "loss": 1.1331, + "step": 26550 + }, + { + "epoch": 5.9, + "learning_rate": 3.230299873502493e-05, + "loss": 1.144, + "step": 26560 + }, + { + "epoch": 5.9, + "learning_rate": 3.2300022323089516e-05, + "loss": 1.1367, + "step": 26570 + }, + { + "epoch": 5.9, + "learning_rate": 3.22970459111541e-05, + "loss": 1.1398, + "step": 26580 + }, + { + "epoch": 5.9, + "learning_rate": 3.2294069499218696e-05, + "loss": 1.1399, + "step": 26590 + }, + { + "epoch": 5.91, + "learning_rate": 3.229109308728328e-05, + "loss": 1.1486, + "step": 26600 + }, + { + "epoch": 5.91, + "learning_rate": 3.228811667534787e-05, + "loss": 1.1332, + "step": 26610 + }, + { + "epoch": 5.91, + "learning_rate": 3.228514026341246e-05, + "loss": 1.1355, + "step": 26620 + }, + { + "epoch": 5.91, + "learning_rate": 3.228216385147705e-05, + "loss": 1.1292, + "step": 26630 + }, + { + "epoch": 5.92, + "learning_rate": 3.227918743954164e-05, + "loss": 1.1338, + "step": 26640 + }, + { + "epoch": 5.92, + "learning_rate": 3.2276211027606225e-05, + "loss": 1.14, + "step": 26650 + }, + { + "epoch": 5.92, + "learning_rate": 3.227323461567081e-05, + "loss": 1.1515, + "step": 26660 + }, + { + "epoch": 5.92, + "learning_rate": 3.22702582037354e-05, + "loss": 1.133, + "step": 26670 + }, + { + "epoch": 5.92, + "learning_rate": 3.2267281791799985e-05, + "loss": 1.1358, + "step": 26680 + }, + { + "epoch": 5.93, + "learning_rate": 3.226430537986458e-05, + "loss": 1.1397, + "step": 26690 + }, + { + "epoch": 5.93, + "learning_rate": 3.2261328967929166e-05, + "loss": 1.1383, + "step": 26700 + }, + { + "epoch": 5.93, + "learning_rate": 3.225835255599375e-05, + "loss": 1.1395, + "step": 26710 + }, + { + "epoch": 5.93, + "learning_rate": 3.225537614405834e-05, + "loss": 1.141, + "step": 26720 + }, + { + "epoch": 5.94, + "learning_rate": 3.2252399732122927e-05, + "loss": 1.1373, + "step": 26730 + }, + { + "epoch": 5.94, + "learning_rate": 3.224942332018752e-05, + "loss": 1.1307, + "step": 26740 + }, + { + "epoch": 5.94, + "learning_rate": 3.22464469082521e-05, + "loss": 1.1406, + "step": 26750 + }, + { + "epoch": 5.94, + "learning_rate": 3.2243470496316694e-05, + "loss": 1.1412, + "step": 26760 + }, + { + "epoch": 5.94, + "learning_rate": 3.224049408438128e-05, + "loss": 1.1403, + "step": 26770 + }, + { + "epoch": 5.95, + "learning_rate": 3.223751767244587e-05, + "loss": 1.1446, + "step": 26780 + }, + { + "epoch": 5.95, + "learning_rate": 3.2234541260510455e-05, + "loss": 1.1498, + "step": 26790 + }, + { + "epoch": 5.95, + "learning_rate": 3.223156484857505e-05, + "loss": 1.1336, + "step": 26800 + }, + { + "epoch": 5.95, + "learning_rate": 3.2228588436639635e-05, + "loss": 1.1526, + "step": 26810 + }, + { + "epoch": 5.96, + "learning_rate": 3.222561202470422e-05, + "loss": 1.1347, + "step": 26820 + }, + { + "epoch": 5.96, + "learning_rate": 3.222263561276881e-05, + "loss": 1.1406, + "step": 26830 + }, + { + "epoch": 5.96, + "learning_rate": 3.22196592008334e-05, + "loss": 1.1319, + "step": 26840 + }, + { + "epoch": 5.96, + "learning_rate": 3.221668278889799e-05, + "loss": 1.1241, + "step": 26850 + }, + { + "epoch": 5.96, + "learning_rate": 3.221370637696257e-05, + "loss": 1.1377, + "step": 26860 + }, + { + "epoch": 5.97, + "learning_rate": 3.2210729965027163e-05, + "loss": 1.1366, + "step": 26870 + }, + { + "epoch": 5.97, + "learning_rate": 3.220775355309175e-05, + "loss": 1.127, + "step": 26880 + }, + { + "epoch": 5.97, + "learning_rate": 3.220477714115634e-05, + "loss": 1.1366, + "step": 26890 + }, + { + "epoch": 5.97, + "learning_rate": 3.2201800729220924e-05, + "loss": 1.1177, + "step": 26900 + }, + { + "epoch": 5.98, + "learning_rate": 3.219882431728552e-05, + "loss": 1.1317, + "step": 26910 + }, + { + "epoch": 5.98, + "learning_rate": 3.2195847905350105e-05, + "loss": 1.1244, + "step": 26920 + }, + { + "epoch": 5.98, + "learning_rate": 3.219287149341469e-05, + "loss": 1.1481, + "step": 26930 + }, + { + "epoch": 5.98, + "learning_rate": 3.218989508147928e-05, + "loss": 1.1319, + "step": 26940 + }, + { + "epoch": 5.98, + "learning_rate": 3.218691866954387e-05, + "loss": 1.1439, + "step": 26950 + }, + { + "epoch": 5.99, + "learning_rate": 3.218394225760845e-05, + "loss": 1.1291, + "step": 26960 + }, + { + "epoch": 5.99, + "learning_rate": 3.218096584567304e-05, + "loss": 1.1307, + "step": 26970 + }, + { + "epoch": 5.99, + "learning_rate": 3.217798943373763e-05, + "loss": 1.1189, + "step": 26980 + }, + { + "epoch": 5.99, + "learning_rate": 3.217501302180222e-05, + "loss": 1.1555, + "step": 26990 + }, + { + "epoch": 6.0, + "learning_rate": 3.217203660986681e-05, + "loss": 1.1393, + "step": 27000 + }, + { + "epoch": 6.0, + "learning_rate": 3.2169060197931394e-05, + "loss": 1.1371, + "step": 27010 + }, + { + "epoch": 6.0, + "eval_cer": 4.760683760683761, + "eval_loss": 1.125946044921875, + "eval_runtime": 5.4649, + "eval_samples_per_second": 1.83, + "eval_steps_per_second": 0.183, + "eval_wer": 1.0129870129870129, + "step": 27018 + }, + { + "epoch": 6.0, + "learning_rate": 3.216608378599599e-05, + "loss": 1.1301, + "step": 27020 + }, + { + "epoch": 6.0, + "learning_rate": 3.2163107374060574e-05, + "loss": 1.1482, + "step": 27030 + }, + { + "epoch": 6.0, + "learning_rate": 3.216013096212516e-05, + "loss": 1.1311, + "step": 27040 + }, + { + "epoch": 6.01, + "learning_rate": 3.215715455018975e-05, + "loss": 1.1424, + "step": 27050 + }, + { + "epoch": 6.01, + "learning_rate": 3.2154178138254335e-05, + "loss": 1.1393, + "step": 27060 + }, + { + "epoch": 6.01, + "learning_rate": 3.215120172631892e-05, + "loss": 1.1356, + "step": 27070 + }, + { + "epoch": 6.01, + "learning_rate": 3.2148225314383515e-05, + "loss": 1.1368, + "step": 27080 + }, + { + "epoch": 6.02, + "learning_rate": 3.21452489024481e-05, + "loss": 1.1262, + "step": 27090 + }, + { + "epoch": 6.02, + "learning_rate": 3.214227249051269e-05, + "loss": 1.143, + "step": 27100 + }, + { + "epoch": 6.02, + "learning_rate": 3.2139296078577276e-05, + "loss": 1.1437, + "step": 27110 + }, + { + "epoch": 6.02, + "learning_rate": 3.213631966664187e-05, + "loss": 1.1547, + "step": 27120 + }, + { + "epoch": 6.02, + "learning_rate": 3.213334325470646e-05, + "loss": 1.1247, + "step": 27130 + }, + { + "epoch": 6.03, + "learning_rate": 3.2130366842771044e-05, + "loss": 1.1319, + "step": 27140 + }, + { + "epoch": 6.03, + "learning_rate": 3.212739043083563e-05, + "loss": 1.1334, + "step": 27150 + }, + { + "epoch": 6.03, + "learning_rate": 3.212441401890022e-05, + "loss": 1.1383, + "step": 27160 + }, + { + "epoch": 6.03, + "learning_rate": 3.2121437606964804e-05, + "loss": 1.146, + "step": 27170 + }, + { + "epoch": 6.04, + "learning_rate": 3.211846119502939e-05, + "loss": 1.1315, + "step": 27180 + }, + { + "epoch": 6.04, + "learning_rate": 3.2115484783093985e-05, + "loss": 1.1288, + "step": 27190 + }, + { + "epoch": 6.04, + "learning_rate": 3.211250837115857e-05, + "loss": 1.1278, + "step": 27200 + }, + { + "epoch": 6.04, + "learning_rate": 3.210953195922316e-05, + "loss": 1.1257, + "step": 27210 + }, + { + "epoch": 6.04, + "learning_rate": 3.2106555547287746e-05, + "loss": 1.1387, + "step": 27220 + }, + { + "epoch": 6.05, + "learning_rate": 3.210357913535234e-05, + "loss": 1.1211, + "step": 27230 + }, + { + "epoch": 6.05, + "learning_rate": 3.2100602723416926e-05, + "loss": 1.13, + "step": 27240 + }, + { + "epoch": 6.05, + "learning_rate": 3.209762631148151e-05, + "loss": 1.1415, + "step": 27250 + }, + { + "epoch": 6.05, + "learning_rate": 3.20946498995461e-05, + "loss": 1.1381, + "step": 27260 + }, + { + "epoch": 6.06, + "learning_rate": 3.209167348761069e-05, + "loss": 1.1333, + "step": 27270 + }, + { + "epoch": 6.06, + "learning_rate": 3.2088697075675274e-05, + "loss": 1.1298, + "step": 27280 + }, + { + "epoch": 6.06, + "learning_rate": 3.208572066373986e-05, + "loss": 1.1334, + "step": 27290 + }, + { + "epoch": 6.06, + "learning_rate": 3.2082744251804454e-05, + "loss": 1.1482, + "step": 27300 + }, + { + "epoch": 6.06, + "learning_rate": 3.207976783986904e-05, + "loss": 1.1305, + "step": 27310 + }, + { + "epoch": 6.07, + "learning_rate": 3.207679142793363e-05, + "loss": 1.124, + "step": 27320 + }, + { + "epoch": 6.07, + "learning_rate": 3.2073815015998215e-05, + "loss": 1.1369, + "step": 27330 + }, + { + "epoch": 6.07, + "learning_rate": 3.207083860406281e-05, + "loss": 1.1349, + "step": 27340 + }, + { + "epoch": 6.07, + "learning_rate": 3.2067862192127396e-05, + "loss": 1.1315, + "step": 27350 + }, + { + "epoch": 6.08, + "learning_rate": 3.206488578019198e-05, + "loss": 1.1432, + "step": 27360 + }, + { + "epoch": 6.08, + "learning_rate": 3.206190936825657e-05, + "loss": 1.1341, + "step": 27370 + }, + { + "epoch": 6.08, + "learning_rate": 3.2058932956321156e-05, + "loss": 1.1281, + "step": 27380 + }, + { + "epoch": 6.08, + "learning_rate": 3.205595654438574e-05, + "loss": 1.1307, + "step": 27390 + }, + { + "epoch": 6.08, + "learning_rate": 3.205298013245033e-05, + "loss": 1.1433, + "step": 27400 + }, + { + "epoch": 6.09, + "learning_rate": 3.2050003720514924e-05, + "loss": 1.1423, + "step": 27410 + }, + { + "epoch": 6.09, + "learning_rate": 3.204702730857951e-05, + "loss": 1.1303, + "step": 27420 + }, + { + "epoch": 6.09, + "learning_rate": 3.20440508966441e-05, + "loss": 1.1338, + "step": 27430 + }, + { + "epoch": 6.09, + "learning_rate": 3.2041074484708684e-05, + "loss": 1.1475, + "step": 27440 + }, + { + "epoch": 6.1, + "learning_rate": 3.203809807277328e-05, + "loss": 1.1318, + "step": 27450 + }, + { + "epoch": 6.1, + "learning_rate": 3.2035121660837865e-05, + "loss": 1.1147, + "step": 27460 + }, + { + "epoch": 6.1, + "learning_rate": 3.203214524890245e-05, + "loss": 1.1318, + "step": 27470 + }, + { + "epoch": 6.1, + "learning_rate": 3.202916883696704e-05, + "loss": 1.1222, + "step": 27480 + }, + { + "epoch": 6.1, + "learning_rate": 3.2026192425031626e-05, + "loss": 1.1345, + "step": 27490 + }, + { + "epoch": 6.11, + "learning_rate": 3.202321601309621e-05, + "loss": 1.1214, + "step": 27500 + }, + { + "epoch": 6.11, + "learning_rate": 3.2020239601160806e-05, + "loss": 1.1415, + "step": 27510 + }, + { + "epoch": 6.11, + "learning_rate": 3.201726318922539e-05, + "loss": 1.158, + "step": 27520 + }, + { + "epoch": 6.11, + "learning_rate": 3.201428677728998e-05, + "loss": 1.1188, + "step": 27530 + }, + { + "epoch": 6.12, + "learning_rate": 3.201131036535457e-05, + "loss": 1.1458, + "step": 27540 + }, + { + "epoch": 6.12, + "learning_rate": 3.200833395341916e-05, + "loss": 1.1312, + "step": 27550 + }, + { + "epoch": 6.12, + "learning_rate": 3.200535754148375e-05, + "loss": 1.1247, + "step": 27560 + }, + { + "epoch": 6.12, + "learning_rate": 3.200238112954833e-05, + "loss": 1.139, + "step": 27570 + }, + { + "epoch": 6.12, + "learning_rate": 3.199940471761292e-05, + "loss": 1.1163, + "step": 27580 + }, + { + "epoch": 6.13, + "learning_rate": 3.199642830567751e-05, + "loss": 1.1281, + "step": 27590 + }, + { + "epoch": 6.13, + "learning_rate": 3.1993451893742095e-05, + "loss": 1.1336, + "step": 27600 + }, + { + "epoch": 6.13, + "learning_rate": 3.199047548180668e-05, + "loss": 1.1264, + "step": 27610 + }, + { + "epoch": 6.13, + "learning_rate": 3.1987499069871276e-05, + "loss": 1.1366, + "step": 27620 + }, + { + "epoch": 6.14, + "learning_rate": 3.198452265793586e-05, + "loss": 1.135, + "step": 27630 + }, + { + "epoch": 6.14, + "learning_rate": 3.198154624600045e-05, + "loss": 1.1409, + "step": 27640 + }, + { + "epoch": 6.14, + "learning_rate": 3.1978569834065036e-05, + "loss": 1.1304, + "step": 27650 + }, + { + "epoch": 6.14, + "learning_rate": 3.197559342212963e-05, + "loss": 1.1209, + "step": 27660 + }, + { + "epoch": 6.14, + "learning_rate": 3.197261701019422e-05, + "loss": 1.1341, + "step": 27670 + }, + { + "epoch": 6.15, + "learning_rate": 3.19696405982588e-05, + "loss": 1.1289, + "step": 27680 + }, + { + "epoch": 6.15, + "learning_rate": 3.196666418632339e-05, + "loss": 1.1226, + "step": 27690 + }, + { + "epoch": 6.15, + "learning_rate": 3.196368777438798e-05, + "loss": 1.1317, + "step": 27700 + }, + { + "epoch": 6.15, + "learning_rate": 3.1960711362452565e-05, + "loss": 1.1239, + "step": 27710 + }, + { + "epoch": 6.16, + "learning_rate": 3.195773495051715e-05, + "loss": 1.133, + "step": 27720 + }, + { + "epoch": 6.16, + "learning_rate": 3.1954758538581745e-05, + "loss": 1.1149, + "step": 27730 + }, + { + "epoch": 6.16, + "learning_rate": 3.195178212664633e-05, + "loss": 1.1384, + "step": 27740 + }, + { + "epoch": 6.16, + "learning_rate": 3.194880571471092e-05, + "loss": 1.1254, + "step": 27750 + }, + { + "epoch": 6.16, + "learning_rate": 3.1945829302775506e-05, + "loss": 1.1139, + "step": 27760 + }, + { + "epoch": 6.17, + "learning_rate": 3.19428528908401e-05, + "loss": 1.1411, + "step": 27770 + }, + { + "epoch": 6.17, + "learning_rate": 3.193987647890468e-05, + "loss": 1.12, + "step": 27780 + }, + { + "epoch": 6.17, + "learning_rate": 3.1936900066969267e-05, + "loss": 1.1392, + "step": 27790 + }, + { + "epoch": 6.17, + "learning_rate": 3.193392365503386e-05, + "loss": 1.1244, + "step": 27800 + }, + { + "epoch": 6.18, + "learning_rate": 3.193094724309845e-05, + "loss": 1.1241, + "step": 27810 + }, + { + "epoch": 6.18, + "learning_rate": 3.1927970831163034e-05, + "loss": 1.1403, + "step": 27820 + }, + { + "epoch": 6.18, + "learning_rate": 3.192499441922762e-05, + "loss": 1.1135, + "step": 27830 + }, + { + "epoch": 6.18, + "learning_rate": 3.1922018007292215e-05, + "loss": 1.1227, + "step": 27840 + }, + { + "epoch": 6.18, + "learning_rate": 3.19190415953568e-05, + "loss": 1.1214, + "step": 27850 + }, + { + "epoch": 6.19, + "learning_rate": 3.191606518342139e-05, + "loss": 1.1382, + "step": 27860 + }, + { + "epoch": 6.19, + "learning_rate": 3.1913088771485975e-05, + "loss": 1.1408, + "step": 27870 + }, + { + "epoch": 6.19, + "learning_rate": 3.191011235955056e-05, + "loss": 1.1362, + "step": 27880 + }, + { + "epoch": 6.19, + "learning_rate": 3.190713594761515e-05, + "loss": 1.1234, + "step": 27890 + }, + { + "epoch": 6.2, + "learning_rate": 3.190415953567974e-05, + "loss": 1.1354, + "step": 27900 + }, + { + "epoch": 6.2, + "learning_rate": 3.190118312374433e-05, + "loss": 1.139, + "step": 27910 + }, + { + "epoch": 6.2, + "learning_rate": 3.1898206711808917e-05, + "loss": 1.13, + "step": 27920 + }, + { + "epoch": 6.2, + "learning_rate": 3.1895230299873503e-05, + "loss": 1.1231, + "step": 27930 + }, + { + "epoch": 6.2, + "learning_rate": 3.18922538879381e-05, + "loss": 1.1323, + "step": 27940 + }, + { + "epoch": 6.21, + "learning_rate": 3.1889277476002684e-05, + "loss": 1.1189, + "step": 27950 + }, + { + "epoch": 6.21, + "learning_rate": 3.188630106406727e-05, + "loss": 1.1538, + "step": 27960 + }, + { + "epoch": 6.21, + "learning_rate": 3.188332465213186e-05, + "loss": 1.1212, + "step": 27970 + }, + { + "epoch": 6.21, + "learning_rate": 3.1880348240196445e-05, + "loss": 1.123, + "step": 27980 + }, + { + "epoch": 6.22, + "learning_rate": 3.187737182826103e-05, + "loss": 1.1369, + "step": 27990 + }, + { + "epoch": 6.22, + "learning_rate": 3.187439541632562e-05, + "loss": 1.1362, + "step": 28000 + }, + { + "epoch": 6.22, + "learning_rate": 3.187141900439021e-05, + "loss": 1.1132, + "step": 28010 + }, + { + "epoch": 6.22, + "learning_rate": 3.18684425924548e-05, + "loss": 1.1345, + "step": 28020 + }, + { + "epoch": 6.22, + "learning_rate": 3.1865466180519386e-05, + "loss": 1.1413, + "step": 28030 + }, + { + "epoch": 6.23, + "learning_rate": 3.186248976858397e-05, + "loss": 1.1457, + "step": 28040 + }, + { + "epoch": 6.23, + "learning_rate": 3.1859513356648567e-05, + "loss": 1.1291, + "step": 28050 + }, + { + "epoch": 6.23, + "learning_rate": 3.1856536944713153e-05, + "loss": 1.1315, + "step": 28060 + }, + { + "epoch": 6.23, + "learning_rate": 3.185356053277774e-05, + "loss": 1.1286, + "step": 28070 + }, + { + "epoch": 6.24, + "learning_rate": 3.185058412084233e-05, + "loss": 1.1374, + "step": 28080 + }, + { + "epoch": 6.24, + "learning_rate": 3.1847607708906914e-05, + "loss": 1.1317, + "step": 28090 + }, + { + "epoch": 6.24, + "learning_rate": 3.18446312969715e-05, + "loss": 1.119, + "step": 28100 + }, + { + "epoch": 6.24, + "learning_rate": 3.184165488503609e-05, + "loss": 1.1209, + "step": 28110 + }, + { + "epoch": 6.24, + "learning_rate": 3.183867847310068e-05, + "loss": 1.1202, + "step": 28120 + }, + { + "epoch": 6.25, + "learning_rate": 3.183570206116527e-05, + "loss": 1.1234, + "step": 28130 + }, + { + "epoch": 6.25, + "learning_rate": 3.1832725649229855e-05, + "loss": 1.1192, + "step": 28140 + }, + { + "epoch": 6.25, + "learning_rate": 3.182974923729444e-05, + "loss": 1.1176, + "step": 28150 + }, + { + "epoch": 6.25, + "learning_rate": 3.1826772825359036e-05, + "loss": 1.112, + "step": 28160 + }, + { + "epoch": 6.26, + "learning_rate": 3.182379641342362e-05, + "loss": 1.101, + "step": 28170 + }, + { + "epoch": 6.26, + "learning_rate": 3.182082000148821e-05, + "loss": 1.1227, + "step": 28180 + }, + { + "epoch": 6.26, + "learning_rate": 3.18178435895528e-05, + "loss": 1.1211, + "step": 28190 + }, + { + "epoch": 6.26, + "learning_rate": 3.1814867177617384e-05, + "loss": 1.1215, + "step": 28200 + }, + { + "epoch": 6.26, + "learning_rate": 3.181189076568197e-05, + "loss": 1.118, + "step": 28210 + }, + { + "epoch": 6.27, + "learning_rate": 3.180891435374656e-05, + "loss": 1.1197, + "step": 28220 + }, + { + "epoch": 6.27, + "learning_rate": 3.180593794181115e-05, + "loss": 1.1226, + "step": 28230 + }, + { + "epoch": 6.27, + "learning_rate": 3.180296152987574e-05, + "loss": 1.1229, + "step": 28240 + }, + { + "epoch": 6.27, + "learning_rate": 3.1799985117940325e-05, + "loss": 1.1402, + "step": 28250 + }, + { + "epoch": 6.28, + "learning_rate": 3.179700870600491e-05, + "loss": 1.1353, + "step": 28260 + }, + { + "epoch": 6.28, + "learning_rate": 3.1794032294069505e-05, + "loss": 1.138, + "step": 28270 + }, + { + "epoch": 6.28, + "learning_rate": 3.179105588213409e-05, + "loss": 1.141, + "step": 28280 + }, + { + "epoch": 6.28, + "learning_rate": 3.178807947019868e-05, + "loss": 1.1174, + "step": 28290 + }, + { + "epoch": 6.28, + "learning_rate": 3.1785103058263266e-05, + "loss": 1.1143, + "step": 28300 + }, + { + "epoch": 6.29, + "learning_rate": 3.178212664632785e-05, + "loss": 1.1352, + "step": 28310 + }, + { + "epoch": 6.29, + "learning_rate": 3.177915023439244e-05, + "loss": 1.0983, + "step": 28320 + }, + { + "epoch": 6.29, + "learning_rate": 3.1776173822457034e-05, + "loss": 1.13, + "step": 28330 + }, + { + "epoch": 6.29, + "learning_rate": 3.177319741052162e-05, + "loss": 1.1258, + "step": 28340 + }, + { + "epoch": 6.3, + "learning_rate": 3.177022099858621e-05, + "loss": 1.1249, + "step": 28350 + }, + { + "epoch": 6.3, + "learning_rate": 3.1767244586650794e-05, + "loss": 1.1021, + "step": 28360 + }, + { + "epoch": 6.3, + "learning_rate": 3.176426817471539e-05, + "loss": 1.1352, + "step": 28370 + }, + { + "epoch": 6.3, + "learning_rate": 3.1761291762779975e-05, + "loss": 1.1189, + "step": 28380 + }, + { + "epoch": 6.3, + "learning_rate": 3.1758315350844555e-05, + "loss": 1.1306, + "step": 28390 + }, + { + "epoch": 6.31, + "learning_rate": 3.175533893890915e-05, + "loss": 1.135, + "step": 28400 + }, + { + "epoch": 6.31, + "learning_rate": 3.1752362526973736e-05, + "loss": 1.135, + "step": 28410 + }, + { + "epoch": 6.31, + "learning_rate": 3.174938611503832e-05, + "loss": 1.1367, + "step": 28420 + }, + { + "epoch": 6.31, + "learning_rate": 3.174640970310291e-05, + "loss": 1.1263, + "step": 28430 + }, + { + "epoch": 6.32, + "learning_rate": 3.17434332911675e-05, + "loss": 1.1311, + "step": 28440 + }, + { + "epoch": 6.32, + "learning_rate": 3.174045687923209e-05, + "loss": 1.1259, + "step": 28450 + }, + { + "epoch": 6.32, + "learning_rate": 3.173748046729668e-05, + "loss": 1.1396, + "step": 28460 + }, + { + "epoch": 6.32, + "learning_rate": 3.1734504055361264e-05, + "loss": 1.1214, + "step": 28470 + }, + { + "epoch": 6.32, + "learning_rate": 3.173152764342586e-05, + "loss": 1.1353, + "step": 28480 + }, + { + "epoch": 6.33, + "learning_rate": 3.172855123149044e-05, + "loss": 1.1185, + "step": 28490 + }, + { + "epoch": 6.33, + "learning_rate": 3.1725574819555024e-05, + "loss": 1.1193, + "step": 28500 + }, + { + "epoch": 6.33, + "learning_rate": 3.172259840761962e-05, + "loss": 1.1177, + "step": 28510 + }, + { + "epoch": 6.33, + "learning_rate": 3.1719621995684205e-05, + "loss": 1.1321, + "step": 28520 + }, + { + "epoch": 6.34, + "learning_rate": 3.171664558374879e-05, + "loss": 1.1201, + "step": 28530 + }, + { + "epoch": 6.34, + "learning_rate": 3.171366917181338e-05, + "loss": 1.1253, + "step": 28540 + }, + { + "epoch": 6.34, + "learning_rate": 3.171069275987797e-05, + "loss": 1.1103, + "step": 28550 + }, + { + "epoch": 6.34, + "learning_rate": 3.170771634794256e-05, + "loss": 1.1142, + "step": 28560 + }, + { + "epoch": 6.34, + "learning_rate": 3.1704739936007146e-05, + "loss": 1.1084, + "step": 28570 + }, + { + "epoch": 6.35, + "learning_rate": 3.170176352407173e-05, + "loss": 1.1133, + "step": 28580 + }, + { + "epoch": 6.35, + "learning_rate": 3.169878711213633e-05, + "loss": 1.1138, + "step": 28590 + }, + { + "epoch": 6.35, + "learning_rate": 3.169581070020091e-05, + "loss": 1.1133, + "step": 28600 + }, + { + "epoch": 6.35, + "learning_rate": 3.1692834288265494e-05, + "loss": 1.1275, + "step": 28610 + }, + { + "epoch": 6.36, + "learning_rate": 3.168985787633009e-05, + "loss": 1.1252, + "step": 28620 + }, + { + "epoch": 6.36, + "learning_rate": 3.1686881464394674e-05, + "loss": 1.1296, + "step": 28630 + }, + { + "epoch": 6.36, + "learning_rate": 3.168390505245926e-05, + "loss": 1.1111, + "step": 28640 + }, + { + "epoch": 6.36, + "learning_rate": 3.168092864052385e-05, + "loss": 1.1226, + "step": 28650 + }, + { + "epoch": 6.36, + "learning_rate": 3.167795222858844e-05, + "loss": 1.1345, + "step": 28660 + }, + { + "epoch": 6.37, + "learning_rate": 3.167497581665303e-05, + "loss": 1.126, + "step": 28670 + }, + { + "epoch": 6.37, + "learning_rate": 3.1671999404717616e-05, + "loss": 1.1305, + "step": 28680 + }, + { + "epoch": 6.37, + "learning_rate": 3.16690229927822e-05, + "loss": 1.1345, + "step": 28690 + }, + { + "epoch": 6.37, + "learning_rate": 3.166604658084679e-05, + "loss": 1.1291, + "step": 28700 + }, + { + "epoch": 6.38, + "learning_rate": 3.1663070168911376e-05, + "loss": 1.1217, + "step": 28710 + }, + { + "epoch": 6.38, + "learning_rate": 3.166009375697597e-05, + "loss": 1.1175, + "step": 28720 + }, + { + "epoch": 6.38, + "learning_rate": 3.165711734504056e-05, + "loss": 1.1303, + "step": 28730 + }, + { + "epoch": 6.38, + "learning_rate": 3.1654140933105144e-05, + "loss": 1.1199, + "step": 28740 + }, + { + "epoch": 6.38, + "learning_rate": 3.165116452116973e-05, + "loss": 1.1226, + "step": 28750 + }, + { + "epoch": 6.39, + "learning_rate": 3.1648188109234324e-05, + "loss": 1.1394, + "step": 28760 + }, + { + "epoch": 6.39, + "learning_rate": 3.164521169729891e-05, + "loss": 1.1399, + "step": 28770 + }, + { + "epoch": 6.39, + "learning_rate": 3.16422352853635e-05, + "loss": 1.1155, + "step": 28780 + }, + { + "epoch": 6.39, + "learning_rate": 3.1639258873428085e-05, + "loss": 1.1284, + "step": 28790 + }, + { + "epoch": 6.4, + "learning_rate": 3.163628246149267e-05, + "loss": 1.1177, + "step": 28800 + }, + { + "epoch": 6.4, + "learning_rate": 3.163330604955726e-05, + "loss": 1.1489, + "step": 28810 + }, + { + "epoch": 6.4, + "learning_rate": 3.1630329637621846e-05, + "loss": 1.1236, + "step": 28820 + }, + { + "epoch": 6.4, + "learning_rate": 3.162735322568644e-05, + "loss": 1.1226, + "step": 28830 + }, + { + "epoch": 6.4, + "learning_rate": 3.1624376813751026e-05, + "loss": 1.1155, + "step": 28840 + }, + { + "epoch": 6.41, + "learning_rate": 3.162140040181561e-05, + "loss": 1.1312, + "step": 28850 + }, + { + "epoch": 6.41, + "learning_rate": 3.16184239898802e-05, + "loss": 1.1307, + "step": 28860 + }, + { + "epoch": 6.41, + "learning_rate": 3.1615447577944794e-05, + "loss": 1.1328, + "step": 28870 + }, + { + "epoch": 6.41, + "learning_rate": 3.161247116600938e-05, + "loss": 1.1212, + "step": 28880 + }, + { + "epoch": 6.42, + "learning_rate": 3.160949475407397e-05, + "loss": 1.1313, + "step": 28890 + }, + { + "epoch": 6.42, + "learning_rate": 3.1606518342138555e-05, + "loss": 1.1246, + "step": 28900 + }, + { + "epoch": 6.42, + "learning_rate": 3.160354193020314e-05, + "loss": 1.1133, + "step": 28910 + }, + { + "epoch": 6.42, + "learning_rate": 3.160056551826773e-05, + "loss": 1.134, + "step": 28920 + }, + { + "epoch": 6.42, + "learning_rate": 3.1597589106332315e-05, + "loss": 1.1238, + "step": 28930 + }, + { + "epoch": 6.43, + "learning_rate": 3.159461269439691e-05, + "loss": 1.141, + "step": 28940 + }, + { + "epoch": 6.43, + "learning_rate": 3.1591636282461496e-05, + "loss": 1.13, + "step": 28950 + }, + { + "epoch": 6.43, + "learning_rate": 3.158865987052608e-05, + "loss": 1.1321, + "step": 28960 + }, + { + "epoch": 6.43, + "learning_rate": 3.158568345859067e-05, + "loss": 1.1343, + "step": 28970 + }, + { + "epoch": 6.44, + "learning_rate": 3.158270704665526e-05, + "loss": 1.1032, + "step": 28980 + }, + { + "epoch": 6.44, + "learning_rate": 3.157973063471985e-05, + "loss": 1.1169, + "step": 28990 + }, + { + "epoch": 6.44, + "learning_rate": 3.157675422278444e-05, + "loss": 1.1137, + "step": 29000 + }, + { + "epoch": 6.44, + "learning_rate": 3.1573777810849024e-05, + "loss": 1.1316, + "step": 29010 + }, + { + "epoch": 6.44, + "learning_rate": 3.157080139891361e-05, + "loss": 1.1171, + "step": 29020 + }, + { + "epoch": 6.45, + "learning_rate": 3.15678249869782e-05, + "loss": 1.1375, + "step": 29030 + }, + { + "epoch": 6.45, + "learning_rate": 3.1564848575042785e-05, + "loss": 1.1128, + "step": 29040 + }, + { + "epoch": 6.45, + "learning_rate": 3.156187216310738e-05, + "loss": 1.1189, + "step": 29050 + }, + { + "epoch": 6.45, + "learning_rate": 3.1558895751171965e-05, + "loss": 1.1294, + "step": 29060 + }, + { + "epoch": 6.46, + "learning_rate": 3.155591933923655e-05, + "loss": 1.1471, + "step": 29070 + }, + { + "epoch": 6.46, + "learning_rate": 3.155294292730114e-05, + "loss": 1.1112, + "step": 29080 + }, + { + "epoch": 6.46, + "learning_rate": 3.154996651536573e-05, + "loss": 1.1197, + "step": 29090 + }, + { + "epoch": 6.46, + "learning_rate": 3.154699010343032e-05, + "loss": 1.1393, + "step": 29100 + }, + { + "epoch": 6.46, + "learning_rate": 3.154401369149491e-05, + "loss": 1.1133, + "step": 29110 + }, + { + "epoch": 6.47, + "learning_rate": 3.1541037279559494e-05, + "loss": 1.1222, + "step": 29120 + }, + { + "epoch": 6.47, + "learning_rate": 3.153806086762408e-05, + "loss": 1.1113, + "step": 29130 + }, + { + "epoch": 6.47, + "learning_rate": 3.153508445568867e-05, + "loss": 1.1236, + "step": 29140 + }, + { + "epoch": 6.47, + "learning_rate": 3.153210804375326e-05, + "loss": 1.1384, + "step": 29150 + }, + { + "epoch": 6.48, + "learning_rate": 3.152913163181785e-05, + "loss": 1.1239, + "step": 29160 + }, + { + "epoch": 6.48, + "learning_rate": 3.1526155219882435e-05, + "loss": 1.1304, + "step": 29170 + }, + { + "epoch": 6.48, + "learning_rate": 3.152317880794702e-05, + "loss": 1.1289, + "step": 29180 + }, + { + "epoch": 6.48, + "learning_rate": 3.1520202396011615e-05, + "loss": 1.121, + "step": 29190 + }, + { + "epoch": 6.48, + "learning_rate": 3.15172259840762e-05, + "loss": 1.1353, + "step": 29200 + }, + { + "epoch": 6.49, + "learning_rate": 3.151424957214078e-05, + "loss": 1.1376, + "step": 29210 + }, + { + "epoch": 6.49, + "learning_rate": 3.1511273160205376e-05, + "loss": 1.1226, + "step": 29220 + }, + { + "epoch": 6.49, + "learning_rate": 3.150829674826996e-05, + "loss": 1.1338, + "step": 29230 + }, + { + "epoch": 6.49, + "learning_rate": 3.150532033633455e-05, + "loss": 1.1265, + "step": 29240 + }, + { + "epoch": 6.5, + "learning_rate": 3.150234392439914e-05, + "loss": 1.1243, + "step": 29250 + }, + { + "epoch": 6.5, + "learning_rate": 3.149936751246373e-05, + "loss": 1.1345, + "step": 29260 + }, + { + "epoch": 6.5, + "learning_rate": 3.149639110052832e-05, + "loss": 1.1246, + "step": 29270 + }, + { + "epoch": 6.5, + "learning_rate": 3.1493414688592904e-05, + "loss": 1.1379, + "step": 29280 + }, + { + "epoch": 6.5, + "learning_rate": 3.149043827665749e-05, + "loss": 1.1341, + "step": 29290 + }, + { + "epoch": 6.51, + "learning_rate": 3.1487461864722085e-05, + "loss": 1.1309, + "step": 29300 + }, + { + "epoch": 6.51, + "learning_rate": 3.1484485452786665e-05, + "loss": 1.1342, + "step": 29310 + }, + { + "epoch": 6.51, + "learning_rate": 3.148150904085125e-05, + "loss": 1.1222, + "step": 29320 + }, + { + "epoch": 6.51, + "learning_rate": 3.1478532628915845e-05, + "loss": 1.1231, + "step": 29330 + }, + { + "epoch": 6.52, + "learning_rate": 3.147555621698043e-05, + "loss": 1.1441, + "step": 29340 + }, + { + "epoch": 6.52, + "learning_rate": 3.147257980504502e-05, + "loss": 1.1206, + "step": 29350 + }, + { + "epoch": 6.52, + "learning_rate": 3.1469603393109606e-05, + "loss": 1.1161, + "step": 29360 + }, + { + "epoch": 6.52, + "learning_rate": 3.14666269811742e-05, + "loss": 1.1315, + "step": 29370 + }, + { + "epoch": 6.52, + "learning_rate": 3.146365056923879e-05, + "loss": 1.1078, + "step": 29380 + }, + { + "epoch": 6.53, + "learning_rate": 3.1460674157303374e-05, + "loss": 1.118, + "step": 29390 + }, + { + "epoch": 6.53, + "learning_rate": 3.145769774536796e-05, + "loss": 1.1187, + "step": 29400 + }, + { + "epoch": 6.53, + "learning_rate": 3.1454721333432554e-05, + "loss": 1.1127, + "step": 29410 + }, + { + "epoch": 6.53, + "learning_rate": 3.1451744921497134e-05, + "loss": 1.1383, + "step": 29420 + }, + { + "epoch": 6.54, + "learning_rate": 3.144876850956172e-05, + "loss": 1.1189, + "step": 29430 + }, + { + "epoch": 6.54, + "learning_rate": 3.1445792097626315e-05, + "loss": 1.134, + "step": 29440 + }, + { + "epoch": 6.54, + "learning_rate": 3.14428156856909e-05, + "loss": 1.1223, + "step": 29450 + }, + { + "epoch": 6.54, + "learning_rate": 3.143983927375549e-05, + "loss": 1.1308, + "step": 29460 + }, + { + "epoch": 6.54, + "learning_rate": 3.1436862861820076e-05, + "loss": 1.119, + "step": 29470 + }, + { + "epoch": 6.55, + "learning_rate": 3.143388644988467e-05, + "loss": 1.1153, + "step": 29480 + }, + { + "epoch": 6.55, + "learning_rate": 3.1430910037949256e-05, + "loss": 1.1558, + "step": 29490 + }, + { + "epoch": 6.55, + "learning_rate": 3.142793362601384e-05, + "loss": 1.1243, + "step": 29500 + }, + { + "epoch": 6.55, + "learning_rate": 3.142495721407843e-05, + "loss": 1.1338, + "step": 29510 + }, + { + "epoch": 6.56, + "learning_rate": 3.142198080214302e-05, + "loss": 1.118, + "step": 29520 + }, + { + "epoch": 6.56, + "learning_rate": 3.1419004390207604e-05, + "loss": 1.1217, + "step": 29530 + }, + { + "epoch": 6.56, + "learning_rate": 3.14160279782722e-05, + "loss": 1.1374, + "step": 29540 + }, + { + "epoch": 6.56, + "learning_rate": 3.1413051566336784e-05, + "loss": 1.1226, + "step": 29550 + }, + { + "epoch": 6.56, + "learning_rate": 3.141007515440137e-05, + "loss": 1.1346, + "step": 29560 + }, + { + "epoch": 6.57, + "learning_rate": 3.140709874246596e-05, + "loss": 1.1196, + "step": 29570 + }, + { + "epoch": 6.57, + "learning_rate": 3.140412233053055e-05, + "loss": 1.1292, + "step": 29580 + }, + { + "epoch": 6.57, + "learning_rate": 3.140114591859514e-05, + "loss": 1.1318, + "step": 29590 + }, + { + "epoch": 6.57, + "learning_rate": 3.1398169506659726e-05, + "loss": 1.1225, + "step": 29600 + }, + { + "epoch": 6.58, + "learning_rate": 3.139519309472431e-05, + "loss": 1.122, + "step": 29610 + }, + { + "epoch": 6.58, + "learning_rate": 3.13922166827889e-05, + "loss": 1.1412, + "step": 29620 + }, + { + "epoch": 6.58, + "learning_rate": 3.1389240270853486e-05, + "loss": 1.1219, + "step": 29630 + }, + { + "epoch": 6.58, + "learning_rate": 3.138626385891807e-05, + "loss": 1.1357, + "step": 29640 + }, + { + "epoch": 6.58, + "learning_rate": 3.138328744698267e-05, + "loss": 1.1313, + "step": 29650 + }, + { + "epoch": 6.59, + "learning_rate": 3.1380311035047254e-05, + "loss": 1.1126, + "step": 29660 + }, + { + "epoch": 6.59, + "learning_rate": 3.137733462311184e-05, + "loss": 1.1343, + "step": 29670 + }, + { + "epoch": 6.59, + "learning_rate": 3.137435821117643e-05, + "loss": 1.1277, + "step": 29680 + }, + { + "epoch": 6.59, + "learning_rate": 3.137138179924102e-05, + "loss": 1.1217, + "step": 29690 + }, + { + "epoch": 6.6, + "learning_rate": 3.136840538730561e-05, + "loss": 1.1416, + "step": 29700 + }, + { + "epoch": 6.6, + "learning_rate": 3.1365428975370195e-05, + "loss": 1.117, + "step": 29710 + }, + { + "epoch": 6.6, + "learning_rate": 3.136245256343478e-05, + "loss": 1.1167, + "step": 29720 + }, + { + "epoch": 6.6, + "learning_rate": 3.135947615149937e-05, + "loss": 1.1483, + "step": 29730 + }, + { + "epoch": 6.6, + "learning_rate": 3.1356499739563956e-05, + "loss": 1.1145, + "step": 29740 + }, + { + "epoch": 6.61, + "learning_rate": 3.135352332762854e-05, + "loss": 1.1219, + "step": 29750 + }, + { + "epoch": 6.61, + "learning_rate": 3.1350546915693136e-05, + "loss": 1.1155, + "step": 29760 + }, + { + "epoch": 6.61, + "learning_rate": 3.134757050375772e-05, + "loss": 1.1266, + "step": 29770 + }, + { + "epoch": 6.61, + "learning_rate": 3.134459409182231e-05, + "loss": 1.1166, + "step": 29780 + }, + { + "epoch": 6.62, + "learning_rate": 3.13416176798869e-05, + "loss": 1.1224, + "step": 29790 + }, + { + "epoch": 6.62, + "learning_rate": 3.133864126795149e-05, + "loss": 1.1386, + "step": 29800 + }, + { + "epoch": 6.62, + "learning_rate": 3.133566485601608e-05, + "loss": 1.1232, + "step": 29810 + }, + { + "epoch": 6.62, + "learning_rate": 3.133268844408066e-05, + "loss": 1.126, + "step": 29820 + }, + { + "epoch": 6.62, + "learning_rate": 3.132971203214525e-05, + "loss": 1.1311, + "step": 29830 + }, + { + "epoch": 6.63, + "learning_rate": 3.132673562020984e-05, + "loss": 1.1323, + "step": 29840 + }, + { + "epoch": 6.63, + "learning_rate": 3.1323759208274425e-05, + "loss": 1.1349, + "step": 29850 + }, + { + "epoch": 6.63, + "learning_rate": 3.132078279633901e-05, + "loss": 1.1225, + "step": 29860 + }, + { + "epoch": 6.63, + "learning_rate": 3.1317806384403606e-05, + "loss": 1.1259, + "step": 29870 + }, + { + "epoch": 6.64, + "learning_rate": 3.131482997246819e-05, + "loss": 1.1296, + "step": 29880 + }, + { + "epoch": 6.64, + "learning_rate": 3.131185356053278e-05, + "loss": 1.1408, + "step": 29890 + }, + { + "epoch": 6.64, + "learning_rate": 3.1308877148597366e-05, + "loss": 1.1273, + "step": 29900 + }, + { + "epoch": 6.64, + "learning_rate": 3.130590073666196e-05, + "loss": 1.1156, + "step": 29910 + }, + { + "epoch": 6.64, + "learning_rate": 3.130292432472655e-05, + "loss": 1.131, + "step": 29920 + }, + { + "epoch": 6.65, + "learning_rate": 3.1299947912791134e-05, + "loss": 1.1199, + "step": 29930 + }, + { + "epoch": 6.65, + "learning_rate": 3.129697150085572e-05, + "loss": 1.1149, + "step": 29940 + }, + { + "epoch": 6.65, + "learning_rate": 3.129399508892031e-05, + "loss": 1.1407, + "step": 29950 + }, + { + "epoch": 6.65, + "learning_rate": 3.1291018676984895e-05, + "loss": 1.1223, + "step": 29960 + }, + { + "epoch": 6.66, + "learning_rate": 3.128804226504949e-05, + "loss": 1.1176, + "step": 29970 + }, + { + "epoch": 6.66, + "learning_rate": 3.1285065853114075e-05, + "loss": 1.1468, + "step": 29980 + }, + { + "epoch": 6.66, + "learning_rate": 3.128208944117866e-05, + "loss": 1.1408, + "step": 29990 + }, + { + "epoch": 6.66, + "learning_rate": 3.127911302924325e-05, + "loss": 1.1168, + "step": 30000 + }, + { + "epoch": 6.66, + "learning_rate": 3.127613661730784e-05, + "loss": 1.1361, + "step": 30010 + }, + { + "epoch": 6.67, + "learning_rate": 3.127316020537243e-05, + "loss": 1.1485, + "step": 30020 + }, + { + "epoch": 6.67, + "learning_rate": 3.127018379343701e-05, + "loss": 1.1246, + "step": 30030 + }, + { + "epoch": 6.67, + "learning_rate": 3.1267207381501603e-05, + "loss": 1.1108, + "step": 30040 + }, + { + "epoch": 6.67, + "learning_rate": 3.126423096956619e-05, + "loss": 1.1206, + "step": 30050 + }, + { + "epoch": 6.68, + "learning_rate": 3.126125455763078e-05, + "loss": 1.1317, + "step": 30060 + }, + { + "epoch": 6.68, + "learning_rate": 3.1258278145695364e-05, + "loss": 1.1298, + "step": 30070 + }, + { + "epoch": 6.68, + "learning_rate": 3.125530173375996e-05, + "loss": 1.1227, + "step": 30080 + }, + { + "epoch": 6.68, + "learning_rate": 3.1252325321824545e-05, + "loss": 1.1315, + "step": 30090 + }, + { + "epoch": 6.68, + "learning_rate": 3.124934890988913e-05, + "loss": 1.1173, + "step": 30100 + }, + { + "epoch": 6.69, + "learning_rate": 3.124637249795372e-05, + "loss": 1.1185, + "step": 30110 + }, + { + "epoch": 6.69, + "learning_rate": 3.124339608601831e-05, + "loss": 1.1294, + "step": 30120 + }, + { + "epoch": 6.69, + "learning_rate": 3.124041967408289e-05, + "loss": 1.1248, + "step": 30130 + }, + { + "epoch": 6.69, + "learning_rate": 3.123744326214748e-05, + "loss": 1.1358, + "step": 30140 + }, + { + "epoch": 6.7, + "learning_rate": 3.123446685021207e-05, + "loss": 1.1086, + "step": 30150 + }, + { + "epoch": 6.7, + "learning_rate": 3.123149043827666e-05, + "loss": 1.1317, + "step": 30160 + }, + { + "epoch": 6.7, + "learning_rate": 3.122851402634125e-05, + "loss": 1.1362, + "step": 30170 + }, + { + "epoch": 6.7, + "learning_rate": 3.1225537614405834e-05, + "loss": 1.1352, + "step": 30180 + }, + { + "epoch": 6.7, + "learning_rate": 3.122256120247043e-05, + "loss": 1.1316, + "step": 30190 + }, + { + "epoch": 6.71, + "learning_rate": 3.1219584790535014e-05, + "loss": 1.137, + "step": 30200 + }, + { + "epoch": 6.71, + "learning_rate": 3.12166083785996e-05, + "loss": 1.1266, + "step": 30210 + }, + { + "epoch": 6.71, + "learning_rate": 3.121363196666419e-05, + "loss": 1.1364, + "step": 30220 + }, + { + "epoch": 6.71, + "learning_rate": 3.1210655554728775e-05, + "loss": 1.1388, + "step": 30230 + }, + { + "epoch": 6.72, + "learning_rate": 3.120767914279336e-05, + "loss": 1.1174, + "step": 30240 + }, + { + "epoch": 6.72, + "learning_rate": 3.120470273085795e-05, + "loss": 1.13, + "step": 30250 + }, + { + "epoch": 6.72, + "learning_rate": 3.120172631892254e-05, + "loss": 1.1159, + "step": 30260 + }, + { + "epoch": 6.72, + "learning_rate": 3.119874990698713e-05, + "loss": 1.1228, + "step": 30270 + }, + { + "epoch": 6.72, + "learning_rate": 3.1195773495051716e-05, + "loss": 1.1107, + "step": 30280 + }, + { + "epoch": 6.73, + "learning_rate": 3.11927970831163e-05, + "loss": 1.1429, + "step": 30290 + }, + { + "epoch": 6.73, + "learning_rate": 3.11898206711809e-05, + "loss": 1.1291, + "step": 30300 + }, + { + "epoch": 6.73, + "learning_rate": 3.1186844259245484e-05, + "loss": 1.1276, + "step": 30310 + }, + { + "epoch": 6.73, + "learning_rate": 3.118386784731007e-05, + "loss": 1.1422, + "step": 30320 + }, + { + "epoch": 6.74, + "learning_rate": 3.118089143537466e-05, + "loss": 1.1201, + "step": 30330 + }, + { + "epoch": 6.74, + "learning_rate": 3.1177915023439244e-05, + "loss": 1.1284, + "step": 30340 + }, + { + "epoch": 6.74, + "learning_rate": 3.117493861150383e-05, + "loss": 1.1268, + "step": 30350 + }, + { + "epoch": 6.74, + "learning_rate": 3.1171962199568425e-05, + "loss": 1.1335, + "step": 30360 + }, + { + "epoch": 6.74, + "learning_rate": 3.116898578763301e-05, + "loss": 1.1319, + "step": 30370 + }, + { + "epoch": 6.75, + "learning_rate": 3.11660093756976e-05, + "loss": 1.1098, + "step": 30380 + }, + { + "epoch": 6.75, + "learning_rate": 3.1163032963762186e-05, + "loss": 1.1225, + "step": 30390 + }, + { + "epoch": 6.75, + "learning_rate": 3.116005655182678e-05, + "loss": 1.1228, + "step": 30400 + }, + { + "epoch": 6.75, + "learning_rate": 3.1157080139891366e-05, + "loss": 1.1335, + "step": 30410 + }, + { + "epoch": 6.76, + "learning_rate": 3.115410372795595e-05, + "loss": 1.1154, + "step": 30420 + }, + { + "epoch": 6.76, + "learning_rate": 3.115112731602054e-05, + "loss": 1.1265, + "step": 30430 + }, + { + "epoch": 6.76, + "learning_rate": 3.114815090408513e-05, + "loss": 1.1424, + "step": 30440 + }, + { + "epoch": 6.76, + "learning_rate": 3.1145174492149714e-05, + "loss": 1.1253, + "step": 30450 + }, + { + "epoch": 6.76, + "learning_rate": 3.11421980802143e-05, + "loss": 1.121, + "step": 30460 + }, + { + "epoch": 6.77, + "learning_rate": 3.1139221668278894e-05, + "loss": 1.1141, + "step": 30470 + }, + { + "epoch": 6.77, + "learning_rate": 3.113624525634348e-05, + "loss": 1.1222, + "step": 30480 + }, + { + "epoch": 6.77, + "learning_rate": 3.113326884440807e-05, + "loss": 1.123, + "step": 30490 + }, + { + "epoch": 6.77, + "learning_rate": 3.1130292432472655e-05, + "loss": 1.1235, + "step": 30500 + }, + { + "epoch": 6.78, + "learning_rate": 3.112731602053725e-05, + "loss": 1.1258, + "step": 30510 + }, + { + "epoch": 6.78, + "learning_rate": 3.1124339608601836e-05, + "loss": 1.1138, + "step": 30520 + }, + { + "epoch": 6.78, + "learning_rate": 3.112136319666642e-05, + "loss": 1.1194, + "step": 30530 + }, + { + "epoch": 6.78, + "learning_rate": 3.111838678473101e-05, + "loss": 1.1051, + "step": 30540 + }, + { + "epoch": 6.78, + "learning_rate": 3.1115410372795596e-05, + "loss": 1.1307, + "step": 30550 + }, + { + "epoch": 6.79, + "learning_rate": 3.111243396086018e-05, + "loss": 1.131, + "step": 30560 + }, + { + "epoch": 6.79, + "learning_rate": 3.110945754892477e-05, + "loss": 1.1211, + "step": 30570 + }, + { + "epoch": 6.79, + "learning_rate": 3.1106481136989364e-05, + "loss": 1.1225, + "step": 30580 + }, + { + "epoch": 6.79, + "learning_rate": 3.110350472505395e-05, + "loss": 1.1164, + "step": 30590 + }, + { + "epoch": 6.8, + "learning_rate": 3.110052831311854e-05, + "loss": 1.1258, + "step": 30600 + }, + { + "epoch": 6.8, + "learning_rate": 3.1097551901183124e-05, + "loss": 1.1271, + "step": 30610 + }, + { + "epoch": 6.8, + "learning_rate": 3.109457548924772e-05, + "loss": 1.1307, + "step": 30620 + }, + { + "epoch": 6.8, + "learning_rate": 3.1091599077312305e-05, + "loss": 1.1249, + "step": 30630 + }, + { + "epoch": 6.8, + "learning_rate": 3.108862266537689e-05, + "loss": 1.1296, + "step": 30640 + }, + { + "epoch": 6.81, + "learning_rate": 3.108564625344148e-05, + "loss": 1.143, + "step": 30650 + }, + { + "epoch": 6.81, + "learning_rate": 3.1082669841506066e-05, + "loss": 1.1116, + "step": 30660 + }, + { + "epoch": 6.81, + "learning_rate": 3.107969342957065e-05, + "loss": 1.1189, + "step": 30670 + }, + { + "epoch": 6.81, + "learning_rate": 3.107671701763524e-05, + "loss": 1.1131, + "step": 30680 + }, + { + "epoch": 6.82, + "learning_rate": 3.107374060569983e-05, + "loss": 1.124, + "step": 30690 + }, + { + "epoch": 6.82, + "learning_rate": 3.107076419376442e-05, + "loss": 1.1301, + "step": 30700 + }, + { + "epoch": 6.82, + "learning_rate": 3.106778778182901e-05, + "loss": 1.1325, + "step": 30710 + }, + { + "epoch": 6.82, + "learning_rate": 3.1064811369893594e-05, + "loss": 1.1236, + "step": 30720 + }, + { + "epoch": 6.82, + "learning_rate": 3.106183495795819e-05, + "loss": 1.1327, + "step": 30730 + }, + { + "epoch": 6.83, + "learning_rate": 3.1058858546022774e-05, + "loss": 1.1263, + "step": 30740 + }, + { + "epoch": 6.83, + "learning_rate": 3.105588213408736e-05, + "loss": 1.1412, + "step": 30750 + }, + { + "epoch": 6.83, + "learning_rate": 3.105290572215195e-05, + "loss": 1.1339, + "step": 30760 + }, + { + "epoch": 6.83, + "learning_rate": 3.1049929310216535e-05, + "loss": 1.1069, + "step": 30770 + }, + { + "epoch": 6.84, + "learning_rate": 3.104695289828112e-05, + "loss": 1.1239, + "step": 30780 + }, + { + "epoch": 6.84, + "learning_rate": 3.1043976486345716e-05, + "loss": 1.1165, + "step": 30790 + }, + { + "epoch": 6.84, + "learning_rate": 3.10410000744103e-05, + "loss": 1.1249, + "step": 30800 + }, + { + "epoch": 6.84, + "learning_rate": 3.103802366247489e-05, + "loss": 1.1207, + "step": 30810 + }, + { + "epoch": 6.84, + "learning_rate": 3.1035047250539476e-05, + "loss": 1.1202, + "step": 30820 + }, + { + "epoch": 6.85, + "learning_rate": 3.103207083860407e-05, + "loss": 1.1311, + "step": 30830 + }, + { + "epoch": 6.85, + "learning_rate": 3.102909442666866e-05, + "loss": 1.1298, + "step": 30840 + }, + { + "epoch": 6.85, + "learning_rate": 3.102611801473324e-05, + "loss": 1.1296, + "step": 30850 + }, + { + "epoch": 6.85, + "learning_rate": 3.102314160279783e-05, + "loss": 1.1194, + "step": 30860 + }, + { + "epoch": 6.86, + "learning_rate": 3.102016519086242e-05, + "loss": 1.0913, + "step": 30870 + }, + { + "epoch": 6.86, + "learning_rate": 3.1017188778927005e-05, + "loss": 1.1, + "step": 30880 + }, + { + "epoch": 6.86, + "learning_rate": 3.101421236699159e-05, + "loss": 1.1321, + "step": 30890 + }, + { + "epoch": 6.86, + "learning_rate": 3.1011235955056185e-05, + "loss": 1.1238, + "step": 30900 + }, + { + "epoch": 6.86, + "learning_rate": 3.100825954312077e-05, + "loss": 1.107, + "step": 30910 + }, + { + "epoch": 6.87, + "learning_rate": 3.100528313118536e-05, + "loss": 1.1029, + "step": 30920 + }, + { + "epoch": 6.87, + "learning_rate": 3.1002306719249946e-05, + "loss": 1.1388, + "step": 30930 + }, + { + "epoch": 6.87, + "learning_rate": 3.099933030731454e-05, + "loss": 1.1106, + "step": 30940 + }, + { + "epoch": 6.87, + "learning_rate": 3.099635389537912e-05, + "loss": 1.1046, + "step": 30950 + }, + { + "epoch": 6.88, + "learning_rate": 3.0993377483443707e-05, + "loss": 1.1213, + "step": 30960 + }, + { + "epoch": 6.88, + "learning_rate": 3.09904010715083e-05, + "loss": 1.1162, + "step": 30970 + }, + { + "epoch": 6.88, + "learning_rate": 3.098742465957289e-05, + "loss": 1.1194, + "step": 30980 + }, + { + "epoch": 6.88, + "learning_rate": 3.0984448247637474e-05, + "loss": 1.1286, + "step": 30990 + }, + { + "epoch": 6.88, + "learning_rate": 3.098147183570206e-05, + "loss": 1.1218, + "step": 31000 + }, + { + "epoch": 6.89, + "learning_rate": 3.0978495423766655e-05, + "loss": 1.1222, + "step": 31010 + }, + { + "epoch": 6.89, + "learning_rate": 3.097551901183124e-05, + "loss": 1.1201, + "step": 31020 + }, + { + "epoch": 6.89, + "learning_rate": 3.097254259989583e-05, + "loss": 1.1404, + "step": 31030 + }, + { + "epoch": 6.89, + "learning_rate": 3.0969566187960415e-05, + "loss": 1.1062, + "step": 31040 + }, + { + "epoch": 6.9, + "learning_rate": 3.0966589776025e-05, + "loss": 1.1231, + "step": 31050 + }, + { + "epoch": 6.9, + "learning_rate": 3.096361336408959e-05, + "loss": 1.1253, + "step": 31060 + }, + { + "epoch": 6.9, + "learning_rate": 3.096063695215418e-05, + "loss": 1.1078, + "step": 31070 + }, + { + "epoch": 6.9, + "learning_rate": 3.095766054021877e-05, + "loss": 1.12, + "step": 31080 + }, + { + "epoch": 6.9, + "learning_rate": 3.0954684128283357e-05, + "loss": 1.1208, + "step": 31090 + }, + { + "epoch": 6.91, + "learning_rate": 3.0951707716347943e-05, + "loss": 1.1257, + "step": 31100 + }, + { + "epoch": 6.91, + "learning_rate": 3.094873130441253e-05, + "loss": 1.1172, + "step": 31110 + }, + { + "epoch": 6.91, + "learning_rate": 3.0945754892477124e-05, + "loss": 1.1225, + "step": 31120 + }, + { + "epoch": 6.91, + "learning_rate": 3.094277848054171e-05, + "loss": 1.1278, + "step": 31130 + }, + { + "epoch": 6.92, + "learning_rate": 3.09398020686063e-05, + "loss": 1.113, + "step": 31140 + }, + { + "epoch": 6.92, + "learning_rate": 3.0936825656670885e-05, + "loss": 1.1259, + "step": 31150 + }, + { + "epoch": 6.92, + "learning_rate": 3.093384924473547e-05, + "loss": 1.1201, + "step": 31160 + }, + { + "epoch": 6.92, + "learning_rate": 3.093087283280006e-05, + "loss": 1.1239, + "step": 31170 + }, + { + "epoch": 6.92, + "learning_rate": 3.092789642086465e-05, + "loss": 1.1115, + "step": 31180 + }, + { + "epoch": 6.93, + "learning_rate": 3.092492000892924e-05, + "loss": 1.1187, + "step": 31190 + }, + { + "epoch": 6.93, + "learning_rate": 3.0921943596993826e-05, + "loss": 1.1173, + "step": 31200 + }, + { + "epoch": 6.93, + "learning_rate": 3.091896718505841e-05, + "loss": 1.1202, + "step": 31210 + }, + { + "epoch": 6.93, + "learning_rate": 3.0915990773123007e-05, + "loss": 1.1244, + "step": 31220 + }, + { + "epoch": 6.94, + "learning_rate": 3.0913014361187593e-05, + "loss": 1.1265, + "step": 31230 + }, + { + "epoch": 6.94, + "learning_rate": 3.091003794925218e-05, + "loss": 1.1279, + "step": 31240 + }, + { + "epoch": 6.94, + "learning_rate": 3.090706153731677e-05, + "loss": 1.1164, + "step": 31250 + }, + { + "epoch": 6.94, + "learning_rate": 3.0904085125381354e-05, + "loss": 1.1329, + "step": 31260 + }, + { + "epoch": 6.94, + "learning_rate": 3.090110871344594e-05, + "loss": 1.1204, + "step": 31270 + }, + { + "epoch": 6.95, + "learning_rate": 3.089813230151053e-05, + "loss": 1.0962, + "step": 31280 + }, + { + "epoch": 6.95, + "learning_rate": 3.089515588957512e-05, + "loss": 1.1207, + "step": 31290 + }, + { + "epoch": 6.95, + "learning_rate": 3.089217947763971e-05, + "loss": 1.1346, + "step": 31300 + }, + { + "epoch": 6.95, + "learning_rate": 3.0889203065704295e-05, + "loss": 1.1132, + "step": 31310 + }, + { + "epoch": 6.96, + "learning_rate": 3.088622665376888e-05, + "loss": 1.1248, + "step": 31320 + }, + { + "epoch": 6.96, + "learning_rate": 3.0883250241833476e-05, + "loss": 1.1148, + "step": 31330 + }, + { + "epoch": 6.96, + "learning_rate": 3.088027382989806e-05, + "loss": 1.1305, + "step": 31340 + }, + { + "epoch": 6.96, + "learning_rate": 3.087729741796265e-05, + "loss": 1.133, + "step": 31350 + }, + { + "epoch": 6.96, + "learning_rate": 3.087432100602724e-05, + "loss": 1.1272, + "step": 31360 + }, + { + "epoch": 6.97, + "learning_rate": 3.0871344594091824e-05, + "loss": 1.1, + "step": 31370 + }, + { + "epoch": 6.97, + "learning_rate": 3.086836818215641e-05, + "loss": 1.1141, + "step": 31380 + }, + { + "epoch": 6.97, + "learning_rate": 3.0865391770221e-05, + "loss": 1.1087, + "step": 31390 + }, + { + "epoch": 6.97, + "learning_rate": 3.086241535828559e-05, + "loss": 1.0915, + "step": 31400 + }, + { + "epoch": 6.98, + "learning_rate": 3.085943894635018e-05, + "loss": 1.1273, + "step": 31410 + }, + { + "epoch": 6.98, + "learning_rate": 3.0856462534414765e-05, + "loss": 1.1311, + "step": 31420 + }, + { + "epoch": 6.98, + "learning_rate": 3.085348612247935e-05, + "loss": 1.1239, + "step": 31430 + }, + { + "epoch": 6.98, + "learning_rate": 3.0850509710543945e-05, + "loss": 1.0923, + "step": 31440 + }, + { + "epoch": 6.98, + "learning_rate": 3.084753329860853e-05, + "loss": 1.1282, + "step": 31450 + }, + { + "epoch": 6.99, + "learning_rate": 3.084455688667312e-05, + "loss": 1.1237, + "step": 31460 + }, + { + "epoch": 6.99, + "learning_rate": 3.0841580474737706e-05, + "loss": 1.1136, + "step": 31470 + }, + { + "epoch": 6.99, + "learning_rate": 3.083860406280229e-05, + "loss": 1.1216, + "step": 31480 + }, + { + "epoch": 6.99, + "learning_rate": 3.083562765086688e-05, + "loss": 1.1267, + "step": 31490 + }, + { + "epoch": 7.0, + "learning_rate": 3.0832651238931474e-05, + "loss": 1.1189, + "step": 31500 + }, + { + "epoch": 7.0, + "learning_rate": 3.082967482699606e-05, + "loss": 1.1188, + "step": 31510 + }, + { + "epoch": 7.0, + "learning_rate": 3.082669841506065e-05, + "loss": 1.1155, + "step": 31520 + }, + { + "epoch": 7.0, + "eval_cer": 4.764957264957265, + "eval_loss": 1.1188170909881592, + "eval_runtime": 5.4262, + "eval_samples_per_second": 1.843, + "eval_steps_per_second": 0.184, + "eval_wer": 1.0129870129870129, + "step": 31521 + } + ], + "max_steps": 135090, + "num_train_epochs": 30, + "total_flos": 1.1077976360516271e+21, + "trial_name": null, + "trial_params": null +}