{ "best_metric": 0.03287291918282044, "best_model_checkpoint": "00_medieval/v2_medieval_escriptorium_masks/models/checkpoint-87000", "epoch": 19.9244622037331, "eval_steps": 500, "global_step": 91000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 4.6e-06, "loss": 8.5577, "step": 50 }, { "epoch": 0.02, "learning_rate": 9.600000000000001e-06, "loss": 5.1307, "step": 100 }, { "epoch": 0.03, "learning_rate": 1.4599999999999999e-05, "loss": 3.9933, "step": 150 }, { "epoch": 0.04, "learning_rate": 1.9600000000000002e-05, "loss": 3.5669, "step": 200 }, { "epoch": 0.05, "learning_rate": 2.46e-05, "loss": 3.3447, "step": 250 }, { "epoch": 0.07, "learning_rate": 2.96e-05, "loss": 3.1734, "step": 300 }, { "epoch": 0.08, "learning_rate": 3.45e-05, "loss": 3.1923, "step": 350 }, { "epoch": 0.09, "learning_rate": 3.9500000000000005e-05, "loss": 3.094, "step": 400 }, { "epoch": 0.1, "learning_rate": 4.4500000000000004e-05, "loss": 3.1098, "step": 450 }, { "epoch": 0.11, "learning_rate": 4.9500000000000004e-05, "loss": 3.077, "step": 500 }, { "epoch": 0.11, "eval_acc": 0.020410816430003795, "eval_cer": 0.5912892888206028, "eval_loss": 2.8952395915985107, "eval_runtime": 4854.9711, "eval_samples_per_second": 1.584, "eval_steps_per_second": 0.198, "step": 500 }, { "epoch": 0.12, "learning_rate": 4.997523117569353e-05, "loss": 3.0275, "step": 550 }, { "epoch": 0.13, "learning_rate": 4.994771025979745e-05, "loss": 2.8246, "step": 600 }, { "epoch": 0.14, "learning_rate": 4.992018934390137e-05, "loss": 2.6547, "step": 650 }, { "epoch": 0.15, "learning_rate": 4.9892668428005284e-05, "loss": 2.562, "step": 700 }, { "epoch": 0.16, "learning_rate": 4.986514751210921e-05, "loss": 2.4166, "step": 750 }, { "epoch": 0.18, "learning_rate": 4.983762659621312e-05, "loss": 2.3276, "step": 800 }, { "epoch": 0.19, "learning_rate": 4.981010568031704e-05, "loss": 2.2942, "step": 850 }, { "epoch": 0.2, "learning_rate": 4.9782584764420966e-05, "loss": 2.2679, "step": 900 }, { "epoch": 0.21, "learning_rate": 4.975506384852488e-05, "loss": 2.2626, "step": 950 }, { "epoch": 0.22, "learning_rate": 4.97275429326288e-05, "loss": 2.1657, "step": 1000 }, { "epoch": 0.22, "eval_acc": 0.025871034838030285, "eval_cer": 0.4587138894788984, "eval_loss": 1.9457383155822754, "eval_runtime": 3670.9235, "eval_samples_per_second": 2.095, "eval_steps_per_second": 0.262, "step": 1000 }, { "epoch": 0.23, "learning_rate": 4.970002201673272e-05, "loss": 2.0625, "step": 1050 }, { "epoch": 0.24, "learning_rate": 4.9672501100836635e-05, "loss": 2.0504, "step": 1100 }, { "epoch": 0.25, "learning_rate": 4.964498018494056e-05, "loss": 1.9837, "step": 1150 }, { "epoch": 0.26, "learning_rate": 4.9617459269044476e-05, "loss": 1.9131, "step": 1200 }, { "epoch": 0.27, "learning_rate": 4.958993835314839e-05, "loss": 1.8233, "step": 1250 }, { "epoch": 0.28, "learning_rate": 4.956241743725231e-05, "loss": 1.8375, "step": 1300 }, { "epoch": 0.3, "learning_rate": 4.9534896521356234e-05, "loss": 1.8702, "step": 1350 }, { "epoch": 0.31, "learning_rate": 4.950737560546015e-05, "loss": 1.7618, "step": 1400 }, { "epoch": 0.32, "learning_rate": 4.947985468956407e-05, "loss": 1.7752, "step": 1450 }, { "epoch": 0.33, "learning_rate": 4.945233377366799e-05, "loss": 1.7533, "step": 1500 }, { "epoch": 0.33, "eval_acc": 0.04017160685905205, "eval_cer": 0.311746219761581, "eval_loss": 1.5871385335922241, "eval_runtime": 2919.4107, "eval_samples_per_second": 2.635, "eval_steps_per_second": 0.33, "step": 1500 }, { "epoch": 0.34, "learning_rate": 4.942481285777191e-05, "loss": 1.6499, "step": 1550 }, { "epoch": 0.35, "learning_rate": 4.939729194187583e-05, "loss": 1.646, "step": 1600 }, { "epoch": 0.36, "learning_rate": 4.936977102597975e-05, "loss": 1.5978, "step": 1650 }, { "epoch": 0.37, "learning_rate": 4.934225011008367e-05, "loss": 1.6438, "step": 1700 }, { "epoch": 0.38, "learning_rate": 4.9314729194187585e-05, "loss": 1.6361, "step": 1750 }, { "epoch": 0.39, "learning_rate": 4.92872082782915e-05, "loss": 1.5909, "step": 1800 }, { "epoch": 0.41, "learning_rate": 4.9259687362395426e-05, "loss": 1.5129, "step": 1850 }, { "epoch": 0.42, "learning_rate": 4.923216644649934e-05, "loss": 1.5617, "step": 1900 }, { "epoch": 0.43, "learning_rate": 4.920464553060326e-05, "loss": 1.4992, "step": 1950 }, { "epoch": 0.44, "learning_rate": 4.9177124614707185e-05, "loss": 1.4751, "step": 2000 }, { "epoch": 0.44, "eval_acc": 0.05200208007644278, "eval_cer": 0.26249498061237303, "eval_loss": 1.336438536643982, "eval_runtime": 2734.4386, "eval_samples_per_second": 2.813, "eval_steps_per_second": 0.352, "step": 2000 }, { "epoch": 0.45, "learning_rate": 4.9149603698811095e-05, "loss": 1.4507, "step": 2050 }, { "epoch": 0.46, "learning_rate": 4.912208278291502e-05, "loss": 1.4969, "step": 2100 }, { "epoch": 0.47, "learning_rate": 4.9094561867018936e-05, "loss": 1.424, "step": 2150 }, { "epoch": 0.48, "learning_rate": 4.906704095112285e-05, "loss": 1.4178, "step": 2200 }, { "epoch": 0.49, "learning_rate": 4.903952003522678e-05, "loss": 1.4282, "step": 2250 }, { "epoch": 0.5, "learning_rate": 4.9011999119330694e-05, "loss": 1.331, "step": 2300 }, { "epoch": 0.51, "learning_rate": 4.898447820343461e-05, "loss": 1.3595, "step": 2350 }, { "epoch": 0.53, "learning_rate": 4.895695728753853e-05, "loss": 1.3561, "step": 2400 }, { "epoch": 0.54, "learning_rate": 4.892943637164245e-05, "loss": 1.368, "step": 2450 }, { "epoch": 0.55, "learning_rate": 4.890191545574637e-05, "loss": 1.3775, "step": 2500 }, { "epoch": 0.55, "eval_acc": 0.05564222568179378, "eval_cer": 0.3026113218780184, "eval_loss": 1.2641615867614746, "eval_runtime": 3245.8116, "eval_samples_per_second": 2.37, "eval_steps_per_second": 0.296, "step": 2500 }, { "epoch": 0.56, "learning_rate": 4.887439453985029e-05, "loss": 1.3244, "step": 2550 }, { "epoch": 0.57, "learning_rate": 4.884687362395421e-05, "loss": 1.3514, "step": 2600 }, { "epoch": 0.58, "learning_rate": 4.881935270805813e-05, "loss": 1.3349, "step": 2650 }, { "epoch": 0.59, "learning_rate": 4.8791831792162045e-05, "loss": 1.2717, "step": 2700 }, { "epoch": 0.6, "learning_rate": 4.876431087626596e-05, "loss": 1.2243, "step": 2750 }, { "epoch": 0.61, "learning_rate": 4.873678996036988e-05, "loss": 1.2679, "step": 2800 }, { "epoch": 0.62, "learning_rate": 4.8709269044473804e-05, "loss": 1.2447, "step": 2850 }, { "epoch": 0.63, "learning_rate": 4.868174812857772e-05, "loss": 1.2269, "step": 2900 }, { "epoch": 0.65, "learning_rate": 4.865422721268164e-05, "loss": 1.2036, "step": 2950 }, { "epoch": 0.66, "learning_rate": 4.8626706296785555e-05, "loss": 1.2241, "step": 3000 }, { "epoch": 0.66, "eval_acc": 0.06604264169708234, "eval_cer": 0.23580191597196276, "eval_loss": 1.1367168426513672, "eval_runtime": 2884.4123, "eval_samples_per_second": 2.667, "eval_steps_per_second": 0.334, "step": 3000 }, { "epoch": 0.67, "learning_rate": 4.859918538088948e-05, "loss": 1.2466, "step": 3050 }, { "epoch": 0.68, "learning_rate": 4.8571664464993396e-05, "loss": 1.2071, "step": 3100 }, { "epoch": 0.69, "learning_rate": 4.854414354909731e-05, "loss": 1.2059, "step": 3150 }, { "epoch": 0.7, "learning_rate": 4.851662263320124e-05, "loss": 1.1822, "step": 3200 }, { "epoch": 0.71, "learning_rate": 4.8489101717305154e-05, "loss": 1.0968, "step": 3250 }, { "epoch": 0.72, "learning_rate": 4.846158080140907e-05, "loss": 1.1966, "step": 3300 }, { "epoch": 0.73, "learning_rate": 4.8434059885512996e-05, "loss": 1.1477, "step": 3350 }, { "epoch": 0.74, "learning_rate": 4.840653896961691e-05, "loss": 1.1433, "step": 3400 }, { "epoch": 0.76, "learning_rate": 4.837901805372083e-05, "loss": 1.138, "step": 3450 }, { "epoch": 0.77, "learning_rate": 4.835149713782475e-05, "loss": 1.0963, "step": 3500 }, { "epoch": 0.77, "eval_acc": 0.07384295370854875, "eval_cer": 0.195494876736243, "eval_loss": 1.0342167615890503, "eval_runtime": 2491.4875, "eval_samples_per_second": 3.087, "eval_steps_per_second": 0.386, "step": 3500 }, { "epoch": 0.78, "learning_rate": 4.832397622192867e-05, "loss": 1.0988, "step": 3550 }, { "epoch": 0.79, "learning_rate": 4.829645530603259e-05, "loss": 1.1284, "step": 3600 }, { "epoch": 0.8, "learning_rate": 4.8268934390136505e-05, "loss": 1.1235, "step": 3650 }, { "epoch": 0.81, "learning_rate": 4.824196389255835e-05, "loss": 1.1225, "step": 3700 }, { "epoch": 0.82, "learning_rate": 4.8214442976662265e-05, "loss": 1.0763, "step": 3750 }, { "epoch": 0.83, "learning_rate": 4.818692206076618e-05, "loss": 1.0841, "step": 3800 }, { "epoch": 0.84, "learning_rate": 4.8159401144870106e-05, "loss": 1.0994, "step": 3850 }, { "epoch": 0.85, "learning_rate": 4.813188022897402e-05, "loss": 1.0723, "step": 3900 }, { "epoch": 0.86, "learning_rate": 4.810435931307794e-05, "loss": 1.0384, "step": 3950 }, { "epoch": 0.88, "learning_rate": 4.8076838397181864e-05, "loss": 1.0655, "step": 4000 }, { "epoch": 0.88, "eval_acc": 0.07514300571045983, "eval_cer": 0.24283758579470666, "eval_loss": 1.1094621419906616, "eval_runtime": 3232.3014, "eval_samples_per_second": 2.38, "eval_steps_per_second": 0.298, "step": 4000 }, { "epoch": 0.89, "learning_rate": 4.8049317481285775e-05, "loss": 1.0818, "step": 4050 }, { "epoch": 0.9, "learning_rate": 4.80217965653897e-05, "loss": 1.0594, "step": 4100 }, { "epoch": 0.91, "learning_rate": 4.799427564949362e-05, "loss": 1.0087, "step": 4150 }, { "epoch": 0.92, "learning_rate": 4.796675473359753e-05, "loss": 1.0247, "step": 4200 }, { "epoch": 0.93, "learning_rate": 4.793923381770146e-05, "loss": 1.0896, "step": 4250 }, { "epoch": 0.94, "learning_rate": 4.7911712901805374e-05, "loss": 1.0826, "step": 4300 }, { "epoch": 0.95, "learning_rate": 4.788419198590929e-05, "loss": 0.996, "step": 4350 }, { "epoch": 0.96, "learning_rate": 4.7856671070013215e-05, "loss": 0.9957, "step": 4400 }, { "epoch": 0.97, "learning_rate": 4.782915015411713e-05, "loss": 1.0103, "step": 4450 }, { "epoch": 0.99, "learning_rate": 4.780162923822105e-05, "loss": 0.9841, "step": 4500 }, { "epoch": 0.99, "eval_acc": 0.08931357253129048, "eval_cer": 0.17093824951666592, "eval_loss": 0.9081698656082153, "eval_runtime": 2397.9448, "eval_samples_per_second": 3.208, "eval_steps_per_second": 0.401, "step": 4500 }, { "epoch": 1.0, "learning_rate": 4.777410832232497e-05, "loss": 1.014, "step": 4550 }, { "epoch": 1.01, "learning_rate": 4.774658740642889e-05, "loss": 0.9617, "step": 4600 }, { "epoch": 1.02, "learning_rate": 4.771906649053281e-05, "loss": 0.9232, "step": 4650 }, { "epoch": 1.03, "learning_rate": 4.7691545574636725e-05, "loss": 0.96, "step": 4700 }, { "epoch": 1.04, "learning_rate": 4.766402465874065e-05, "loss": 0.9087, "step": 4750 }, { "epoch": 1.05, "learning_rate": 4.763650374284456e-05, "loss": 0.9226, "step": 4800 }, { "epoch": 1.06, "learning_rate": 4.760898282694848e-05, "loss": 0.9249, "step": 4850 }, { "epoch": 1.07, "learning_rate": 4.75814619110524e-05, "loss": 0.8718, "step": 4900 }, { "epoch": 1.08, "learning_rate": 4.755394099515632e-05, "loss": 0.8789, "step": 4950 }, { "epoch": 1.09, "learning_rate": 4.752642007926024e-05, "loss": 0.8746, "step": 5000 }, { "epoch": 1.09, "eval_acc": 0.09100364013377488, "eval_cer": 0.16938940990606188, "eval_loss": 0.8924765586853027, "eval_runtime": 2509.8917, "eval_samples_per_second": 3.065, "eval_steps_per_second": 0.383, "step": 5000 }, { "epoch": 1.11, "learning_rate": 4.749889916336416e-05, "loss": 0.9212, "step": 5050 }, { "epoch": 1.12, "learning_rate": 4.7471378247468076e-05, "loss": 0.8748, "step": 5100 }, { "epoch": 1.13, "learning_rate": 4.744385733157199e-05, "loss": 0.8864, "step": 5150 }, { "epoch": 1.14, "learning_rate": 4.741633641567592e-05, "loss": 0.8509, "step": 5200 }, { "epoch": 1.15, "learning_rate": 4.7388815499779834e-05, "loss": 0.8594, "step": 5250 }, { "epoch": 1.16, "learning_rate": 4.736129458388375e-05, "loss": 0.8814, "step": 5300 }, { "epoch": 1.17, "learning_rate": 4.7333773667987675e-05, "loss": 0.8689, "step": 5350 }, { "epoch": 1.18, "learning_rate": 4.730625275209159e-05, "loss": 0.8678, "step": 5400 }, { "epoch": 1.19, "learning_rate": 4.727873183619551e-05, "loss": 0.8475, "step": 5450 }, { "epoch": 1.2, "learning_rate": 4.7251210920299434e-05, "loss": 0.8549, "step": 5500 }, { "epoch": 1.2, "eval_acc": 0.08385335412326399, "eval_cer": 0.17893981230978653, "eval_loss": 0.8790249824523926, "eval_runtime": 2637.8013, "eval_samples_per_second": 2.916, "eval_steps_per_second": 0.365, "step": 5500 }, { "epoch": 1.22, "learning_rate": 4.722369000440335e-05, "loss": 0.8873, "step": 5550 }, { "epoch": 1.23, "learning_rate": 4.719616908850727e-05, "loss": 0.8834, "step": 5600 }, { "epoch": 1.24, "learning_rate": 4.7168648172611185e-05, "loss": 0.855, "step": 5650 }, { "epoch": 1.25, "learning_rate": 4.714112725671511e-05, "loss": 0.8473, "step": 5700 }, { "epoch": 1.26, "learning_rate": 4.711360634081902e-05, "loss": 0.8253, "step": 5750 }, { "epoch": 1.27, "learning_rate": 4.708608542492294e-05, "loss": 0.8476, "step": 5800 }, { "epoch": 1.28, "learning_rate": 4.705856450902687e-05, "loss": 0.8522, "step": 5850 }, { "epoch": 1.29, "learning_rate": 4.703104359313078e-05, "loss": 0.8159, "step": 5900 }, { "epoch": 1.3, "learning_rate": 4.70035226772347e-05, "loss": 0.8073, "step": 5950 }, { "epoch": 1.31, "learning_rate": 4.697600176133862e-05, "loss": 0.8467, "step": 6000 }, { "epoch": 1.31, "eval_acc": 0.09620384814141915, "eval_cer": 0.16569328014462037, "eval_loss": 0.8687126040458679, "eval_runtime": 2433.3407, "eval_samples_per_second": 3.161, "eval_steps_per_second": 0.395, "step": 6000 }, { "epoch": 1.32, "learning_rate": 4.6948480845442536e-05, "loss": 0.837, "step": 6050 }, { "epoch": 1.34, "learning_rate": 4.692095992954646e-05, "loss": 0.8188, "step": 6100 }, { "epoch": 1.35, "learning_rate": 4.689343901365038e-05, "loss": 0.8061, "step": 6150 }, { "epoch": 1.36, "learning_rate": 4.6865918097754294e-05, "loss": 0.8355, "step": 6200 }, { "epoch": 1.37, "learning_rate": 4.683839718185821e-05, "loss": 0.7924, "step": 6250 }, { "epoch": 1.38, "learning_rate": 4.6810876265962135e-05, "loss": 0.7915, "step": 6300 }, { "epoch": 1.39, "learning_rate": 4.678335535006605e-05, "loss": 0.7906, "step": 6350 }, { "epoch": 1.4, "learning_rate": 4.675583443416997e-05, "loss": 0.8021, "step": 6400 }, { "epoch": 1.41, "learning_rate": 4.6728313518273894e-05, "loss": 0.8202, "step": 6450 }, { "epoch": 1.42, "learning_rate": 4.670079260237781e-05, "loss": 0.7687, "step": 6500 }, { "epoch": 1.42, "eval_acc": 0.10348413935212114, "eval_cer": 0.15004829464851885, "eval_loss": 0.7853290438652039, "eval_runtime": 2338.8646, "eval_samples_per_second": 3.289, "eval_steps_per_second": 0.411, "step": 6500 }, { "epoch": 1.43, "learning_rate": 4.667327168648173e-05, "loss": 0.7909, "step": 6550 }, { "epoch": 1.45, "learning_rate": 4.664575077058565e-05, "loss": 0.7908, "step": 6600 }, { "epoch": 1.46, "learning_rate": 4.661822985468957e-05, "loss": 0.7847, "step": 6650 }, { "epoch": 1.47, "learning_rate": 4.6590708938793486e-05, "loss": 0.7905, "step": 6700 }, { "epoch": 1.48, "learning_rate": 4.6563188022897403e-05, "loss": 0.778, "step": 6750 }, { "epoch": 1.49, "learning_rate": 4.653566710700132e-05, "loss": 0.7986, "step": 6800 }, { "epoch": 1.5, "learning_rate": 4.650814619110524e-05, "loss": 0.7472, "step": 6850 }, { "epoch": 1.51, "learning_rate": 4.648062527520916e-05, "loss": 0.8086, "step": 6900 }, { "epoch": 1.52, "learning_rate": 4.645310435931308e-05, "loss": 0.7977, "step": 6950 }, { "epoch": 1.53, "learning_rate": 4.6425583443416996e-05, "loss": 0.7824, "step": 7000 }, { "epoch": 1.53, "eval_acc": 0.11687467497180516, "eval_cer": 0.15055527217871656, "eval_loss": 0.7585737705230713, "eval_runtime": 2451.3177, "eval_samples_per_second": 3.138, "eval_steps_per_second": 0.392, "step": 7000 }, { "epoch": 1.54, "learning_rate": 4.639806252752092e-05, "loss": 0.7635, "step": 7050 }, { "epoch": 1.55, "learning_rate": 4.637054161162484e-05, "loss": 0.7356, "step": 7100 }, { "epoch": 1.57, "learning_rate": 4.6343020695728754e-05, "loss": 0.7346, "step": 7150 }, { "epoch": 1.58, "learning_rate": 4.631549977983268e-05, "loss": 0.7301, "step": 7200 }, { "epoch": 1.59, "learning_rate": 4.6287978863936595e-05, "loss": 0.7068, "step": 7250 }, { "epoch": 1.6, "learning_rate": 4.626045794804051e-05, "loss": 0.7685, "step": 7300 }, { "epoch": 1.61, "learning_rate": 4.623293703214443e-05, "loss": 0.8037, "step": 7350 }, { "epoch": 1.62, "learning_rate": 4.6205416116248354e-05, "loss": 0.7372, "step": 7400 }, { "epoch": 1.63, "learning_rate": 4.617789520035227e-05, "loss": 0.8084, "step": 7450 }, { "epoch": 1.64, "learning_rate": 4.615037428445619e-05, "loss": 0.7914, "step": 7500 }, { "epoch": 1.64, "eval_acc": 0.11440457616817412, "eval_cer": 0.14829325316783437, "eval_loss": 0.7524902820587158, "eval_runtime": 2409.0581, "eval_samples_per_second": 3.193, "eval_steps_per_second": 0.399, "step": 7500 }, { "epoch": 1.65, "learning_rate": 4.612285336856011e-05, "loss": 0.729, "step": 7550 }, { "epoch": 1.66, "learning_rate": 4.609533245266402e-05, "loss": 0.7403, "step": 7600 }, { "epoch": 1.67, "learning_rate": 4.6067811536767946e-05, "loss": 0.7689, "step": 7650 }, { "epoch": 1.69, "learning_rate": 4.6040290620871863e-05, "loss": 0.7634, "step": 7700 }, { "epoch": 1.7, "learning_rate": 4.601276970497578e-05, "loss": 0.7226, "step": 7750 }, { "epoch": 1.71, "learning_rate": 4.5985248789079705e-05, "loss": 0.7229, "step": 7800 }, { "epoch": 1.72, "learning_rate": 4.595772787318362e-05, "loss": 0.7214, "step": 7850 }, { "epoch": 1.73, "learning_rate": 4.593020695728754e-05, "loss": 0.7426, "step": 7900 }, { "epoch": 1.74, "learning_rate": 4.5902686041391456e-05, "loss": 0.7308, "step": 7950 }, { "epoch": 1.75, "learning_rate": 4.587516512549538e-05, "loss": 0.7028, "step": 8000 }, { "epoch": 1.75, "eval_acc": 0.11960478417581841, "eval_cer": 0.14120952125507172, "eval_loss": 0.7158035039901733, "eval_runtime": 2504.3586, "eval_samples_per_second": 3.071, "eval_steps_per_second": 0.384, "step": 8000 }, { "epoch": 1.76, "learning_rate": 4.58476442095993e-05, "loss": 0.7227, "step": 8050 }, { "epoch": 1.77, "learning_rate": 4.5820123293703214e-05, "loss": 0.7226, "step": 8100 }, { "epoch": 1.78, "learning_rate": 4.579260237780714e-05, "loss": 0.7356, "step": 8150 }, { "epoch": 1.8, "learning_rate": 4.5765081461911055e-05, "loss": 0.7623, "step": 8200 }, { "epoch": 1.81, "learning_rate": 4.573756054601497e-05, "loss": 0.7358, "step": 8250 }, { "epoch": 1.82, "learning_rate": 4.5710039630118897e-05, "loss": 0.7187, "step": 8300 }, { "epoch": 1.83, "learning_rate": 4.5682518714222814e-05, "loss": 0.699, "step": 8350 }, { "epoch": 1.84, "learning_rate": 4.565499779832673e-05, "loss": 0.7306, "step": 8400 }, { "epoch": 1.85, "learning_rate": 4.562747688243065e-05, "loss": 0.6923, "step": 8450 }, { "epoch": 1.86, "learning_rate": 4.559995596653457e-05, "loss": 0.695, "step": 8500 }, { "epoch": 1.86, "eval_acc": 0.12077483097753837, "eval_cer": 0.13451183645245962, "eval_loss": 0.6999772787094116, "eval_runtime": 2428.0435, "eval_samples_per_second": 3.168, "eval_steps_per_second": 0.396, "step": 8500 }, { "epoch": 1.87, "learning_rate": 4.557243505063849e-05, "loss": 0.7093, "step": 8550 }, { "epoch": 1.88, "learning_rate": 4.5544914134742406e-05, "loss": 0.7235, "step": 8600 }, { "epoch": 1.89, "learning_rate": 4.551739321884633e-05, "loss": 0.7058, "step": 8650 }, { "epoch": 1.9, "learning_rate": 4.548987230295024e-05, "loss": 0.6814, "step": 8700 }, { "epoch": 1.92, "learning_rate": 4.5462351387054165e-05, "loss": 0.6825, "step": 8750 }, { "epoch": 1.93, "learning_rate": 4.543483047115808e-05, "loss": 0.6913, "step": 8800 }, { "epoch": 1.94, "learning_rate": 4.5407309555262e-05, "loss": 0.7448, "step": 8850 }, { "epoch": 1.95, "learning_rate": 4.537978863936592e-05, "loss": 0.6972, "step": 8900 }, { "epoch": 1.96, "learning_rate": 4.535226772346984e-05, "loss": 0.6655, "step": 8950 }, { "epoch": 1.97, "learning_rate": 4.532474680757376e-05, "loss": 0.7233, "step": 9000 }, { "epoch": 1.97, "eval_acc": 0.12688507538652039, "eval_cer": 0.1338389671921972, "eval_loss": 0.6706005930900574, "eval_runtime": 2472.9865, "eval_samples_per_second": 3.11, "eval_steps_per_second": 0.389, "step": 9000 }, { "epoch": 1.98, "learning_rate": 4.5297225891677674e-05, "loss": 0.6875, "step": 9050 }, { "epoch": 1.99, "learning_rate": 4.52697049757816e-05, "loss": 0.6878, "step": 9100 }, { "epoch": 2.0, "learning_rate": 4.5242184059885516e-05, "loss": 0.6486, "step": 9150 }, { "epoch": 2.01, "learning_rate": 4.521466314398943e-05, "loss": 0.5747, "step": 9200 }, { "epoch": 2.03, "learning_rate": 4.518714222809336e-05, "loss": 0.5716, "step": 9250 }, { "epoch": 2.04, "learning_rate": 4.515962131219727e-05, "loss": 0.5689, "step": 9300 }, { "epoch": 2.05, "learning_rate": 4.513210039630119e-05, "loss": 0.5613, "step": 9350 }, { "epoch": 2.06, "learning_rate": 4.5104579480405115e-05, "loss": 0.5692, "step": 9400 }, { "epoch": 2.07, "learning_rate": 4.5077058564509025e-05, "loss": 0.5767, "step": 9450 }, { "epoch": 2.08, "learning_rate": 4.504953764861295e-05, "loss": 0.5931, "step": 9500 }, { "epoch": 2.08, "eval_acc": 0.1327353093951202, "eval_cer": 0.1271722901895972, "eval_loss": 0.657380223274231, "eval_runtime": 2374.7935, "eval_samples_per_second": 3.239, "eval_steps_per_second": 0.405, "step": 9500 }, { "epoch": 2.09, "learning_rate": 4.5022016732716866e-05, "loss": 0.5764, "step": 9550 }, { "epoch": 2.1, "learning_rate": 4.4994495816820784e-05, "loss": 0.6077, "step": 9600 }, { "epoch": 2.11, "learning_rate": 4.496697490092471e-05, "loss": 0.5916, "step": 9650 }, { "epoch": 2.12, "learning_rate": 4.4939453985028625e-05, "loss": 0.5912, "step": 9700 }, { "epoch": 2.13, "learning_rate": 4.491193306913254e-05, "loss": 0.5922, "step": 9750 }, { "epoch": 2.15, "learning_rate": 4.488441215323646e-05, "loss": 0.5762, "step": 9800 }, { "epoch": 2.16, "learning_rate": 4.485689123734038e-05, "loss": 0.5326, "step": 9850 }, { "epoch": 2.17, "learning_rate": 4.48293703214443e-05, "loss": 0.559, "step": 9900 }, { "epoch": 2.18, "learning_rate": 4.480184940554822e-05, "loss": 0.5893, "step": 9950 }, { "epoch": 2.19, "learning_rate": 4.477432848965214e-05, "loss": 0.6099, "step": 10000 }, { "epoch": 2.19, "eval_acc": 0.13598543939989788, "eval_cer": 0.12414437851841631, "eval_loss": 0.6273950934410095, "eval_runtime": 2477.2194, "eval_samples_per_second": 3.105, "eval_steps_per_second": 0.388, "step": 10000 }, { "epoch": 2.2, "learning_rate": 4.474680757375606e-05, "loss": 0.5248, "step": 10050 }, { "epoch": 2.21, "learning_rate": 4.4719286657859976e-05, "loss": 0.5711, "step": 10100 }, { "epoch": 2.22, "learning_rate": 4.469176574196389e-05, "loss": 0.5764, "step": 10150 }, { "epoch": 2.23, "learning_rate": 4.466424482606782e-05, "loss": 0.5665, "step": 10200 }, { "epoch": 2.24, "learning_rate": 4.4636723910171734e-05, "loss": 0.559, "step": 10250 }, { "epoch": 2.26, "learning_rate": 4.460920299427565e-05, "loss": 0.5561, "step": 10300 }, { "epoch": 2.27, "learning_rate": 4.4581682078379575e-05, "loss": 0.6077, "step": 10350 }, { "epoch": 2.28, "learning_rate": 4.4554161162483485e-05, "loss": 0.5665, "step": 10400 }, { "epoch": 2.29, "learning_rate": 4.452664024658741e-05, "loss": 0.5709, "step": 10450 }, { "epoch": 2.3, "learning_rate": 4.449911933069133e-05, "loss": 0.5675, "step": 10500 }, { "epoch": 2.3, "eval_acc": 0.13624544980028008, "eval_cer": 0.11983739509673659, "eval_loss": 0.6147823333740234, "eval_runtime": 2420.4162, "eval_samples_per_second": 3.178, "eval_steps_per_second": 0.397, "step": 10500 }, { "epoch": 2.31, "learning_rate": 4.4471598414795244e-05, "loss": 0.5561, "step": 10550 }, { "epoch": 2.32, "learning_rate": 4.444407749889917e-05, "loss": 0.5691, "step": 10600 }, { "epoch": 2.33, "learning_rate": 4.4416556583003085e-05, "loss": 0.5794, "step": 10650 }, { "epoch": 2.34, "learning_rate": 4.4389035667107e-05, "loss": 0.585, "step": 10700 }, { "epoch": 2.35, "learning_rate": 4.436151475121092e-05, "loss": 0.5594, "step": 10750 }, { "epoch": 2.36, "learning_rate": 4.433399383531484e-05, "loss": 0.5559, "step": 10800 }, { "epoch": 2.38, "learning_rate": 4.430647291941876e-05, "loss": 0.5723, "step": 10850 }, { "epoch": 2.39, "learning_rate": 4.427895200352268e-05, "loss": 0.5518, "step": 10900 }, { "epoch": 2.4, "learning_rate": 4.42514310876266e-05, "loss": 0.5658, "step": 10950 }, { "epoch": 2.41, "learning_rate": 4.422446059004844e-05, "loss": 0.5909, "step": 11000 }, { "epoch": 2.41, "eval_acc": 0.14911596461919968, "eval_cer": 0.1165412659554511, "eval_loss": 0.6066173911094666, "eval_runtime": 2395.5518, "eval_samples_per_second": 3.211, "eval_steps_per_second": 0.402, "step": 11000 }, { "epoch": 2.42, "learning_rate": 4.419693967415236e-05, "loss": 0.5511, "step": 11050 }, { "epoch": 2.43, "learning_rate": 4.416941875825628e-05, "loss": 0.5715, "step": 11100 }, { "epoch": 2.44, "learning_rate": 4.4141897842360195e-05, "loss": 0.5424, "step": 11150 }, { "epoch": 2.45, "learning_rate": 4.411437692646411e-05, "loss": 0.5902, "step": 11200 }, { "epoch": 2.46, "learning_rate": 4.4086856010568036e-05, "loss": 0.5642, "step": 11250 }, { "epoch": 2.47, "learning_rate": 4.4059335094671954e-05, "loss": 0.5261, "step": 11300 }, { "epoch": 2.49, "learning_rate": 4.403181417877587e-05, "loss": 0.548, "step": 11350 }, { "epoch": 2.5, "learning_rate": 4.4004293262879795e-05, "loss": 0.5623, "step": 11400 }, { "epoch": 2.51, "learning_rate": 4.3976772346983705e-05, "loss": 0.5736, "step": 11450 }, { "epoch": 2.52, "learning_rate": 4.394925143108763e-05, "loss": 0.5088, "step": 11500 }, { "epoch": 2.52, "eval_acc": 0.15275611022455068, "eval_cer": 0.11419397549453565, "eval_loss": 0.5838978886604309, "eval_runtime": 2490.473, "eval_samples_per_second": 3.089, "eval_steps_per_second": 0.386, "step": 11500 }, { "epoch": 2.53, "learning_rate": 4.392173051519155e-05, "loss": 0.5502, "step": 11550 }, { "epoch": 2.54, "learning_rate": 4.389420959929546e-05, "loss": 0.5694, "step": 11600 }, { "epoch": 2.55, "learning_rate": 4.386668868339939e-05, "loss": 0.5612, "step": 11650 }, { "epoch": 2.56, "learning_rate": 4.3839167767503304e-05, "loss": 0.5259, "step": 11700 }, { "epoch": 2.57, "learning_rate": 4.381164685160722e-05, "loss": 0.5491, "step": 11750 }, { "epoch": 2.58, "learning_rate": 4.378412593571114e-05, "loss": 0.5464, "step": 11800 }, { "epoch": 2.59, "learning_rate": 4.375660501981506e-05, "loss": 0.5407, "step": 11850 }, { "epoch": 2.61, "learning_rate": 4.372908410391898e-05, "loss": 0.511, "step": 11900 }, { "epoch": 2.62, "learning_rate": 4.37015631880229e-05, "loss": 0.5804, "step": 11950 }, { "epoch": 2.63, "learning_rate": 4.367404227212682e-05, "loss": 0.5597, "step": 12000 }, { "epoch": 2.63, "eval_acc": 0.15236609462397735, "eval_cer": 0.11861258699625891, "eval_loss": 0.5860298275947571, "eval_runtime": 2486.6893, "eval_samples_per_second": 3.093, "eval_steps_per_second": 0.387, "step": 12000 }, { "epoch": 2.64, "learning_rate": 4.364652135623074e-05, "loss": 0.5672, "step": 12050 }, { "epoch": 2.65, "learning_rate": 4.3619000440334655e-05, "loss": 0.535, "step": 12100 }, { "epoch": 2.66, "learning_rate": 4.359147952443858e-05, "loss": 0.527, "step": 12150 }, { "epoch": 2.67, "learning_rate": 4.3563958608542496e-05, "loss": 0.5377, "step": 12200 }, { "epoch": 2.68, "learning_rate": 4.3536437692646414e-05, "loss": 0.5611, "step": 12250 }, { "epoch": 2.69, "learning_rate": 4.350891677675033e-05, "loss": 0.5487, "step": 12300 }, { "epoch": 2.7, "learning_rate": 4.3481395860854255e-05, "loss": 0.5627, "step": 12350 }, { "epoch": 2.71, "learning_rate": 4.345387494495817e-05, "loss": 0.5527, "step": 12400 }, { "epoch": 2.73, "learning_rate": 4.342635402906209e-05, "loss": 0.527, "step": 12450 }, { "epoch": 2.74, "learning_rate": 4.339883311316601e-05, "loss": 0.5305, "step": 12500 }, { "epoch": 2.74, "eval_acc": 0.1570462818308572, "eval_cer": 0.11371335459434821, "eval_loss": 0.5711297392845154, "eval_runtime": 2415.9809, "eval_samples_per_second": 3.184, "eval_steps_per_second": 0.398, "step": 12500 }, { "epoch": 2.75, "learning_rate": 4.337131219726992e-05, "loss": 0.531, "step": 12550 }, { "epoch": 2.76, "learning_rate": 4.334379128137385e-05, "loss": 0.5056, "step": 12600 }, { "epoch": 2.77, "learning_rate": 4.3316270365477764e-05, "loss": 0.5423, "step": 12650 }, { "epoch": 2.78, "learning_rate": 4.328874944958168e-05, "loss": 0.5293, "step": 12700 }, { "epoch": 2.79, "learning_rate": 4.3261228533685606e-05, "loss": 0.5174, "step": 12750 }, { "epoch": 2.8, "learning_rate": 4.323370761778952e-05, "loss": 0.5212, "step": 12800 }, { "epoch": 2.81, "learning_rate": 4.320618670189344e-05, "loss": 0.5368, "step": 12850 }, { "epoch": 2.82, "learning_rate": 4.317866578599736e-05, "loss": 0.526, "step": 12900 }, { "epoch": 2.84, "learning_rate": 4.315114487010128e-05, "loss": 0.5288, "step": 12950 }, { "epoch": 2.85, "learning_rate": 4.31236239542052e-05, "loss": 0.5453, "step": 13000 }, { "epoch": 2.85, "eval_acc": 0.1608164326363993, "eval_cer": 0.11214901108373812, "eval_loss": 0.5519789457321167, "eval_runtime": 2476.1549, "eval_samples_per_second": 3.106, "eval_steps_per_second": 0.389, "step": 13000 }, { "epoch": 2.86, "learning_rate": 4.3096103038309115e-05, "loss": 0.5121, "step": 13050 }, { "epoch": 2.87, "learning_rate": 4.306858212241304e-05, "loss": 0.5292, "step": 13100 }, { "epoch": 2.88, "learning_rate": 4.304106120651695e-05, "loss": 0.5381, "step": 13150 }, { "epoch": 2.89, "learning_rate": 4.3013540290620874e-05, "loss": 0.4871, "step": 13200 }, { "epoch": 2.9, "learning_rate": 4.29860193747248e-05, "loss": 0.4917, "step": 13250 }, { "epoch": 2.91, "learning_rate": 4.295849845882871e-05, "loss": 0.5253, "step": 13300 }, { "epoch": 2.92, "learning_rate": 4.293097754293263e-05, "loss": 0.5127, "step": 13350 }, { "epoch": 2.93, "learning_rate": 4.290345662703655e-05, "loss": 0.4916, "step": 13400 }, { "epoch": 2.94, "learning_rate": 4.2875935711140466e-05, "loss": 0.5053, "step": 13450 }, { "epoch": 2.96, "learning_rate": 4.284841479524439e-05, "loss": 0.4931, "step": 13500 }, { "epoch": 2.96, "eval_acc": 0.16666666664499913, "eval_cer": 0.10775055465202271, "eval_loss": 0.5448443293571472, "eval_runtime": 2437.1677, "eval_samples_per_second": 3.156, "eval_steps_per_second": 0.395, "step": 13500 }, { "epoch": 2.97, "learning_rate": 4.282089387934831e-05, "loss": 0.5284, "step": 13550 }, { "epoch": 2.98, "learning_rate": 4.2793372963452225e-05, "loss": 0.4843, "step": 13600 }, { "epoch": 2.99, "learning_rate": 4.276585204755614e-05, "loss": 0.5259, "step": 13650 }, { "epoch": 3.0, "learning_rate": 4.2738331131660066e-05, "loss": 0.5194, "step": 13700 }, { "epoch": 3.01, "learning_rate": 4.271081021576398e-05, "loss": 0.4109, "step": 13750 }, { "epoch": 3.02, "learning_rate": 4.26832892998679e-05, "loss": 0.412, "step": 13800 }, { "epoch": 3.03, "learning_rate": 4.2655768383971824e-05, "loss": 0.4071, "step": 13850 }, { "epoch": 3.04, "learning_rate": 4.262824746807574e-05, "loss": 0.434, "step": 13900 }, { "epoch": 3.05, "learning_rate": 4.260072655217966e-05, "loss": 0.4159, "step": 13950 }, { "epoch": 3.07, "learning_rate": 4.2573205636283575e-05, "loss": 0.4154, "step": 14000 }, { "epoch": 3.07, "eval_acc": 0.17095683825130564, "eval_cer": 0.10412419244060843, "eval_loss": 0.5264742970466614, "eval_runtime": 2375.6119, "eval_samples_per_second": 3.238, "eval_steps_per_second": 0.405, "step": 14000 }, { "epoch": 3.08, "learning_rate": 4.25456847203875e-05, "loss": 0.4043, "step": 14050 }, { "epoch": 3.09, "learning_rate": 4.2518163804491417e-05, "loss": 0.4101, "step": 14100 }, { "epoch": 3.1, "learning_rate": 4.2490642888595334e-05, "loss": 0.4034, "step": 14150 }, { "epoch": 3.11, "learning_rate": 4.246312197269926e-05, "loss": 0.4307, "step": 14200 }, { "epoch": 3.12, "learning_rate": 4.243560105680317e-05, "loss": 0.408, "step": 14250 }, { "epoch": 3.13, "learning_rate": 4.240808014090709e-05, "loss": 0.42, "step": 14300 }, { "epoch": 3.14, "learning_rate": 4.2380559225011016e-05, "loss": 0.4213, "step": 14350 }, { "epoch": 3.15, "learning_rate": 4.2353038309114926e-05, "loss": 0.3993, "step": 14400 }, { "epoch": 3.16, "learning_rate": 4.232551739321885e-05, "loss": 0.4172, "step": 14450 }, { "epoch": 3.17, "learning_rate": 4.229799647732277e-05, "loss": 0.4151, "step": 14500 }, { "epoch": 3.17, "eval_acc": 0.181097243866212, "eval_cer": 0.10292264019013983, "eval_loss": 0.5197580456733704, "eval_runtime": 2415.2798, "eval_samples_per_second": 3.185, "eval_steps_per_second": 0.398, "step": 14500 }, { "epoch": 3.19, "learning_rate": 4.2270475561426685e-05, "loss": 0.4562, "step": 14550 }, { "epoch": 3.2, "learning_rate": 4.224295464553061e-05, "loss": 0.4017, "step": 14600 }, { "epoch": 3.21, "learning_rate": 4.2215433729634526e-05, "loss": 0.4453, "step": 14650 }, { "epoch": 3.22, "learning_rate": 4.218791281373844e-05, "loss": 0.4333, "step": 14700 }, { "epoch": 3.23, "learning_rate": 4.216039189784236e-05, "loss": 0.431, "step": 14750 }, { "epoch": 3.24, "learning_rate": 4.2132870981946284e-05, "loss": 0.4047, "step": 14800 }, { "epoch": 3.25, "learning_rate": 4.2105350066050194e-05, "loss": 0.4046, "step": 14850 }, { "epoch": 3.26, "learning_rate": 4.207782915015412e-05, "loss": 0.4192, "step": 14900 }, { "epoch": 3.27, "learning_rate": 4.205030823425804e-05, "loss": 0.4344, "step": 14950 }, { "epoch": 3.28, "learning_rate": 4.202278731836195e-05, "loss": 0.3972, "step": 15000 }, { "epoch": 3.28, "eval_acc": 0.17641705665933216, "eval_cer": 0.10194434409975829, "eval_loss": 0.5112528800964355, "eval_runtime": 2422.1094, "eval_samples_per_second": 3.176, "eval_steps_per_second": 0.397, "step": 15000 }, { "epoch": 3.3, "learning_rate": 4.1995266402465877e-05, "loss": 0.4183, "step": 15050 }, { "epoch": 3.31, "learning_rate": 4.1967745486569794e-05, "loss": 0.4075, "step": 15100 }, { "epoch": 3.32, "learning_rate": 4.194022457067371e-05, "loss": 0.4087, "step": 15150 }, { "epoch": 3.33, "learning_rate": 4.1912703654777635e-05, "loss": 0.4282, "step": 15200 }, { "epoch": 3.34, "learning_rate": 4.188518273888155e-05, "loss": 0.3998, "step": 15250 }, { "epoch": 3.35, "learning_rate": 4.185766182298547e-05, "loss": 0.4356, "step": 15300 }, { "epoch": 3.36, "learning_rate": 4.1830140907089386e-05, "loss": 0.4222, "step": 15350 }, { "epoch": 3.37, "learning_rate": 4.180261999119331e-05, "loss": 0.4528, "step": 15400 }, { "epoch": 3.38, "learning_rate": 4.177509907529723e-05, "loss": 0.3943, "step": 15450 }, { "epoch": 3.39, "learning_rate": 4.1747578159401145e-05, "loss": 0.4126, "step": 15500 }, { "epoch": 3.39, "eval_acc": 0.1777171086612432, "eval_cer": 0.10119860650946745, "eval_loss": 0.5097116231918335, "eval_runtime": 2416.1113, "eval_samples_per_second": 3.184, "eval_steps_per_second": 0.398, "step": 15500 }, { "epoch": 3.4, "learning_rate": 4.172005724350507e-05, "loss": 0.4223, "step": 15550 }, { "epoch": 3.42, "learning_rate": 4.1692536327608986e-05, "loss": 0.4272, "step": 15600 }, { "epoch": 3.43, "learning_rate": 4.16650154117129e-05, "loss": 0.4322, "step": 15650 }, { "epoch": 3.44, "learning_rate": 4.163749449581682e-05, "loss": 0.4199, "step": 15700 }, { "epoch": 3.45, "learning_rate": 4.1609973579920744e-05, "loss": 0.4032, "step": 15750 }, { "epoch": 3.46, "learning_rate": 4.158245266402466e-05, "loss": 0.415, "step": 15800 }, { "epoch": 3.47, "learning_rate": 4.155493174812858e-05, "loss": 0.417, "step": 15850 }, { "epoch": 3.48, "learning_rate": 4.15274108322325e-05, "loss": 0.4071, "step": 15900 }, { "epoch": 3.49, "learning_rate": 4.149988991633641e-05, "loss": 0.4119, "step": 15950 }, { "epoch": 3.5, "learning_rate": 4.1472369000440337e-05, "loss": 0.4082, "step": 16000 }, { "epoch": 3.5, "eval_acc": 0.18616744667366517, "eval_cer": 0.09961720870885071, "eval_loss": 0.49047818779945374, "eval_runtime": 2463.5838, "eval_samples_per_second": 3.122, "eval_steps_per_second": 0.39, "step": 16000 }, { "epoch": 3.51, "learning_rate": 4.144484808454426e-05, "loss": 0.4502, "step": 16050 }, { "epoch": 3.53, "learning_rate": 4.141732716864817e-05, "loss": 0.4244, "step": 16100 }, { "epoch": 3.54, "learning_rate": 4.1389806252752095e-05, "loss": 0.3983, "step": 16150 }, { "epoch": 3.55, "learning_rate": 4.136228533685601e-05, "loss": 0.394, "step": 16200 }, { "epoch": 3.56, "learning_rate": 4.133476442095993e-05, "loss": 0.4326, "step": 16250 }, { "epoch": 3.57, "learning_rate": 4.130779392338177e-05, "loss": 0.4019, "step": 16300 }, { "epoch": 3.58, "learning_rate": 4.1280273007485696e-05, "loss": 0.402, "step": 16350 }, { "epoch": 3.59, "learning_rate": 4.1252752091589606e-05, "loss": 0.4258, "step": 16400 }, { "epoch": 3.6, "learning_rate": 4.122523117569353e-05, "loss": 0.4178, "step": 16450 }, { "epoch": 3.61, "learning_rate": 4.1197710259797454e-05, "loss": 0.4013, "step": 16500 }, { "epoch": 3.61, "eval_acc": 0.19175767028188276, "eval_cer": 0.09475518566695453, "eval_loss": 0.4757140874862671, "eval_runtime": 2404.3281, "eval_samples_per_second": 3.199, "eval_steps_per_second": 0.4, "step": 16500 }, { "epoch": 3.62, "learning_rate": 4.1170189343901364e-05, "loss": 0.3808, "step": 16550 }, { "epoch": 3.63, "learning_rate": 4.114266842800529e-05, "loss": 0.4034, "step": 16600 }, { "epoch": 3.65, "learning_rate": 4.1115147512109205e-05, "loss": 0.4042, "step": 16650 }, { "epoch": 3.66, "learning_rate": 4.108762659621312e-05, "loss": 0.4304, "step": 16700 }, { "epoch": 3.67, "learning_rate": 4.106010568031704e-05, "loss": 0.427, "step": 16750 }, { "epoch": 3.68, "learning_rate": 4.1032584764420964e-05, "loss": 0.3849, "step": 16800 }, { "epoch": 3.69, "learning_rate": 4.100506384852488e-05, "loss": 0.412, "step": 16850 }, { "epoch": 3.7, "learning_rate": 4.09775429326288e-05, "loss": 0.4099, "step": 16900 }, { "epoch": 3.71, "learning_rate": 4.095002201673272e-05, "loss": 0.4052, "step": 16950 }, { "epoch": 3.72, "learning_rate": 4.092250110083663e-05, "loss": 0.4014, "step": 17000 }, { "epoch": 3.72, "eval_acc": 0.19825793029143812, "eval_cer": 0.09623580811753196, "eval_loss": 0.47613686323165894, "eval_runtime": 2485.0631, "eval_samples_per_second": 3.095, "eval_steps_per_second": 0.387, "step": 17000 }, { "epoch": 3.73, "learning_rate": 4.0894980184940556e-05, "loss": 0.4268, "step": 17050 }, { "epoch": 3.74, "learning_rate": 4.086745926904448e-05, "loss": 0.384, "step": 17100 }, { "epoch": 3.75, "learning_rate": 4.083993835314839e-05, "loss": 0.3994, "step": 17150 }, { "epoch": 3.77, "learning_rate": 4.0812417437252315e-05, "loss": 0.4024, "step": 17200 }, { "epoch": 3.78, "learning_rate": 4.078489652135623e-05, "loss": 0.3895, "step": 17250 }, { "epoch": 3.79, "learning_rate": 4.075737560546015e-05, "loss": 0.387, "step": 17300 }, { "epoch": 3.8, "learning_rate": 4.072985468956407e-05, "loss": 0.4229, "step": 17350 }, { "epoch": 3.81, "learning_rate": 4.070233377366799e-05, "loss": 0.4237, "step": 17400 }, { "epoch": 3.82, "learning_rate": 4.0675363276089826e-05, "loss": 0.3938, "step": 17450 }, { "epoch": 3.83, "learning_rate": 4.064784236019375e-05, "loss": 0.3766, "step": 17500 }, { "epoch": 3.83, "eval_acc": 0.19656786268895374, "eval_cer": 0.09395828520664373, "eval_loss": 0.468285471200943, "eval_runtime": 2433.7209, "eval_samples_per_second": 3.161, "eval_steps_per_second": 0.395, "step": 17500 }, { "epoch": 3.84, "learning_rate": 4.0620321444297674e-05, "loss": 0.4031, "step": 17550 }, { "epoch": 3.85, "learning_rate": 4.0592800528401584e-05, "loss": 0.4033, "step": 17600 }, { "epoch": 3.86, "learning_rate": 4.056527961250551e-05, "loss": 0.4201, "step": 17650 }, { "epoch": 3.88, "learning_rate": 4.0537758696609425e-05, "loss": 0.3757, "step": 17700 }, { "epoch": 3.89, "learning_rate": 4.051023778071334e-05, "loss": 0.3922, "step": 17750 }, { "epoch": 3.9, "learning_rate": 4.048271686481726e-05, "loss": 0.3964, "step": 17800 }, { "epoch": 3.91, "learning_rate": 4.045519594892118e-05, "loss": 0.3917, "step": 17850 }, { "epoch": 3.92, "learning_rate": 4.04276750330251e-05, "loss": 0.3899, "step": 17900 }, { "epoch": 3.93, "learning_rate": 4.040015411712902e-05, "loss": 0.3806, "step": 17950 }, { "epoch": 3.94, "learning_rate": 4.037263320123294e-05, "loss": 0.3757, "step": 18000 }, { "epoch": 3.94, "eval_acc": 0.20319812789870018, "eval_cer": 0.09292107429623922, "eval_loss": 0.46000081300735474, "eval_runtime": 2445.2781, "eval_samples_per_second": 3.146, "eval_steps_per_second": 0.393, "step": 18000 }, { "epoch": 3.95, "learning_rate": 4.034511228533686e-05, "loss": 0.4001, "step": 18050 }, { "epoch": 3.96, "learning_rate": 4.0317591369440776e-05, "loss": 0.3737, "step": 18100 }, { "epoch": 3.97, "learning_rate": 4.02900704535447e-05, "loss": 0.4042, "step": 18150 }, { "epoch": 3.98, "learning_rate": 4.026254953764862e-05, "loss": 0.3745, "step": 18200 }, { "epoch": 4.0, "learning_rate": 4.0235028621752534e-05, "loss": 0.3906, "step": 18250 }, { "epoch": 4.01, "learning_rate": 4.020750770585645e-05, "loss": 0.312, "step": 18300 }, { "epoch": 4.02, "learning_rate": 4.0179986789960375e-05, "loss": 0.3038, "step": 18350 }, { "epoch": 4.03, "learning_rate": 4.015246587406429e-05, "loss": 0.3159, "step": 18400 }, { "epoch": 4.04, "learning_rate": 4.012494495816821e-05, "loss": 0.2978, "step": 18450 }, { "epoch": 4.05, "learning_rate": 4.0097424042272134e-05, "loss": 0.2936, "step": 18500 }, { "epoch": 4.05, "eval_acc": 0.20800832030577113, "eval_cer": 0.09400944807666368, "eval_loss": 0.4694964587688446, "eval_runtime": 2466.2638, "eval_samples_per_second": 3.119, "eval_steps_per_second": 0.39, "step": 18500 }, { "epoch": 4.06, "learning_rate": 4.0069903126376044e-05, "loss": 0.3163, "step": 18550 }, { "epoch": 4.07, "learning_rate": 4.004238221047997e-05, "loss": 0.2903, "step": 18600 }, { "epoch": 4.08, "learning_rate": 4.0014861294583885e-05, "loss": 0.3251, "step": 18650 }, { "epoch": 4.09, "learning_rate": 3.99873403786878e-05, "loss": 0.3194, "step": 18700 }, { "epoch": 4.11, "learning_rate": 3.9959819462791726e-05, "loss": 0.3029, "step": 18750 }, { "epoch": 4.12, "learning_rate": 3.9932298546895643e-05, "loss": 0.3113, "step": 18800 }, { "epoch": 4.13, "learning_rate": 3.990477763099956e-05, "loss": 0.3039, "step": 18850 }, { "epoch": 4.14, "learning_rate": 3.987725671510348e-05, "loss": 0.298, "step": 18900 }, { "epoch": 4.15, "learning_rate": 3.98497357992074e-05, "loss": 0.3092, "step": 18950 }, { "epoch": 4.16, "learning_rate": 3.982221488331132e-05, "loss": 0.3142, "step": 19000 }, { "epoch": 4.16, "eval_acc": 0.21970878832297078, "eval_cer": 0.08872261817460182, "eval_loss": 0.44163626432418823, "eval_runtime": 2477.8186, "eval_samples_per_second": 3.104, "eval_steps_per_second": 0.388, "step": 19000 }, { "epoch": 4.17, "learning_rate": 3.9794693967415236e-05, "loss": 0.3072, "step": 19050 }, { "epoch": 4.18, "learning_rate": 3.976717305151916e-05, "loss": 0.3123, "step": 19100 }, { "epoch": 4.19, "learning_rate": 3.973965213562307e-05, "loss": 0.2902, "step": 19150 }, { "epoch": 4.2, "learning_rate": 3.9712131219726994e-05, "loss": 0.3212, "step": 19200 }, { "epoch": 4.21, "learning_rate": 3.968461030383092e-05, "loss": 0.2916, "step": 19250 }, { "epoch": 4.23, "learning_rate": 3.965708938793483e-05, "loss": 0.3308, "step": 19300 }, { "epoch": 4.24, "learning_rate": 3.962956847203875e-05, "loss": 0.3016, "step": 19350 }, { "epoch": 4.25, "learning_rate": 3.960204755614267e-05, "loss": 0.3116, "step": 19400 }, { "epoch": 4.26, "learning_rate": 3.957452664024659e-05, "loss": 0.3083, "step": 19450 }, { "epoch": 4.27, "learning_rate": 3.954700572435051e-05, "loss": 0.3144, "step": 19500 }, { "epoch": 4.27, "eval_acc": 0.2143785751151354, "eval_cer": 0.0860528465935606, "eval_loss": 0.437505841255188, "eval_runtime": 2403.934, "eval_samples_per_second": 3.2, "eval_steps_per_second": 0.4, "step": 19500 }, { "epoch": 4.28, "learning_rate": 3.951948480845443e-05, "loss": 0.2962, "step": 19550 }, { "epoch": 4.29, "learning_rate": 3.9491963892558345e-05, "loss": 0.3088, "step": 19600 }, { "epoch": 4.3, "learning_rate": 3.946444297666226e-05, "loss": 0.3045, "step": 19650 }, { "epoch": 4.31, "learning_rate": 3.9436922060766186e-05, "loss": 0.3207, "step": 19700 }, { "epoch": 4.32, "learning_rate": 3.9409401144870103e-05, "loss": 0.3149, "step": 19750 }, { "epoch": 4.34, "learning_rate": 3.938188022897402e-05, "loss": 0.3031, "step": 19800 }, { "epoch": 4.35, "learning_rate": 3.9354359313077945e-05, "loss": 0.3168, "step": 19850 }, { "epoch": 4.36, "learning_rate": 3.932683839718186e-05, "loss": 0.3027, "step": 19900 }, { "epoch": 4.37, "learning_rate": 3.929931748128578e-05, "loss": 0.3243, "step": 19950 }, { "epoch": 4.38, "learning_rate": 3.9271796565389696e-05, "loss": 0.3092, "step": 20000 }, { "epoch": 4.38, "eval_acc": 0.21697867911895752, "eval_cer": 0.08637222693368517, "eval_loss": 0.4346335232257843, "eval_runtime": 2403.4504, "eval_samples_per_second": 3.2, "eval_steps_per_second": 0.4, "step": 20000 }, { "epoch": 4.39, "learning_rate": 3.924427564949362e-05, "loss": 0.3074, "step": 20050 }, { "epoch": 4.4, "learning_rate": 3.921675473359754e-05, "loss": 0.3082, "step": 20100 }, { "epoch": 4.41, "learning_rate": 3.9189233817701454e-05, "loss": 0.3088, "step": 20150 }, { "epoch": 4.42, "learning_rate": 3.916171290180538e-05, "loss": 0.3118, "step": 20200 }, { "epoch": 4.43, "learning_rate": 3.913419198590929e-05, "loss": 0.3229, "step": 20250 }, { "epoch": 4.44, "learning_rate": 3.910667107001321e-05, "loss": 0.3273, "step": 20300 }, { "epoch": 4.46, "learning_rate": 3.9079150154117137e-05, "loss": 0.3068, "step": 20350 }, { "epoch": 4.47, "learning_rate": 3.905162923822105e-05, "loss": 0.3177, "step": 20400 }, { "epoch": 4.48, "learning_rate": 3.902410832232497e-05, "loss": 0.3011, "step": 20450 }, { "epoch": 4.49, "learning_rate": 3.899658740642889e-05, "loss": 0.3275, "step": 20500 }, { "epoch": 4.49, "eval_acc": 0.2238689547290862, "eval_cer": 0.08576757483344935, "eval_loss": 0.42684033513069153, "eval_runtime": 2437.8724, "eval_samples_per_second": 3.155, "eval_steps_per_second": 0.395, "step": 20500 }, { "epoch": 4.5, "learning_rate": 3.8969066490532805e-05, "loss": 0.3103, "step": 20550 }, { "epoch": 4.51, "learning_rate": 3.894154557463672e-05, "loss": 0.3184, "step": 20600 }, { "epoch": 4.52, "learning_rate": 3.8914024658740646e-05, "loss": 0.3149, "step": 20650 }, { "epoch": 4.53, "learning_rate": 3.8886503742844564e-05, "loss": 0.3312, "step": 20700 }, { "epoch": 4.54, "learning_rate": 3.885898282694848e-05, "loss": 0.3176, "step": 20750 }, { "epoch": 4.55, "learning_rate": 3.8831461911052405e-05, "loss": 0.3085, "step": 20800 }, { "epoch": 4.57, "learning_rate": 3.8803940995156315e-05, "loss": 0.3118, "step": 20850 }, { "epoch": 4.58, "learning_rate": 3.877642007926024e-05, "loss": 0.3103, "step": 20900 }, { "epoch": 4.59, "learning_rate": 3.874889916336416e-05, "loss": 0.3097, "step": 20950 }, { "epoch": 4.6, "learning_rate": 3.872137824746807e-05, "loss": 0.3132, "step": 21000 }, { "epoch": 4.6, "eval_acc": 0.23218928754131704, "eval_cer": 0.08339392774252363, "eval_loss": 0.42323964834213257, "eval_runtime": 2399.2661, "eval_samples_per_second": 3.206, "eval_steps_per_second": 0.401, "step": 21000 }, { "epoch": 4.61, "learning_rate": 3.8693857331572e-05, "loss": 0.3174, "step": 21050 }, { "epoch": 4.62, "learning_rate": 3.8666336415675914e-05, "loss": 0.2985, "step": 21100 }, { "epoch": 4.63, "learning_rate": 3.863881549977983e-05, "loss": 0.3264, "step": 21150 }, { "epoch": 4.64, "learning_rate": 3.8611294583883756e-05, "loss": 0.2915, "step": 21200 }, { "epoch": 4.65, "learning_rate": 3.858377366798767e-05, "loss": 0.2875, "step": 21250 }, { "epoch": 4.66, "learning_rate": 3.855625275209159e-05, "loss": 0.3061, "step": 21300 }, { "epoch": 4.67, "learning_rate": 3.852873183619551e-05, "loss": 0.311, "step": 21350 }, { "epoch": 4.69, "learning_rate": 3.850121092029943e-05, "loss": 0.3053, "step": 21400 }, { "epoch": 4.7, "learning_rate": 3.847369000440335e-05, "loss": 0.3197, "step": 21450 }, { "epoch": 4.71, "learning_rate": 3.8446169088507265e-05, "loss": 0.3039, "step": 21500 }, { "epoch": 4.71, "eval_acc": 0.22971918873768601, "eval_cer": 0.08402803725277093, "eval_loss": 0.4141163229942322, "eval_runtime": 2448.8052, "eval_samples_per_second": 3.141, "eval_steps_per_second": 0.393, "step": 21500 }, { "epoch": 4.72, "learning_rate": 3.841864817261119e-05, "loss": 0.273, "step": 21550 }, { "epoch": 4.73, "learning_rate": 3.8391127256715106e-05, "loss": 0.3525, "step": 21600 }, { "epoch": 4.74, "learning_rate": 3.8363606340819024e-05, "loss": 0.3004, "step": 21650 }, { "epoch": 4.75, "learning_rate": 3.833608542492294e-05, "loss": 0.3112, "step": 21700 }, { "epoch": 4.76, "learning_rate": 3.8308564509026865e-05, "loss": 0.3008, "step": 21750 }, { "epoch": 4.77, "learning_rate": 3.828104359313078e-05, "loss": 0.2954, "step": 21800 }, { "epoch": 4.78, "learning_rate": 3.82535226772347e-05, "loss": 0.2975, "step": 21850 }, { "epoch": 4.8, "learning_rate": 3.822600176133862e-05, "loss": 0.2923, "step": 21900 }, { "epoch": 4.81, "learning_rate": 3.819848084544253e-05, "loss": 0.2934, "step": 21950 }, { "epoch": 4.82, "learning_rate": 3.817095992954646e-05, "loss": 0.3063, "step": 22000 }, { "epoch": 4.82, "eval_acc": 0.236999479948388, "eval_cer": 0.08368695145263791, "eval_loss": 0.40729042887687683, "eval_runtime": 2434.9032, "eval_samples_per_second": 3.159, "eval_steps_per_second": 0.395, "step": 22000 }, { "epoch": 4.83, "learning_rate": 3.814343901365038e-05, "loss": 0.3022, "step": 22050 }, { "epoch": 4.84, "learning_rate": 3.811591809775429e-05, "loss": 0.2964, "step": 22100 }, { "epoch": 4.85, "learning_rate": 3.8088397181858216e-05, "loss": 0.3109, "step": 22150 }, { "epoch": 4.86, "learning_rate": 3.806087626596213e-05, "loss": 0.3044, "step": 22200 }, { "epoch": 4.87, "learning_rate": 3.803335535006605e-05, "loss": 0.3082, "step": 22250 }, { "epoch": 4.88, "learning_rate": 3.8005834434169974e-05, "loss": 0.3162, "step": 22300 }, { "epoch": 4.89, "learning_rate": 3.797831351827389e-05, "loss": 0.3013, "step": 22350 }, { "epoch": 4.9, "learning_rate": 3.795079260237781e-05, "loss": 0.306, "step": 22400 }, { "epoch": 4.92, "learning_rate": 3.7923271686481725e-05, "loss": 0.299, "step": 22450 }, { "epoch": 4.93, "learning_rate": 3.789575077058565e-05, "loss": 0.3085, "step": 22500 }, { "epoch": 4.93, "eval_acc": 0.24193967755565005, "eval_cer": 0.07995981389118433, "eval_loss": 0.3981165289878845, "eval_runtime": 2412.3309, "eval_samples_per_second": 3.189, "eval_steps_per_second": 0.399, "step": 22500 }, { "epoch": 4.94, "learning_rate": 3.7868229854689566e-05, "loss": 0.2896, "step": 22550 }, { "epoch": 4.95, "learning_rate": 3.7840708938793484e-05, "loss": 0.3118, "step": 22600 }, { "epoch": 4.96, "learning_rate": 3.781318802289741e-05, "loss": 0.2927, "step": 22650 }, { "epoch": 4.97, "learning_rate": 3.778566710700132e-05, "loss": 0.3019, "step": 22700 }, { "epoch": 4.98, "learning_rate": 3.775814619110524e-05, "loss": 0.2849, "step": 22750 }, { "epoch": 4.99, "learning_rate": 3.773062527520916e-05, "loss": 0.3009, "step": 22800 }, { "epoch": 5.0, "learning_rate": 3.7703104359313076e-05, "loss": 0.2766, "step": 22850 }, { "epoch": 5.01, "learning_rate": 3.7675583443417e-05, "loss": 0.2198, "step": 22900 }, { "epoch": 5.02, "learning_rate": 3.764806252752092e-05, "loss": 0.2204, "step": 22950 }, { "epoch": 5.04, "learning_rate": 3.7620541611624835e-05, "loss": 0.2059, "step": 23000 }, { "epoch": 5.04, "eval_acc": 0.2506500259684542, "eval_cer": 0.0784187262305833, "eval_loss": 0.3935554623603821, "eval_runtime": 2451.7215, "eval_samples_per_second": 3.137, "eval_steps_per_second": 0.392, "step": 23000 }, { "epoch": 5.05, "learning_rate": 3.759302069572875e-05, "loss": 0.2265, "step": 23050 }, { "epoch": 5.06, "learning_rate": 3.7565499779832676e-05, "loss": 0.2116, "step": 23100 }, { "epoch": 5.07, "learning_rate": 3.753797886393659e-05, "loss": 0.2365, "step": 23150 }, { "epoch": 5.08, "learning_rate": 3.751045794804051e-05, "loss": 0.2479, "step": 23200 }, { "epoch": 5.09, "learning_rate": 3.7482937032144434e-05, "loss": 0.2294, "step": 23250 }, { "epoch": 5.1, "learning_rate": 3.745541611624835e-05, "loss": 0.2348, "step": 23300 }, { "epoch": 5.11, "learning_rate": 3.742789520035227e-05, "loss": 0.2182, "step": 23350 }, { "epoch": 5.12, "learning_rate": 3.740037428445619e-05, "loss": 0.2341, "step": 23400 }, { "epoch": 5.13, "learning_rate": 3.737285336856011e-05, "loss": 0.2202, "step": 23450 }, { "epoch": 5.15, "learning_rate": 3.7345332452664027e-05, "loss": 0.2505, "step": 23500 }, { "epoch": 5.15, "eval_acc": 0.2485699427653965, "eval_cer": 0.07783422920035535, "eval_loss": 0.3888697326183319, "eval_runtime": 2447.0114, "eval_samples_per_second": 3.143, "eval_steps_per_second": 0.393, "step": 23500 }, { "epoch": 5.16, "learning_rate": 3.731836195508587e-05, "loss": 0.2226, "step": 23550 }, { "epoch": 5.17, "learning_rate": 3.7290841039189786e-05, "loss": 0.2489, "step": 23600 }, { "epoch": 5.18, "learning_rate": 3.72633201232937e-05, "loss": 0.2134, "step": 23650 }, { "epoch": 5.19, "learning_rate": 3.723579920739763e-05, "loss": 0.2285, "step": 23700 }, { "epoch": 5.2, "learning_rate": 3.7208278291501544e-05, "loss": 0.2436, "step": 23750 }, { "epoch": 5.21, "learning_rate": 3.718075737560546e-05, "loss": 0.234, "step": 23800 }, { "epoch": 5.22, "learning_rate": 3.715323645970938e-05, "loss": 0.2278, "step": 23850 }, { "epoch": 5.23, "learning_rate": 3.71257155438133e-05, "loss": 0.2226, "step": 23900 }, { "epoch": 5.24, "learning_rate": 3.709819462791722e-05, "loss": 0.2259, "step": 23950 }, { "epoch": 5.25, "learning_rate": 3.707067371202114e-05, "loss": 0.2251, "step": 24000 }, { "epoch": 5.25, "eval_acc": 0.26183047318488945, "eval_cer": 0.07873345540070605, "eval_loss": 0.38558822870254517, "eval_runtime": 2441.3865, "eval_samples_per_second": 3.151, "eval_steps_per_second": 0.394, "step": 24000 }, { "epoch": 5.27, "learning_rate": 3.704315279612506e-05, "loss": 0.2283, "step": 24050 }, { "epoch": 5.28, "learning_rate": 3.701563188022897e-05, "loss": 0.2267, "step": 24100 }, { "epoch": 5.29, "learning_rate": 3.6988110964332895e-05, "loss": 0.2295, "step": 24150 }, { "epoch": 5.3, "learning_rate": 3.696059004843682e-05, "loss": 0.2367, "step": 24200 }, { "epoch": 5.31, "learning_rate": 3.693306913254073e-05, "loss": 0.2212, "step": 24250 }, { "epoch": 5.32, "learning_rate": 3.6905548216644654e-05, "loss": 0.2417, "step": 24300 }, { "epoch": 5.33, "learning_rate": 3.687802730074857e-05, "loss": 0.2277, "step": 24350 }, { "epoch": 5.34, "learning_rate": 3.685050638485249e-05, "loss": 0.2199, "step": 24400 }, { "epoch": 5.35, "learning_rate": 3.682298546895641e-05, "loss": 0.2416, "step": 24450 }, { "epoch": 5.36, "learning_rate": 3.679546455306033e-05, "loss": 0.2337, "step": 24500 }, { "epoch": 5.36, "eval_acc": 0.2687207487950181, "eval_cer": 0.07631484699976279, "eval_loss": 0.38175275921821594, "eval_runtime": 2401.0005, "eval_samples_per_second": 3.204, "eval_steps_per_second": 0.401, "step": 24500 }, { "epoch": 5.38, "learning_rate": 3.6767943637164246e-05, "loss": 0.2184, "step": 24550 }, { "epoch": 5.39, "learning_rate": 3.674042272126816e-05, "loss": 0.2285, "step": 24600 }, { "epoch": 5.4, "learning_rate": 3.671290180537209e-05, "loss": 0.2263, "step": 24650 }, { "epoch": 5.41, "learning_rate": 3.6685380889476e-05, "loss": 0.2257, "step": 24700 }, { "epoch": 5.42, "learning_rate": 3.665785997357992e-05, "loss": 0.2422, "step": 24750 }, { "epoch": 5.43, "learning_rate": 3.6630339057683846e-05, "loss": 0.2271, "step": 24800 }, { "epoch": 5.44, "learning_rate": 3.6602818141787756e-05, "loss": 0.2312, "step": 24850 }, { "epoch": 5.45, "learning_rate": 3.657529722589168e-05, "loss": 0.2221, "step": 24900 }, { "epoch": 5.46, "learning_rate": 3.65477763099956e-05, "loss": 0.2382, "step": 24950 }, { "epoch": 5.47, "learning_rate": 3.6520255394099514e-05, "loss": 0.2173, "step": 25000 }, { "epoch": 5.47, "eval_acc": 0.27171086839941355, "eval_cer": 0.07626523451974344, "eval_loss": 0.38215985894203186, "eval_runtime": 2452.1174, "eval_samples_per_second": 3.137, "eval_steps_per_second": 0.392, "step": 25000 }, { "epoch": 5.48, "learning_rate": 3.649273447820344e-05, "loss": 0.2393, "step": 25050 }, { "epoch": 5.5, "learning_rate": 3.6465213562307355e-05, "loss": 0.2445, "step": 25100 }, { "epoch": 5.51, "learning_rate": 3.643769264641127e-05, "loss": 0.2258, "step": 25150 }, { "epoch": 5.52, "learning_rate": 3.641017173051519e-05, "loss": 0.2308, "step": 25200 }, { "epoch": 5.53, "learning_rate": 3.6382650814619114e-05, "loss": 0.2456, "step": 25250 }, { "epoch": 5.54, "learning_rate": 3.635512989872303e-05, "loss": 0.2332, "step": 25300 }, { "epoch": 5.55, "learning_rate": 3.632760898282695e-05, "loss": 0.2377, "step": 25350 }, { "epoch": 5.56, "learning_rate": 3.630008806693087e-05, "loss": 0.2393, "step": 25400 }, { "epoch": 5.57, "learning_rate": 3.627256715103479e-05, "loss": 0.2172, "step": 25450 }, { "epoch": 5.58, "learning_rate": 3.6245046235138706e-05, "loss": 0.2316, "step": 25500 }, { "epoch": 5.58, "eval_acc": 0.27626105040610227, "eval_cer": 0.07521251970933289, "eval_loss": 0.37909185886383057, "eval_runtime": 2402.1196, "eval_samples_per_second": 3.202, "eval_steps_per_second": 0.4, "step": 25500 }, { "epoch": 5.59, "learning_rate": 3.6217525319242623e-05, "loss": 0.2604, "step": 25550 }, { "epoch": 5.61, "learning_rate": 3.619000440334655e-05, "loss": 0.2378, "step": 25600 }, { "epoch": 5.62, "learning_rate": 3.6162483487450465e-05, "loss": 0.2424, "step": 25650 }, { "epoch": 5.63, "learning_rate": 3.613551298987231e-05, "loss": 0.2439, "step": 25700 }, { "epoch": 5.64, "learning_rate": 3.6107992073976224e-05, "loss": 0.2377, "step": 25750 }, { "epoch": 5.65, "learning_rate": 3.608047115808014e-05, "loss": 0.2263, "step": 25800 }, { "epoch": 5.66, "learning_rate": 3.6052950242184065e-05, "loss": 0.2228, "step": 25850 }, { "epoch": 5.67, "learning_rate": 3.602542932628798e-05, "loss": 0.2231, "step": 25900 }, { "epoch": 5.68, "learning_rate": 3.59979084103919e-05, "loss": 0.2247, "step": 25950 }, { "epoch": 5.69, "learning_rate": 3.597038749449582e-05, "loss": 0.2348, "step": 26000 }, { "epoch": 5.69, "eval_acc": 0.2852314092192887, "eval_cer": 0.07336600521861274, "eval_loss": 0.36554059386253357, "eval_runtime": 2406.905, "eval_samples_per_second": 3.196, "eval_steps_per_second": 0.4, "step": 26000 }, { "epoch": 5.7, "learning_rate": 3.594286657859974e-05, "loss": 0.2381, "step": 26050 }, { "epoch": 5.71, "learning_rate": 3.591534566270366e-05, "loss": 0.2189, "step": 26100 }, { "epoch": 5.73, "learning_rate": 3.5887824746807575e-05, "loss": 0.2467, "step": 26150 }, { "epoch": 5.74, "learning_rate": 3.58603038309115e-05, "loss": 0.2192, "step": 26200 }, { "epoch": 5.75, "learning_rate": 3.583278291501541e-05, "loss": 0.2484, "step": 26250 }, { "epoch": 5.76, "learning_rate": 3.580526199911933e-05, "loss": 0.2236, "step": 26300 }, { "epoch": 5.77, "learning_rate": 3.577774108322326e-05, "loss": 0.2344, "step": 26350 }, { "epoch": 5.78, "learning_rate": 3.575022016732717e-05, "loss": 0.2343, "step": 26400 }, { "epoch": 5.79, "learning_rate": 3.572269925143109e-05, "loss": 0.2252, "step": 26450 }, { "epoch": 5.8, "learning_rate": 3.569517833553501e-05, "loss": 0.2553, "step": 26500 }, { "epoch": 5.8, "eval_acc": 0.2951118044338128, "eval_cer": 0.07258925982830981, "eval_loss": 0.3526802659034729, "eval_runtime": 2459.0427, "eval_samples_per_second": 3.128, "eval_steps_per_second": 0.391, "step": 26500 }, { "epoch": 5.81, "learning_rate": 3.5667657419638926e-05, "loss": 0.2235, "step": 26550 }, { "epoch": 5.82, "learning_rate": 3.564013650374284e-05, "loss": 0.2257, "step": 26600 }, { "epoch": 5.84, "learning_rate": 3.561261558784677e-05, "loss": 0.2309, "step": 26650 }, { "epoch": 5.85, "learning_rate": 3.5585094671950684e-05, "loss": 0.2291, "step": 26700 }, { "epoch": 5.86, "learning_rate": 3.55575737560546e-05, "loss": 0.2064, "step": 26750 }, { "epoch": 5.87, "learning_rate": 3.5530052840158525e-05, "loss": 0.237, "step": 26800 }, { "epoch": 5.88, "learning_rate": 3.5502531924262436e-05, "loss": 0.2322, "step": 26850 }, { "epoch": 5.89, "learning_rate": 3.547501100836636e-05, "loss": 0.2466, "step": 26900 }, { "epoch": 5.9, "learning_rate": 3.5447490092470284e-05, "loss": 0.2296, "step": 26950 }, { "epoch": 5.91, "learning_rate": 3.5419969176574194e-05, "loss": 0.2319, "step": 27000 }, { "epoch": 5.91, "eval_acc": 0.30330213204585255, "eval_cer": 0.07118305609776139, "eval_loss": 0.35267382860183716, "eval_runtime": 2453.5416, "eval_samples_per_second": 3.135, "eval_steps_per_second": 0.392, "step": 27000 }, { "epoch": 5.92, "learning_rate": 3.539244826067812e-05, "loss": 0.2354, "step": 27050 }, { "epoch": 5.93, "learning_rate": 3.5364927344782035e-05, "loss": 0.2217, "step": 27100 }, { "epoch": 5.94, "learning_rate": 3.533740642888595e-05, "loss": 0.2285, "step": 27150 }, { "epoch": 5.96, "learning_rate": 3.5309885512989876e-05, "loss": 0.2071, "step": 27200 }, { "epoch": 5.97, "learning_rate": 3.528236459709379e-05, "loss": 0.2207, "step": 27250 }, { "epoch": 5.98, "learning_rate": 3.525484368119771e-05, "loss": 0.2173, "step": 27300 }, { "epoch": 5.99, "learning_rate": 3.522732276530163e-05, "loss": 0.2414, "step": 27350 }, { "epoch": 6.0, "learning_rate": 3.519980184940555e-05, "loss": 0.231, "step": 27400 }, { "epoch": 6.01, "learning_rate": 3.517228093350947e-05, "loss": 0.1624, "step": 27450 }, { "epoch": 6.02, "learning_rate": 3.5144760017613386e-05, "loss": 0.1641, "step": 27500 }, { "epoch": 6.02, "eval_acc": 0.3092823712546435, "eval_cer": 0.06900475814691186, "eval_loss": 0.3480505049228668, "eval_runtime": 2442.26, "eval_samples_per_second": 3.15, "eval_steps_per_second": 0.394, "step": 27500 }, { "epoch": 6.03, "learning_rate": 3.511723910171731e-05, "loss": 0.1622, "step": 27550 }, { "epoch": 6.04, "learning_rate": 3.508971818582123e-05, "loss": 0.1549, "step": 27600 }, { "epoch": 6.05, "learning_rate": 3.5062197269925144e-05, "loss": 0.1649, "step": 27650 }, { "epoch": 6.06, "learning_rate": 3.503467635402906e-05, "loss": 0.1519, "step": 27700 }, { "epoch": 6.08, "learning_rate": 3.5007155438132985e-05, "loss": 0.1632, "step": 27750 }, { "epoch": 6.09, "learning_rate": 3.49796345222369e-05, "loss": 0.173, "step": 27800 }, { "epoch": 6.1, "learning_rate": 3.495211360634082e-05, "loss": 0.1684, "step": 27850 }, { "epoch": 6.11, "learning_rate": 3.4924592690444744e-05, "loss": 0.1682, "step": 27900 }, { "epoch": 6.12, "learning_rate": 3.4897071774548654e-05, "loss": 0.1652, "step": 27950 }, { "epoch": 6.13, "learning_rate": 3.486955085865258e-05, "loss": 0.1691, "step": 28000 }, { "epoch": 6.13, "eval_acc": 0.3095423816550257, "eval_cer": 0.07106367606771484, "eval_loss": 0.34748131036758423, "eval_runtime": 2438.156, "eval_samples_per_second": 3.155, "eval_steps_per_second": 0.395, "step": 28000 }, { "epoch": 6.14, "learning_rate": 3.48420299427565e-05, "loss": 0.1665, "step": 28050 }, { "epoch": 6.15, "learning_rate": 3.481450902686041e-05, "loss": 0.1652, "step": 28100 }, { "epoch": 6.16, "learning_rate": 3.4786988110964336e-05, "loss": 0.1716, "step": 28150 }, { "epoch": 6.17, "learning_rate": 3.4759467195068253e-05, "loss": 0.1717, "step": 28200 }, { "epoch": 6.19, "learning_rate": 3.473194627917217e-05, "loss": 0.1633, "step": 28250 }, { "epoch": 6.2, "learning_rate": 3.4704425363276095e-05, "loss": 0.1724, "step": 28300 }, { "epoch": 6.21, "learning_rate": 3.467690444738001e-05, "loss": 0.1676, "step": 28350 }, { "epoch": 6.22, "learning_rate": 3.464938353148393e-05, "loss": 0.1761, "step": 28400 }, { "epoch": 6.23, "learning_rate": 3.4621862615587846e-05, "loss": 0.1665, "step": 28450 }, { "epoch": 6.24, "learning_rate": 3.459434169969177e-05, "loss": 0.1777, "step": 28500 }, { "epoch": 6.24, "eval_acc": 0.3095423816550257, "eval_cer": 0.0700838295873327, "eval_loss": 0.3448556661605835, "eval_runtime": 2427.8966, "eval_samples_per_second": 3.168, "eval_steps_per_second": 0.396, "step": 28500 }, { "epoch": 6.25, "learning_rate": 3.456682078379568e-05, "loss": 0.1831, "step": 28550 }, { "epoch": 6.26, "learning_rate": 3.4539299867899604e-05, "loss": 0.1543, "step": 28600 }, { "epoch": 6.27, "learning_rate": 3.451177895200353e-05, "loss": 0.1754, "step": 28650 }, { "epoch": 6.28, "learning_rate": 3.448425803610744e-05, "loss": 0.185, "step": 28700 }, { "epoch": 6.29, "learning_rate": 3.445673712021136e-05, "loss": 0.1825, "step": 28750 }, { "epoch": 6.31, "learning_rate": 3.442921620431528e-05, "loss": 0.1781, "step": 28800 }, { "epoch": 6.32, "learning_rate": 3.44016952884192e-05, "loss": 0.183, "step": 28850 }, { "epoch": 6.33, "learning_rate": 3.437417437252312e-05, "loss": 0.1726, "step": 28900 }, { "epoch": 6.34, "learning_rate": 3.434720387494496e-05, "loss": 0.1857, "step": 28950 }, { "epoch": 6.35, "learning_rate": 3.4319682959048874e-05, "loss": 0.1812, "step": 29000 }, { "epoch": 6.35, "eval_acc": 0.3226729068743275, "eval_cer": 0.06831483459664278, "eval_loss": 0.3413793742656708, "eval_runtime": 2402.3605, "eval_samples_per_second": 3.202, "eval_steps_per_second": 0.4, "step": 29000 }, { "epoch": 6.36, "learning_rate": 3.42921620431528e-05, "loss": 0.1814, "step": 29050 }, { "epoch": 6.37, "learning_rate": 3.426464112725672e-05, "loss": 0.172, "step": 29100 }, { "epoch": 6.38, "learning_rate": 3.423712021136063e-05, "loss": 0.1774, "step": 29150 }, { "epoch": 6.39, "learning_rate": 3.4209599295464556e-05, "loss": 0.165, "step": 29200 }, { "epoch": 6.4, "learning_rate": 3.418207837956847e-05, "loss": 0.1818, "step": 29250 }, { "epoch": 6.42, "learning_rate": 3.415455746367239e-05, "loss": 0.1792, "step": 29300 }, { "epoch": 6.43, "learning_rate": 3.4127036547776314e-05, "loss": 0.1761, "step": 29350 }, { "epoch": 6.44, "learning_rate": 3.409951563188023e-05, "loss": 0.1896, "step": 29400 }, { "epoch": 6.45, "learning_rate": 3.407199471598415e-05, "loss": 0.17, "step": 29450 }, { "epoch": 6.46, "learning_rate": 3.4044473800088066e-05, "loss": 0.176, "step": 29500 }, { "epoch": 6.46, "eval_acc": 0.31448257926228773, "eval_cer": 0.06894274254688768, "eval_loss": 0.34407129883766174, "eval_runtime": 2409.0404, "eval_samples_per_second": 3.193, "eval_steps_per_second": 0.399, "step": 29500 }, { "epoch": 6.47, "learning_rate": 3.401695288419199e-05, "loss": 0.1697, "step": 29550 }, { "epoch": 6.48, "learning_rate": 3.398943196829591e-05, "loss": 0.1635, "step": 29600 }, { "epoch": 6.49, "learning_rate": 3.3961911052399824e-05, "loss": 0.1768, "step": 29650 }, { "epoch": 6.5, "learning_rate": 3.393439013650375e-05, "loss": 0.1658, "step": 29700 }, { "epoch": 6.51, "learning_rate": 3.3906869220607665e-05, "loss": 0.1718, "step": 29750 }, { "epoch": 6.52, "learning_rate": 3.387934830471158e-05, "loss": 0.1778, "step": 29800 }, { "epoch": 6.54, "learning_rate": 3.38518273888155e-05, "loss": 0.1834, "step": 29850 }, { "epoch": 6.55, "learning_rate": 3.3824306472919423e-05, "loss": 0.1727, "step": 29900 }, { "epoch": 6.56, "learning_rate": 3.379678555702334e-05, "loss": 0.1676, "step": 29950 }, { "epoch": 6.57, "learning_rate": 3.376926464112726e-05, "loss": 0.1851, "step": 30000 }, { "epoch": 6.57, "eval_acc": 0.341523660902038, "eval_cer": 0.06639855255589544, "eval_loss": 0.33033427596092224, "eval_runtime": 2449.8438, "eval_samples_per_second": 3.14, "eval_steps_per_second": 0.393, "step": 30000 }, { "epoch": 6.58, "learning_rate": 3.374174372523118e-05, "loss": 0.1848, "step": 30050 }, { "epoch": 6.59, "learning_rate": 3.371422280933509e-05, "loss": 0.1665, "step": 30100 }, { "epoch": 6.6, "learning_rate": 3.3686701893439016e-05, "loss": 0.1852, "step": 30150 }, { "epoch": 6.61, "learning_rate": 3.365918097754294e-05, "loss": 0.1784, "step": 30200 }, { "epoch": 6.62, "learning_rate": 3.363166006164685e-05, "loss": 0.1773, "step": 30250 }, { "epoch": 6.63, "learning_rate": 3.3604139145750774e-05, "loss": 0.1649, "step": 30300 }, { "epoch": 6.65, "learning_rate": 3.357661822985469e-05, "loss": 0.1757, "step": 30350 }, { "epoch": 6.66, "learning_rate": 3.354964773227653e-05, "loss": 0.1737, "step": 30400 }, { "epoch": 6.67, "learning_rate": 3.352212681638045e-05, "loss": 0.17, "step": 30450 }, { "epoch": 6.68, "learning_rate": 3.349460590048437e-05, "loss": 0.1821, "step": 30500 }, { "epoch": 6.68, "eval_acc": 0.3542641705207665, "eval_cer": 0.0656853731556173, "eval_loss": 0.32759806513786316, "eval_runtime": 2443.4895, "eval_samples_per_second": 3.148, "eval_steps_per_second": 0.394, "step": 30500 }, { "epoch": 6.69, "learning_rate": 3.3467084984588285e-05, "loss": 0.184, "step": 30550 }, { "epoch": 6.7, "learning_rate": 3.343956406869221e-05, "loss": 0.1599, "step": 30600 }, { "epoch": 6.71, "learning_rate": 3.3412043152796127e-05, "loss": 0.1702, "step": 30650 }, { "epoch": 6.72, "learning_rate": 3.3384522236900044e-05, "loss": 0.1789, "step": 30700 }, { "epoch": 6.73, "learning_rate": 3.335700132100397e-05, "loss": 0.1894, "step": 30750 }, { "epoch": 6.74, "learning_rate": 3.3329480405107885e-05, "loss": 0.1738, "step": 30800 }, { "epoch": 6.75, "learning_rate": 3.33019594892118e-05, "loss": 0.1634, "step": 30850 }, { "epoch": 6.77, "learning_rate": 3.327443857331572e-05, "loss": 0.1645, "step": 30900 }, { "epoch": 6.78, "learning_rate": 3.324691765741964e-05, "loss": 0.1718, "step": 30950 }, { "epoch": 6.79, "learning_rate": 3.321939674152356e-05, "loss": 0.1841, "step": 31000 }, { "epoch": 6.79, "eval_acc": 0.3491939677133133, "eval_cer": 0.06529002370546311, "eval_loss": 0.32412809133529663, "eval_runtime": 2425.8573, "eval_samples_per_second": 3.171, "eval_steps_per_second": 0.397, "step": 31000 }, { "epoch": 6.8, "learning_rate": 3.319187582562748e-05, "loss": 0.1749, "step": 31050 }, { "epoch": 6.81, "learning_rate": 3.31643549097314e-05, "loss": 0.1653, "step": 31100 }, { "epoch": 6.82, "learning_rate": 3.313683399383531e-05, "loss": 0.1583, "step": 31150 }, { "epoch": 6.83, "learning_rate": 3.3109313077939236e-05, "loss": 0.1632, "step": 31200 }, { "epoch": 6.84, "learning_rate": 3.308179216204316e-05, "loss": 0.1738, "step": 31250 }, { "epoch": 6.85, "learning_rate": 3.305427124614707e-05, "loss": 0.166, "step": 31300 }, { "epoch": 6.86, "learning_rate": 3.3026750330250994e-05, "loss": 0.167, "step": 31350 }, { "epoch": 6.88, "learning_rate": 3.299922941435491e-05, "loss": 0.1644, "step": 31400 }, { "epoch": 6.89, "learning_rate": 3.297170849845883e-05, "loss": 0.1716, "step": 31450 }, { "epoch": 6.9, "learning_rate": 3.2944187582562745e-05, "loss": 0.1772, "step": 31500 }, { "epoch": 6.9, "eval_acc": 0.35205408211751765, "eval_cer": 0.06414893666501809, "eval_loss": 0.321593701839447, "eval_runtime": 2418.8594, "eval_samples_per_second": 3.18, "eval_steps_per_second": 0.398, "step": 31500 }, { "epoch": 6.91, "learning_rate": 3.291666666666667e-05, "loss": 0.1648, "step": 31550 }, { "epoch": 6.92, "learning_rate": 3.2889145750770587e-05, "loss": 0.1846, "step": 31600 }, { "epoch": 6.93, "learning_rate": 3.2861624834874504e-05, "loss": 0.1678, "step": 31650 }, { "epoch": 6.94, "learning_rate": 3.283410391897843e-05, "loss": 0.1614, "step": 31700 }, { "epoch": 6.95, "learning_rate": 3.2806583003082345e-05, "loss": 0.1832, "step": 31750 }, { "epoch": 6.96, "learning_rate": 3.277906208718626e-05, "loss": 0.1755, "step": 31800 }, { "epoch": 6.97, "learning_rate": 3.2751541171290186e-05, "loss": 0.1679, "step": 31850 }, { "epoch": 6.98, "learning_rate": 3.27240202553941e-05, "loss": 0.1672, "step": 31900 }, { "epoch": 7.0, "learning_rate": 3.269649933949802e-05, "loss": 0.1728, "step": 31950 }, { "epoch": 7.01, "learning_rate": 3.266897842360194e-05, "loss": 0.1417, "step": 32000 }, { "epoch": 7.01, "eval_acc": 0.36648465933873053, "eval_cer": 0.06339854790472543, "eval_loss": 0.31661146879196167, "eval_runtime": 2439.7983, "eval_samples_per_second": 3.153, "eval_steps_per_second": 0.394, "step": 32000 }, { "epoch": 7.02, "learning_rate": 3.264145750770586e-05, "loss": 0.1188, "step": 32050 }, { "epoch": 7.03, "learning_rate": 3.261393659180978e-05, "loss": 0.1098, "step": 32100 }, { "epoch": 7.04, "learning_rate": 3.2586415675913696e-05, "loss": 0.1097, "step": 32150 }, { "epoch": 7.05, "learning_rate": 3.255889476001762e-05, "loss": 0.1268, "step": 32200 }, { "epoch": 7.06, "learning_rate": 3.253137384412153e-05, "loss": 0.1251, "step": 32250 }, { "epoch": 7.07, "learning_rate": 3.2503852928225454e-05, "loss": 0.1307, "step": 32300 }, { "epoch": 7.08, "learning_rate": 3.247633201232938e-05, "loss": 0.1111, "step": 32350 }, { "epoch": 7.09, "learning_rate": 3.244881109643329e-05, "loss": 0.11, "step": 32400 }, { "epoch": 7.1, "learning_rate": 3.242129018053721e-05, "loss": 0.1324, "step": 32450 }, { "epoch": 7.12, "learning_rate": 3.239376926464113e-05, "loss": 0.1426, "step": 32500 }, { "epoch": 7.12, "eval_acc": 0.35816432652649965, "eval_cer": 0.0637876957948772, "eval_loss": 0.32509633898735046, "eval_runtime": 2419.5538, "eval_samples_per_second": 3.179, "eval_steps_per_second": 0.398, "step": 32500 }, { "epoch": 7.13, "learning_rate": 3.236624834874505e-05, "loss": 0.1214, "step": 32550 }, { "epoch": 7.14, "learning_rate": 3.2338727432848964e-05, "loss": 0.119, "step": 32600 }, { "epoch": 7.15, "learning_rate": 3.231120651695289e-05, "loss": 0.1233, "step": 32650 }, { "epoch": 7.16, "learning_rate": 3.2283685601056805e-05, "loss": 0.1237, "step": 32700 }, { "epoch": 7.17, "learning_rate": 3.225616468516072e-05, "loss": 0.1298, "step": 32750 }, { "epoch": 7.18, "learning_rate": 3.2228643769264646e-05, "loss": 0.1168, "step": 32800 }, { "epoch": 7.19, "learning_rate": 3.2201122853368556e-05, "loss": 0.1283, "step": 32850 }, { "epoch": 7.2, "learning_rate": 3.217360193747248e-05, "loss": 0.123, "step": 32900 }, { "epoch": 7.21, "learning_rate": 3.2146081021576404e-05, "loss": 0.1198, "step": 32950 }, { "epoch": 7.23, "learning_rate": 3.2118560105680315e-05, "loss": 0.1251, "step": 33000 }, { "epoch": 7.23, "eval_acc": 0.3653146125370106, "eval_cer": 0.06344195882474236, "eval_loss": 0.3220234513282776, "eval_runtime": 2413.7335, "eval_samples_per_second": 3.187, "eval_steps_per_second": 0.399, "step": 33000 }, { "epoch": 7.24, "learning_rate": 3.209103918978424e-05, "loss": 0.1201, "step": 33050 }, { "epoch": 7.25, "learning_rate": 3.2063518273888156e-05, "loss": 0.1239, "step": 33100 }, { "epoch": 7.26, "learning_rate": 3.203599735799207e-05, "loss": 0.1272, "step": 33150 }, { "epoch": 7.27, "learning_rate": 3.2008476442096e-05, "loss": 0.1176, "step": 33200 }, { "epoch": 7.28, "learning_rate": 3.1980955526199914e-05, "loss": 0.1229, "step": 33250 }, { "epoch": 7.29, "learning_rate": 3.195398502862175e-05, "loss": 0.1251, "step": 33300 }, { "epoch": 7.3, "learning_rate": 3.1926464112725674e-05, "loss": 0.1271, "step": 33350 }, { "epoch": 7.31, "learning_rate": 3.189894319682959e-05, "loss": 0.1204, "step": 33400 }, { "epoch": 7.32, "learning_rate": 3.187142228093351e-05, "loss": 0.1339, "step": 33450 }, { "epoch": 7.33, "learning_rate": 3.184390136503743e-05, "loss": 0.131, "step": 33500 }, { "epoch": 7.33, "eval_acc": 0.3733749349488592, "eval_cer": 0.06140319597394724, "eval_loss": 0.3167717754840851, "eval_runtime": 2416.3007, "eval_samples_per_second": 3.183, "eval_steps_per_second": 0.398, "step": 33500 }, { "epoch": 7.35, "learning_rate": 3.181638044914135e-05, "loss": 0.1267, "step": 33550 }, { "epoch": 7.36, "learning_rate": 3.1788859533245266e-05, "loss": 0.1179, "step": 33600 }, { "epoch": 7.37, "learning_rate": 3.1761338617349184e-05, "loss": 0.1275, "step": 33650 }, { "epoch": 7.38, "learning_rate": 3.173381770145311e-05, "loss": 0.1249, "step": 33700 }, { "epoch": 7.39, "learning_rate": 3.1706296785557025e-05, "loss": 0.1244, "step": 33750 }, { "epoch": 7.4, "learning_rate": 3.167877586966094e-05, "loss": 0.1268, "step": 33800 }, { "epoch": 7.41, "learning_rate": 3.1651254953764866e-05, "loss": 0.1255, "step": 33850 }, { "epoch": 7.42, "learning_rate": 3.162373403786878e-05, "loss": 0.1454, "step": 33900 }, { "epoch": 7.43, "learning_rate": 3.15962131219727e-05, "loss": 0.1169, "step": 33950 }, { "epoch": 7.44, "learning_rate": 3.1568692206076624e-05, "loss": 0.1504, "step": 34000 }, { "epoch": 7.44, "eval_acc": 0.3848153925656766, "eval_cer": 0.062052809384200595, "eval_loss": 0.31372004747390747, "eval_runtime": 2434.2574, "eval_samples_per_second": 3.16, "eval_steps_per_second": 0.395, "step": 34000 }, { "epoch": 7.46, "learning_rate": 3.154117129018054e-05, "loss": 0.1325, "step": 34050 }, { "epoch": 7.47, "learning_rate": 3.151365037428446e-05, "loss": 0.1274, "step": 34100 }, { "epoch": 7.48, "learning_rate": 3.1486129458388376e-05, "loss": 0.1428, "step": 34150 }, { "epoch": 7.49, "learning_rate": 3.145860854249229e-05, "loss": 0.1386, "step": 34200 }, { "epoch": 7.5, "learning_rate": 3.143108762659622e-05, "loss": 0.1441, "step": 34250 }, { "epoch": 7.51, "learning_rate": 3.1403566710700134e-05, "loss": 0.1411, "step": 34300 }, { "epoch": 7.52, "learning_rate": 3.137604579480405e-05, "loss": 0.1215, "step": 34350 }, { "epoch": 7.53, "learning_rate": 3.134852487890797e-05, "loss": 0.1198, "step": 34400 }, { "epoch": 7.54, "learning_rate": 3.132100396301189e-05, "loss": 0.1292, "step": 34450 }, { "epoch": 7.55, "learning_rate": 3.129348304711581e-05, "loss": 0.1204, "step": 34500 }, { "epoch": 7.55, "eval_acc": 0.38715548616911655, "eval_cer": 0.05957218538323315, "eval_loss": 0.312330961227417, "eval_runtime": 2394.8652, "eval_samples_per_second": 3.212, "eval_steps_per_second": 0.402, "step": 34500 }, { "epoch": 7.56, "learning_rate": 3.1265962131219726e-05, "loss": 0.1424, "step": 34550 }, { "epoch": 7.58, "learning_rate": 3.123844121532365e-05, "loss": 0.1363, "step": 34600 }, { "epoch": 7.59, "learning_rate": 3.121092029942757e-05, "loss": 0.1412, "step": 34650 }, { "epoch": 7.6, "learning_rate": 3.1183399383531485e-05, "loss": 0.1187, "step": 34700 }, { "epoch": 7.61, "learning_rate": 3.11558784676354e-05, "loss": 0.1213, "step": 34750 }, { "epoch": 7.62, "learning_rate": 3.1128357551739326e-05, "loss": 0.1238, "step": 34800 }, { "epoch": 7.63, "learning_rate": 3.110083663584324e-05, "loss": 0.1351, "step": 34850 }, { "epoch": 7.64, "learning_rate": 3.107331571994716e-05, "loss": 0.1293, "step": 34900 }, { "epoch": 7.65, "learning_rate": 3.1045794804051084e-05, "loss": 0.1248, "step": 34950 }, { "epoch": 7.66, "learning_rate": 3.1018273888154994e-05, "loss": 0.1319, "step": 35000 }, { "epoch": 7.66, "eval_acc": 0.40795631819969363, "eval_cer": 0.059308619083130364, "eval_loss": 0.303150475025177, "eval_runtime": 2433.7241, "eval_samples_per_second": 3.161, "eval_steps_per_second": 0.395, "step": 35000 }, { "epoch": 7.67, "learning_rate": 3.099075297225892e-05, "loss": 0.1334, "step": 35050 }, { "epoch": 7.69, "learning_rate": 3.096323205636284e-05, "loss": 0.1513, "step": 35100 }, { "epoch": 7.7, "learning_rate": 3.093571114046675e-05, "loss": 0.1355, "step": 35150 }, { "epoch": 7.71, "learning_rate": 3.090819022457068e-05, "loss": 0.1327, "step": 35200 }, { "epoch": 7.72, "learning_rate": 3.0880669308674594e-05, "loss": 0.1423, "step": 35250 }, { "epoch": 7.73, "learning_rate": 3.085314839277851e-05, "loss": 0.1371, "step": 35300 }, { "epoch": 7.74, "learning_rate": 3.082562747688243e-05, "loss": 0.129, "step": 35350 }, { "epoch": 7.75, "learning_rate": 3.079810656098635e-05, "loss": 0.1376, "step": 35400 }, { "epoch": 7.76, "learning_rate": 3.077058564509027e-05, "loss": 0.1265, "step": 35450 }, { "epoch": 7.77, "learning_rate": 3.0743064729194186e-05, "loss": 0.1355, "step": 35500 }, { "epoch": 7.77, "eval_acc": 0.41419656780886677, "eval_cer": 0.058490013162811105, "eval_loss": 0.2999359667301178, "eval_runtime": 2422.3339, "eval_samples_per_second": 3.175, "eval_steps_per_second": 0.397, "step": 35500 }, { "epoch": 7.78, "learning_rate": 3.071554381329811e-05, "loss": 0.1298, "step": 35550 }, { "epoch": 7.79, "learning_rate": 3.068802289740203e-05, "loss": 0.123, "step": 35600 }, { "epoch": 7.81, "learning_rate": 3.0660501981505945e-05, "loss": 0.1333, "step": 35650 }, { "epoch": 7.82, "learning_rate": 3.063298106560987e-05, "loss": 0.1413, "step": 35700 }, { "epoch": 7.83, "learning_rate": 3.0605460149713786e-05, "loss": 0.1272, "step": 35750 }, { "epoch": 7.84, "learning_rate": 3.05779392338177e-05, "loss": 0.1461, "step": 35800 }, { "epoch": 7.85, "learning_rate": 3.055041831792162e-05, "loss": 0.124, "step": 35850 }, { "epoch": 7.86, "learning_rate": 3.0522897402025544e-05, "loss": 0.1459, "step": 35900 }, { "epoch": 7.87, "learning_rate": 3.049537648612946e-05, "loss": 0.1232, "step": 35950 }, { "epoch": 7.88, "learning_rate": 3.046785557023338e-05, "loss": 0.1235, "step": 36000 }, { "epoch": 7.88, "eval_acc": 0.4006760269889917, "eval_cer": 0.05912722345305962, "eval_loss": 0.30008625984191895, "eval_runtime": 2418.0589, "eval_samples_per_second": 3.181, "eval_steps_per_second": 0.398, "step": 36000 }, { "epoch": 7.89, "learning_rate": 3.04403346543373e-05, "loss": 0.1278, "step": 36050 }, { "epoch": 7.9, "learning_rate": 3.0412813738441216e-05, "loss": 0.1375, "step": 36100 }, { "epoch": 7.92, "learning_rate": 3.0385292822545137e-05, "loss": 0.1287, "step": 36150 }, { "epoch": 7.93, "learning_rate": 3.0357771906649057e-05, "loss": 0.1281, "step": 36200 }, { "epoch": 7.94, "learning_rate": 3.0330250990752974e-05, "loss": 0.1262, "step": 36250 }, { "epoch": 7.95, "learning_rate": 3.0302730074856895e-05, "loss": 0.1331, "step": 36300 }, { "epoch": 7.96, "learning_rate": 3.027520915896081e-05, "loss": 0.1328, "step": 36350 }, { "epoch": 7.97, "learning_rate": 3.0247688243064733e-05, "loss": 0.1299, "step": 36400 }, { "epoch": 7.98, "learning_rate": 3.0220167327168646e-05, "loss": 0.1315, "step": 36450 }, { "epoch": 7.99, "learning_rate": 3.0192646411272567e-05, "loss": 0.1359, "step": 36500 }, { "epoch": 7.99, "eval_acc": 0.42108684341899544, "eval_cer": 0.058217144522704684, "eval_loss": 0.2916148602962494, "eval_runtime": 2429.407, "eval_samples_per_second": 3.166, "eval_steps_per_second": 0.396, "step": 36500 }, { "epoch": 8.0, "learning_rate": 3.016512549537649e-05, "loss": 0.1183, "step": 36550 }, { "epoch": 8.01, "learning_rate": 3.0137604579480405e-05, "loss": 0.0797, "step": 36600 }, { "epoch": 8.02, "learning_rate": 3.0110083663584325e-05, "loss": 0.0818, "step": 36650 }, { "epoch": 8.04, "learning_rate": 3.0082562747688242e-05, "loss": 0.0873, "step": 36700 }, { "epoch": 8.05, "learning_rate": 3.0055041831792163e-05, "loss": 0.0832, "step": 36750 }, { "epoch": 8.06, "learning_rate": 3.0027520915896084e-05, "loss": 0.0891, "step": 36800 }, { "epoch": 8.07, "learning_rate": 3e-05, "loss": 0.0808, "step": 36850 }, { "epoch": 8.08, "learning_rate": 2.997247908410392e-05, "loss": 0.09, "step": 36900 }, { "epoch": 8.09, "learning_rate": 2.994495816820784e-05, "loss": 0.09, "step": 36950 }, { "epoch": 8.1, "learning_rate": 2.991743725231176e-05, "loss": 0.0861, "step": 37000 }, { "epoch": 8.1, "eval_acc": 0.42667706702721303, "eval_cer": 0.05797993485261217, "eval_loss": 0.2984052300453186, "eval_runtime": 2430.5266, "eval_samples_per_second": 3.165, "eval_steps_per_second": 0.396, "step": 37000 }, { "epoch": 8.11, "learning_rate": 2.988991633641568e-05, "loss": 0.0973, "step": 37050 }, { "epoch": 8.12, "learning_rate": 2.9862395420519597e-05, "loss": 0.0949, "step": 37100 }, { "epoch": 8.13, "learning_rate": 2.9834874504623517e-05, "loss": 0.0886, "step": 37150 }, { "epoch": 8.14, "learning_rate": 2.980735358872743e-05, "loss": 0.0875, "step": 37200 }, { "epoch": 8.16, "learning_rate": 2.9779832672831355e-05, "loss": 0.0888, "step": 37250 }, { "epoch": 8.17, "learning_rate": 2.9752311756935276e-05, "loss": 0.0913, "step": 37300 }, { "epoch": 8.18, "learning_rate": 2.972479084103919e-05, "loss": 0.0912, "step": 37350 }, { "epoch": 8.19, "learning_rate": 2.9697269925143113e-05, "loss": 0.0904, "step": 37400 }, { "epoch": 8.2, "learning_rate": 2.9669749009247027e-05, "loss": 0.0888, "step": 37450 }, { "epoch": 8.21, "learning_rate": 2.9642228093350948e-05, "loss": 0.0857, "step": 37500 }, { "epoch": 8.21, "eval_acc": 0.42407696302339093, "eval_cer": 0.05735047651236669, "eval_loss": 0.29710087180137634, "eval_runtime": 2434.9281, "eval_samples_per_second": 3.159, "eval_steps_per_second": 0.395, "step": 37500 }, { "epoch": 8.22, "learning_rate": 2.9614707177454865e-05, "loss": 0.0946, "step": 37550 }, { "epoch": 8.23, "learning_rate": 2.9587186261558785e-05, "loss": 0.0943, "step": 37600 }, { "epoch": 8.24, "learning_rate": 2.9559665345662706e-05, "loss": 0.0993, "step": 37650 }, { "epoch": 8.25, "learning_rate": 2.9532144429766623e-05, "loss": 0.0859, "step": 37700 }, { "epoch": 8.27, "learning_rate": 2.9504623513870544e-05, "loss": 0.0987, "step": 37750 }, { "epoch": 8.28, "learning_rate": 2.947710259797446e-05, "loss": 0.0885, "step": 37800 }, { "epoch": 8.29, "learning_rate": 2.944958168207838e-05, "loss": 0.0898, "step": 37850 }, { "epoch": 8.3, "learning_rate": 2.9422060766182302e-05, "loss": 0.1001, "step": 37900 }, { "epoch": 8.31, "learning_rate": 2.939453985028622e-05, "loss": 0.109, "step": 37950 }, { "epoch": 8.32, "learning_rate": 2.936701893439014e-05, "loss": 0.0947, "step": 38000 }, { "epoch": 8.32, "eval_acc": 0.4429277170511014, "eval_cer": 0.05624039727193376, "eval_loss": 0.2943662106990814, "eval_runtime": 2422.719, "eval_samples_per_second": 3.175, "eval_steps_per_second": 0.397, "step": 38000 }, { "epoch": 8.33, "learning_rate": 2.9339498018494053e-05, "loss": 0.0991, "step": 38050 }, { "epoch": 8.34, "learning_rate": 2.9311977102597977e-05, "loss": 0.0995, "step": 38100 }, { "epoch": 8.35, "learning_rate": 2.9284456186701898e-05, "loss": 0.0896, "step": 38150 }, { "epoch": 8.36, "learning_rate": 2.925693527080581e-05, "loss": 0.0948, "step": 38200 }, { "epoch": 8.37, "learning_rate": 2.9229414354909736e-05, "loss": 0.105, "step": 38250 }, { "epoch": 8.39, "learning_rate": 2.920189343901365e-05, "loss": 0.0986, "step": 38300 }, { "epoch": 8.4, "learning_rate": 2.917437252311757e-05, "loss": 0.1048, "step": 38350 }, { "epoch": 8.41, "learning_rate": 2.9146851607221487e-05, "loss": 0.1033, "step": 38400 }, { "epoch": 8.42, "learning_rate": 2.9119330691325408e-05, "loss": 0.0923, "step": 38450 }, { "epoch": 8.43, "learning_rate": 2.9091809775429328e-05, "loss": 0.0946, "step": 38500 }, { "epoch": 8.43, "eval_acc": 0.45059802386237674, "eval_cer": 0.05642644407200631, "eval_loss": 0.29671648144721985, "eval_runtime": 2404.4375, "eval_samples_per_second": 3.199, "eval_steps_per_second": 0.4, "step": 38500 }, { "epoch": 8.44, "learning_rate": 2.9064288859533245e-05, "loss": 0.1035, "step": 38550 }, { "epoch": 8.45, "learning_rate": 2.9036767943637166e-05, "loss": 0.1006, "step": 38600 }, { "epoch": 8.46, "learning_rate": 2.9009247027741083e-05, "loss": 0.1006, "step": 38650 }, { "epoch": 8.47, "learning_rate": 2.8981726111845004e-05, "loss": 0.0997, "step": 38700 }, { "epoch": 8.48, "learning_rate": 2.8954205195948924e-05, "loss": 0.0979, "step": 38750 }, { "epoch": 8.5, "learning_rate": 2.892668428005284e-05, "loss": 0.0924, "step": 38800 }, { "epoch": 8.51, "learning_rate": 2.8899163364156762e-05, "loss": 0.0983, "step": 38850 }, { "epoch": 8.52, "learning_rate": 2.8871642448260676e-05, "loss": 0.0963, "step": 38900 }, { "epoch": 8.53, "learning_rate": 2.88441215323646e-05, "loss": 0.1025, "step": 38950 }, { "epoch": 8.54, "learning_rate": 2.881660061646852e-05, "loss": 0.0925, "step": 39000 }, { "epoch": 8.54, "eval_acc": 0.4494279770606568, "eval_cer": 0.0552233414315371, "eval_loss": 0.2928614020347595, "eval_runtime": 2430.8041, "eval_samples_per_second": 3.164, "eval_steps_per_second": 0.396, "step": 39000 }, { "epoch": 8.55, "learning_rate": 2.8789079700572434e-05, "loss": 0.0912, "step": 39050 }, { "epoch": 8.56, "learning_rate": 2.8761558784676358e-05, "loss": 0.0937, "step": 39100 }, { "epoch": 8.57, "learning_rate": 2.8734037868780272e-05, "loss": 0.0989, "step": 39150 }, { "epoch": 8.58, "learning_rate": 2.8706516952884192e-05, "loss": 0.0957, "step": 39200 }, { "epoch": 8.59, "learning_rate": 2.8678996036988116e-05, "loss": 0.0942, "step": 39250 }, { "epoch": 8.6, "learning_rate": 2.865147512109203e-05, "loss": 0.0929, "step": 39300 }, { "epoch": 8.62, "learning_rate": 2.862395420519595e-05, "loss": 0.111, "step": 39350 }, { "epoch": 8.63, "learning_rate": 2.8596433289299868e-05, "loss": 0.0936, "step": 39400 }, { "epoch": 8.64, "learning_rate": 2.8568912373403788e-05, "loss": 0.0991, "step": 39450 }, { "epoch": 8.65, "learning_rate": 2.8541391457507705e-05, "loss": 0.1092, "step": 39500 }, { "epoch": 8.65, "eval_acc": 0.46203848147919413, "eval_cer": 0.05521713987153468, "eval_loss": 0.2892671227455139, "eval_runtime": 2443.2305, "eval_samples_per_second": 3.148, "eval_steps_per_second": 0.394, "step": 39500 }, { "epoch": 8.66, "learning_rate": 2.851442095992955e-05, "loss": 0.0868, "step": 39550 }, { "epoch": 8.67, "learning_rate": 2.8486900044033465e-05, "loss": 0.099, "step": 39600 }, { "epoch": 8.68, "learning_rate": 2.8459379128137386e-05, "loss": 0.095, "step": 39650 }, { "epoch": 8.69, "learning_rate": 2.8431858212241303e-05, "loss": 0.0928, "step": 39700 }, { "epoch": 8.7, "learning_rate": 2.8404337296345223e-05, "loss": 0.0954, "step": 39750 }, { "epoch": 8.71, "learning_rate": 2.8376816380449144e-05, "loss": 0.0973, "step": 39800 }, { "epoch": 8.73, "learning_rate": 2.834929546455306e-05, "loss": 0.0921, "step": 39850 }, { "epoch": 8.74, "learning_rate": 2.832177454865698e-05, "loss": 0.101, "step": 39900 }, { "epoch": 8.75, "learning_rate": 2.82942536327609e-05, "loss": 0.1072, "step": 39950 }, { "epoch": 8.76, "learning_rate": 2.826673271686482e-05, "loss": 0.0945, "step": 40000 }, { "epoch": 8.76, "eval_acc": 0.4663286530855007, "eval_cer": 0.054930317721422824, "eval_loss": 0.2871682941913605, "eval_runtime": 2419.7382, "eval_samples_per_second": 3.179, "eval_steps_per_second": 0.398, "step": 40000 }, { "epoch": 8.77, "learning_rate": 2.823921180096874e-05, "loss": 0.1034, "step": 40050 }, { "epoch": 8.78, "learning_rate": 2.8211690885072657e-05, "loss": 0.0949, "step": 40100 }, { "epoch": 8.79, "learning_rate": 2.8184169969176578e-05, "loss": 0.1056, "step": 40150 }, { "epoch": 8.8, "learning_rate": 2.815664905328049e-05, "loss": 0.1044, "step": 40200 }, { "epoch": 8.81, "learning_rate": 2.8129128137384415e-05, "loss": 0.0857, "step": 40250 }, { "epoch": 8.82, "learning_rate": 2.8101607221488336e-05, "loss": 0.105, "step": 40300 }, { "epoch": 8.83, "learning_rate": 2.807408630559225e-05, "loss": 0.1038, "step": 40350 }, { "epoch": 8.85, "learning_rate": 2.8046565389696174e-05, "loss": 0.11, "step": 40400 }, { "epoch": 8.86, "learning_rate": 2.8019044473800087e-05, "loss": 0.098, "step": 40450 }, { "epoch": 8.87, "learning_rate": 2.7991523557904008e-05, "loss": 0.1001, "step": 40500 }, { "epoch": 8.87, "eval_acc": 0.4759490378996426, "eval_cer": 0.053654346750925196, "eval_loss": 0.28374138474464417, "eval_runtime": 2405.4491, "eval_samples_per_second": 3.198, "eval_steps_per_second": 0.4, "step": 40500 }, { "epoch": 8.88, "learning_rate": 2.7964002642007925e-05, "loss": 0.0927, "step": 40550 }, { "epoch": 8.89, "learning_rate": 2.7936481726111846e-05, "loss": 0.0938, "step": 40600 }, { "epoch": 8.9, "learning_rate": 2.7908960810215766e-05, "loss": 0.1148, "step": 40650 }, { "epoch": 8.91, "learning_rate": 2.7881439894319683e-05, "loss": 0.1037, "step": 40700 }, { "epoch": 8.92, "learning_rate": 2.7853918978423604e-05, "loss": 0.0882, "step": 40750 }, { "epoch": 8.93, "learning_rate": 2.782639806252752e-05, "loss": 0.0999, "step": 40800 }, { "epoch": 8.94, "learning_rate": 2.7798877146631442e-05, "loss": 0.1035, "step": 40850 }, { "epoch": 8.96, "learning_rate": 2.7771356230735362e-05, "loss": 0.0908, "step": 40900 }, { "epoch": 8.97, "learning_rate": 2.774383531483928e-05, "loss": 0.1049, "step": 40950 }, { "epoch": 8.98, "learning_rate": 2.77163143989432e-05, "loss": 0.101, "step": 41000 }, { "epoch": 8.98, "eval_acc": 0.49232969312372205, "eval_cer": 0.05347605190085566, "eval_loss": 0.2792474329471588, "eval_runtime": 2433.842, "eval_samples_per_second": 3.16, "eval_steps_per_second": 0.395, "step": 41000 }, { "epoch": 8.99, "learning_rate": 2.7688793483047114e-05, "loss": 0.0845, "step": 41050 }, { "epoch": 9.0, "learning_rate": 2.7661272567151038e-05, "loss": 0.0994, "step": 41100 }, { "epoch": 9.01, "learning_rate": 2.7633751651254958e-05, "loss": 0.0719, "step": 41150 }, { "epoch": 9.02, "learning_rate": 2.7606230735358872e-05, "loss": 0.0663, "step": 41200 }, { "epoch": 9.03, "learning_rate": 2.7578709819462796e-05, "loss": 0.0644, "step": 41250 }, { "epoch": 9.04, "learning_rate": 2.755118890356671e-05, "loss": 0.0698, "step": 41300 }, { "epoch": 9.05, "learning_rate": 2.752366798767063e-05, "loss": 0.0657, "step": 41350 }, { "epoch": 9.06, "learning_rate": 2.7496147071774547e-05, "loss": 0.0686, "step": 41400 }, { "epoch": 9.08, "learning_rate": 2.7468626155878468e-05, "loss": 0.0643, "step": 41450 }, { "epoch": 9.09, "learning_rate": 2.744110523998239e-05, "loss": 0.072, "step": 41500 }, { "epoch": 9.09, "eval_acc": 0.4877795111170333, "eval_cer": 0.05345899761084901, "eval_loss": 0.2827744781970978, "eval_runtime": 2418.7702, "eval_samples_per_second": 3.18, "eval_steps_per_second": 0.398, "step": 41500 }, { "epoch": 9.1, "learning_rate": 2.7413584324086306e-05, "loss": 0.073, "step": 41550 }, { "epoch": 9.11, "learning_rate": 2.7386063408190226e-05, "loss": 0.064, "step": 41600 }, { "epoch": 9.12, "learning_rate": 2.7358542492294143e-05, "loss": 0.0693, "step": 41650 }, { "epoch": 9.13, "learning_rate": 2.7331021576398064e-05, "loss": 0.0669, "step": 41700 }, { "epoch": 9.14, "learning_rate": 2.7304051078819903e-05, "loss": 0.0641, "step": 41750 }, { "epoch": 9.15, "learning_rate": 2.7276530162923824e-05, "loss": 0.0681, "step": 41800 }, { "epoch": 9.16, "learning_rate": 2.724900924702774e-05, "loss": 0.0629, "step": 41850 }, { "epoch": 9.17, "learning_rate": 2.722148833113166e-05, "loss": 0.0694, "step": 41900 }, { "epoch": 9.18, "learning_rate": 2.7193967415235582e-05, "loss": 0.0798, "step": 41950 }, { "epoch": 9.2, "learning_rate": 2.71664464993395e-05, "loss": 0.0729, "step": 42000 }, { "epoch": 9.2, "eval_acc": 0.48452938111225563, "eval_cer": 0.05131170746001157, "eval_loss": 0.28362253308296204, "eval_runtime": 2420.0313, "eval_samples_per_second": 3.178, "eval_steps_per_second": 0.398, "step": 42000 }, { "epoch": 9.21, "learning_rate": 2.713892558344342e-05, "loss": 0.0652, "step": 42050 }, { "epoch": 9.22, "learning_rate": 2.7111404667547337e-05, "loss": 0.0757, "step": 42100 }, { "epoch": 9.23, "learning_rate": 2.7083883751651257e-05, "loss": 0.063, "step": 42150 }, { "epoch": 9.24, "learning_rate": 2.7056362835755178e-05, "loss": 0.0695, "step": 42200 }, { "epoch": 9.25, "learning_rate": 2.7028841919859095e-05, "loss": 0.0743, "step": 42250 }, { "epoch": 9.26, "learning_rate": 2.7001321003963016e-05, "loss": 0.0705, "step": 42300 }, { "epoch": 9.27, "learning_rate": 2.697380008806693e-05, "loss": 0.0773, "step": 42350 }, { "epoch": 9.28, "learning_rate": 2.6946279172170853e-05, "loss": 0.0663, "step": 42400 }, { "epoch": 9.29, "learning_rate": 2.6918758256274767e-05, "loss": 0.0712, "step": 42450 }, { "epoch": 9.31, "learning_rate": 2.6891237340378688e-05, "loss": 0.0681, "step": 42500 }, { "epoch": 9.31, "eval_acc": 0.48998959952028215, "eval_cer": 0.05191790995024798, "eval_loss": 0.2831544876098633, "eval_runtime": 2415.2607, "eval_samples_per_second": 3.185, "eval_steps_per_second": 0.398, "step": 42500 }, { "epoch": 9.32, "learning_rate": 2.686371642448261e-05, "loss": 0.0778, "step": 42550 }, { "epoch": 9.33, "learning_rate": 2.6836195508586525e-05, "loss": 0.0703, "step": 42600 }, { "epoch": 9.34, "learning_rate": 2.6808674592690446e-05, "loss": 0.0618, "step": 42650 }, { "epoch": 9.35, "learning_rate": 2.6781153676794363e-05, "loss": 0.0721, "step": 42700 }, { "epoch": 9.36, "learning_rate": 2.6753632760898284e-05, "loss": 0.0661, "step": 42750 }, { "epoch": 9.37, "learning_rate": 2.6726111845002204e-05, "loss": 0.0746, "step": 42800 }, { "epoch": 9.38, "learning_rate": 2.669859092910612e-05, "loss": 0.0652, "step": 42850 }, { "epoch": 9.39, "learning_rate": 2.6671070013210042e-05, "loss": 0.063, "step": 42900 }, { "epoch": 9.4, "learning_rate": 2.664354909731396e-05, "loss": 0.0797, "step": 42950 }, { "epoch": 9.41, "learning_rate": 2.661602818141788e-05, "loss": 0.0691, "step": 43000 }, { "epoch": 9.41, "eval_acc": 0.4944097763267798, "eval_cer": 0.05157682415011496, "eval_loss": 0.28549954295158386, "eval_runtime": 2402.7984, "eval_samples_per_second": 3.201, "eval_steps_per_second": 0.4, "step": 43000 }, { "epoch": 9.43, "learning_rate": 2.65885072655218e-05, "loss": 0.0733, "step": 43050 }, { "epoch": 9.44, "learning_rate": 2.6560986349625717e-05, "loss": 0.0792, "step": 43100 }, { "epoch": 9.45, "learning_rate": 2.6533465433729638e-05, "loss": 0.0673, "step": 43150 }, { "epoch": 9.46, "learning_rate": 2.6505944517833552e-05, "loss": 0.0756, "step": 43200 }, { "epoch": 9.47, "learning_rate": 2.6478423601937476e-05, "loss": 0.0727, "step": 43250 }, { "epoch": 9.48, "learning_rate": 2.645090268604139e-05, "loss": 0.0759, "step": 43300 }, { "epoch": 9.49, "learning_rate": 2.642338177014531e-05, "loss": 0.0636, "step": 43350 }, { "epoch": 9.5, "learning_rate": 2.6395860854249234e-05, "loss": 0.0728, "step": 43400 }, { "epoch": 9.51, "learning_rate": 2.6368339938353148e-05, "loss": 0.075, "step": 43450 }, { "epoch": 9.52, "learning_rate": 2.634081902245707e-05, "loss": 0.0716, "step": 43500 }, { "epoch": 9.52, "eval_acc": 0.5058502339435972, "eval_cer": 0.0509907767298864, "eval_loss": 0.2799459993839264, "eval_runtime": 2413.6447, "eval_samples_per_second": 3.187, "eval_steps_per_second": 0.399, "step": 43500 }, { "epoch": 9.54, "learning_rate": 2.6313298106560986e-05, "loss": 0.0671, "step": 43550 }, { "epoch": 9.55, "learning_rate": 2.6285777190664906e-05, "loss": 0.0664, "step": 43600 }, { "epoch": 9.56, "learning_rate": 2.6258256274768827e-05, "loss": 0.0721, "step": 43650 }, { "epoch": 9.57, "learning_rate": 2.6230735358872744e-05, "loss": 0.0649, "step": 43700 }, { "epoch": 9.58, "learning_rate": 2.6203214442976664e-05, "loss": 0.0679, "step": 43750 }, { "epoch": 9.59, "learning_rate": 2.617569352708058e-05, "loss": 0.0704, "step": 43800 }, { "epoch": 9.6, "learning_rate": 2.6148172611184502e-05, "loss": 0.0746, "step": 43850 }, { "epoch": 9.61, "learning_rate": 2.612120211360634e-05, "loss": 0.0787, "step": 43900 }, { "epoch": 9.62, "learning_rate": 2.6093681197710262e-05, "loss": 0.076, "step": 43950 }, { "epoch": 9.63, "learning_rate": 2.606616028181418e-05, "loss": 0.0805, "step": 44000 }, { "epoch": 9.63, "eval_acc": 0.5122204887529614, "eval_cer": 0.051007831019893056, "eval_loss": 0.28180253505706787, "eval_runtime": 2416.357, "eval_samples_per_second": 3.183, "eval_steps_per_second": 0.398, "step": 44000 }, { "epoch": 9.64, "learning_rate": 2.60386393659181e-05, "loss": 0.068, "step": 44050 }, { "epoch": 9.66, "learning_rate": 2.601111845002202e-05, "loss": 0.0765, "step": 44100 }, { "epoch": 9.67, "learning_rate": 2.5983597534125937e-05, "loss": 0.0748, "step": 44150 }, { "epoch": 9.68, "learning_rate": 2.5956076618229858e-05, "loss": 0.0749, "step": 44200 }, { "epoch": 9.69, "learning_rate": 2.5928555702333775e-05, "loss": 0.0757, "step": 44250 }, { "epoch": 9.7, "learning_rate": 2.5901034786437695e-05, "loss": 0.0761, "step": 44300 }, { "epoch": 9.71, "learning_rate": 2.587351387054161e-05, "loss": 0.0698, "step": 44350 }, { "epoch": 9.72, "learning_rate": 2.5845992954645533e-05, "loss": 0.0716, "step": 44400 }, { "epoch": 9.73, "learning_rate": 2.5818472038749454e-05, "loss": 0.0719, "step": 44450 }, { "epoch": 9.74, "learning_rate": 2.5790951122853367e-05, "loss": 0.0773, "step": 44500 }, { "epoch": 9.74, "eval_acc": 0.5215808631667211, "eval_cer": 0.04890550217907315, "eval_loss": 0.2750154733657837, "eval_runtime": 2420.5632, "eval_samples_per_second": 3.178, "eval_steps_per_second": 0.397, "step": 44500 }, { "epoch": 9.75, "learning_rate": 2.5763430206957288e-05, "loss": 0.0713, "step": 44550 }, { "epoch": 9.77, "learning_rate": 2.5735909291061205e-05, "loss": 0.0703, "step": 44600 }, { "epoch": 9.78, "learning_rate": 2.5708388375165126e-05, "loss": 0.0701, "step": 44650 }, { "epoch": 9.79, "learning_rate": 2.5680867459269046e-05, "loss": 0.074, "step": 44700 }, { "epoch": 9.8, "learning_rate": 2.5653346543372963e-05, "loss": 0.071, "step": 44750 }, { "epoch": 9.81, "learning_rate": 2.5625825627476884e-05, "loss": 0.0702, "step": 44800 }, { "epoch": 9.82, "learning_rate": 2.55983047115808e-05, "loss": 0.074, "step": 44850 }, { "epoch": 9.83, "learning_rate": 2.5570783795684722e-05, "loss": 0.0752, "step": 44900 }, { "epoch": 9.84, "learning_rate": 2.5543262879788642e-05, "loss": 0.0796, "step": 44950 }, { "epoch": 9.85, "learning_rate": 2.551574196389256e-05, "loss": 0.0754, "step": 45000 }, { "epoch": 9.85, "eval_acc": 0.5257410295728365, "eval_cer": 0.04867759484898426, "eval_loss": 0.27724218368530273, "eval_runtime": 2426.3329, "eval_samples_per_second": 3.17, "eval_steps_per_second": 0.396, "step": 45000 }, { "epoch": 9.86, "learning_rate": 2.548822104799648e-05, "loss": 0.0691, "step": 45050 }, { "epoch": 9.87, "learning_rate": 2.5460700132100397e-05, "loss": 0.0832, "step": 45100 }, { "epoch": 9.89, "learning_rate": 2.5433179216204318e-05, "loss": 0.0752, "step": 45150 }, { "epoch": 9.9, "learning_rate": 2.540565830030824e-05, "loss": 0.0717, "step": 45200 }, { "epoch": 9.91, "learning_rate": 2.5378137384412155e-05, "loss": 0.074, "step": 45250 }, { "epoch": 9.92, "learning_rate": 2.5350616468516076e-05, "loss": 0.0846, "step": 45300 }, { "epoch": 9.93, "learning_rate": 2.532309555261999e-05, "loss": 0.0768, "step": 45350 }, { "epoch": 9.94, "learning_rate": 2.5295574636723914e-05, "loss": 0.0718, "step": 45400 }, { "epoch": 9.95, "learning_rate": 2.5268053720827828e-05, "loss": 0.0704, "step": 45450 }, { "epoch": 9.96, "learning_rate": 2.5240532804931748e-05, "loss": 0.0681, "step": 45500 }, { "epoch": 9.96, "eval_acc": 0.530031201179143, "eval_cer": 0.04874581200901087, "eval_loss": 0.2730477452278137, "eval_runtime": 2408.063, "eval_samples_per_second": 3.194, "eval_steps_per_second": 0.399, "step": 45500 }, { "epoch": 9.97, "learning_rate": 2.5213011889035672e-05, "loss": 0.0724, "step": 45550 }, { "epoch": 9.98, "learning_rate": 2.5185490973139586e-05, "loss": 0.0707, "step": 45600 }, { "epoch": 10.0, "learning_rate": 2.5157970057243506e-05, "loss": 0.0693, "step": 45650 }, { "epoch": 10.01, "learning_rate": 2.5130449141347424e-05, "loss": 0.0596, "step": 45700 }, { "epoch": 10.02, "learning_rate": 2.5102928225451344e-05, "loss": 0.0487, "step": 45750 }, { "epoch": 10.03, "learning_rate": 2.5075407309555265e-05, "loss": 0.0455, "step": 45800 }, { "epoch": 10.04, "learning_rate": 2.5047886393659182e-05, "loss": 0.0541, "step": 45850 }, { "epoch": 10.05, "learning_rate": 2.5020365477763102e-05, "loss": 0.0526, "step": 45900 }, { "epoch": 10.06, "learning_rate": 2.499284456186702e-05, "loss": 0.0524, "step": 45950 }, { "epoch": 10.07, "learning_rate": 2.496532364597094e-05, "loss": 0.0469, "step": 46000 }, { "epoch": 10.07, "eval_acc": 0.537181487189654, "eval_cer": 0.0483907726988724, "eval_loss": 0.2742944061756134, "eval_runtime": 2441.5786, "eval_samples_per_second": 3.15, "eval_steps_per_second": 0.394, "step": 46000 }, { "epoch": 10.08, "learning_rate": 2.493835314839278e-05, "loss": 0.0515, "step": 46050 }, { "epoch": 10.09, "learning_rate": 2.4910832232496696e-05, "loss": 0.0491, "step": 46100 }, { "epoch": 10.1, "learning_rate": 2.488331131660062e-05, "loss": 0.0444, "step": 46150 }, { "epoch": 10.12, "learning_rate": 2.4855790400704537e-05, "loss": 0.0501, "step": 46200 }, { "epoch": 10.13, "learning_rate": 2.4828269484808455e-05, "loss": 0.0479, "step": 46250 }, { "epoch": 10.14, "learning_rate": 2.4800748568912375e-05, "loss": 0.0488, "step": 46300 }, { "epoch": 10.15, "learning_rate": 2.4773227653016292e-05, "loss": 0.0472, "step": 46350 }, { "epoch": 10.16, "learning_rate": 2.4745706737120213e-05, "loss": 0.0534, "step": 46400 }, { "epoch": 10.17, "learning_rate": 2.4718185821224133e-05, "loss": 0.0521, "step": 46450 }, { "epoch": 10.18, "learning_rate": 2.469066490532805e-05, "loss": 0.0475, "step": 46500 }, { "epoch": 10.18, "eval_acc": 0.5322412895823919, "eval_cer": 0.047517903128531984, "eval_loss": 0.2764817178249359, "eval_runtime": 2427.6992, "eval_samples_per_second": 3.168, "eval_steps_per_second": 0.396, "step": 46500 }, { "epoch": 10.19, "learning_rate": 2.4663143989431968e-05, "loss": 0.0658, "step": 46550 }, { "epoch": 10.2, "learning_rate": 2.463562307353589e-05, "loss": 0.0578, "step": 46600 }, { "epoch": 10.21, "learning_rate": 2.4608102157639806e-05, "loss": 0.0495, "step": 46650 }, { "epoch": 10.22, "learning_rate": 2.4580581241743726e-05, "loss": 0.0526, "step": 46700 }, { "epoch": 10.24, "learning_rate": 2.4553060325847647e-05, "loss": 0.0475, "step": 46750 }, { "epoch": 10.25, "learning_rate": 2.4525539409951564e-05, "loss": 0.0586, "step": 46800 }, { "epoch": 10.26, "learning_rate": 2.4498018494055484e-05, "loss": 0.0507, "step": 46850 }, { "epoch": 10.27, "learning_rate": 2.44704975781594e-05, "loss": 0.0495, "step": 46900 }, { "epoch": 10.28, "learning_rate": 2.444297666226332e-05, "loss": 0.0481, "step": 46950 }, { "epoch": 10.29, "learning_rate": 2.4415455746367243e-05, "loss": 0.0586, "step": 47000 }, { "epoch": 10.29, "eval_acc": 0.5439417575995915, "eval_cer": 0.04793495803869463, "eval_loss": 0.2732994556427002, "eval_runtime": 2419.184, "eval_samples_per_second": 3.18, "eval_steps_per_second": 0.398, "step": 47000 }, { "epoch": 10.3, "learning_rate": 2.438793483047116e-05, "loss": 0.0487, "step": 47050 }, { "epoch": 10.31, "learning_rate": 2.4360413914575077e-05, "loss": 0.0552, "step": 47100 }, { "epoch": 10.32, "learning_rate": 2.4332892998678997e-05, "loss": 0.051, "step": 47150 }, { "epoch": 10.33, "learning_rate": 2.430592250110084e-05, "loss": 0.0518, "step": 47200 }, { "epoch": 10.35, "learning_rate": 2.4278401585204757e-05, "loss": 0.0478, "step": 47250 }, { "epoch": 10.36, "learning_rate": 2.4250880669308674e-05, "loss": 0.0518, "step": 47300 }, { "epoch": 10.37, "learning_rate": 2.4223359753412595e-05, "loss": 0.0559, "step": 47350 }, { "epoch": 10.38, "learning_rate": 2.4195838837516512e-05, "loss": 0.0503, "step": 47400 }, { "epoch": 10.39, "learning_rate": 2.4168317921620433e-05, "loss": 0.0512, "step": 47450 }, { "epoch": 10.4, "learning_rate": 2.4140797005724353e-05, "loss": 0.0592, "step": 47500 }, { "epoch": 10.4, "eval_acc": 0.5471918876043692, "eval_cer": 0.04605433496796119, "eval_loss": 0.2718336880207062, "eval_runtime": 2441.2739, "eval_samples_per_second": 3.151, "eval_steps_per_second": 0.394, "step": 47500 }, { "epoch": 10.41, "learning_rate": 2.411327608982827e-05, "loss": 0.0501, "step": 47550 }, { "epoch": 10.42, "learning_rate": 2.408575517393219e-05, "loss": 0.058, "step": 47600 }, { "epoch": 10.43, "learning_rate": 2.4058234258036108e-05, "loss": 0.0536, "step": 47650 }, { "epoch": 10.44, "learning_rate": 2.4030713342140025e-05, "loss": 0.0479, "step": 47700 }, { "epoch": 10.45, "learning_rate": 2.400319242624395e-05, "loss": 0.0519, "step": 47750 }, { "epoch": 10.47, "learning_rate": 2.3975671510347866e-05, "loss": 0.0589, "step": 47800 }, { "epoch": 10.48, "learning_rate": 2.3948150594451783e-05, "loss": 0.0509, "step": 47850 }, { "epoch": 10.49, "learning_rate": 2.3920629678555704e-05, "loss": 0.0592, "step": 47900 }, { "epoch": 10.5, "learning_rate": 2.389310876265962e-05, "loss": 0.0604, "step": 47950 }, { "epoch": 10.51, "learning_rate": 2.3865587846763542e-05, "loss": 0.0507, "step": 48000 }, { "epoch": 10.51, "eval_acc": 0.5499219968083825, "eval_cer": 0.045651233567803984, "eval_loss": 0.27467742562294006, "eval_runtime": 2421.7457, "eval_samples_per_second": 3.176, "eval_steps_per_second": 0.397, "step": 48000 }, { "epoch": 10.52, "learning_rate": 2.3838066930867462e-05, "loss": 0.0564, "step": 48050 }, { "epoch": 10.53, "learning_rate": 2.381054601497138e-05, "loss": 0.0573, "step": 48100 }, { "epoch": 10.54, "learning_rate": 2.37830250990753e-05, "loss": 0.0565, "step": 48150 }, { "epoch": 10.55, "learning_rate": 2.3755504183179217e-05, "loss": 0.0569, "step": 48200 }, { "epoch": 10.56, "learning_rate": 2.3727983267283134e-05, "loss": 0.0567, "step": 48250 }, { "epoch": 10.58, "learning_rate": 2.370046235138706e-05, "loss": 0.0536, "step": 48300 }, { "epoch": 10.59, "learning_rate": 2.3672941435490975e-05, "loss": 0.0527, "step": 48350 }, { "epoch": 10.6, "learning_rate": 2.3645420519594893e-05, "loss": 0.0538, "step": 48400 }, { "epoch": 10.61, "learning_rate": 2.3617899603698813e-05, "loss": 0.0557, "step": 48450 }, { "epoch": 10.62, "learning_rate": 2.359037868780273e-05, "loss": 0.0506, "step": 48500 }, { "epoch": 10.62, "eval_acc": 0.5536921476139246, "eval_cer": 0.04698301857832338, "eval_loss": 0.27325424551963806, "eval_runtime": 2412.269, "eval_samples_per_second": 3.189, "eval_steps_per_second": 0.399, "step": 48500 }, { "epoch": 10.63, "learning_rate": 2.3562857771906648e-05, "loss": 0.0534, "step": 48550 }, { "epoch": 10.64, "learning_rate": 2.353533685601057e-05, "loss": 0.056, "step": 48600 }, { "epoch": 10.65, "learning_rate": 2.350781594011449e-05, "loss": 0.0527, "step": 48650 }, { "epoch": 10.66, "learning_rate": 2.3480295024218406e-05, "loss": 0.0519, "step": 48700 }, { "epoch": 10.67, "learning_rate": 2.3452774108322326e-05, "loss": 0.0509, "step": 48750 }, { "epoch": 10.68, "learning_rate": 2.3425253192426244e-05, "loss": 0.0568, "step": 48800 }, { "epoch": 10.7, "learning_rate": 2.3397732276530164e-05, "loss": 0.0508, "step": 48850 }, { "epoch": 10.71, "learning_rate": 2.3370211360634085e-05, "loss": 0.0524, "step": 48900 }, { "epoch": 10.72, "learning_rate": 2.3342690444738002e-05, "loss": 0.0527, "step": 48950 }, { "epoch": 10.73, "learning_rate": 2.3315169528841922e-05, "loss": 0.0537, "step": 49000 }, { "epoch": 10.73, "eval_acc": 0.5526521060123957, "eval_cer": 0.045635729667797935, "eval_loss": 0.27340859174728394, "eval_runtime": 2413.049, "eval_samples_per_second": 3.188, "eval_steps_per_second": 0.399, "step": 49000 }, { "epoch": 10.74, "learning_rate": 2.328764861294584e-05, "loss": 0.0585, "step": 49050 }, { "epoch": 10.75, "learning_rate": 2.3260127697049757e-05, "loss": 0.0502, "step": 49100 }, { "epoch": 10.76, "learning_rate": 2.323260678115368e-05, "loss": 0.0572, "step": 49150 }, { "epoch": 10.77, "learning_rate": 2.3205085865257598e-05, "loss": 0.0575, "step": 49200 }, { "epoch": 10.78, "learning_rate": 2.3177564949361515e-05, "loss": 0.0495, "step": 49250 }, { "epoch": 10.79, "learning_rate": 2.3150044033465436e-05, "loss": 0.0614, "step": 49300 }, { "epoch": 10.81, "learning_rate": 2.3122523117569353e-05, "loss": 0.0519, "step": 49350 }, { "epoch": 10.82, "learning_rate": 2.309500220167327e-05, "loss": 0.0605, "step": 49400 }, { "epoch": 10.83, "learning_rate": 2.3067481285777194e-05, "loss": 0.051, "step": 49450 }, { "epoch": 10.84, "learning_rate": 2.303996036988111e-05, "loss": 0.053, "step": 49500 }, { "epoch": 10.84, "eval_acc": 0.5647425896301687, "eval_cer": 0.04586363699788682, "eval_loss": 0.27086734771728516, "eval_runtime": 2433.5124, "eval_samples_per_second": 3.161, "eval_steps_per_second": 0.395, "step": 49500 }, { "epoch": 10.85, "learning_rate": 2.3012439453985028e-05, "loss": 0.0509, "step": 49550 }, { "epoch": 10.86, "learning_rate": 2.298491853808895e-05, "loss": 0.0547, "step": 49600 }, { "epoch": 10.87, "learning_rate": 2.2957397622192866e-05, "loss": 0.0511, "step": 49650 }, { "epoch": 10.88, "learning_rate": 2.2929876706296786e-05, "loss": 0.0539, "step": 49700 }, { "epoch": 10.89, "learning_rate": 2.2902355790400707e-05, "loss": 0.0499, "step": 49750 }, { "epoch": 10.9, "learning_rate": 2.2874834874504624e-05, "loss": 0.0509, "step": 49800 }, { "epoch": 10.91, "learning_rate": 2.2847313958608545e-05, "loss": 0.0557, "step": 49850 }, { "epoch": 10.93, "learning_rate": 2.2819793042712462e-05, "loss": 0.0489, "step": 49900 }, { "epoch": 10.94, "learning_rate": 2.279227212681638e-05, "loss": 0.0545, "step": 49950 }, { "epoch": 10.95, "learning_rate": 2.2764751210920303e-05, "loss": 0.0571, "step": 50000 }, { "epoch": 10.95, "eval_acc": 0.5704628184385774, "eval_cer": 0.04416130877722291, "eval_loss": 0.26752549409866333, "eval_runtime": 2423.2125, "eval_samples_per_second": 3.174, "eval_steps_per_second": 0.397, "step": 50000 }, { "epoch": 10.96, "learning_rate": 2.273723029502422e-05, "loss": 0.0535, "step": 50050 }, { "epoch": 10.97, "learning_rate": 2.2709709379128137e-05, "loss": 0.0542, "step": 50100 }, { "epoch": 10.98, "learning_rate": 2.2682188463232058e-05, "loss": 0.0502, "step": 50150 }, { "epoch": 10.99, "learning_rate": 2.2654667547335975e-05, "loss": 0.0495, "step": 50200 }, { "epoch": 11.0, "learning_rate": 2.2627146631439896e-05, "loss": 0.0475, "step": 50250 }, { "epoch": 11.01, "learning_rate": 2.2599625715543816e-05, "loss": 0.0356, "step": 50300 }, { "epoch": 11.02, "learning_rate": 2.2572104799647733e-05, "loss": 0.0355, "step": 50350 }, { "epoch": 11.04, "learning_rate": 2.254458388375165e-05, "loss": 0.0369, "step": 50400 }, { "epoch": 11.05, "learning_rate": 2.251706296785557e-05, "loss": 0.0373, "step": 50450 }, { "epoch": 11.06, "learning_rate": 2.2489542051959488e-05, "loss": 0.0351, "step": 50500 }, { "epoch": 11.06, "eval_acc": 0.577353094048706, "eval_cer": 0.043815571807088075, "eval_loss": 0.2698224186897278, "eval_runtime": 2426.9108, "eval_samples_per_second": 3.169, "eval_steps_per_second": 0.396, "step": 50500 }, { "epoch": 11.07, "learning_rate": 2.246202113606341e-05, "loss": 0.0364, "step": 50550 }, { "epoch": 11.08, "learning_rate": 2.243450022016733e-05, "loss": 0.0352, "step": 50600 }, { "epoch": 11.09, "learning_rate": 2.2406979304271246e-05, "loss": 0.0345, "step": 50650 }, { "epoch": 11.1, "learning_rate": 2.2379458388375167e-05, "loss": 0.0358, "step": 50700 }, { "epoch": 11.11, "learning_rate": 2.2351937472479084e-05, "loss": 0.0394, "step": 50750 }, { "epoch": 11.12, "learning_rate": 2.2324416556583005e-05, "loss": 0.0387, "step": 50800 }, { "epoch": 11.13, "learning_rate": 2.2296895640686925e-05, "loss": 0.0418, "step": 50850 }, { "epoch": 11.14, "learning_rate": 2.2269374724790842e-05, "loss": 0.0398, "step": 50900 }, { "epoch": 11.16, "learning_rate": 2.224185380889476e-05, "loss": 0.0382, "step": 50950 }, { "epoch": 11.17, "learning_rate": 2.221433289299868e-05, "loss": 0.0391, "step": 51000 }, { "epoch": 11.17, "eval_acc": 0.5743629744443106, "eval_cer": 0.04380937024708566, "eval_loss": 0.2706660032272339, "eval_runtime": 2428.3853, "eval_samples_per_second": 3.168, "eval_steps_per_second": 0.396, "step": 51000 }, { "epoch": 11.18, "learning_rate": 2.2186811977102597e-05, "loss": 0.0401, "step": 51050 }, { "epoch": 11.19, "learning_rate": 2.2159291061206518e-05, "loss": 0.0417, "step": 51100 }, { "epoch": 11.2, "learning_rate": 2.213177014531044e-05, "loss": 0.0388, "step": 51150 }, { "epoch": 11.21, "learning_rate": 2.2104249229414356e-05, "loss": 0.0409, "step": 51200 }, { "epoch": 11.22, "learning_rate": 2.2076728313518276e-05, "loss": 0.0389, "step": 51250 }, { "epoch": 11.23, "learning_rate": 2.2049207397622193e-05, "loss": 0.0378, "step": 51300 }, { "epoch": 11.24, "learning_rate": 2.2021686481726114e-05, "loss": 0.0389, "step": 51350 }, { "epoch": 11.25, "learning_rate": 2.199416556583003e-05, "loss": 0.0369, "step": 51400 }, { "epoch": 11.26, "learning_rate": 2.196664464993395e-05, "loss": 0.0338, "step": 51450 }, { "epoch": 11.28, "learning_rate": 2.193912373403787e-05, "loss": 0.0343, "step": 51500 }, { "epoch": 11.28, "eval_acc": 0.5776131044490882, "eval_cer": 0.044525650427365004, "eval_loss": 0.2710331678390503, "eval_runtime": 2447.563, "eval_samples_per_second": 3.143, "eval_steps_per_second": 0.393, "step": 51500 }, { "epoch": 11.29, "learning_rate": 2.191160281814179e-05, "loss": 0.0411, "step": 51550 }, { "epoch": 11.3, "learning_rate": 2.1884081902245707e-05, "loss": 0.0439, "step": 51600 }, { "epoch": 11.31, "learning_rate": 2.1856560986349627e-05, "loss": 0.0392, "step": 51650 }, { "epoch": 11.32, "learning_rate": 2.1829040070453548e-05, "loss": 0.0382, "step": 51700 }, { "epoch": 11.33, "learning_rate": 2.1801519154557465e-05, "loss": 0.0406, "step": 51750 }, { "epoch": 11.34, "learning_rate": 2.1773998238661382e-05, "loss": 0.0359, "step": 51800 }, { "epoch": 11.35, "learning_rate": 2.1746477322765303e-05, "loss": 0.0376, "step": 51850 }, { "epoch": 11.36, "learning_rate": 2.171895640686922e-05, "loss": 0.0334, "step": 51900 }, { "epoch": 11.37, "learning_rate": 2.169143549097314e-05, "loss": 0.0385, "step": 51950 }, { "epoch": 11.39, "learning_rate": 2.166391457507706e-05, "loss": 0.0399, "step": 52000 }, { "epoch": 11.39, "eval_acc": 0.5835933436578792, "eval_cer": 0.044110145907202955, "eval_loss": 0.27085205912590027, "eval_runtime": 2433.2038, "eval_samples_per_second": 3.161, "eval_steps_per_second": 0.395, "step": 52000 }, { "epoch": 11.4, "learning_rate": 2.1636393659180978e-05, "loss": 0.0355, "step": 52050 }, { "epoch": 11.41, "learning_rate": 2.16088727432849e-05, "loss": 0.0365, "step": 52100 }, { "epoch": 11.42, "learning_rate": 2.1581351827388816e-05, "loss": 0.0355, "step": 52150 }, { "epoch": 11.43, "learning_rate": 2.1553830911492736e-05, "loss": 0.0415, "step": 52200 }, { "epoch": 11.44, "learning_rate": 2.1526309995596657e-05, "loss": 0.0416, "step": 52250 }, { "epoch": 11.45, "learning_rate": 2.1498789079700574e-05, "loss": 0.043, "step": 52300 }, { "epoch": 11.46, "learning_rate": 2.147126816380449e-05, "loss": 0.0341, "step": 52350 }, { "epoch": 11.47, "learning_rate": 2.144429766622633e-05, "loss": 0.0339, "step": 52400 }, { "epoch": 11.48, "learning_rate": 2.1416776750330254e-05, "loss": 0.0419, "step": 52450 }, { "epoch": 11.49, "learning_rate": 2.138925583443417e-05, "loss": 0.0374, "step": 52500 }, { "epoch": 11.49, "eval_acc": 0.5847633904595991, "eval_cer": 0.04424502983725556, "eval_loss": 0.2762584090232849, "eval_runtime": 2430.2459, "eval_samples_per_second": 3.165, "eval_steps_per_second": 0.396, "step": 52500 }, { "epoch": 11.51, "learning_rate": 2.136173491853809e-05, "loss": 0.0375, "step": 52550 }, { "epoch": 11.52, "learning_rate": 2.133421400264201e-05, "loss": 0.0433, "step": 52600 }, { "epoch": 11.53, "learning_rate": 2.1306693086745926e-05, "loss": 0.0387, "step": 52650 }, { "epoch": 11.54, "learning_rate": 2.1279172170849847e-05, "loss": 0.0398, "step": 52700 }, { "epoch": 11.55, "learning_rate": 2.1251651254953767e-05, "loss": 0.0348, "step": 52750 }, { "epoch": 11.56, "learning_rate": 2.1224130339057684e-05, "loss": 0.0381, "step": 52800 }, { "epoch": 11.57, "learning_rate": 2.1196609423161605e-05, "loss": 0.0389, "step": 52850 }, { "epoch": 11.58, "learning_rate": 2.1169088507265522e-05, "loss": 0.0401, "step": 52900 }, { "epoch": 11.59, "learning_rate": 2.114156759136944e-05, "loss": 0.0372, "step": 52950 }, { "epoch": 11.6, "learning_rate": 2.1114046675473363e-05, "loss": 0.0418, "step": 53000 }, { "epoch": 11.6, "eval_acc": 0.5852834112603635, "eval_cer": 0.04214890255643807, "eval_loss": 0.27044418454170227, "eval_runtime": 2419.8311, "eval_samples_per_second": 3.179, "eval_steps_per_second": 0.398, "step": 53000 }, { "epoch": 11.62, "learning_rate": 2.108652575957728e-05, "loss": 0.0423, "step": 53050 }, { "epoch": 11.63, "learning_rate": 2.1059004843681198e-05, "loss": 0.0386, "step": 53100 }, { "epoch": 11.64, "learning_rate": 2.1031483927785118e-05, "loss": 0.0466, "step": 53150 }, { "epoch": 11.65, "learning_rate": 2.1003963011889035e-05, "loss": 0.0385, "step": 53200 }, { "epoch": 11.66, "learning_rate": 2.0976442095992956e-05, "loss": 0.041, "step": 53250 }, { "epoch": 11.67, "learning_rate": 2.0948921180096876e-05, "loss": 0.0397, "step": 53300 }, { "epoch": 11.68, "learning_rate": 2.0921400264200794e-05, "loss": 0.0384, "step": 53350 }, { "epoch": 11.69, "learning_rate": 2.089387934830471e-05, "loss": 0.0357, "step": 53400 }, { "epoch": 11.7, "learning_rate": 2.086635843240863e-05, "loss": 0.0361, "step": 53450 }, { "epoch": 11.71, "learning_rate": 2.083883751651255e-05, "loss": 0.0386, "step": 53500 }, { "epoch": 11.71, "eval_acc": 0.5934737388724033, "eval_cer": 0.04196130536636491, "eval_loss": 0.26918068528175354, "eval_runtime": 2422.4006, "eval_samples_per_second": 3.175, "eval_steps_per_second": 0.397, "step": 53500 }, { "epoch": 11.72, "learning_rate": 2.081131660061647e-05, "loss": 0.0345, "step": 53550 }, { "epoch": 11.74, "learning_rate": 2.078379568472039e-05, "loss": 0.039, "step": 53600 }, { "epoch": 11.75, "learning_rate": 2.0756274768824307e-05, "loss": 0.0389, "step": 53650 }, { "epoch": 11.76, "learning_rate": 2.0728753852928227e-05, "loss": 0.0425, "step": 53700 }, { "epoch": 11.77, "learning_rate": 2.0701232937032145e-05, "loss": 0.0363, "step": 53750 }, { "epoch": 11.78, "learning_rate": 2.0673712021136065e-05, "loss": 0.0411, "step": 53800 }, { "epoch": 11.79, "learning_rate": 2.0646191105239986e-05, "loss": 0.0411, "step": 53850 }, { "epoch": 11.8, "learning_rate": 2.0618670189343903e-05, "loss": 0.0419, "step": 53900 }, { "epoch": 11.81, "learning_rate": 2.059114927344782e-05, "loss": 0.0375, "step": 53950 }, { "epoch": 11.82, "learning_rate": 2.056362835755174e-05, "loss": 0.0398, "step": 54000 }, { "epoch": 11.82, "eval_acc": 0.5977639104787098, "eval_cer": 0.041258978696091, "eval_loss": 0.26716116070747375, "eval_runtime": 2416.0995, "eval_samples_per_second": 3.184, "eval_steps_per_second": 0.398, "step": 54000 }, { "epoch": 11.83, "learning_rate": 2.0536107441655658e-05, "loss": 0.0503, "step": 54050 }, { "epoch": 11.85, "learning_rate": 2.0508586525759578e-05, "loss": 0.0335, "step": 54100 }, { "epoch": 11.86, "learning_rate": 2.04810656098635e-05, "loss": 0.0428, "step": 54150 }, { "epoch": 11.87, "learning_rate": 2.0453544693967416e-05, "loss": 0.0377, "step": 54200 }, { "epoch": 11.88, "learning_rate": 2.0426023778071337e-05, "loss": 0.0444, "step": 54250 }, { "epoch": 11.89, "learning_rate": 2.0398502862175254e-05, "loss": 0.0392, "step": 54300 }, { "epoch": 11.9, "learning_rate": 2.037098194627917e-05, "loss": 0.0385, "step": 54350 }, { "epoch": 11.91, "learning_rate": 2.034346103038309e-05, "loss": 0.0413, "step": 54400 }, { "epoch": 11.92, "learning_rate": 2.0315940114487012e-05, "loss": 0.0397, "step": 54450 }, { "epoch": 11.93, "learning_rate": 2.028841919859093e-05, "loss": 0.0367, "step": 54500 }, { "epoch": 11.93, "eval_acc": 0.5965938636769899, "eval_cer": 0.04078300896590537, "eval_loss": 0.2696912884712219, "eval_runtime": 2403.6892, "eval_samples_per_second": 3.2, "eval_steps_per_second": 0.4, "step": 54500 }, { "epoch": 11.94, "learning_rate": 2.026089828269485e-05, "loss": 0.0405, "step": 54550 }, { "epoch": 11.95, "learning_rate": 2.0233377366798767e-05, "loss": 0.0317, "step": 54600 }, { "epoch": 11.97, "learning_rate": 2.0205856450902687e-05, "loss": 0.0477, "step": 54650 }, { "epoch": 11.98, "learning_rate": 2.0178335535006608e-05, "loss": 0.04, "step": 54700 }, { "epoch": 11.99, "learning_rate": 2.0150814619110525e-05, "loss": 0.0421, "step": 54750 }, { "epoch": 12.0, "learning_rate": 2.0123844121532364e-05, "loss": 0.045, "step": 54800 }, { "epoch": 12.01, "learning_rate": 2.0096323205636285e-05, "loss": 0.0279, "step": 54850 }, { "epoch": 12.02, "learning_rate": 2.0068802289740205e-05, "loss": 0.0243, "step": 54900 }, { "epoch": 12.03, "learning_rate": 2.0041281373844122e-05, "loss": 0.0224, "step": 54950 }, { "epoch": 12.04, "learning_rate": 2.0013760457948043e-05, "loss": 0.0259, "step": 55000 }, { "epoch": 12.04, "eval_acc": 0.6123244929001138, "eval_cer": 0.04014579867565686, "eval_loss": 0.2668377161026001, "eval_runtime": 2417.0943, "eval_samples_per_second": 3.182, "eval_steps_per_second": 0.398, "step": 55000 }, { "epoch": 12.05, "learning_rate": 1.998623954205196e-05, "loss": 0.0245, "step": 55050 }, { "epoch": 12.06, "learning_rate": 1.9958718626155877e-05, "loss": 0.0267, "step": 55100 }, { "epoch": 12.08, "learning_rate": 1.99311977102598e-05, "loss": 0.0247, "step": 55150 }, { "epoch": 12.09, "learning_rate": 1.990367679436372e-05, "loss": 0.0263, "step": 55200 }, { "epoch": 12.1, "learning_rate": 1.9876155878467636e-05, "loss": 0.0249, "step": 55250 }, { "epoch": 12.11, "learning_rate": 1.9848634962571556e-05, "loss": 0.0269, "step": 55300 }, { "epoch": 12.12, "learning_rate": 1.9821114046675473e-05, "loss": 0.0254, "step": 55350 }, { "epoch": 12.13, "learning_rate": 1.979359313077939e-05, "loss": 0.0288, "step": 55400 }, { "epoch": 12.14, "learning_rate": 1.9766072214883314e-05, "loss": 0.0262, "step": 55450 }, { "epoch": 12.15, "learning_rate": 1.973855129898723e-05, "loss": 0.0302, "step": 55500 }, { "epoch": 12.15, "eval_acc": 0.6038741548876918, "eval_cer": 0.04154735123620347, "eval_loss": 0.2702733278274536, "eval_runtime": 2435.2652, "eval_samples_per_second": 3.159, "eval_steps_per_second": 0.395, "step": 55500 }, { "epoch": 12.16, "learning_rate": 1.971103038309115e-05, "loss": 0.0272, "step": 55550 }, { "epoch": 12.17, "learning_rate": 1.968350946719507e-05, "loss": 0.0241, "step": 55600 }, { "epoch": 12.18, "learning_rate": 1.9655988551298987e-05, "loss": 0.0271, "step": 55650 }, { "epoch": 12.2, "learning_rate": 1.9628467635402907e-05, "loss": 0.0306, "step": 55700 }, { "epoch": 12.21, "learning_rate": 1.9600946719506828e-05, "loss": 0.0288, "step": 55750 }, { "epoch": 12.22, "learning_rate": 1.9573425803610745e-05, "loss": 0.0272, "step": 55800 }, { "epoch": 12.23, "learning_rate": 1.9545904887714665e-05, "loss": 0.0288, "step": 55850 }, { "epoch": 12.24, "learning_rate": 1.9518383971818583e-05, "loss": 0.0253, "step": 55900 }, { "epoch": 12.25, "learning_rate": 1.94908630559225e-05, "loss": 0.0293, "step": 55950 }, { "epoch": 12.26, "learning_rate": 1.9463342140026424e-05, "loss": 0.0266, "step": 56000 }, { "epoch": 12.26, "eval_acc": 0.6030941236865452, "eval_cer": 0.041094637356026906, "eval_loss": 0.2693229615688324, "eval_runtime": 2418.4882, "eval_samples_per_second": 3.18, "eval_steps_per_second": 0.398, "step": 56000 }, { "epoch": 12.27, "learning_rate": 1.943582122413034e-05, "loss": 0.028, "step": 56050 }, { "epoch": 12.28, "learning_rate": 1.9408300308234258e-05, "loss": 0.0316, "step": 56100 }, { "epoch": 12.29, "learning_rate": 1.938077939233818e-05, "loss": 0.0266, "step": 56150 }, { "epoch": 12.3, "learning_rate": 1.9353258476442096e-05, "loss": 0.0267, "step": 56200 }, { "epoch": 12.32, "learning_rate": 1.9325737560546016e-05, "loss": 0.028, "step": 56250 }, { "epoch": 12.33, "learning_rate": 1.9298216644649937e-05, "loss": 0.0286, "step": 56300 }, { "epoch": 12.34, "learning_rate": 1.9270695728753854e-05, "loss": 0.0326, "step": 56350 }, { "epoch": 12.35, "learning_rate": 1.924317481285777e-05, "loss": 0.03, "step": 56400 }, { "epoch": 12.36, "learning_rate": 1.9215653896961692e-05, "loss": 0.0289, "step": 56450 }, { "epoch": 12.37, "learning_rate": 1.918813298106561e-05, "loss": 0.0316, "step": 56500 }, { "epoch": 12.37, "eval_acc": 0.6142745709029804, "eval_cer": 0.04024192285569435, "eval_loss": 0.2692607641220093, "eval_runtime": 2418.0408, "eval_samples_per_second": 3.181, "eval_steps_per_second": 0.398, "step": 56500 }, { "epoch": 12.38, "learning_rate": 1.916061206516953e-05, "loss": 0.0285, "step": 56550 }, { "epoch": 12.39, "learning_rate": 1.913309114927345e-05, "loss": 0.0271, "step": 56600 }, { "epoch": 12.4, "learning_rate": 1.9105570233377367e-05, "loss": 0.0278, "step": 56650 }, { "epoch": 12.41, "learning_rate": 1.9078049317481288e-05, "loss": 0.0245, "step": 56700 }, { "epoch": 12.43, "learning_rate": 1.9050528401585205e-05, "loss": 0.0245, "step": 56750 }, { "epoch": 12.44, "learning_rate": 1.9023557904007047e-05, "loss": 0.0266, "step": 56800 }, { "epoch": 12.45, "learning_rate": 1.8996036988110965e-05, "loss": 0.0293, "step": 56850 }, { "epoch": 12.46, "learning_rate": 1.8968516072214885e-05, "loss": 0.026, "step": 56900 }, { "epoch": 12.47, "learning_rate": 1.8940995156318802e-05, "loss": 0.0299, "step": 56950 }, { "epoch": 12.48, "learning_rate": 1.891347424042272e-05, "loss": 0.0279, "step": 57000 }, { "epoch": 12.48, "eval_acc": 0.6073842952928518, "eval_cer": 0.04096130381597491, "eval_loss": 0.27060824632644653, "eval_runtime": 2418.1417, "eval_samples_per_second": 3.181, "eval_steps_per_second": 0.398, "step": 57000 }, { "epoch": 12.49, "learning_rate": 1.8885953324526643e-05, "loss": 0.0291, "step": 57050 }, { "epoch": 12.5, "learning_rate": 1.885843240863056e-05, "loss": 0.0284, "step": 57100 }, { "epoch": 12.51, "learning_rate": 1.8830911492734478e-05, "loss": 0.0268, "step": 57150 }, { "epoch": 12.52, "learning_rate": 1.8803390576838398e-05, "loss": 0.0304, "step": 57200 }, { "epoch": 12.53, "learning_rate": 1.8775869660942315e-05, "loss": 0.0376, "step": 57250 }, { "epoch": 12.55, "learning_rate": 1.8748348745046236e-05, "loss": 0.0262, "step": 57300 }, { "epoch": 12.56, "learning_rate": 1.8720827829150157e-05, "loss": 0.0256, "step": 57350 }, { "epoch": 12.57, "learning_rate": 1.8693306913254074e-05, "loss": 0.03, "step": 57400 }, { "epoch": 12.58, "learning_rate": 1.8665785997357994e-05, "loss": 0.0288, "step": 57450 }, { "epoch": 12.59, "learning_rate": 1.863826508146191e-05, "loss": 0.0322, "step": 57500 }, { "epoch": 12.59, "eval_acc": 0.6142745709029804, "eval_cer": 0.039677580895474254, "eval_loss": 0.27082282304763794, "eval_runtime": 2424.4199, "eval_samples_per_second": 3.173, "eval_steps_per_second": 0.397, "step": 57500 }, { "epoch": 12.6, "learning_rate": 1.861074416556583e-05, "loss": 0.0287, "step": 57550 }, { "epoch": 12.61, "learning_rate": 1.8583223249669753e-05, "loss": 0.0265, "step": 57600 }, { "epoch": 12.62, "learning_rate": 1.855570233377367e-05, "loss": 0.0256, "step": 57650 }, { "epoch": 12.63, "learning_rate": 1.8528181417877587e-05, "loss": 0.0292, "step": 57700 }, { "epoch": 12.64, "learning_rate": 1.8500660501981507e-05, "loss": 0.0252, "step": 57750 }, { "epoch": 12.66, "learning_rate": 1.8473139586085425e-05, "loss": 0.025, "step": 57800 }, { "epoch": 12.67, "learning_rate": 1.8445618670189345e-05, "loss": 0.028, "step": 57850 }, { "epoch": 12.68, "learning_rate": 1.8418097754293266e-05, "loss": 0.0261, "step": 57900 }, { "epoch": 12.69, "learning_rate": 1.8390576838397183e-05, "loss": 0.0291, "step": 57950 }, { "epoch": 12.7, "learning_rate": 1.8363055922501103e-05, "loss": 0.0305, "step": 58000 }, { "epoch": 12.7, "eval_acc": 0.6158346333052737, "eval_cer": 0.04006052722562361, "eval_loss": 0.2681460380554199, "eval_runtime": 2417.4931, "eval_samples_per_second": 3.182, "eval_steps_per_second": 0.398, "step": 58000 }, { "epoch": 12.71, "learning_rate": 1.833553500660502e-05, "loss": 0.0234, "step": 58050 }, { "epoch": 12.72, "learning_rate": 1.8308014090708938e-05, "loss": 0.0296, "step": 58100 }, { "epoch": 12.73, "learning_rate": 1.8280493174812858e-05, "loss": 0.025, "step": 58150 }, { "epoch": 12.74, "learning_rate": 1.825297225891678e-05, "loss": 0.0267, "step": 58200 }, { "epoch": 12.75, "learning_rate": 1.8225451343020696e-05, "loss": 0.0232, "step": 58250 }, { "epoch": 12.76, "learning_rate": 1.8197930427124617e-05, "loss": 0.0284, "step": 58300 }, { "epoch": 12.78, "learning_rate": 1.8170409511228534e-05, "loss": 0.0295, "step": 58350 }, { "epoch": 12.79, "learning_rate": 1.814288859533245e-05, "loss": 0.0308, "step": 58400 }, { "epoch": 12.8, "learning_rate": 1.8115367679436375e-05, "loss": 0.0295, "step": 58450 }, { "epoch": 12.81, "learning_rate": 1.8087846763540292e-05, "loss": 0.0279, "step": 58500 }, { "epoch": 12.81, "eval_acc": 0.6254550181194156, "eval_cer": 0.039162851415273515, "eval_loss": 0.26786795258522034, "eval_runtime": 2425.3747, "eval_samples_per_second": 3.171, "eval_steps_per_second": 0.397, "step": 58500 }, { "epoch": 12.82, "learning_rate": 1.806032584764421e-05, "loss": 0.0267, "step": 58550 }, { "epoch": 12.83, "learning_rate": 1.803280493174813e-05, "loss": 0.0283, "step": 58600 }, { "epoch": 12.84, "learning_rate": 1.8005284015852047e-05, "loss": 0.0296, "step": 58650 }, { "epoch": 12.85, "learning_rate": 1.7977763099955967e-05, "loss": 0.0278, "step": 58700 }, { "epoch": 12.86, "learning_rate": 1.7950242184059888e-05, "loss": 0.0284, "step": 58750 }, { "epoch": 12.87, "learning_rate": 1.7922721268163805e-05, "loss": 0.0263, "step": 58800 }, { "epoch": 12.89, "learning_rate": 1.7895200352267726e-05, "loss": 0.0246, "step": 58850 }, { "epoch": 12.9, "learning_rate": 1.7867679436371643e-05, "loss": 0.027, "step": 58900 }, { "epoch": 12.91, "learning_rate": 1.784015852047556e-05, "loss": 0.0323, "step": 58950 }, { "epoch": 12.92, "learning_rate": 1.7812637604579484e-05, "loss": 0.0273, "step": 59000 }, { "epoch": 12.92, "eval_acc": 0.6214248569134913, "eval_cer": 0.03904502177522756, "eval_loss": 0.26956528425216675, "eval_runtime": 2416.1553, "eval_samples_per_second": 3.184, "eval_steps_per_second": 0.398, "step": 59000 }, { "epoch": 12.93, "learning_rate": 1.77851166886834e-05, "loss": 0.0295, "step": 59050 }, { "epoch": 12.94, "learning_rate": 1.775759577278732e-05, "loss": 0.0284, "step": 59100 }, { "epoch": 12.95, "learning_rate": 1.773007485689124e-05, "loss": 0.0266, "step": 59150 }, { "epoch": 12.96, "learning_rate": 1.7702553940995156e-05, "loss": 0.0311, "step": 59200 }, { "epoch": 12.97, "learning_rate": 1.7675033025099073e-05, "loss": 0.0276, "step": 59250 }, { "epoch": 12.98, "learning_rate": 1.7647512109202997e-05, "loss": 0.0285, "step": 59300 }, { "epoch": 12.99, "learning_rate": 1.7619991193306914e-05, "loss": 0.0299, "step": 59350 }, { "epoch": 13.01, "learning_rate": 1.759247027741083e-05, "loss": 0.0266, "step": 59400 }, { "epoch": 13.02, "learning_rate": 1.7564949361514752e-05, "loss": 0.0196, "step": 59450 }, { "epoch": 13.03, "learning_rate": 1.753742844561867e-05, "loss": 0.0166, "step": 59500 }, { "epoch": 13.03, "eval_acc": 0.6300052001261044, "eval_cer": 0.03765122116468397, "eval_loss": 0.26843369007110596, "eval_runtime": 2419.8398, "eval_samples_per_second": 3.179, "eval_steps_per_second": 0.398, "step": 59500 }, { "epoch": 13.04, "learning_rate": 1.750990752972259e-05, "loss": 0.0165, "step": 59550 }, { "epoch": 13.05, "learning_rate": 1.748238661382651e-05, "loss": 0.0168, "step": 59600 }, { "epoch": 13.06, "learning_rate": 1.7454865697930427e-05, "loss": 0.0195, "step": 59650 }, { "epoch": 13.07, "learning_rate": 1.7427344782034348e-05, "loss": 0.019, "step": 59700 }, { "epoch": 13.08, "learning_rate": 1.7399823866138265e-05, "loss": 0.018, "step": 59750 }, { "epoch": 13.09, "learning_rate": 1.7372302950242182e-05, "loss": 0.0188, "step": 59800 }, { "epoch": 13.1, "learning_rate": 1.7344782034346106e-05, "loss": 0.0192, "step": 59850 }, { "epoch": 13.12, "learning_rate": 1.7317261118450023e-05, "loss": 0.0201, "step": 59900 }, { "epoch": 13.13, "learning_rate": 1.728974020255394e-05, "loss": 0.0193, "step": 59950 }, { "epoch": 13.14, "learning_rate": 1.726221928665786e-05, "loss": 0.0178, "step": 60000 }, { "epoch": 13.14, "eval_acc": 0.6283151325236199, "eval_cer": 0.03863571881506793, "eval_loss": 0.26756173372268677, "eval_runtime": 2429.9446, "eval_samples_per_second": 3.166, "eval_steps_per_second": 0.396, "step": 60000 }, { "epoch": 13.15, "learning_rate": 1.723469837076178e-05, "loss": 0.0204, "step": 60050 }, { "epoch": 13.16, "learning_rate": 1.72071774548657e-05, "loss": 0.0182, "step": 60100 }, { "epoch": 13.17, "learning_rate": 1.717965653896962e-05, "loss": 0.02, "step": 60150 }, { "epoch": 13.18, "learning_rate": 1.7152135623073537e-05, "loss": 0.0218, "step": 60200 }, { "epoch": 13.19, "learning_rate": 1.7124614707177454e-05, "loss": 0.0205, "step": 60250 }, { "epoch": 13.2, "learning_rate": 1.7097093791281374e-05, "loss": 0.0164, "step": 60300 }, { "epoch": 13.21, "learning_rate": 1.706957287538529e-05, "loss": 0.0191, "step": 60350 }, { "epoch": 13.22, "learning_rate": 1.7042051959489212e-05, "loss": 0.019, "step": 60400 }, { "epoch": 13.24, "learning_rate": 1.7014531043593133e-05, "loss": 0.019, "step": 60450 }, { "epoch": 13.25, "learning_rate": 1.698701012769705e-05, "loss": 0.0191, "step": 60500 }, { "epoch": 13.25, "eval_acc": 0.6319552781289709, "eval_cer": 0.03849618371501351, "eval_loss": 0.2676541805267334, "eval_runtime": 2429.477, "eval_samples_per_second": 3.166, "eval_steps_per_second": 0.396, "step": 60500 }, { "epoch": 13.26, "learning_rate": 1.695948921180097e-05, "loss": 0.0165, "step": 60550 }, { "epoch": 13.27, "learning_rate": 1.6931968295904888e-05, "loss": 0.0189, "step": 60600 }, { "epoch": 13.28, "learning_rate": 1.6904447380008808e-05, "loss": 0.0184, "step": 60650 }, { "epoch": 13.29, "learning_rate": 1.687692646411273e-05, "loss": 0.0165, "step": 60700 }, { "epoch": 13.3, "learning_rate": 1.6849405548216646e-05, "loss": 0.019, "step": 60750 }, { "epoch": 13.31, "learning_rate": 1.6821884632320563e-05, "loss": 0.0203, "step": 60800 }, { "epoch": 13.32, "learning_rate": 1.6794363716424484e-05, "loss": 0.0193, "step": 60850 }, { "epoch": 13.33, "learning_rate": 1.67668428005284e-05, "loss": 0.0191, "step": 60900 }, { "epoch": 13.35, "learning_rate": 1.673932188463232e-05, "loss": 0.0208, "step": 60950 }, { "epoch": 13.36, "learning_rate": 1.6711800968736242e-05, "loss": 0.0189, "step": 61000 }, { "epoch": 13.36, "eval_acc": 0.635595423734322, "eval_cer": 0.03698610385442458, "eval_loss": 0.268144428730011, "eval_runtime": 2411.8799, "eval_samples_per_second": 3.189, "eval_steps_per_second": 0.399, "step": 61000 }, { "epoch": 13.37, "learning_rate": 1.668428005284016e-05, "loss": 0.0202, "step": 61050 }, { "epoch": 13.38, "learning_rate": 1.665675913694408e-05, "loss": 0.0176, "step": 61100 }, { "epoch": 13.39, "learning_rate": 1.662978863936592e-05, "loss": 0.0198, "step": 61150 }, { "epoch": 13.4, "learning_rate": 1.660226772346984e-05, "loss": 0.0209, "step": 61200 }, { "epoch": 13.41, "learning_rate": 1.6574746807573756e-05, "loss": 0.0199, "step": 61250 }, { "epoch": 13.42, "learning_rate": 1.6547225891677677e-05, "loss": 0.0195, "step": 61300 }, { "epoch": 13.43, "learning_rate": 1.6519704975781594e-05, "loss": 0.0173, "step": 61350 }, { "epoch": 13.44, "learning_rate": 1.649218405988551e-05, "loss": 0.0187, "step": 61400 }, { "epoch": 13.45, "learning_rate": 1.6464663143989435e-05, "loss": 0.0234, "step": 61450 }, { "epoch": 13.47, "learning_rate": 1.6437142228093352e-05, "loss": 0.0168, "step": 61500 }, { "epoch": 13.47, "eval_acc": 0.6306552261270598, "eval_cer": 0.038207811174901046, "eval_loss": 0.2689424753189087, "eval_runtime": 2415.0879, "eval_samples_per_second": 3.185, "eval_steps_per_second": 0.398, "step": 61500 }, { "epoch": 13.48, "learning_rate": 1.640962131219727e-05, "loss": 0.0206, "step": 61550 }, { "epoch": 13.49, "learning_rate": 1.638210039630119e-05, "loss": 0.0195, "step": 61600 }, { "epoch": 13.5, "learning_rate": 1.6354579480405107e-05, "loss": 0.0177, "step": 61650 }, { "epoch": 13.51, "learning_rate": 1.6327058564509028e-05, "loss": 0.0198, "step": 61700 }, { "epoch": 13.52, "learning_rate": 1.629953764861295e-05, "loss": 0.0192, "step": 61750 }, { "epoch": 13.53, "learning_rate": 1.6272016732716866e-05, "loss": 0.0175, "step": 61800 }, { "epoch": 13.54, "learning_rate": 1.6244495816820786e-05, "loss": 0.0176, "step": 61850 }, { "epoch": 13.55, "learning_rate": 1.6216974900924703e-05, "loss": 0.0174, "step": 61900 }, { "epoch": 13.56, "learning_rate": 1.618945398502862e-05, "loss": 0.0178, "step": 61950 }, { "epoch": 13.57, "learning_rate": 1.6161933069132544e-05, "loss": 0.0193, "step": 62000 }, { "epoch": 13.57, "eval_acc": 0.6322152885293532, "eval_cer": 0.03755199620464528, "eval_loss": 0.26729756593704224, "eval_runtime": 2401.4022, "eval_samples_per_second": 3.203, "eval_steps_per_second": 0.401, "step": 62000 }, { "epoch": 13.59, "learning_rate": 1.613441215323646e-05, "loss": 0.0199, "step": 62050 }, { "epoch": 13.6, "learning_rate": 1.610689123734038e-05, "loss": 0.0223, "step": 62100 }, { "epoch": 13.61, "learning_rate": 1.60793703214443e-05, "loss": 0.0206, "step": 62150 }, { "epoch": 13.62, "learning_rate": 1.6051849405548216e-05, "loss": 0.0205, "step": 62200 }, { "epoch": 13.63, "learning_rate": 1.6024328489652134e-05, "loss": 0.0201, "step": 62250 }, { "epoch": 13.64, "learning_rate": 1.5996807573756058e-05, "loss": 0.0197, "step": 62300 }, { "epoch": 13.65, "learning_rate": 1.5969286657859975e-05, "loss": 0.0183, "step": 62350 }, { "epoch": 13.66, "learning_rate": 1.5941765741963892e-05, "loss": 0.0184, "step": 62400 }, { "epoch": 13.67, "learning_rate": 1.5914244826067812e-05, "loss": 0.0197, "step": 62450 }, { "epoch": 13.68, "learning_rate": 1.588672391017173e-05, "loss": 0.0184, "step": 62500 }, { "epoch": 13.68, "eval_acc": 0.6339053561318375, "eval_cer": 0.037992306964817, "eval_loss": 0.26707085967063904, "eval_runtime": 2418.7114, "eval_samples_per_second": 3.18, "eval_steps_per_second": 0.398, "step": 62500 }, { "epoch": 13.7, "learning_rate": 1.585920299427565e-05, "loss": 0.0202, "step": 62550 }, { "epoch": 13.71, "learning_rate": 1.583168207837957e-05, "loss": 0.0191, "step": 62600 }, { "epoch": 13.72, "learning_rate": 1.5804161162483488e-05, "loss": 0.0217, "step": 62650 }, { "epoch": 13.73, "learning_rate": 1.577664024658741e-05, "loss": 0.0214, "step": 62700 }, { "epoch": 13.74, "learning_rate": 1.5749119330691326e-05, "loss": 0.0191, "step": 62750 }, { "epoch": 13.75, "learning_rate": 1.5721598414795243e-05, "loss": 0.0167, "step": 62800 }, { "epoch": 13.76, "learning_rate": 1.5694077498899167e-05, "loss": 0.0187, "step": 62850 }, { "epoch": 13.77, "learning_rate": 1.5666556583003084e-05, "loss": 0.0185, "step": 62900 }, { "epoch": 13.78, "learning_rate": 1.5639035667107e-05, "loss": 0.0185, "step": 62950 }, { "epoch": 13.79, "learning_rate": 1.561151475121092e-05, "loss": 0.0184, "step": 63000 }, { "epoch": 13.79, "eval_acc": 0.6333853353310731, "eval_cer": 0.037341143164563044, "eval_loss": 0.266510546207428, "eval_runtime": 2421.5513, "eval_samples_per_second": 3.176, "eval_steps_per_second": 0.397, "step": 63000 }, { "epoch": 13.8, "learning_rate": 1.558399383531484e-05, "loss": 0.0197, "step": 63050 }, { "epoch": 13.82, "learning_rate": 1.555647291941876e-05, "loss": 0.0174, "step": 63100 }, { "epoch": 13.83, "learning_rate": 1.552895200352268e-05, "loss": 0.0182, "step": 63150 }, { "epoch": 13.84, "learning_rate": 1.5501431087626597e-05, "loss": 0.0191, "step": 63200 }, { "epoch": 13.85, "learning_rate": 1.5473910171730514e-05, "loss": 0.0202, "step": 63250 }, { "epoch": 13.86, "learning_rate": 1.5446389255834435e-05, "loss": 0.0194, "step": 63300 }, { "epoch": 13.87, "learning_rate": 1.5418868339938352e-05, "loss": 0.0179, "step": 63350 }, { "epoch": 13.88, "learning_rate": 1.5391347424042272e-05, "loss": 0.0167, "step": 63400 }, { "epoch": 13.89, "learning_rate": 1.5363826508146193e-05, "loss": 0.0179, "step": 63450 }, { "epoch": 13.9, "learning_rate": 1.533630559225011e-05, "loss": 0.0176, "step": 63500 }, { "epoch": 13.9, "eval_acc": 0.6387155485389086, "eval_cer": 0.0379597487748043, "eval_loss": 0.26856788992881775, "eval_runtime": 2412.2522, "eval_samples_per_second": 3.189, "eval_steps_per_second": 0.399, "step": 63500 }, { "epoch": 13.91, "learning_rate": 1.530878467635403e-05, "loss": 0.0198, "step": 63550 }, { "epoch": 13.93, "learning_rate": 1.5281263760457948e-05, "loss": 0.0187, "step": 63600 }, { "epoch": 13.94, "learning_rate": 1.5253742844561868e-05, "loss": 0.0218, "step": 63650 }, { "epoch": 13.95, "learning_rate": 1.5226221928665787e-05, "loss": 0.0207, "step": 63700 }, { "epoch": 13.96, "learning_rate": 1.5198701012769706e-05, "loss": 0.0175, "step": 63750 }, { "epoch": 13.97, "learning_rate": 1.5171180096873625e-05, "loss": 0.0203, "step": 63800 }, { "epoch": 13.98, "learning_rate": 1.5143659180977542e-05, "loss": 0.0187, "step": 63850 }, { "epoch": 13.99, "learning_rate": 1.5116138265081461e-05, "loss": 0.0182, "step": 63900 }, { "epoch": 14.0, "learning_rate": 1.5088617349185383e-05, "loss": 0.0195, "step": 63950 }, { "epoch": 14.01, "learning_rate": 1.50610964332893e-05, "loss": 0.0126, "step": 64000 }, { "epoch": 14.01, "eval_acc": 0.639235569339673, "eval_cer": 0.037051220234449976, "eval_loss": 0.2667127847671509, "eval_runtime": 2419.4475, "eval_samples_per_second": 3.179, "eval_steps_per_second": 0.398, "step": 64000 }, { "epoch": 14.02, "learning_rate": 1.503357551739322e-05, "loss": 0.0141, "step": 64050 }, { "epoch": 14.03, "learning_rate": 1.5006054601497138e-05, "loss": 0.012, "step": 64100 }, { "epoch": 14.05, "learning_rate": 1.4978533685601057e-05, "loss": 0.0117, "step": 64150 }, { "epoch": 14.06, "learning_rate": 1.4951012769704978e-05, "loss": 0.0114, "step": 64200 }, { "epoch": 14.07, "learning_rate": 1.4923491853808896e-05, "loss": 0.0118, "step": 64250 }, { "epoch": 14.08, "learning_rate": 1.4895970937912815e-05, "loss": 0.0124, "step": 64300 }, { "epoch": 14.09, "learning_rate": 1.4868450022016733e-05, "loss": 0.0124, "step": 64350 }, { "epoch": 14.1, "learning_rate": 1.4840929106120651e-05, "loss": 0.0125, "step": 64400 }, { "epoch": 14.11, "learning_rate": 1.481340819022457e-05, "loss": 0.0136, "step": 64450 }, { "epoch": 14.12, "learning_rate": 1.478588727432849e-05, "loss": 0.0139, "step": 64500 }, { "epoch": 14.12, "eval_acc": 0.6450858033482727, "eval_cer": 0.03637680058418695, "eval_loss": 0.2650182843208313, "eval_runtime": 2417.3045, "eval_samples_per_second": 3.182, "eval_steps_per_second": 0.398, "step": 64500 }, { "epoch": 14.13, "learning_rate": 1.475836635843241e-05, "loss": 0.0126, "step": 64550 }, { "epoch": 14.14, "learning_rate": 1.4730845442536328e-05, "loss": 0.0128, "step": 64600 }, { "epoch": 14.16, "learning_rate": 1.4703324526640247e-05, "loss": 0.0128, "step": 64650 }, { "epoch": 14.17, "learning_rate": 1.4675803610744166e-05, "loss": 0.012, "step": 64700 }, { "epoch": 14.18, "learning_rate": 1.4648282694848083e-05, "loss": 0.0126, "step": 64750 }, { "epoch": 14.19, "learning_rate": 1.4620761778952006e-05, "loss": 0.012, "step": 64800 }, { "epoch": 14.2, "learning_rate": 1.4593240863055923e-05, "loss": 0.0114, "step": 64850 }, { "epoch": 14.21, "learning_rate": 1.4565719947159842e-05, "loss": 0.0111, "step": 64900 }, { "epoch": 14.22, "learning_rate": 1.453819903126376e-05, "loss": 0.0123, "step": 64950 }, { "epoch": 14.23, "learning_rate": 1.451067811536768e-05, "loss": 0.0111, "step": 65000 }, { "epoch": 14.23, "eval_acc": 0.6524960997591659, "eval_cer": 0.03636594785418272, "eval_loss": 0.26658621430397034, "eval_runtime": 2418.1674, "eval_samples_per_second": 3.181, "eval_steps_per_second": 0.398, "step": 65000 }, { "epoch": 14.24, "learning_rate": 1.44831571994716e-05, "loss": 0.0126, "step": 65050 }, { "epoch": 14.25, "learning_rate": 1.4455636283575519e-05, "loss": 0.0128, "step": 65100 }, { "epoch": 14.26, "learning_rate": 1.4428115367679438e-05, "loss": 0.0121, "step": 65150 }, { "epoch": 14.28, "learning_rate": 1.4400594451783357e-05, "loss": 0.0121, "step": 65200 }, { "epoch": 14.29, "learning_rate": 1.4373073535887274e-05, "loss": 0.0126, "step": 65250 }, { "epoch": 14.3, "learning_rate": 1.4345552619991193e-05, "loss": 0.0113, "step": 65300 }, { "epoch": 14.31, "learning_rate": 1.4318031704095115e-05, "loss": 0.0148, "step": 65350 }, { "epoch": 14.32, "learning_rate": 1.4290510788199032e-05, "loss": 0.0126, "step": 65400 }, { "epoch": 14.33, "learning_rate": 1.426298987230295e-05, "loss": 0.0122, "step": 65450 }, { "epoch": 14.34, "learning_rate": 1.423546895640687e-05, "loss": 0.0113, "step": 65500 }, { "epoch": 14.34, "eval_acc": 0.6461258449498016, "eval_cer": 0.03683881680436714, "eval_loss": 0.2688175439834595, "eval_runtime": 2406.3904, "eval_samples_per_second": 3.196, "eval_steps_per_second": 0.4, "step": 65500 }, { "epoch": 14.35, "learning_rate": 1.4207948040510789e-05, "loss": 0.0108, "step": 65550 }, { "epoch": 14.36, "learning_rate": 1.4180427124614709e-05, "loss": 0.0137, "step": 65600 }, { "epoch": 14.37, "learning_rate": 1.4153456627036548e-05, "loss": 0.0121, "step": 65650 }, { "epoch": 14.39, "learning_rate": 1.4125935711140467e-05, "loss": 0.0106, "step": 65700 }, { "epoch": 14.4, "learning_rate": 1.4098414795244386e-05, "loss": 0.0174, "step": 65750 }, { "epoch": 14.41, "learning_rate": 1.4070893879348305e-05, "loss": 0.0121, "step": 65800 }, { "epoch": 14.42, "learning_rate": 1.4043923381770147e-05, "loss": 0.0117, "step": 65850 }, { "epoch": 14.43, "learning_rate": 1.4016402465874064e-05, "loss": 0.0113, "step": 65900 }, { "epoch": 14.44, "learning_rate": 1.3988881549977983e-05, "loss": 0.0112, "step": 65950 }, { "epoch": 14.45, "learning_rate": 1.3961360634081902e-05, "loss": 0.0139, "step": 66000 }, { "epoch": 14.45, "eval_acc": 0.6544461777620324, "eval_cer": 0.03642796345420691, "eval_loss": 0.2679358124732971, "eval_runtime": 2423.2201, "eval_samples_per_second": 3.174, "eval_steps_per_second": 0.397, "step": 66000 }, { "epoch": 14.46, "learning_rate": 1.3933839718185823e-05, "loss": 0.014, "step": 66050 }, { "epoch": 14.47, "learning_rate": 1.3906318802289742e-05, "loss": 0.015, "step": 66100 }, { "epoch": 14.48, "learning_rate": 1.387879788639366e-05, "loss": 0.0131, "step": 66150 }, { "epoch": 14.49, "learning_rate": 1.385127697049758e-05, "loss": 0.012, "step": 66200 }, { "epoch": 14.51, "learning_rate": 1.3823756054601496e-05, "loss": 0.0132, "step": 66250 }, { "epoch": 14.52, "learning_rate": 1.3796235138705415e-05, "loss": 0.0108, "step": 66300 }, { "epoch": 14.53, "learning_rate": 1.3768714222809338e-05, "loss": 0.0103, "step": 66350 }, { "epoch": 14.54, "learning_rate": 1.3741193306913255e-05, "loss": 0.0136, "step": 66400 }, { "epoch": 14.55, "learning_rate": 1.3713672391017174e-05, "loss": 0.0123, "step": 66450 }, { "epoch": 14.56, "learning_rate": 1.3686151475121092e-05, "loss": 0.0125, "step": 66500 }, { "epoch": 14.56, "eval_acc": 0.650286011355917, "eval_cer": 0.03602951322405151, "eval_loss": 0.26753196120262146, "eval_runtime": 2416.4508, "eval_samples_per_second": 3.183, "eval_steps_per_second": 0.398, "step": 66500 }, { "epoch": 14.57, "learning_rate": 1.3658630559225011e-05, "loss": 0.0124, "step": 66550 }, { "epoch": 14.58, "learning_rate": 1.3631109643328928e-05, "loss": 0.0129, "step": 66600 }, { "epoch": 14.59, "learning_rate": 1.360358872743285e-05, "loss": 0.0118, "step": 66650 }, { "epoch": 14.6, "learning_rate": 1.357606781153677e-05, "loss": 0.0118, "step": 66700 }, { "epoch": 14.61, "learning_rate": 1.3548546895640687e-05, "loss": 0.0099, "step": 66750 }, { "epoch": 14.63, "learning_rate": 1.3521025979744606e-05, "loss": 0.0118, "step": 66800 }, { "epoch": 14.64, "learning_rate": 1.3493505063848524e-05, "loss": 0.0113, "step": 66850 }, { "epoch": 14.65, "learning_rate": 1.3465984147952445e-05, "loss": 0.0108, "step": 66900 }, { "epoch": 14.66, "learning_rate": 1.3438463232056364e-05, "loss": 0.0113, "step": 66950 }, { "epoch": 14.67, "learning_rate": 1.3410942316160283e-05, "loss": 0.0153, "step": 67000 }, { "epoch": 14.67, "eval_acc": 0.6463858553501838, "eval_cer": 0.03614424208409626, "eval_loss": 0.2684445381164551, "eval_runtime": 2411.1808, "eval_samples_per_second": 3.19, "eval_steps_per_second": 0.399, "step": 67000 }, { "epoch": 14.68, "learning_rate": 1.3383421400264202e-05, "loss": 0.0128, "step": 67050 }, { "epoch": 14.69, "learning_rate": 1.3355900484368119e-05, "loss": 0.0101, "step": 67100 }, { "epoch": 14.7, "learning_rate": 1.3328379568472038e-05, "loss": 0.0115, "step": 67150 }, { "epoch": 14.71, "learning_rate": 1.330085865257596e-05, "loss": 0.0113, "step": 67200 }, { "epoch": 14.72, "learning_rate": 1.3273337736679877e-05, "loss": 0.0129, "step": 67250 }, { "epoch": 14.74, "learning_rate": 1.3245816820783796e-05, "loss": 0.0133, "step": 67300 }, { "epoch": 14.75, "learning_rate": 1.3218295904887715e-05, "loss": 0.0134, "step": 67350 }, { "epoch": 14.76, "learning_rate": 1.3190774988991634e-05, "loss": 0.0116, "step": 67400 }, { "epoch": 14.77, "learning_rate": 1.3163254073095554e-05, "loss": 0.0101, "step": 67450 }, { "epoch": 14.78, "learning_rate": 1.3135733157199473e-05, "loss": 0.0126, "step": 67500 }, { "epoch": 14.78, "eval_acc": 0.6523660945589748, "eval_cer": 0.03534889201378607, "eval_loss": 0.26629167795181274, "eval_runtime": 2409.1288, "eval_samples_per_second": 3.193, "eval_steps_per_second": 0.399, "step": 67500 }, { "epoch": 14.79, "learning_rate": 1.3108212241303392e-05, "loss": 0.0127, "step": 67550 }, { "epoch": 14.8, "learning_rate": 1.3080691325407309e-05, "loss": 0.0104, "step": 67600 }, { "epoch": 14.81, "learning_rate": 1.3053170409511228e-05, "loss": 0.0119, "step": 67650 }, { "epoch": 14.82, "learning_rate": 1.3025649493615147e-05, "loss": 0.0142, "step": 67700 }, { "epoch": 14.83, "learning_rate": 1.2998128577719067e-05, "loss": 0.0123, "step": 67750 }, { "epoch": 14.84, "learning_rate": 1.2970607661822986e-05, "loss": 0.0124, "step": 67800 }, { "epoch": 14.86, "learning_rate": 1.2943086745926905e-05, "loss": 0.0118, "step": 67850 }, { "epoch": 14.87, "learning_rate": 1.2915565830030824e-05, "loss": 0.0127, "step": 67900 }, { "epoch": 14.88, "learning_rate": 1.2888044914134743e-05, "loss": 0.0141, "step": 67950 }, { "epoch": 14.89, "learning_rate": 1.2860523998238663e-05, "loss": 0.0123, "step": 68000 }, { "epoch": 14.89, "eval_acc": 0.6497659905551526, "eval_cer": 0.035796954723960814, "eval_loss": 0.2682942748069763, "eval_runtime": 2405.0061, "eval_samples_per_second": 3.198, "eval_steps_per_second": 0.4, "step": 68000 }, { "epoch": 14.9, "learning_rate": 1.2833003082342582e-05, "loss": 0.0138, "step": 68050 }, { "epoch": 14.91, "learning_rate": 1.28054821664465e-05, "loss": 0.0124, "step": 68100 }, { "epoch": 14.92, "learning_rate": 1.2777961250550418e-05, "loss": 0.0113, "step": 68150 }, { "epoch": 14.93, "learning_rate": 1.2750440334654337e-05, "loss": 0.0113, "step": 68200 }, { "epoch": 14.94, "learning_rate": 1.2722919418758256e-05, "loss": 0.015, "step": 68250 }, { "epoch": 14.95, "learning_rate": 1.2695398502862177e-05, "loss": 0.0124, "step": 68300 }, { "epoch": 14.97, "learning_rate": 1.2667877586966095e-05, "loss": 0.0132, "step": 68350 }, { "epoch": 14.98, "learning_rate": 1.2640356671070014e-05, "loss": 0.0118, "step": 68400 }, { "epoch": 14.99, "learning_rate": 1.2612835755173933e-05, "loss": 0.0132, "step": 68450 }, { "epoch": 15.0, "learning_rate": 1.258531483927785e-05, "loss": 0.0119, "step": 68500 }, { "epoch": 15.0, "eval_acc": 0.6535361413606947, "eval_cer": 0.035972148794029137, "eval_loss": 0.26940032839775085, "eval_runtime": 2401.4191, "eval_samples_per_second": 3.203, "eval_steps_per_second": 0.401, "step": 68500 }, { "epoch": 15.01, "learning_rate": 1.2557793923381773e-05, "loss": 0.0098, "step": 68550 }, { "epoch": 15.02, "learning_rate": 1.2530273007485691e-05, "loss": 0.0087, "step": 68600 }, { "epoch": 15.03, "learning_rate": 1.2502752091589609e-05, "loss": 0.0081, "step": 68650 }, { "epoch": 15.04, "learning_rate": 1.2475231175693527e-05, "loss": 0.0081, "step": 68700 }, { "epoch": 15.05, "learning_rate": 1.2447710259797446e-05, "loss": 0.0081, "step": 68750 }, { "epoch": 15.06, "learning_rate": 1.2420189343901365e-05, "loss": 0.0087, "step": 68800 }, { "epoch": 15.07, "learning_rate": 1.2392668428005284e-05, "loss": 0.0093, "step": 68850 }, { "epoch": 15.09, "learning_rate": 1.2365147512109205e-05, "loss": 0.0082, "step": 68900 }, { "epoch": 15.1, "learning_rate": 1.2337626596213123e-05, "loss": 0.0089, "step": 68950 }, { "epoch": 15.11, "learning_rate": 1.231010568031704e-05, "loss": 0.0077, "step": 69000 }, { "epoch": 15.11, "eval_acc": 0.6544461777620324, "eval_cer": 0.035460520093829606, "eval_loss": 0.2669413983821869, "eval_runtime": 2412.9546, "eval_samples_per_second": 3.188, "eval_steps_per_second": 0.399, "step": 69000 }, { "epoch": 15.12, "learning_rate": 1.2282584764420961e-05, "loss": 0.0081, "step": 69050 }, { "epoch": 15.13, "learning_rate": 1.225506384852488e-05, "loss": 0.007, "step": 69100 }, { "epoch": 15.14, "learning_rate": 1.2227542932628797e-05, "loss": 0.0088, "step": 69150 }, { "epoch": 15.15, "learning_rate": 1.2200022016732718e-05, "loss": 0.0091, "step": 69200 }, { "epoch": 15.16, "learning_rate": 1.2172501100836637e-05, "loss": 0.0089, "step": 69250 }, { "epoch": 15.17, "learning_rate": 1.2144980184940555e-05, "loss": 0.0072, "step": 69300 }, { "epoch": 15.18, "learning_rate": 1.2117459269044474e-05, "loss": 0.0072, "step": 69350 }, { "epoch": 15.2, "learning_rate": 1.2089938353148393e-05, "loss": 0.0067, "step": 69400 }, { "epoch": 15.21, "learning_rate": 1.2062417437252314e-05, "loss": 0.0068, "step": 69450 }, { "epoch": 15.22, "learning_rate": 1.2034896521356231e-05, "loss": 0.0086, "step": 69500 }, { "epoch": 15.22, "eval_acc": 0.6547061881624147, "eval_cer": 0.03550083023384532, "eval_loss": 0.26964884996414185, "eval_runtime": 2402.8785, "eval_samples_per_second": 3.201, "eval_steps_per_second": 0.4, "step": 69500 }, { "epoch": 15.23, "learning_rate": 1.200737560546015e-05, "loss": 0.0091, "step": 69550 }, { "epoch": 15.24, "learning_rate": 1.197985468956407e-05, "loss": 0.0077, "step": 69600 }, { "epoch": 15.25, "learning_rate": 1.1952333773667987e-05, "loss": 0.0082, "step": 69650 }, { "epoch": 15.26, "learning_rate": 1.1924812857771906e-05, "loss": 0.0069, "step": 69700 }, { "epoch": 15.27, "learning_rate": 1.1897291941875827e-05, "loss": 0.0078, "step": 69750 }, { "epoch": 15.28, "learning_rate": 1.1869771025979746e-05, "loss": 0.0089, "step": 69800 }, { "epoch": 15.29, "learning_rate": 1.1842800528401587e-05, "loss": 0.0079, "step": 69850 }, { "epoch": 15.3, "learning_rate": 1.1815279612505505e-05, "loss": 0.0092, "step": 69900 }, { "epoch": 15.32, "learning_rate": 1.1787758696609423e-05, "loss": 0.0077, "step": 69950 }, { "epoch": 15.33, "learning_rate": 1.1760237780713343e-05, "loss": 0.0072, "step": 70000 }, { "epoch": 15.33, "eval_acc": 0.6570462817658546, "eval_cer": 0.03543881463382114, "eval_loss": 0.2687400281429291, "eval_runtime": 2406.8006, "eval_samples_per_second": 3.196, "eval_steps_per_second": 0.4, "step": 70000 }, { "epoch": 15.34, "learning_rate": 1.1732716864817262e-05, "loss": 0.0069, "step": 70050 }, { "epoch": 15.35, "learning_rate": 1.170519594892118e-05, "loss": 0.0075, "step": 70100 }, { "epoch": 15.36, "learning_rate": 1.16776750330251e-05, "loss": 0.0074, "step": 70150 }, { "epoch": 15.37, "learning_rate": 1.1650154117129019e-05, "loss": 0.0072, "step": 70200 }, { "epoch": 15.38, "learning_rate": 1.1622633201232937e-05, "loss": 0.0073, "step": 70250 }, { "epoch": 15.39, "learning_rate": 1.1595112285336856e-05, "loss": 0.007, "step": 70300 }, { "epoch": 15.4, "learning_rate": 1.1567591369440775e-05, "loss": 0.0081, "step": 70350 }, { "epoch": 15.41, "learning_rate": 1.1540070453544696e-05, "loss": 0.0086, "step": 70400 }, { "epoch": 15.43, "learning_rate": 1.1512549537648613e-05, "loss": 0.0066, "step": 70450 }, { "epoch": 15.44, "learning_rate": 1.1485028621752532e-05, "loss": 0.0087, "step": 70500 }, { "epoch": 15.44, "eval_acc": 0.659906396170059, "eval_cer": 0.034970596853638534, "eval_loss": 0.2686842978000641, "eval_runtime": 2416.3582, "eval_samples_per_second": 3.183, "eval_steps_per_second": 0.398, "step": 70500 }, { "epoch": 15.45, "learning_rate": 1.1457507705856452e-05, "loss": 0.0068, "step": 70550 }, { "epoch": 15.46, "learning_rate": 1.142998678996037e-05, "loss": 0.0077, "step": 70600 }, { "epoch": 15.47, "learning_rate": 1.140246587406429e-05, "loss": 0.0089, "step": 70650 }, { "epoch": 15.48, "learning_rate": 1.1374944958168209e-05, "loss": 0.0098, "step": 70700 }, { "epoch": 15.49, "learning_rate": 1.1347424042272128e-05, "loss": 0.0088, "step": 70750 }, { "epoch": 15.5, "learning_rate": 1.1319903126376047e-05, "loss": 0.0071, "step": 70800 }, { "epoch": 15.51, "learning_rate": 1.1292382210479965e-05, "loss": 0.0068, "step": 70850 }, { "epoch": 15.52, "learning_rate": 1.1264861294583884e-05, "loss": 0.0078, "step": 70900 }, { "epoch": 15.53, "learning_rate": 1.1237340378687803e-05, "loss": 0.0083, "step": 70950 }, { "epoch": 15.55, "learning_rate": 1.1209819462791722e-05, "loss": 0.0082, "step": 71000 }, { "epoch": 15.55, "eval_acc": 0.6561362453645169, "eval_cer": 0.035308581873770346, "eval_loss": 0.2689681351184845, "eval_runtime": 2406.9298, "eval_samples_per_second": 3.196, "eval_steps_per_second": 0.4, "step": 71000 }, { "epoch": 15.56, "learning_rate": 1.1182298546895641e-05, "loss": 0.0091, "step": 71050 }, { "epoch": 15.57, "learning_rate": 1.115477763099956e-05, "loss": 0.0084, "step": 71100 }, { "epoch": 15.58, "learning_rate": 1.1127256715103479e-05, "loss": 0.0082, "step": 71150 }, { "epoch": 15.59, "learning_rate": 1.1099735799207397e-05, "loss": 0.0082, "step": 71200 }, { "epoch": 15.6, "learning_rate": 1.1072214883311318e-05, "loss": 0.0065, "step": 71250 }, { "epoch": 15.61, "learning_rate": 1.1044693967415235e-05, "loss": 0.0078, "step": 71300 }, { "epoch": 15.62, "learning_rate": 1.1017173051519156e-05, "loss": 0.008, "step": 71350 }, { "epoch": 15.63, "learning_rate": 1.0989652135623075e-05, "loss": 0.0085, "step": 71400 }, { "epoch": 15.64, "learning_rate": 1.0962131219726993e-05, "loss": 0.008, "step": 71450 }, { "epoch": 15.65, "learning_rate": 1.0934610303830912e-05, "loss": 0.008, "step": 71500 }, { "epoch": 15.65, "eval_acc": 0.659906396170059, "eval_cer": 0.03512098468369718, "eval_loss": 0.26970669627189636, "eval_runtime": 2418.3397, "eval_samples_per_second": 3.181, "eval_steps_per_second": 0.398, "step": 71500 }, { "epoch": 15.67, "learning_rate": 1.0907089387934831e-05, "loss": 0.008, "step": 71550 }, { "epoch": 15.68, "learning_rate": 1.087956847203875e-05, "loss": 0.0079, "step": 71600 }, { "epoch": 15.69, "learning_rate": 1.0852047556142669e-05, "loss": 0.0073, "step": 71650 }, { "epoch": 15.7, "learning_rate": 1.0824526640246588e-05, "loss": 0.0105, "step": 71700 }, { "epoch": 15.71, "learning_rate": 1.0797005724350507e-05, "loss": 0.0089, "step": 71750 }, { "epoch": 15.72, "learning_rate": 1.0769484808454425e-05, "loss": 0.0086, "step": 71800 }, { "epoch": 15.73, "learning_rate": 1.0741963892558344e-05, "loss": 0.0078, "step": 71850 }, { "epoch": 15.74, "learning_rate": 1.0714442976662265e-05, "loss": 0.0075, "step": 71900 }, { "epoch": 15.75, "learning_rate": 1.0686922060766184e-05, "loss": 0.0079, "step": 71950 }, { "epoch": 15.76, "learning_rate": 1.0659401144870101e-05, "loss": 0.0161, "step": 72000 }, { "epoch": 15.76, "eval_acc": 0.6601664065704411, "eval_cer": 0.03435199124339728, "eval_loss": 0.2675269842147827, "eval_runtime": 2410.8647, "eval_samples_per_second": 3.191, "eval_steps_per_second": 0.399, "step": 72000 }, { "epoch": 15.78, "learning_rate": 1.0631880228974021e-05, "loss": 0.009, "step": 72050 }, { "epoch": 15.79, "learning_rate": 1.060435931307794e-05, "loss": 0.0082, "step": 72100 }, { "epoch": 15.8, "learning_rate": 1.0576838397181857e-05, "loss": 0.0076, "step": 72150 }, { "epoch": 15.81, "learning_rate": 1.0549317481285778e-05, "loss": 0.0078, "step": 72200 }, { "epoch": 15.82, "learning_rate": 1.0521796565389697e-05, "loss": 0.0081, "step": 72250 }, { "epoch": 15.83, "learning_rate": 1.0494275649493616e-05, "loss": 0.0089, "step": 72300 }, { "epoch": 15.84, "learning_rate": 1.0466754733597535e-05, "loss": 0.0077, "step": 72350 }, { "epoch": 15.85, "learning_rate": 1.0439233817701453e-05, "loss": 0.0099, "step": 72400 }, { "epoch": 15.86, "learning_rate": 1.0411712901805372e-05, "loss": 0.0078, "step": 72450 }, { "epoch": 15.87, "learning_rate": 1.0384191985909291e-05, "loss": 0.0092, "step": 72500 }, { "epoch": 15.87, "eval_acc": 0.6631565261748367, "eval_cer": 0.03487912384360286, "eval_loss": 0.2682670056819916, "eval_runtime": 2406.0992, "eval_samples_per_second": 3.197, "eval_steps_per_second": 0.4, "step": 72500 }, { "epoch": 15.88, "learning_rate": 1.035667107001321e-05, "loss": 0.0075, "step": 72550 }, { "epoch": 15.9, "learning_rate": 1.032915015411713e-05, "loss": 0.0071, "step": 72600 }, { "epoch": 15.91, "learning_rate": 1.0301629238221048e-05, "loss": 0.0083, "step": 72650 }, { "epoch": 15.92, "learning_rate": 1.0274108322324967e-05, "loss": 0.0104, "step": 72700 }, { "epoch": 15.93, "learning_rate": 1.0246587406428887e-05, "loss": 0.0069, "step": 72750 }, { "epoch": 15.94, "learning_rate": 1.0219066490532806e-05, "loss": 0.0078, "step": 72800 }, { "epoch": 15.95, "learning_rate": 1.0191545574636723e-05, "loss": 0.008, "step": 72850 }, { "epoch": 15.96, "learning_rate": 1.0164024658740644e-05, "loss": 0.0076, "step": 72900 }, { "epoch": 15.97, "learning_rate": 1.0136503742844563e-05, "loss": 0.0098, "step": 72950 }, { "epoch": 15.98, "learning_rate": 1.0108982826948482e-05, "loss": 0.0083, "step": 73000 }, { "epoch": 15.98, "eval_acc": 0.6601664065704411, "eval_cer": 0.035355093573788486, "eval_loss": 0.26840099692344666, "eval_runtime": 2406.8352, "eval_samples_per_second": 3.196, "eval_steps_per_second": 0.4, "step": 73000 }, { "epoch": 15.99, "learning_rate": 1.00814619110524e-05, "loss": 0.0075, "step": 73050 }, { "epoch": 16.01, "learning_rate": 1.005394099515632e-05, "loss": 0.0073, "step": 73100 }, { "epoch": 16.02, "learning_rate": 1.0026420079260238e-05, "loss": 0.0059, "step": 73150 }, { "epoch": 16.03, "learning_rate": 9.998899163364157e-06, "loss": 0.005, "step": 73200 }, { "epoch": 16.04, "learning_rate": 9.971378247468076e-06, "loss": 0.0049, "step": 73250 }, { "epoch": 16.05, "learning_rate": 9.943857331571996e-06, "loss": 0.007, "step": 73300 }, { "epoch": 16.06, "learning_rate": 9.916336415675914e-06, "loss": 0.0125, "step": 73350 }, { "epoch": 16.07, "learning_rate": 9.888815499779832e-06, "loss": 0.0051, "step": 73400 }, { "epoch": 16.08, "learning_rate": 9.861294583883753e-06, "loss": 0.0048, "step": 73450 }, { "epoch": 16.09, "learning_rate": 9.833773667987672e-06, "loss": 0.0066, "step": 73500 }, { "epoch": 16.09, "eval_acc": 0.6582163285675745, "eval_cer": 0.03471323211353816, "eval_loss": 0.2682417035102844, "eval_runtime": 2398.8589, "eval_samples_per_second": 3.207, "eval_steps_per_second": 0.401, "step": 73500 }, { "epoch": 16.1, "learning_rate": 9.806803170409513e-06, "loss": 0.0059, "step": 73550 }, { "epoch": 16.11, "learning_rate": 9.77928225451343e-06, "loss": 0.0056, "step": 73600 }, { "epoch": 16.13, "learning_rate": 9.751761338617349e-06, "loss": 0.0053, "step": 73650 }, { "epoch": 16.14, "learning_rate": 9.72424042272127e-06, "loss": 0.0055, "step": 73700 }, { "epoch": 16.15, "learning_rate": 9.696719506825188e-06, "loss": 0.006, "step": 73750 }, { "epoch": 16.16, "learning_rate": 9.669198590929107e-06, "loss": 0.0063, "step": 73800 }, { "epoch": 16.17, "learning_rate": 9.641677675033026e-06, "loss": 0.0059, "step": 73850 }, { "epoch": 16.18, "learning_rate": 9.614156759136945e-06, "loss": 0.0051, "step": 73900 }, { "epoch": 16.19, "learning_rate": 9.586635843240863e-06, "loss": 0.0048, "step": 73950 }, { "epoch": 16.2, "learning_rate": 9.559114927344782e-06, "loss": 0.0052, "step": 74000 }, { "epoch": 16.2, "eval_acc": 0.6578263129670012, "eval_cer": 0.03493338749362402, "eval_loss": 0.2687562108039856, "eval_runtime": 2401.8685, "eval_samples_per_second": 3.203, "eval_steps_per_second": 0.401, "step": 74000 }, { "epoch": 16.21, "learning_rate": 9.531594011448701e-06, "loss": 0.0052, "step": 74050 }, { "epoch": 16.22, "learning_rate": 9.50407309555262e-06, "loss": 0.0056, "step": 74100 }, { "epoch": 16.24, "learning_rate": 9.476552179656539e-06, "loss": 0.0052, "step": 74150 }, { "epoch": 16.25, "learning_rate": 9.449031263760458e-06, "loss": 0.0052, "step": 74200 }, { "epoch": 16.26, "learning_rate": 9.421510347864378e-06, "loss": 0.0052, "step": 74250 }, { "epoch": 16.27, "learning_rate": 9.393989431968296e-06, "loss": 0.0051, "step": 74300 }, { "epoch": 16.28, "learning_rate": 9.366468516072216e-06, "loss": 0.0055, "step": 74350 }, { "epoch": 16.29, "learning_rate": 9.338947600176135e-06, "loss": 0.0051, "step": 74400 }, { "epoch": 16.3, "learning_rate": 9.311426684280052e-06, "loss": 0.005, "step": 74450 }, { "epoch": 16.31, "learning_rate": 9.283905768383973e-06, "loss": 0.0048, "step": 74500 }, { "epoch": 16.31, "eval_acc": 0.6605564221710145, "eval_cer": 0.03437834787340756, "eval_loss": 0.26815786957740784, "eval_runtime": 2399.2403, "eval_samples_per_second": 3.206, "eval_steps_per_second": 0.401, "step": 74500 }, { "epoch": 16.32, "learning_rate": 9.256384852487892e-06, "loss": 0.0048, "step": 74550 }, { "epoch": 16.33, "learning_rate": 9.22886393659181e-06, "loss": 0.0054, "step": 74600 }, { "epoch": 16.34, "learning_rate": 9.20134302069573e-06, "loss": 0.0048, "step": 74650 }, { "epoch": 16.36, "learning_rate": 9.173822104799648e-06, "loss": 0.005, "step": 74700 }, { "epoch": 16.37, "learning_rate": 9.146301188903567e-06, "loss": 0.0058, "step": 74750 }, { "epoch": 16.38, "learning_rate": 9.118780273007486e-06, "loss": 0.0051, "step": 74800 }, { "epoch": 16.39, "learning_rate": 9.091259357111405e-06, "loss": 0.0053, "step": 74850 }, { "epoch": 16.4, "learning_rate": 9.063738441215324e-06, "loss": 0.0062, "step": 74900 }, { "epoch": 16.41, "learning_rate": 9.036217525319244e-06, "loss": 0.0046, "step": 74950 }, { "epoch": 16.42, "learning_rate": 9.008696609423161e-06, "loss": 0.0048, "step": 75000 }, { "epoch": 16.42, "eval_acc": 0.6596463857696767, "eval_cer": 0.03411323118330416, "eval_loss": 0.2690982222557068, "eval_runtime": 2398.8209, "eval_samples_per_second": 3.207, "eval_steps_per_second": 0.401, "step": 75000 }, { "epoch": 16.43, "learning_rate": 8.981175693527082e-06, "loss": 0.0052, "step": 75050 }, { "epoch": 16.44, "learning_rate": 8.953654777631e-06, "loss": 0.0055, "step": 75100 }, { "epoch": 16.45, "learning_rate": 8.926133861734918e-06, "loss": 0.0062, "step": 75150 }, { "epoch": 16.47, "learning_rate": 8.898612945838838e-06, "loss": 0.0051, "step": 75200 }, { "epoch": 16.48, "learning_rate": 8.871092029942757e-06, "loss": 0.0058, "step": 75250 }, { "epoch": 16.49, "learning_rate": 8.843571114046676e-06, "loss": 0.0056, "step": 75300 }, { "epoch": 16.5, "learning_rate": 8.816050198150595e-06, "loss": 0.0047, "step": 75350 }, { "epoch": 16.51, "learning_rate": 8.788529282254514e-06, "loss": 0.0061, "step": 75400 }, { "epoch": 16.52, "learning_rate": 8.761008366358433e-06, "loss": 0.0056, "step": 75450 }, { "epoch": 16.53, "learning_rate": 8.733487450462352e-06, "loss": 0.0056, "step": 75500 }, { "epoch": 16.53, "eval_acc": 0.6636765469756011, "eval_cer": 0.03454578999347286, "eval_loss": 0.2687157988548279, "eval_runtime": 2411.9433, "eval_samples_per_second": 3.189, "eval_steps_per_second": 0.399, "step": 75500 }, { "epoch": 16.54, "learning_rate": 8.70596653456627e-06, "loss": 0.0053, "step": 75550 }, { "epoch": 16.55, "learning_rate": 8.678445618670191e-06, "loss": 0.0051, "step": 75600 }, { "epoch": 16.56, "learning_rate": 8.650924702774108e-06, "loss": 0.0047, "step": 75650 }, { "epoch": 16.57, "learning_rate": 8.623403786878027e-06, "loss": 0.0056, "step": 75700 }, { "epoch": 16.59, "learning_rate": 8.595882870981948e-06, "loss": 0.0059, "step": 75750 }, { "epoch": 16.6, "learning_rate": 8.568361955085866e-06, "loss": 0.0057, "step": 75800 }, { "epoch": 16.61, "learning_rate": 8.540841039189784e-06, "loss": 0.0061, "step": 75850 }, { "epoch": 16.62, "learning_rate": 8.513320123293704e-06, "loss": 0.0047, "step": 75900 }, { "epoch": 16.63, "learning_rate": 8.485799207397623e-06, "loss": 0.0048, "step": 75950 }, { "epoch": 16.64, "learning_rate": 8.45827829150154e-06, "loss": 0.0058, "step": 76000 }, { "epoch": 16.64, "eval_acc": 0.6631565261748367, "eval_cer": 0.034531836483467415, "eval_loss": 0.2691940367221832, "eval_runtime": 2404.9494, "eval_samples_per_second": 3.198, "eval_steps_per_second": 0.4, "step": 76000 }, { "epoch": 16.65, "learning_rate": 8.43075737560546e-06, "loss": 0.0049, "step": 76050 }, { "epoch": 16.66, "learning_rate": 8.40323645970938e-06, "loss": 0.0049, "step": 76100 }, { "epoch": 16.67, "learning_rate": 8.375715543813298e-06, "loss": 0.0053, "step": 76150 }, { "epoch": 16.68, "learning_rate": 8.348194627917217e-06, "loss": 0.0057, "step": 76200 }, { "epoch": 16.69, "learning_rate": 8.320673712021136e-06, "loss": 0.0046, "step": 76250 }, { "epoch": 16.71, "learning_rate": 8.293152796125057e-06, "loss": 0.0048, "step": 76300 }, { "epoch": 16.72, "learning_rate": 8.265631880228974e-06, "loss": 0.0044, "step": 76350 }, { "epoch": 16.73, "learning_rate": 8.238110964332893e-06, "loss": 0.0056, "step": 76400 }, { "epoch": 16.74, "learning_rate": 8.210590048436813e-06, "loss": 0.0047, "step": 76450 }, { "epoch": 16.75, "learning_rate": 8.18306913254073e-06, "loss": 0.0052, "step": 76500 }, { "epoch": 16.75, "eval_acc": 0.6653666145780854, "eval_cer": 0.033903928533222535, "eval_loss": 0.26831379532814026, "eval_runtime": 2408.609, "eval_samples_per_second": 3.194, "eval_steps_per_second": 0.399, "step": 76500 }, { "epoch": 16.76, "learning_rate": 8.15554821664465e-06, "loss": 0.0053, "step": 76550 }, { "epoch": 16.77, "learning_rate": 8.12802730074857e-06, "loss": 0.0047, "step": 76600 }, { "epoch": 16.78, "learning_rate": 8.100506384852489e-06, "loss": 0.0055, "step": 76650 }, { "epoch": 16.79, "learning_rate": 8.072985468956406e-06, "loss": 0.0045, "step": 76700 }, { "epoch": 16.8, "learning_rate": 8.045464553060326e-06, "loss": 0.0068, "step": 76750 }, { "epoch": 16.82, "learning_rate": 8.017943637164245e-06, "loss": 0.0069, "step": 76800 }, { "epoch": 16.83, "learning_rate": 7.990422721268164e-06, "loss": 0.0057, "step": 76850 }, { "epoch": 16.84, "learning_rate": 7.962901805372083e-06, "loss": 0.0059, "step": 76900 }, { "epoch": 16.85, "learning_rate": 7.935380889476002e-06, "loss": 0.0056, "step": 76950 }, { "epoch": 16.86, "learning_rate": 7.907859973579922e-06, "loss": 0.0046, "step": 77000 }, { "epoch": 16.86, "eval_acc": 0.6649765989775122, "eval_cer": 0.034423309183425094, "eval_loss": 0.2681449055671692, "eval_runtime": 2406.5322, "eval_samples_per_second": 3.196, "eval_steps_per_second": 0.4, "step": 77000 }, { "epoch": 16.87, "learning_rate": 7.88033905768384e-06, "loss": 0.0053, "step": 77050 }, { "epoch": 16.88, "learning_rate": 7.852818141787758e-06, "loss": 0.0048, "step": 77100 }, { "epoch": 16.89, "learning_rate": 7.825297225891679e-06, "loss": 0.0061, "step": 77150 }, { "epoch": 16.9, "learning_rate": 7.797776309995596e-06, "loss": 0.0051, "step": 77200 }, { "epoch": 16.91, "learning_rate": 7.770255394099515e-06, "loss": 0.0047, "step": 77250 }, { "epoch": 16.92, "learning_rate": 7.742734478203436e-06, "loss": 0.006, "step": 77300 }, { "epoch": 16.94, "learning_rate": 7.715213562307354e-06, "loss": 0.0049, "step": 77350 }, { "epoch": 16.95, "learning_rate": 7.687692646411273e-06, "loss": 0.0055, "step": 77400 }, { "epoch": 16.96, "learning_rate": 7.660171730515192e-06, "loss": 0.0053, "step": 77450 }, { "epoch": 16.97, "learning_rate": 7.632650814619111e-06, "loss": 0.0046, "step": 77500 }, { "epoch": 16.97, "eval_acc": 0.6622464897734989, "eval_cer": 0.034120983133307185, "eval_loss": 0.26827627420425415, "eval_runtime": 2408.0188, "eval_samples_per_second": 3.194, "eval_steps_per_second": 0.399, "step": 77500 }, { "epoch": 16.98, "learning_rate": 7.605129898723031e-06, "loss": 0.0051, "step": 77550 }, { "epoch": 16.99, "learning_rate": 7.577608982826949e-06, "loss": 0.0067, "step": 77600 }, { "epoch": 17.0, "learning_rate": 7.550088066930868e-06, "loss": 0.0043, "step": 77650 }, { "epoch": 17.01, "learning_rate": 7.522567151034787e-06, "loss": 0.0042, "step": 77700 }, { "epoch": 17.02, "learning_rate": 7.495046235138705e-06, "loss": 0.0042, "step": 77750 }, { "epoch": 17.03, "learning_rate": 7.467525319242624e-06, "loss": 0.0039, "step": 77800 }, { "epoch": 17.05, "learning_rate": 7.440004403346544e-06, "loss": 0.0038, "step": 77850 }, { "epoch": 17.06, "learning_rate": 7.412483487450463e-06, "loss": 0.0041, "step": 77900 }, { "epoch": 17.07, "learning_rate": 7.384962571554381e-06, "loss": 0.0036, "step": 77950 }, { "epoch": 17.08, "learning_rate": 7.3574416556583005e-06, "loss": 0.0037, "step": 78000 }, { "epoch": 17.08, "eval_acc": 0.6625065001738811, "eval_cer": 0.03422951043334951, "eval_loss": 0.26783066987991333, "eval_runtime": 2413.0451, "eval_samples_per_second": 3.188, "eval_steps_per_second": 0.399, "step": 78000 }, { "epoch": 17.09, "learning_rate": 7.329920739762219e-06, "loss": 0.0043, "step": 78050 }, { "epoch": 17.1, "learning_rate": 7.302399823866139e-06, "loss": 0.0033, "step": 78100 }, { "epoch": 17.11, "learning_rate": 7.274878907970058e-06, "loss": 0.0029, "step": 78150 }, { "epoch": 17.12, "learning_rate": 7.247357992073976e-06, "loss": 0.0036, "step": 78200 }, { "epoch": 17.13, "learning_rate": 7.219837076177896e-06, "loss": 0.0033, "step": 78250 }, { "epoch": 17.14, "learning_rate": 7.1923161602818145e-06, "loss": 0.0037, "step": 78300 }, { "epoch": 17.15, "learning_rate": 7.164795244385733e-06, "loss": 0.0034, "step": 78350 }, { "epoch": 17.17, "learning_rate": 7.137274328489653e-06, "loss": 0.0031, "step": 78400 }, { "epoch": 17.18, "learning_rate": 7.109753412593571e-06, "loss": 0.0034, "step": 78450 }, { "epoch": 17.19, "learning_rate": 7.08223249669749e-06, "loss": 0.0038, "step": 78500 }, { "epoch": 17.19, "eval_acc": 0.6656266249784677, "eval_cer": 0.03411323118330416, "eval_loss": 0.2689207196235657, "eval_runtime": 2411.1236, "eval_samples_per_second": 3.19, "eval_steps_per_second": 0.399, "step": 78500 }, { "epoch": 17.2, "learning_rate": 7.05471158080141e-06, "loss": 0.0038, "step": 78550 }, { "epoch": 17.21, "learning_rate": 7.0271906649053286e-06, "loss": 0.0037, "step": 78600 }, { "epoch": 17.22, "learning_rate": 6.999669749009248e-06, "loss": 0.004, "step": 78650 }, { "epoch": 17.23, "learning_rate": 6.972148833113166e-06, "loss": 0.0038, "step": 78700 }, { "epoch": 17.24, "learning_rate": 6.944627917217085e-06, "loss": 0.0032, "step": 78750 }, { "epoch": 17.25, "learning_rate": 6.917107001321005e-06, "loss": 0.004, "step": 78800 }, { "epoch": 17.26, "learning_rate": 6.889586085424924e-06, "loss": 0.003, "step": 78850 }, { "epoch": 17.28, "learning_rate": 6.862065169528842e-06, "loss": 0.004, "step": 78900 }, { "epoch": 17.29, "learning_rate": 6.8345442536327614e-06, "loss": 0.0035, "step": 78950 }, { "epoch": 17.3, "learning_rate": 6.80702333773668e-06, "loss": 0.004, "step": 79000 }, { "epoch": 17.3, "eval_acc": 0.6641965677763655, "eval_cer": 0.033806253963184436, "eval_loss": 0.26872220635414124, "eval_runtime": 2412.878, "eval_samples_per_second": 3.188, "eval_steps_per_second": 0.399, "step": 79000 }, { "epoch": 17.31, "learning_rate": 6.779502421840598e-06, "loss": 0.0033, "step": 79050 }, { "epoch": 17.32, "learning_rate": 6.751981505944519e-06, "loss": 0.0033, "step": 79100 }, { "epoch": 17.33, "learning_rate": 6.724460590048437e-06, "loss": 0.0038, "step": 79150 }, { "epoch": 17.34, "learning_rate": 6.696939674152356e-06, "loss": 0.004, "step": 79200 }, { "epoch": 17.35, "learning_rate": 6.6694187582562754e-06, "loss": 0.004, "step": 79250 }, { "epoch": 17.36, "learning_rate": 6.6418978423601935e-06, "loss": 0.0039, "step": 79300 }, { "epoch": 17.37, "learning_rate": 6.614376926464114e-06, "loss": 0.0043, "step": 79350 }, { "epoch": 17.38, "learning_rate": 6.587406428885953e-06, "loss": 0.0041, "step": 79400 }, { "epoch": 17.4, "learning_rate": 6.559885512989873e-06, "loss": 0.0035, "step": 79450 }, { "epoch": 17.41, "learning_rate": 6.532364597093792e-06, "loss": 0.004, "step": 79500 }, { "epoch": 17.41, "eval_acc": 0.664846593777321, "eval_cer": 0.03358609858309858, "eval_loss": 0.26849421858787537, "eval_runtime": 2409.5547, "eval_samples_per_second": 3.192, "eval_steps_per_second": 0.399, "step": 79500 }, { "epoch": 17.42, "learning_rate": 6.50484368119771e-06, "loss": 0.0034, "step": 79550 }, { "epoch": 17.43, "learning_rate": 6.47732276530163e-06, "loss": 0.0036, "step": 79600 }, { "epoch": 17.44, "learning_rate": 6.449801849405548e-06, "loss": 0.0037, "step": 79650 }, { "epoch": 17.45, "learning_rate": 6.422280933509467e-06, "loss": 0.0031, "step": 79700 }, { "epoch": 17.46, "learning_rate": 6.394760017613387e-06, "loss": 0.0035, "step": 79750 }, { "epoch": 17.47, "learning_rate": 6.367239101717305e-06, "loss": 0.0039, "step": 79800 }, { "epoch": 17.48, "learning_rate": 6.339718185821225e-06, "loss": 0.0036, "step": 79850 }, { "epoch": 17.49, "learning_rate": 6.312197269925143e-06, "loss": 0.004, "step": 79900 }, { "epoch": 17.51, "learning_rate": 6.284676354029062e-06, "loss": 0.0037, "step": 79950 }, { "epoch": 17.52, "learning_rate": 6.257155438132982e-06, "loss": 0.0037, "step": 80000 }, { "epoch": 17.52, "eval_acc": 0.6618564741729256, "eval_cer": 0.03385431605320318, "eval_loss": 0.26905256509780884, "eval_runtime": 2409.5418, "eval_samples_per_second": 3.192, "eval_steps_per_second": 0.399, "step": 80000 }, { "epoch": 17.53, "learning_rate": 6.229634522236901e-06, "loss": 0.0041, "step": 80050 }, { "epoch": 17.54, "learning_rate": 6.20211360634082e-06, "loss": 0.0036, "step": 80100 }, { "epoch": 17.55, "learning_rate": 6.174592690444738e-06, "loss": 0.0035, "step": 80150 }, { "epoch": 17.56, "learning_rate": 6.147071774548657e-06, "loss": 0.0041, "step": 80200 }, { "epoch": 17.57, "learning_rate": 6.119550858652576e-06, "loss": 0.0037, "step": 80250 }, { "epoch": 17.58, "learning_rate": 6.092029942756495e-06, "loss": 0.0029, "step": 80300 }, { "epoch": 17.59, "learning_rate": 6.064509026860414e-06, "loss": 0.0034, "step": 80350 }, { "epoch": 17.6, "learning_rate": 6.036988110964333e-06, "loss": 0.0034, "step": 80400 }, { "epoch": 17.61, "learning_rate": 6.0094671950682526e-06, "loss": 0.0039, "step": 80450 }, { "epoch": 17.63, "learning_rate": 5.981946279172171e-06, "loss": 0.0036, "step": 80500 }, { "epoch": 17.63, "eval_acc": 0.6644565781767476, "eval_cer": 0.03345586582304779, "eval_loss": 0.26846277713775635, "eval_runtime": 2410.0084, "eval_samples_per_second": 3.192, "eval_steps_per_second": 0.399, "step": 80500 }, { "epoch": 17.64, "learning_rate": 5.95442536327609e-06, "loss": 0.0041, "step": 80550 }, { "epoch": 17.65, "learning_rate": 5.926904447380009e-06, "loss": 0.0033, "step": 80600 }, { "epoch": 17.66, "learning_rate": 5.899383531483928e-06, "loss": 0.0029, "step": 80650 }, { "epoch": 17.67, "learning_rate": 5.871862615587847e-06, "loss": 0.0047, "step": 80700 }, { "epoch": 17.68, "learning_rate": 5.844341699691766e-06, "loss": 0.0035, "step": 80750 }, { "epoch": 17.69, "learning_rate": 5.8168207837956854e-06, "loss": 0.0035, "step": 80800 }, { "epoch": 17.7, "learning_rate": 5.7892998678996035e-06, "loss": 0.0034, "step": 80850 }, { "epoch": 17.71, "learning_rate": 5.761778952003523e-06, "loss": 0.0045, "step": 80900 }, { "epoch": 17.72, "learning_rate": 5.734258036107442e-06, "loss": 0.0033, "step": 80950 }, { "epoch": 17.73, "learning_rate": 5.706737120211361e-06, "loss": 0.0041, "step": 81000 }, { "epoch": 17.73, "eval_acc": 0.666016640579041, "eval_cer": 0.03374578875316086, "eval_loss": 0.26800793409347534, "eval_runtime": 2410.4526, "eval_samples_per_second": 3.191, "eval_steps_per_second": 0.399, "step": 81000 }, { "epoch": 17.75, "learning_rate": 5.67921620431528e-06, "loss": 0.0033, "step": 81050 }, { "epoch": 17.76, "learning_rate": 5.651695288419199e-06, "loss": 0.0044, "step": 81100 }, { "epoch": 17.77, "learning_rate": 5.624174372523118e-06, "loss": 0.0035, "step": 81150 }, { "epoch": 17.78, "learning_rate": 5.596653456627036e-06, "loss": 0.0031, "step": 81200 }, { "epoch": 17.79, "learning_rate": 5.569132540730956e-06, "loss": 0.0039, "step": 81250 }, { "epoch": 17.8, "learning_rate": 5.541611624834875e-06, "loss": 0.0043, "step": 81300 }, { "epoch": 17.81, "learning_rate": 5.514090708938794e-06, "loss": 0.0035, "step": 81350 }, { "epoch": 17.82, "learning_rate": 5.486569793042713e-06, "loss": 0.0036, "step": 81400 }, { "epoch": 17.83, "learning_rate": 5.4590488771466315e-06, "loss": 0.004, "step": 81450 }, { "epoch": 17.84, "learning_rate": 5.431527961250551e-06, "loss": 0.0033, "step": 81500 }, { "epoch": 17.84, "eval_acc": 0.6654966197782766, "eval_cer": 0.03366361808312881, "eval_loss": 0.2681046426296234, "eval_runtime": 2411.6457, "eval_samples_per_second": 3.19, "eval_steps_per_second": 0.399, "step": 81500 }, { "epoch": 17.86, "learning_rate": 5.40400704535447e-06, "loss": 0.0043, "step": 81550 }, { "epoch": 17.87, "learning_rate": 5.376486129458388e-06, "loss": 0.004, "step": 81600 }, { "epoch": 17.88, "learning_rate": 5.348965213562308e-06, "loss": 0.0039, "step": 81650 }, { "epoch": 17.89, "learning_rate": 5.321444297666227e-06, "loss": 0.0036, "step": 81700 }, { "epoch": 17.9, "learning_rate": 5.2939233817701455e-06, "loss": 0.0031, "step": 81750 }, { "epoch": 17.91, "learning_rate": 5.266402465874064e-06, "loss": 0.0033, "step": 81800 }, { "epoch": 17.92, "learning_rate": 5.238881549977984e-06, "loss": 0.0036, "step": 81850 }, { "epoch": 17.93, "learning_rate": 5.211360634081903e-06, "loss": 0.0036, "step": 81900 }, { "epoch": 17.94, "learning_rate": 5.183839718185821e-06, "loss": 0.0044, "step": 81950 }, { "epoch": 17.95, "learning_rate": 5.156318802289741e-06, "loss": 0.0031, "step": 82000 }, { "epoch": 17.95, "eval_acc": 0.6632865313750277, "eval_cer": 0.03349772635306411, "eval_loss": 0.2679232358932495, "eval_runtime": 2407.004, "eval_samples_per_second": 3.196, "eval_steps_per_second": 0.4, "step": 82000 }, { "epoch": 17.96, "learning_rate": 5.1287978863936595e-06, "loss": 0.0033, "step": 82050 }, { "epoch": 17.98, "learning_rate": 5.101276970497578e-06, "loss": 0.009, "step": 82100 }, { "epoch": 17.99, "learning_rate": 5.073756054601497e-06, "loss": 0.0048, "step": 82150 }, { "epoch": 18.0, "learning_rate": 5.046235138705416e-06, "loss": 0.0032, "step": 82200 }, { "epoch": 18.01, "learning_rate": 5.018714222809336e-06, "loss": 0.0031, "step": 82250 }, { "epoch": 18.02, "learning_rate": 4.991193306913254e-06, "loss": 0.0027, "step": 82300 }, { "epoch": 18.03, "learning_rate": 4.9636723910171735e-06, "loss": 0.0028, "step": 82350 }, { "epoch": 18.04, "learning_rate": 4.936151475121092e-06, "loss": 0.0027, "step": 82400 }, { "epoch": 18.05, "learning_rate": 4.908630559225011e-06, "loss": 0.0028, "step": 82450 }, { "epoch": 18.06, "learning_rate": 4.88110964332893e-06, "loss": 0.0025, "step": 82500 }, { "epoch": 18.06, "eval_acc": 0.6674466977811432, "eval_cer": 0.03362175755311249, "eval_loss": 0.26769447326660156, "eval_runtime": 2413.9918, "eval_samples_per_second": 3.186, "eval_steps_per_second": 0.399, "step": 82500 }, { "epoch": 18.07, "learning_rate": 4.853588727432849e-06, "loss": 0.0026, "step": 82550 }, { "epoch": 18.09, "learning_rate": 4.826067811536769e-06, "loss": 0.0023, "step": 82600 }, { "epoch": 18.1, "learning_rate": 4.798546895640687e-06, "loss": 0.0025, "step": 82650 }, { "epoch": 18.11, "learning_rate": 4.771025979744606e-06, "loss": 0.0024, "step": 82700 }, { "epoch": 18.12, "learning_rate": 4.743505063848525e-06, "loss": 0.0024, "step": 82750 }, { "epoch": 18.13, "learning_rate": 4.715984147952444e-06, "loss": 0.0024, "step": 82800 }, { "epoch": 18.14, "learning_rate": 4.688463232056363e-06, "loss": 0.0026, "step": 82850 }, { "epoch": 18.15, "learning_rate": 4.660942316160282e-06, "loss": 0.0025, "step": 82900 }, { "epoch": 18.16, "learning_rate": 4.6334214002642015e-06, "loss": 0.0023, "step": 82950 }, { "epoch": 18.17, "learning_rate": 4.6059004843681195e-06, "loss": 0.003, "step": 83000 }, { "epoch": 18.17, "eval_acc": 0.6671866873807609, "eval_cer": 0.03333338501300002, "eval_loss": 0.2686313986778259, "eval_runtime": 2407.7179, "eval_samples_per_second": 3.195, "eval_steps_per_second": 0.4, "step": 83000 }, { "epoch": 18.18, "learning_rate": 4.578379568472039e-06, "loss": 0.0028, "step": 83050 }, { "epoch": 18.19, "learning_rate": 4.550858652575958e-06, "loss": 0.0027, "step": 83100 }, { "epoch": 18.21, "learning_rate": 4.523337736679877e-06, "loss": 0.0072, "step": 83150 }, { "epoch": 18.22, "learning_rate": 4.496367239101718e-06, "loss": 0.0022, "step": 83200 }, { "epoch": 18.23, "learning_rate": 4.468846323205637e-06, "loss": 0.0026, "step": 83250 }, { "epoch": 18.24, "learning_rate": 4.4413254073095555e-06, "loss": 0.0024, "step": 83300 }, { "epoch": 18.25, "learning_rate": 4.413804491413474e-06, "loss": 0.0024, "step": 83350 }, { "epoch": 18.26, "learning_rate": 4.386283575517393e-06, "loss": 0.0028, "step": 83400 }, { "epoch": 18.27, "learning_rate": 4.358762659621313e-06, "loss": 0.0026, "step": 83450 }, { "epoch": 18.28, "learning_rate": 4.331241743725231e-06, "loss": 0.0025, "step": 83500 }, { "epoch": 18.28, "eval_acc": 0.667316692580952, "eval_cer": 0.03348532323305928, "eval_loss": 0.26833412051200867, "eval_runtime": 2412.3066, "eval_samples_per_second": 3.189, "eval_steps_per_second": 0.399, "step": 83500 }, { "epoch": 18.29, "learning_rate": 4.303720827829151e-06, "loss": 0.0025, "step": 83550 }, { "epoch": 18.3, "learning_rate": 4.2761999119330695e-06, "loss": 0.0029, "step": 83600 }, { "epoch": 18.32, "learning_rate": 4.248678996036988e-06, "loss": 0.0025, "step": 83650 }, { "epoch": 18.33, "learning_rate": 4.221158080140907e-06, "loss": 0.0029, "step": 83700 }, { "epoch": 18.34, "learning_rate": 4.193637164244826e-06, "loss": 0.0026, "step": 83750 }, { "epoch": 18.35, "learning_rate": 4.166116248348746e-06, "loss": 0.0028, "step": 83800 }, { "epoch": 18.36, "learning_rate": 4.138595332452664e-06, "loss": 0.0028, "step": 83850 }, { "epoch": 18.37, "learning_rate": 4.1110744165565835e-06, "loss": 0.0023, "step": 83900 }, { "epoch": 18.38, "learning_rate": 4.083553500660502e-06, "loss": 0.0027, "step": 83950 }, { "epoch": 18.39, "learning_rate": 4.056032584764421e-06, "loss": 0.0028, "step": 84000 }, { "epoch": 18.39, "eval_acc": 0.6664066561796143, "eval_cer": 0.03336904398301393, "eval_loss": 0.26911476254463196, "eval_runtime": 2403.7066, "eval_samples_per_second": 3.2, "eval_steps_per_second": 0.4, "step": 84000 }, { "epoch": 18.4, "learning_rate": 4.02851166886834e-06, "loss": 0.0032, "step": 84050 }, { "epoch": 18.41, "learning_rate": 4.000990752972259e-06, "loss": 0.0026, "step": 84100 }, { "epoch": 18.42, "learning_rate": 3.973469837076179e-06, "loss": 0.0029, "step": 84150 }, { "epoch": 18.44, "learning_rate": 3.945948921180097e-06, "loss": 0.0028, "step": 84200 }, { "epoch": 18.45, "learning_rate": 3.918428005284016e-06, "loss": 0.0023, "step": 84250 }, { "epoch": 18.46, "learning_rate": 3.890907089387935e-06, "loss": 0.0021, "step": 84300 }, { "epoch": 18.47, "learning_rate": 3.863386173491854e-06, "loss": 0.0028, "step": 84350 }, { "epoch": 18.48, "learning_rate": 3.835865257595773e-06, "loss": 0.0025, "step": 84400 }, { "epoch": 18.49, "learning_rate": 3.808344341699692e-06, "loss": 0.0027, "step": 84450 }, { "epoch": 18.5, "learning_rate": 3.780823425803611e-06, "loss": 0.0024, "step": 84500 }, { "epoch": 18.5, "eval_acc": 0.6657566301786587, "eval_cer": 0.03334578813300486, "eval_loss": 0.2689560651779175, "eval_runtime": 2404.1767, "eval_samples_per_second": 3.199, "eval_steps_per_second": 0.4, "step": 84500 }, { "epoch": 18.51, "learning_rate": 3.7533025099075295e-06, "loss": 0.0024, "step": 84550 }, { "epoch": 18.52, "learning_rate": 3.725781594011449e-06, "loss": 0.003, "step": 84600 }, { "epoch": 18.53, "learning_rate": 3.698260678115368e-06, "loss": 0.0031, "step": 84650 }, { "epoch": 18.55, "learning_rate": 3.670739762219287e-06, "loss": 0.0024, "step": 84700 }, { "epoch": 18.56, "learning_rate": 3.6432188463232054e-06, "loss": 0.0025, "step": 84750 }, { "epoch": 18.57, "learning_rate": 3.6156979304271247e-06, "loss": 0.0023, "step": 84800 }, { "epoch": 18.58, "learning_rate": 3.588177014531044e-06, "loss": 0.0027, "step": 84850 }, { "epoch": 18.59, "learning_rate": 3.5606560986349624e-06, "loss": 0.0022, "step": 84900 }, { "epoch": 18.6, "learning_rate": 3.5331351827388817e-06, "loss": 0.0029, "step": 84950 }, { "epoch": 18.61, "learning_rate": 3.5056142668428006e-06, "loss": 0.0027, "step": 85000 }, { "epoch": 18.61, "eval_acc": 0.6675767029813342, "eval_cer": 0.033201601862948626, "eval_loss": 0.2690303921699524, "eval_runtime": 2408.5181, "eval_samples_per_second": 3.194, "eval_steps_per_second": 0.399, "step": 85000 }, { "epoch": 18.62, "learning_rate": 3.47809335094672e-06, "loss": 0.0028, "step": 85050 }, { "epoch": 18.63, "learning_rate": 3.4505724350506383e-06, "loss": 0.0031, "step": 85100 }, { "epoch": 18.64, "learning_rate": 3.4230515191545576e-06, "loss": 0.0026, "step": 85150 }, { "epoch": 18.65, "learning_rate": 3.395530603258477e-06, "loss": 0.0028, "step": 85200 }, { "epoch": 18.67, "learning_rate": 3.3680096873623957e-06, "loss": 0.0029, "step": 85250 }, { "epoch": 18.68, "learning_rate": 3.3404887714663146e-06, "loss": 0.0028, "step": 85300 }, { "epoch": 18.69, "learning_rate": 3.3129678555702334e-06, "loss": 0.0026, "step": 85350 }, { "epoch": 18.7, "learning_rate": 3.2854469396741527e-06, "loss": 0.0025, "step": 85400 }, { "epoch": 18.71, "learning_rate": 3.257926023778071e-06, "loss": 0.0028, "step": 85450 }, { "epoch": 18.72, "learning_rate": 3.2304051078819904e-06, "loss": 0.003, "step": 85500 }, { "epoch": 18.72, "eval_acc": 0.6682267289822897, "eval_cer": 0.033145787822926855, "eval_loss": 0.26845839619636536, "eval_runtime": 2403.9703, "eval_samples_per_second": 3.2, "eval_steps_per_second": 0.4, "step": 85500 }, { "epoch": 18.73, "learning_rate": 3.2028841919859097e-06, "loss": 0.0025, "step": 85550 }, { "epoch": 18.74, "learning_rate": 3.1753632760898286e-06, "loss": 0.0029, "step": 85600 }, { "epoch": 18.75, "learning_rate": 3.147842360193747e-06, "loss": 0.0032, "step": 85650 }, { "epoch": 18.76, "learning_rate": 3.1203214442976663e-06, "loss": 0.0023, "step": 85700 }, { "epoch": 18.77, "learning_rate": 3.092800528401585e-06, "loss": 0.0028, "step": 85750 }, { "epoch": 18.79, "learning_rate": 3.0652796125055045e-06, "loss": 0.0028, "step": 85800 }, { "epoch": 18.8, "learning_rate": 3.0377586966094233e-06, "loss": 0.0028, "step": 85850 }, { "epoch": 18.81, "learning_rate": 3.010237780713342e-06, "loss": 0.0027, "step": 85900 }, { "epoch": 18.82, "learning_rate": 2.982716864817261e-06, "loss": 0.0027, "step": 85950 }, { "epoch": 18.83, "learning_rate": 2.9551959489211803e-06, "loss": 0.0023, "step": 86000 }, { "epoch": 18.83, "eval_acc": 0.6674466977811432, "eval_cer": 0.033398501393025415, "eval_loss": 0.2689669132232666, "eval_runtime": 2404.0292, "eval_samples_per_second": 3.2, "eval_steps_per_second": 0.4, "step": 86000 }, { "epoch": 18.84, "learning_rate": 2.927675033025099e-06, "loss": 0.003, "step": 86050 }, { "epoch": 18.85, "learning_rate": 2.9001541171290185e-06, "loss": 0.0025, "step": 86100 }, { "epoch": 18.86, "learning_rate": 2.8726332012329373e-06, "loss": 0.0024, "step": 86150 }, { "epoch": 18.87, "learning_rate": 2.845112285336856e-06, "loss": 0.0022, "step": 86200 }, { "epoch": 18.88, "learning_rate": 2.817591369440775e-06, "loss": 0.0028, "step": 86250 }, { "epoch": 18.9, "learning_rate": 2.790070453544694e-06, "loss": 0.0033, "step": 86300 }, { "epoch": 18.91, "learning_rate": 2.762549537648613e-06, "loss": 0.0027, "step": 86350 }, { "epoch": 18.92, "learning_rate": 2.735579040070454e-06, "loss": 0.0025, "step": 86400 }, { "epoch": 18.93, "learning_rate": 2.708058124174373e-06, "loss": 0.0025, "step": 86450 }, { "epoch": 18.94, "learning_rate": 2.6805372082782913e-06, "loss": 0.0032, "step": 86500 }, { "epoch": 18.94, "eval_acc": 0.6691367653836275, "eval_cer": 0.03319850108294742, "eval_loss": 0.2692432105541229, "eval_runtime": 2405.0684, "eval_samples_per_second": 3.198, "eval_steps_per_second": 0.4, "step": 86500 }, { "epoch": 18.95, "learning_rate": 2.6530162923822106e-06, "loss": 0.0023, "step": 86550 }, { "epoch": 18.96, "learning_rate": 2.6254953764861294e-06, "loss": 0.0025, "step": 86600 }, { "epoch": 18.97, "learning_rate": 2.5979744605900487e-06, "loss": 0.0025, "step": 86650 }, { "epoch": 18.98, "learning_rate": 2.5704535446939676e-06, "loss": 0.0028, "step": 86700 }, { "epoch": 18.99, "learning_rate": 2.5429326287978864e-06, "loss": 0.0023, "step": 86750 }, { "epoch": 19.0, "learning_rate": 2.5154117129018053e-06, "loss": 0.0028, "step": 86800 }, { "epoch": 19.02, "learning_rate": 2.4878907970057246e-06, "loss": 0.0019, "step": 86850 }, { "epoch": 19.03, "learning_rate": 2.4603698811096434e-06, "loss": 0.0019, "step": 86900 }, { "epoch": 19.04, "learning_rate": 2.4328489652135623e-06, "loss": 0.0025, "step": 86950 }, { "epoch": 19.05, "learning_rate": 2.4053280493174816e-06, "loss": 0.0016, "step": 87000 }, { "epoch": 19.05, "eval_acc": 0.6690067601834364, "eval_cer": 0.03287291918282044, "eval_loss": 0.2694932222366333, "eval_runtime": 2407.3344, "eval_samples_per_second": 3.195, "eval_steps_per_second": 0.4, "step": 87000 }, { "epoch": 19.06, "learning_rate": 2.3778071334214004e-06, "loss": 0.0018, "step": 87050 }, { "epoch": 19.07, "learning_rate": 2.3502862175253193e-06, "loss": 0.0017, "step": 87100 }, { "epoch": 19.08, "learning_rate": 2.322765301629238e-06, "loss": 0.0021, "step": 87150 }, { "epoch": 19.09, "learning_rate": 2.2952443857331574e-06, "loss": 0.0021, "step": 87200 }, { "epoch": 19.1, "learning_rate": 2.2677234698370763e-06, "loss": 0.0022, "step": 87250 }, { "epoch": 19.11, "learning_rate": 2.2402025539409956e-06, "loss": 0.0021, "step": 87300 }, { "epoch": 19.13, "learning_rate": 2.2126816380449144e-06, "loss": 0.0023, "step": 87350 }, { "epoch": 19.14, "learning_rate": 2.185160722148833e-06, "loss": 0.0018, "step": 87400 }, { "epoch": 19.15, "learning_rate": 2.157639806252752e-06, "loss": 0.0017, "step": 87450 }, { "epoch": 19.16, "learning_rate": 2.130118890356671e-06, "loss": 0.0022, "step": 87500 }, { "epoch": 19.16, "eval_acc": 0.6675767029813342, "eval_cer": 0.03311012885291295, "eval_loss": 0.26990965008735657, "eval_runtime": 2408.5917, "eval_samples_per_second": 3.194, "eval_steps_per_second": 0.399, "step": 87500 }, { "epoch": 19.17, "learning_rate": 2.1025979744605903e-06, "loss": 0.0021, "step": 87550 }, { "epoch": 19.18, "learning_rate": 2.075077058564509e-06, "loss": 0.0023, "step": 87600 }, { "epoch": 19.19, "learning_rate": 2.047556142668428e-06, "loss": 0.0021, "step": 87650 }, { "epoch": 19.2, "learning_rate": 2.020035226772347e-06, "loss": 0.0022, "step": 87700 }, { "epoch": 19.21, "learning_rate": 1.9925143108762658e-06, "loss": 0.0022, "step": 87750 }, { "epoch": 19.22, "learning_rate": 1.964993394980185e-06, "loss": 0.0022, "step": 87800 }, { "epoch": 19.23, "learning_rate": 1.937472479084104e-06, "loss": 0.0022, "step": 87850 }, { "epoch": 19.25, "learning_rate": 1.909951563188023e-06, "loss": 0.0019, "step": 87900 }, { "epoch": 19.26, "learning_rate": 1.8824306472919418e-06, "loss": 0.002, "step": 87950 }, { "epoch": 19.27, "learning_rate": 1.8549097313958611e-06, "loss": 0.002, "step": 88000 }, { "epoch": 19.27, "eval_acc": 0.6703068121853475, "eval_cer": 0.03311322963291416, "eval_loss": 0.2691878378391266, "eval_runtime": 2407.6924, "eval_samples_per_second": 3.195, "eval_steps_per_second": 0.4, "step": 88000 }, { "epoch": 19.28, "learning_rate": 1.8273888154997798e-06, "loss": 0.0023, "step": 88050 }, { "epoch": 19.29, "learning_rate": 1.799867899603699e-06, "loss": 0.0023, "step": 88100 }, { "epoch": 19.3, "learning_rate": 1.772346983707618e-06, "loss": 0.0022, "step": 88150 }, { "epoch": 19.31, "learning_rate": 1.7448260678115368e-06, "loss": 0.0019, "step": 88200 }, { "epoch": 19.32, "learning_rate": 1.7173051519154558e-06, "loss": 0.0021, "step": 88250 }, { "epoch": 19.33, "learning_rate": 1.6897842360193747e-06, "loss": 0.0019, "step": 88300 }, { "epoch": 19.34, "learning_rate": 1.6622633201232938e-06, "loss": 0.0022, "step": 88350 }, { "epoch": 19.36, "learning_rate": 1.6347424042272126e-06, "loss": 0.0019, "step": 88400 }, { "epoch": 19.37, "learning_rate": 1.607221488331132e-06, "loss": 0.0023, "step": 88450 }, { "epoch": 19.38, "learning_rate": 1.5797005724350506e-06, "loss": 0.0024, "step": 88500 }, { "epoch": 19.38, "eval_acc": 0.6691367653836275, "eval_cer": 0.033103927292910534, "eval_loss": 0.26934462785720825, "eval_runtime": 2406.3405, "eval_samples_per_second": 3.197, "eval_steps_per_second": 0.4, "step": 88500 }, { "epoch": 19.39, "learning_rate": 1.5521796565389696e-06, "loss": 0.002, "step": 88550 }, { "epoch": 19.4, "learning_rate": 1.5246587406428887e-06, "loss": 0.0019, "step": 88600 }, { "epoch": 19.41, "learning_rate": 1.4971378247468076e-06, "loss": 0.0021, "step": 88650 }, { "epoch": 19.42, "learning_rate": 1.4696169088507267e-06, "loss": 0.0017, "step": 88700 }, { "epoch": 19.43, "learning_rate": 1.4420959929546457e-06, "loss": 0.0019, "step": 88750 }, { "epoch": 19.44, "learning_rate": 1.415125495376486e-06, "loss": 0.0025, "step": 88800 }, { "epoch": 19.45, "learning_rate": 1.3876045794804052e-06, "loss": 0.0018, "step": 88850 }, { "epoch": 19.46, "learning_rate": 1.3600836635843242e-06, "loss": 0.0018, "step": 88900 }, { "epoch": 19.48, "learning_rate": 1.332562747688243e-06, "loss": 0.0016, "step": 88950 }, { "epoch": 19.49, "learning_rate": 1.3050418317921622e-06, "loss": 0.0024, "step": 89000 }, { "epoch": 19.49, "eval_acc": 0.6687467497830543, "eval_cer": 0.032975244922860344, "eval_loss": 0.2692321538925171, "eval_runtime": 2407.041, "eval_samples_per_second": 3.196, "eval_steps_per_second": 0.4, "step": 89000 }, { "epoch": 19.5, "learning_rate": 1.277520915896081e-06, "loss": 0.0018, "step": 89050 }, { "epoch": 19.51, "learning_rate": 1.25e-06, "loss": 0.0021, "step": 89100 }, { "epoch": 19.52, "learning_rate": 1.222479084103919e-06, "loss": 0.002, "step": 89150 }, { "epoch": 19.53, "learning_rate": 1.194958168207838e-06, "loss": 0.0021, "step": 89200 }, { "epoch": 19.54, "learning_rate": 1.167437252311757e-06, "loss": 0.0017, "step": 89250 }, { "epoch": 19.55, "learning_rate": 1.139916336415676e-06, "loss": 0.0019, "step": 89300 }, { "epoch": 19.56, "learning_rate": 1.112395420519595e-06, "loss": 0.0021, "step": 89350 }, { "epoch": 19.57, "learning_rate": 1.084874504623514e-06, "loss": 0.002, "step": 89400 }, { "epoch": 19.59, "learning_rate": 1.057353588727433e-06, "loss": 0.0024, "step": 89450 }, { "epoch": 19.6, "learning_rate": 1.0298326728313518e-06, "loss": 0.0022, "step": 89500 }, { "epoch": 19.6, "eval_acc": 0.6693967757840098, "eval_cer": 0.032951989072851275, "eval_loss": 0.2692181169986725, "eval_runtime": 2408.7909, "eval_samples_per_second": 3.193, "eval_steps_per_second": 0.399, "step": 89500 }, { "epoch": 19.61, "learning_rate": 1.002311756935271e-06, "loss": 0.0021, "step": 89550 }, { "epoch": 19.62, "learning_rate": 9.747908410391898e-07, "loss": 0.0021, "step": 89600 }, { "epoch": 19.63, "learning_rate": 9.472699251431087e-07, "loss": 0.002, "step": 89650 }, { "epoch": 19.64, "learning_rate": 9.197490092470277e-07, "loss": 0.0024, "step": 89700 }, { "epoch": 19.65, "learning_rate": 8.922280933509468e-07, "loss": 0.0018, "step": 89750 }, { "epoch": 19.66, "learning_rate": 8.647071774548657e-07, "loss": 0.0021, "step": 89800 }, { "epoch": 19.67, "learning_rate": 8.371862615587847e-07, "loss": 0.002, "step": 89850 }, { "epoch": 19.68, "learning_rate": 8.096653456627038e-07, "loss": 0.0022, "step": 89900 }, { "epoch": 19.69, "learning_rate": 7.821444297666227e-07, "loss": 0.0022, "step": 89950 }, { "epoch": 19.71, "learning_rate": 7.546235138705416e-07, "loss": 0.002, "step": 90000 }, { "epoch": 19.71, "eval_acc": 0.6679667185819076, "eval_cer": 0.03304036130288574, "eval_loss": 0.2694588303565979, "eval_runtime": 2407.5846, "eval_samples_per_second": 3.195, "eval_steps_per_second": 0.4, "step": 90000 }, { "epoch": 19.72, "learning_rate": 7.271025979744607e-07, "loss": 0.002, "step": 90050 }, { "epoch": 19.73, "learning_rate": 6.995816820783795e-07, "loss": 0.0026, "step": 90100 }, { "epoch": 19.74, "learning_rate": 6.720607661822985e-07, "loss": 0.0021, "step": 90150 }, { "epoch": 19.75, "learning_rate": 6.450902686041392e-07, "loss": 0.0055, "step": 90200 }, { "epoch": 19.76, "learning_rate": 6.175693527080582e-07, "loss": 0.0018, "step": 90250 }, { "epoch": 19.77, "learning_rate": 5.900484368119771e-07, "loss": 0.002, "step": 90300 }, { "epoch": 19.78, "learning_rate": 5.625275209158961e-07, "loss": 0.0019, "step": 90350 }, { "epoch": 19.79, "learning_rate": 5.350066050198152e-07, "loss": 0.0024, "step": 90400 }, { "epoch": 19.8, "learning_rate": 5.07485689123734e-07, "loss": 0.0024, "step": 90450 }, { "epoch": 19.81, "learning_rate": 4.79964773227653e-07, "loss": 0.0025, "step": 90500 }, { "epoch": 19.81, "eval_acc": 0.6691367653836275, "eval_cer": 0.03304036130288574, "eval_loss": 0.26927751302719116, "eval_runtime": 2407.5852, "eval_samples_per_second": 3.195, "eval_steps_per_second": 0.4, "step": 90500 }, { "epoch": 19.83, "learning_rate": 4.52443857331572e-07, "loss": 0.0025, "step": 90550 }, { "epoch": 19.84, "learning_rate": 4.2492294143549103e-07, "loss": 0.0022, "step": 90600 }, { "epoch": 19.85, "learning_rate": 3.9740202553941e-07, "loss": 0.0022, "step": 90650 }, { "epoch": 19.86, "learning_rate": 3.6988110964332896e-07, "loss": 0.0019, "step": 90700 }, { "epoch": 19.87, "learning_rate": 3.4236019374724793e-07, "loss": 0.0018, "step": 90750 }, { "epoch": 19.88, "learning_rate": 3.148392778511669e-07, "loss": 0.0026, "step": 90800 }, { "epoch": 19.89, "learning_rate": 2.8731836195508586e-07, "loss": 0.0019, "step": 90850 }, { "epoch": 19.9, "learning_rate": 2.597974460590049e-07, "loss": 0.002, "step": 90900 }, { "epoch": 19.91, "learning_rate": 2.3227653016292382e-07, "loss": 0.0019, "step": 90950 }, { "epoch": 19.92, "learning_rate": 2.0475561426684281e-07, "loss": 0.0021, "step": 91000 }, { "epoch": 19.92, "eval_acc": 0.669656786184392, "eval_cer": 0.03293958595284644, "eval_loss": 0.26924172043800354, "eval_runtime": 2406.6768, "eval_samples_per_second": 3.196, "eval_steps_per_second": 0.4, "step": 91000 } ], "logging_steps": 50, "max_steps": 91340, "num_train_epochs": 20, "save_steps": 500, "total_flos": 2.178949606505743e+21, "trial_name": null, "trial_params": null }