{ "best_metric": null, "best_model_checkpoint": null, "epoch": 29.930420896543858, "eval_steps": 300, "global_step": 24600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.37, "learning_rate": 9.900000000000002e-06, "loss": 19.7382, "step": 300 }, { "epoch": 0.37, "eval_cer": 1.0, "eval_loss": 6.821648120880127, "eval_runtime": 52.3193, "eval_samples_per_second": 42.451, "eval_steps_per_second": 5.314, "step": 300 }, { "epoch": 0.73, "learning_rate": 1.9900000000000003e-05, "loss": 9.8181, "step": 600 }, { "epoch": 0.73, "eval_cer": 1.0, "eval_loss": 6.651111602783203, "eval_runtime": 43.577, "eval_samples_per_second": 50.967, "eval_steps_per_second": 6.38, "step": 600 }, { "epoch": 1.1, "learning_rate": 2.9900000000000002e-05, "loss": 9.5191, "step": 900 }, { "epoch": 1.1, "eval_cer": 0.9806448402826152, "eval_loss": 6.58424186706543, "eval_runtime": 43.1655, "eval_samples_per_second": 51.453, "eval_steps_per_second": 6.44, "step": 900 }, { "epoch": 1.46, "learning_rate": 3.99e-05, "loss": 8.6238, "step": 1200 }, { "epoch": 1.46, "eval_cer": 0.9216837496268285, "eval_loss": 6.142301082611084, "eval_runtime": 42.9764, "eval_samples_per_second": 51.68, "eval_steps_per_second": 6.469, "step": 1200 }, { "epoch": 1.83, "learning_rate": 4.99e-05, "loss": 6.883, "step": 1500 }, { "epoch": 1.83, "eval_cer": 0.850830928450592, "eval_loss": 3.596842050552368, "eval_runtime": 42.8348, "eval_samples_per_second": 51.85, "eval_steps_per_second": 6.49, "step": 1500 }, { "epoch": 2.19, "learning_rate": 4.93579766536965e-05, "loss": 4.0838, "step": 1800 }, { "epoch": 2.19, "eval_cer": 0.48343118718280426, "eval_loss": 2.5516390800476074, "eval_runtime": 42.9354, "eval_samples_per_second": 51.729, "eval_steps_per_second": 6.475, "step": 1800 }, { "epoch": 2.56, "learning_rate": 4.87094682230869e-05, "loss": 3.167, "step": 2100 }, { "epoch": 2.56, "eval_cer": 0.4450691611105583, "eval_loss": 2.2739391326904297, "eval_runtime": 42.8894, "eval_samples_per_second": 51.784, "eval_steps_per_second": 6.482, "step": 2100 }, { "epoch": 2.92, "learning_rate": 4.806312148724601e-05, "loss": 2.826, "step": 2400 }, { "epoch": 2.92, "eval_cer": 0.4178525226390686, "eval_loss": 2.0223917961120605, "eval_runtime": 42.9677, "eval_samples_per_second": 51.69, "eval_steps_per_second": 6.47, "step": 2400 }, { "epoch": 3.29, "learning_rate": 4.7414613056636405e-05, "loss": 2.6955, "step": 2700 }, { "epoch": 3.29, "eval_cer": 0.4174544730818987, "eval_loss": 1.9600275754928589, "eval_runtime": 42.8412, "eval_samples_per_second": 51.843, "eval_steps_per_second": 6.489, "step": 2700 }, { "epoch": 3.65, "learning_rate": 4.676610462602681e-05, "loss": 2.5812, "step": 3000 }, { "epoch": 3.65, "eval_cer": 0.40994128769031746, "eval_loss": 1.769142985343933, "eval_runtime": 42.9361, "eval_samples_per_second": 51.728, "eval_steps_per_second": 6.475, "step": 3000 }, { "epoch": 4.02, "learning_rate": 4.611975789018591e-05, "loss": 2.4952, "step": 3300 }, { "epoch": 4.02, "eval_cer": 0.4013832222111653, "eval_loss": 1.8323670625686646, "eval_runtime": 42.7115, "eval_samples_per_second": 52.0, "eval_steps_per_second": 6.509, "step": 3300 }, { "epoch": 4.38, "learning_rate": 4.547124945957631e-05, "loss": 2.3938, "step": 3600 }, { "epoch": 4.38, "eval_cer": 0.37799781072743555, "eval_loss": 1.7351980209350586, "eval_runtime": 42.7436, "eval_samples_per_second": 51.961, "eval_steps_per_second": 6.504, "step": 3600 }, { "epoch": 4.75, "learning_rate": 4.482490272373541e-05, "loss": 2.3584, "step": 3900 }, { "epoch": 4.75, "eval_cer": 0.3678475470196039, "eval_loss": 1.64540696144104, "eval_runtime": 42.7445, "eval_samples_per_second": 51.96, "eval_steps_per_second": 6.504, "step": 3900 }, { "epoch": 5.11, "learning_rate": 4.417639429312581e-05, "loss": 2.325, "step": 4200 }, { "epoch": 5.11, "eval_cer": 0.38352074833316746, "eval_loss": 1.6946874856948853, "eval_runtime": 42.4882, "eval_samples_per_second": 52.273, "eval_steps_per_second": 6.543, "step": 4200 }, { "epoch": 5.48, "learning_rate": 4.3527885862516214e-05, "loss": 2.2454, "step": 4500 }, { "epoch": 5.48, "eval_cer": 0.34078017713205294, "eval_loss": 1.5765234231948853, "eval_runtime": 42.1243, "eval_samples_per_second": 52.725, "eval_steps_per_second": 6.6, "step": 4500 }, { "epoch": 5.84, "learning_rate": 4.287937743190661e-05, "loss": 2.1954, "step": 4800 }, { "epoch": 5.84, "eval_cer": 0.37093243108767043, "eval_loss": 1.603211760520935, "eval_runtime": 42.6116, "eval_samples_per_second": 52.122, "eval_steps_per_second": 6.524, "step": 4800 }, { "epoch": 6.21, "learning_rate": 4.223086900129702e-05, "loss": 2.1492, "step": 5100 }, { "epoch": 6.21, "eval_cer": 0.3447606727037516, "eval_loss": 1.6078611612319946, "eval_runtime": 42.9188, "eval_samples_per_second": 51.749, "eval_steps_per_second": 6.477, "step": 5100 }, { "epoch": 6.57, "learning_rate": 4.1582360570687426e-05, "loss": 2.1655, "step": 5400 }, { "epoch": 6.57, "eval_cer": 0.33555577669419845, "eval_loss": 1.4955742359161377, "eval_runtime": 42.6136, "eval_samples_per_second": 52.12, "eval_steps_per_second": 6.524, "step": 5400 }, { "epoch": 6.94, "learning_rate": 4.093385214007782e-05, "loss": 2.1393, "step": 5700 }, { "epoch": 6.94, "eval_cer": 0.331625037317146, "eval_loss": 1.4772567749023438, "eval_runtime": 42.6929, "eval_samples_per_second": 52.023, "eval_steps_per_second": 6.512, "step": 5700 }, { "epoch": 7.3, "learning_rate": 4.028534370946823e-05, "loss": 2.1027, "step": 6000 }, { "epoch": 7.3, "eval_cer": 0.341427007662454, "eval_loss": 1.5089548826217651, "eval_runtime": 42.7699, "eval_samples_per_second": 51.929, "eval_steps_per_second": 6.5, "step": 6000 }, { "epoch": 7.67, "learning_rate": 3.9636835278858624e-05, "loss": 2.0824, "step": 6300 }, { "epoch": 7.67, "eval_cer": 0.34575579659667627, "eval_loss": 1.5948169231414795, "eval_runtime": 42.6031, "eval_samples_per_second": 52.132, "eval_steps_per_second": 6.525, "step": 6300 }, { "epoch": 8.03, "learning_rate": 3.899048854301773e-05, "loss": 2.061, "step": 6600 }, { "epoch": 8.03, "eval_cer": 0.35058214747736094, "eval_loss": 1.4923882484436035, "eval_runtime": 42.6516, "eval_samples_per_second": 52.073, "eval_steps_per_second": 6.518, "step": 6600 }, { "epoch": 8.4, "learning_rate": 3.8341980112408135e-05, "loss": 2.0212, "step": 6900 }, { "epoch": 8.4, "eval_cer": 0.33247089262613194, "eval_loss": 1.4590569734573364, "eval_runtime": 42.5489, "eval_samples_per_second": 52.199, "eval_steps_per_second": 6.534, "step": 6900 }, { "epoch": 8.76, "learning_rate": 3.769347168179853e-05, "loss": 2.0504, "step": 7200 }, { "epoch": 8.76, "eval_cer": 0.3344611404119813, "eval_loss": 1.4551000595092773, "eval_runtime": 42.7689, "eval_samples_per_second": 51.93, "eval_steps_per_second": 6.5, "step": 7200 }, { "epoch": 9.13, "learning_rate": 3.7044963251188936e-05, "loss": 2.0113, "step": 7500 }, { "epoch": 9.13, "eval_cer": 0.3344113842173351, "eval_loss": 1.4068984985351562, "eval_runtime": 42.6741, "eval_samples_per_second": 52.046, "eval_steps_per_second": 6.514, "step": 7500 }, { "epoch": 9.49, "learning_rate": 3.639645482057933e-05, "loss": 2.0057, "step": 7800 }, { "epoch": 9.49, "eval_cer": 0.3230669718379938, "eval_loss": 1.443265676498413, "eval_runtime": 42.7855, "eval_samples_per_second": 51.91, "eval_steps_per_second": 6.498, "step": 7800 }, { "epoch": 9.86, "learning_rate": 3.574794638996974e-05, "loss": 1.9741, "step": 8100 }, { "epoch": 9.86, "eval_cer": 0.3216240421932531, "eval_loss": 1.426885724067688, "eval_runtime": 42.8301, "eval_samples_per_second": 51.856, "eval_steps_per_second": 6.491, "step": 8100 }, { "epoch": 10.22, "learning_rate": 3.509943795936014e-05, "loss": 1.936, "step": 8400 }, { "epoch": 10.22, "eval_cer": 0.3227186784754702, "eval_loss": 1.3612221479415894, "eval_runtime": 43.0542, "eval_samples_per_second": 51.586, "eval_steps_per_second": 6.457, "step": 8400 }, { "epoch": 10.59, "learning_rate": 3.4450929528750544e-05, "loss": 1.9416, "step": 8700 }, { "epoch": 10.59, "eval_cer": 0.3027664444223306, "eval_loss": 1.363100290298462, "eval_runtime": 43.1271, "eval_samples_per_second": 51.499, "eval_steps_per_second": 6.446, "step": 8700 }, { "epoch": 10.95, "learning_rate": 3.380242109814095e-05, "loss": 1.9425, "step": 9000 }, { "epoch": 10.95, "eval_cer": 0.3038610807045477, "eval_loss": 1.3716000318527222, "eval_runtime": 43.286, "eval_samples_per_second": 51.31, "eval_steps_per_second": 6.422, "step": 9000 }, { "epoch": 11.32, "learning_rate": 3.3153912667531345e-05, "loss": 1.9351, "step": 9300 }, { "epoch": 11.32, "eval_cer": 0.31490695591601153, "eval_loss": 1.3932286500930786, "eval_runtime": 43.3029, "eval_samples_per_second": 51.29, "eval_steps_per_second": 6.42, "step": 9300 }, { "epoch": 11.68, "learning_rate": 3.250540423692175e-05, "loss": 1.9046, "step": 9600 }, { "epoch": 11.68, "eval_cer": 0.3329684545725943, "eval_loss": 1.4470584392547607, "eval_runtime": 42.9923, "eval_samples_per_second": 51.66, "eval_steps_per_second": 6.466, "step": 9600 }, { "epoch": 12.05, "learning_rate": 3.185905750108085e-05, "loss": 1.8587, "step": 9900 }, { "epoch": 12.05, "eval_cer": 0.3056523037118121, "eval_loss": 1.3519924879074097, "eval_runtime": 42.7004, "eval_samples_per_second": 52.014, "eval_steps_per_second": 6.51, "step": 9900 }, { "epoch": 12.41, "learning_rate": 3.1210549070471253e-05, "loss": 1.8699, "step": 10200 }, { "epoch": 12.41, "eval_cer": 0.3289879590008956, "eval_loss": 1.4434651136398315, "eval_runtime": 42.904, "eval_samples_per_second": 51.767, "eval_steps_per_second": 6.48, "step": 10200 }, { "epoch": 12.78, "learning_rate": 3.056204063986166e-05, "loss": 1.8328, "step": 10500 }, { "epoch": 12.78, "eval_cer": 0.31356353866056325, "eval_loss": 1.361649751663208, "eval_runtime": 42.7673, "eval_samples_per_second": 51.932, "eval_steps_per_second": 6.5, "step": 10500 }, { "epoch": 13.14, "learning_rate": 2.9913532209252054e-05, "loss": 1.8136, "step": 10800 }, { "epoch": 13.14, "eval_cer": 0.2943078913324709, "eval_loss": 1.3512203693389893, "eval_runtime": 42.5723, "eval_samples_per_second": 52.17, "eval_steps_per_second": 6.53, "step": 10800 }, { "epoch": 13.51, "learning_rate": 2.9265023778642458e-05, "loss": 1.8099, "step": 11100 }, { "epoch": 13.51, "eval_cer": 0.2956513085879192, "eval_loss": 1.3534834384918213, "eval_runtime": 42.854, "eval_samples_per_second": 51.827, "eval_steps_per_second": 6.487, "step": 11100 }, { "epoch": 13.87, "learning_rate": 2.861651534803286e-05, "loss": 1.8021, "step": 11400 }, { "epoch": 13.87, "eval_cer": 0.2981888745148771, "eval_loss": 1.3732918500900269, "eval_runtime": 42.5792, "eval_samples_per_second": 52.162, "eval_steps_per_second": 6.529, "step": 11400 }, { "epoch": 14.24, "learning_rate": 2.7968006917423263e-05, "loss": 1.7809, "step": 11700 }, { "epoch": 14.24, "eval_cer": 0.30804060105483133, "eval_loss": 1.3088232278823853, "eval_runtime": 42.9072, "eval_samples_per_second": 51.763, "eval_steps_per_second": 6.479, "step": 11700 }, { "epoch": 14.6, "learning_rate": 2.731949848681366e-05, "loss": 1.7734, "step": 12000 }, { "epoch": 14.6, "eval_cer": 0.28858592894815405, "eval_loss": 1.320089340209961, "eval_runtime": 42.6671, "eval_samples_per_second": 52.054, "eval_steps_per_second": 6.516, "step": 12000 }, { "epoch": 14.97, "learning_rate": 2.6670990056204063e-05, "loss": 1.7646, "step": 12300 }, { "epoch": 14.97, "eval_cer": 0.3268981988257538, "eval_loss": 1.3471167087554932, "eval_runtime": 42.7924, "eval_samples_per_second": 51.902, "eval_steps_per_second": 6.496, "step": 12300 }, { "epoch": 15.33, "learning_rate": 2.602248162559447e-05, "loss": 1.733, "step": 12600 }, { "epoch": 15.33, "eval_cer": 0.30321425017414666, "eval_loss": 1.3437916040420532, "eval_runtime": 42.7066, "eval_samples_per_second": 52.006, "eval_steps_per_second": 6.51, "step": 12600 }, { "epoch": 15.7, "learning_rate": 2.5373973194984868e-05, "loss": 1.7182, "step": 12900 }, { "epoch": 15.7, "eval_cer": 0.2999800975221415, "eval_loss": 1.3310909271240234, "eval_runtime": 42.787, "eval_samples_per_second": 51.908, "eval_steps_per_second": 6.497, "step": 12900 }, { "epoch": 16.06, "learning_rate": 2.472546476437527e-05, "loss": 1.7071, "step": 13200 }, { "epoch": 16.06, "eval_cer": 0.3073937705244303, "eval_loss": 1.2641910314559937, "eval_runtime": 42.6973, "eval_samples_per_second": 52.017, "eval_steps_per_second": 6.511, "step": 13200 }, { "epoch": 16.43, "learning_rate": 2.4076956333765675e-05, "loss": 1.7196, "step": 13500 }, { "epoch": 16.43, "eval_cer": 0.2859488506319037, "eval_loss": 1.2662409543991089, "eval_runtime": 42.6819, "eval_samples_per_second": 52.036, "eval_steps_per_second": 6.513, "step": 13500 }, { "epoch": 16.79, "learning_rate": 2.3428447903156076e-05, "loss": 1.7264, "step": 13800 }, { "epoch": 16.79, "eval_cer": 0.2878893422231068, "eval_loss": 1.2460156679153442, "eval_runtime": 42.7771, "eval_samples_per_second": 51.92, "eval_steps_per_second": 6.499, "step": 13800 }, { "epoch": 17.16, "learning_rate": 2.2782101167315176e-05, "loss": 1.6875, "step": 14100 }, { "epoch": 17.16, "eval_cer": 0.2931137426609613, "eval_loss": 1.3022774457931519, "eval_runtime": 42.5345, "eval_samples_per_second": 52.216, "eval_steps_per_second": 6.536, "step": 14100 }, { "epoch": 17.52, "learning_rate": 2.2133592736705577e-05, "loss": 1.6659, "step": 14400 }, { "epoch": 17.52, "eval_cer": 0.2927654492984377, "eval_loss": 1.32107675075531, "eval_runtime": 42.821, "eval_samples_per_second": 51.867, "eval_steps_per_second": 6.492, "step": 14400 }, { "epoch": 17.89, "learning_rate": 2.148508430609598e-05, "loss": 1.6694, "step": 14700 }, { "epoch": 17.89, "eval_cer": 0.2882873917802766, "eval_loss": 1.3291140794754028, "eval_runtime": 42.7715, "eval_samples_per_second": 51.927, "eval_steps_per_second": 6.5, "step": 14700 }, { "epoch": 18.25, "learning_rate": 2.0836575875486384e-05, "loss": 1.643, "step": 15000 }, { "epoch": 18.25, "eval_cer": 0.294755697084287, "eval_loss": 1.2615532875061035, "eval_runtime": 42.8646, "eval_samples_per_second": 51.814, "eval_steps_per_second": 6.486, "step": 15000 }, { "epoch": 18.62, "learning_rate": 2.0188067444876785e-05, "loss": 1.676, "step": 15300 }, { "epoch": 18.62, "eval_cer": 0.2835107970942382, "eval_loss": 1.2185758352279663, "eval_runtime": 42.7823, "eval_samples_per_second": 51.914, "eval_steps_per_second": 6.498, "step": 15300 }, { "epoch": 18.98, "learning_rate": 1.9539559014267185e-05, "loss": 1.6397, "step": 15600 }, { "epoch": 18.98, "eval_cer": 0.2810727435565728, "eval_loss": 1.3059513568878174, "eval_runtime": 42.9668, "eval_samples_per_second": 51.691, "eval_steps_per_second": 6.47, "step": 15600 }, { "epoch": 19.35, "learning_rate": 1.8893212278426286e-05, "loss": 1.6347, "step": 15900 }, { "epoch": 19.35, "eval_cer": 0.28838690416956914, "eval_loss": 1.2377227544784546, "eval_runtime": 42.8094, "eval_samples_per_second": 51.881, "eval_steps_per_second": 6.494, "step": 15900 }, { "epoch": 19.71, "learning_rate": 1.824470384781669e-05, "loss": 1.6328, "step": 16200 }, { "epoch": 19.71, "eval_cer": 0.27599761170265696, "eval_loss": 1.2721112966537476, "eval_runtime": 42.6795, "eval_samples_per_second": 52.039, "eval_steps_per_second": 6.514, "step": 16200 }, { "epoch": 20.08, "learning_rate": 1.7596195417207094e-05, "loss": 1.6092, "step": 16500 }, { "epoch": 20.08, "eval_cer": 0.28231664842272863, "eval_loss": 1.2696741819381714, "eval_runtime": 42.6768, "eval_samples_per_second": 52.042, "eval_steps_per_second": 6.514, "step": 16500 }, { "epoch": 20.44, "learning_rate": 1.6947686986597494e-05, "loss": 1.5737, "step": 16800 }, { "epoch": 20.44, "eval_cer": 0.28306299134242213, "eval_loss": 1.2230887413024902, "eval_runtime": 42.9425, "eval_samples_per_second": 51.72, "eval_steps_per_second": 6.474, "step": 16800 }, { "epoch": 20.81, "learning_rate": 1.6299178555987894e-05, "loss": 1.6166, "step": 17100 }, { "epoch": 20.81, "eval_cer": 0.2663449099412877, "eval_loss": 1.2277541160583496, "eval_runtime": 42.813, "eval_samples_per_second": 51.877, "eval_steps_per_second": 6.493, "step": 17100 }, { "epoch": 21.17, "learning_rate": 1.56506701253783e-05, "loss": 1.5964, "step": 17400 }, { "epoch": 21.17, "eval_cer": 0.27355955816499156, "eval_loss": 1.2313120365142822, "eval_runtime": 42.7309, "eval_samples_per_second": 51.976, "eval_steps_per_second": 6.506, "step": 17400 }, { "epoch": 21.54, "learning_rate": 1.5002161694768699e-05, "loss": 1.5237, "step": 17700 }, { "epoch": 21.54, "eval_cer": 0.27863469001890734, "eval_loss": 1.2411593198776245, "eval_runtime": 42.9368, "eval_samples_per_second": 51.727, "eval_steps_per_second": 6.475, "step": 17700 }, { "epoch": 21.9, "learning_rate": 1.4353653264159101e-05, "loss": 1.5419, "step": 18000 }, { "epoch": 21.9, "eval_cer": 0.28634690018907355, "eval_loss": 1.2718561887741089, "eval_runtime": 42.5781, "eval_samples_per_second": 52.163, "eval_steps_per_second": 6.529, "step": 18000 }, { "epoch": 22.27, "learning_rate": 1.3705144833549505e-05, "loss": 1.5654, "step": 18300 }, { "epoch": 22.27, "eval_cer": 0.26734003383421234, "eval_loss": 1.2373576164245605, "eval_runtime": 42.5574, "eval_samples_per_second": 52.188, "eval_steps_per_second": 6.532, "step": 18300 }, { "epoch": 22.63, "learning_rate": 1.3060959792477304e-05, "loss": 1.5331, "step": 18600 }, { "epoch": 22.63, "eval_cer": 0.2708727236540949, "eval_loss": 1.197614073753357, "eval_runtime": 42.6921, "eval_samples_per_second": 52.024, "eval_steps_per_second": 6.512, "step": 18600 }, { "epoch": 23.0, "learning_rate": 1.2412451361867706e-05, "loss": 1.5378, "step": 18900 }, { "epoch": 23.0, "eval_cer": 0.26838491392178326, "eval_loss": 1.1672557592391968, "eval_runtime": 42.9497, "eval_samples_per_second": 51.712, "eval_steps_per_second": 6.473, "step": 18900 }, { "epoch": 23.36, "learning_rate": 1.1763942931258106e-05, "loss": 1.4972, "step": 19200 }, { "epoch": 23.36, "eval_cer": 0.26938003781470793, "eval_loss": 1.1548832654953003, "eval_runtime": 42.5425, "eval_samples_per_second": 52.207, "eval_steps_per_second": 6.535, "step": 19200 }, { "epoch": 23.73, "learning_rate": 1.1115434500648508e-05, "loss": 1.5112, "step": 19500 }, { "epoch": 23.73, "eval_cer": 0.2684844263110757, "eval_loss": 1.2580962181091309, "eval_runtime": 43.0178, "eval_samples_per_second": 51.63, "eval_steps_per_second": 6.462, "step": 19500 }, { "epoch": 24.09, "learning_rate": 1.046692607003891e-05, "loss": 1.5026, "step": 19800 }, { "epoch": 24.09, "eval_cer": 0.26957906259329284, "eval_loss": 1.2475780248641968, "eval_runtime": 42.8521, "eval_samples_per_second": 51.829, "eval_steps_per_second": 6.487, "step": 19800 }, { "epoch": 24.46, "learning_rate": 9.818417639429313e-06, "loss": 1.5062, "step": 20100 }, { "epoch": 24.46, "eval_cer": 0.2755995621454871, "eval_loss": 1.2111254930496216, "eval_runtime": 42.9059, "eval_samples_per_second": 51.764, "eval_steps_per_second": 6.479, "step": 20100 }, { "epoch": 24.82, "learning_rate": 9.169909208819715e-06, "loss": 1.4816, "step": 20400 }, { "epoch": 24.82, "eval_cer": 0.26345905065180614, "eval_loss": 1.2007070779800415, "eval_runtime": 42.6871, "eval_samples_per_second": 52.03, "eval_steps_per_second": 6.513, "step": 20400 }, { "epoch": 25.19, "learning_rate": 8.521400778210117e-06, "loss": 1.4836, "step": 20700 }, { "epoch": 25.19, "eval_cer": 0.27435565727933126, "eval_loss": 1.2548900842666626, "eval_runtime": 42.6925, "eval_samples_per_second": 52.023, "eval_steps_per_second": 6.512, "step": 20700 }, { "epoch": 25.55, "learning_rate": 7.87289234760052e-06, "loss": 1.479, "step": 21000 }, { "epoch": 25.55, "eval_cer": 0.26987759976117026, "eval_loss": 1.1535056829452515, "eval_runtime": 42.5922, "eval_samples_per_second": 52.146, "eval_steps_per_second": 6.527, "step": 21000 }, { "epoch": 25.92, "learning_rate": 7.2243839169909205e-06, "loss": 1.493, "step": 21300 }, { "epoch": 25.92, "eval_cer": 0.26972833117723155, "eval_loss": 1.198728084564209, "eval_runtime": 42.6034, "eval_samples_per_second": 52.132, "eval_steps_per_second": 6.525, "step": 21300 }, { "epoch": 26.28, "learning_rate": 6.5758754863813235e-06, "loss": 1.4524, "step": 21600 }, { "epoch": 26.28, "eval_cer": 0.27470395064185493, "eval_loss": 1.2245545387268066, "eval_runtime": 42.9242, "eval_samples_per_second": 51.742, "eval_steps_per_second": 6.477, "step": 21600 }, { "epoch": 26.65, "learning_rate": 5.927367055771725e-06, "loss": 1.4569, "step": 21900 }, { "epoch": 26.65, "eval_cer": 0.2605234351676784, "eval_loss": 1.1879122257232666, "eval_runtime": 42.634, "eval_samples_per_second": 52.095, "eval_steps_per_second": 6.521, "step": 21900 }, { "epoch": 27.01, "learning_rate": 5.278858625162128e-06, "loss": 1.4535, "step": 22200 }, { "epoch": 27.01, "eval_cer": 0.27281321524529806, "eval_loss": 1.2265853881835938, "eval_runtime": 42.7816, "eval_samples_per_second": 51.915, "eval_steps_per_second": 6.498, "step": 22200 }, { "epoch": 27.38, "learning_rate": 4.63035019455253e-06, "loss": 1.4452, "step": 22500 }, { "epoch": 27.38, "eval_cer": 0.2566424519852722, "eval_loss": 1.1812487840652466, "eval_runtime": 42.5285, "eval_samples_per_second": 52.224, "eval_steps_per_second": 6.537, "step": 22500 }, { "epoch": 27.74, "learning_rate": 3.981841763942931e-06, "loss": 1.4513, "step": 22800 }, { "epoch": 27.74, "eval_cer": 0.26286197631605135, "eval_loss": 1.1672886610031128, "eval_runtime": 43.0029, "eval_samples_per_second": 51.648, "eval_steps_per_second": 6.465, "step": 22800 }, { "epoch": 28.11, "learning_rate": 3.3333333333333333e-06, "loss": 1.4561, "step": 23100 }, { "epoch": 28.11, "eval_cer": 0.26181709622848043, "eval_loss": 1.1963270902633667, "eval_runtime": 42.7473, "eval_samples_per_second": 51.957, "eval_steps_per_second": 6.503, "step": 23100 }, { "epoch": 28.47, "learning_rate": 2.6848249027237355e-06, "loss": 1.4357, "step": 23400 }, { "epoch": 28.47, "eval_cer": 0.26913125684147676, "eval_loss": 1.201293706893921, "eval_runtime": 42.6759, "eval_samples_per_second": 52.043, "eval_steps_per_second": 6.514, "step": 23400 }, { "epoch": 28.84, "learning_rate": 2.0363164721141376e-06, "loss": 1.4427, "step": 23700 }, { "epoch": 28.84, "eval_cer": 0.2726141904667131, "eval_loss": 1.2448346614837646, "eval_runtime": 42.4761, "eval_samples_per_second": 52.288, "eval_steps_per_second": 6.545, "step": 23700 }, { "epoch": 29.2, "learning_rate": 1.3899697362732382e-06, "loss": 1.4171, "step": 24000 }, { "epoch": 29.2, "eval_cer": 0.26684247188775, "eval_loss": 1.2063277959823608, "eval_runtime": 42.7033, "eval_samples_per_second": 52.01, "eval_steps_per_second": 6.51, "step": 24000 }, { "epoch": 29.57, "learning_rate": 7.414613056636403e-07, "loss": 1.4639, "step": 24300 }, { "epoch": 29.57, "eval_cer": 0.26694198427704247, "eval_loss": 1.2228556871414185, "eval_runtime": 42.5785, "eval_samples_per_second": 52.162, "eval_steps_per_second": 6.529, "step": 24300 }, { "epoch": 29.93, "learning_rate": 9.295287505404236e-08, "loss": 1.4234, "step": 24600 }, { "epoch": 29.93, "eval_cer": 0.2594785550801075, "eval_loss": 1.1955249309539795, "eval_runtime": 43.02, "eval_samples_per_second": 51.627, "eval_steps_per_second": 6.462, "step": 24600 } ], "logging_steps": 300, "max_steps": 24630, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 600, "total_flos": 3.9184197928838064e+20, "train_batch_size": 4, "trial_name": null, "trial_params": null }