{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9809064397204748, "global_step": 27000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 6.999815761247666e-05, "loss": 4.5119, "step": 100 }, { "epoch": 0.01, "learning_rate": 6.999169171843241e-05, "loss": 2.6638, "step": 200 }, { "epoch": 0.02, "learning_rate": 6.998057503298084e-05, "loss": 1.183, "step": 300 }, { "epoch": 0.03, "learning_rate": 6.996480903365516e-05, "loss": 0.7325, "step": 400 }, { "epoch": 0.04, "learning_rate": 6.99443958159349e-05, "loss": 0.5966, "step": 500 }, { "epoch": 0.04, "learning_rate": 6.991933809296747e-05, "loss": 0.5263, "step": 600 }, { "epoch": 0.05, "learning_rate": 6.988963919520753e-05, "loss": 0.5068, "step": 700 }, { "epoch": 0.06, "learning_rate": 6.985530306997431e-05, "loss": 0.4704, "step": 800 }, { "epoch": 0.07, "learning_rate": 6.981633428092705e-05, "loss": 0.4521, "step": 900 }, { "epoch": 0.07, "learning_rate": 6.977273800745834e-05, "loss": 0.4439, "step": 1000 }, { "epoch": 0.08, "learning_rate": 6.972452004400577e-05, "loss": 0.421, "step": 1100 }, { "epoch": 0.09, "learning_rate": 6.96716867992818e-05, "loss": 0.4105, "step": 1200 }, { "epoch": 0.1, "learning_rate": 6.961424529542192e-05, "loss": 0.3998, "step": 1300 }, { "epoch": 0.1, "learning_rate": 6.955220316705135e-05, "loss": 0.3971, "step": 1400 }, { "epoch": 0.11, "learning_rate": 6.948556866027035e-05, "loss": 0.3958, "step": 1500 }, { "epoch": 0.11, "eval_loss": 0.26585060358047485, "eval_runtime": 1119.616, "eval_samples_per_second": 14.297, "eval_steps_per_second": 1.787, "eval_wer": 0.2537555992570742, "step": 1500 }, { "epoch": 0.12, "learning_rate": 6.941435063155818e-05, "loss": 0.3828, "step": 1600 }, { "epoch": 0.12, "learning_rate": 6.933855854659593e-05, "loss": 0.3852, "step": 1700 }, { "epoch": 0.13, "learning_rate": 6.925820247900854e-05, "loss": 0.3749, "step": 1800 }, { "epoch": 0.14, "learning_rate": 6.917329310902582e-05, "loss": 0.3853, "step": 1900 }, { "epoch": 0.15, "learning_rate": 6.90838417220629e-05, "loss": 0.3692, "step": 2000 }, { "epoch": 0.15, "learning_rate": 6.898986020722038e-05, "loss": 0.3676, "step": 2100 }, { "epoch": 0.16, "learning_rate": 6.889136105570403e-05, "loss": 0.3697, "step": 2200 }, { "epoch": 0.17, "learning_rate": 6.878835735916458e-05, "loss": 0.3608, "step": 2300 }, { "epoch": 0.18, "learning_rate": 6.868086280795778e-05, "loss": 0.3515, "step": 2400 }, { "epoch": 0.18, "learning_rate": 6.85688916893247e-05, "loss": 0.3383, "step": 2500 }, { "epoch": 0.19, "learning_rate": 6.845245888549281e-05, "loss": 0.3421, "step": 2600 }, { "epoch": 0.2, "learning_rate": 6.833157987169802e-05, "loss": 0.3454, "step": 2700 }, { "epoch": 0.21, "learning_rate": 6.820627071412778e-05, "loss": 0.3499, "step": 2800 }, { "epoch": 0.21, "learning_rate": 6.807654806778575e-05, "loss": 0.3411, "step": 2900 }, { "epoch": 0.22, "learning_rate": 6.794242917427811e-05, "loss": 0.3422, "step": 3000 }, { "epoch": 0.22, "eval_loss": 0.21751175820827484, "eval_runtime": 1050.6432, "eval_samples_per_second": 15.235, "eval_steps_per_second": 1.905, "eval_wer": 0.23247159401289194, "step": 3000 }, { "epoch": 0.23, "learning_rate": 6.780393185952203e-05, "loss": 0.3378, "step": 3100 }, { "epoch": 0.23, "learning_rate": 6.766107453137634e-05, "loss": 0.3354, "step": 3200 }, { "epoch": 0.24, "learning_rate": 6.751387617719493e-05, "loss": 0.3332, "step": 3300 }, { "epoch": 0.25, "learning_rate": 6.736235636130315e-05, "loss": 0.3313, "step": 3400 }, { "epoch": 0.26, "learning_rate": 6.720653522239741e-05, "loss": 0.3291, "step": 3500 }, { "epoch": 0.26, "learning_rate": 6.704643347086866e-05, "loss": 0.3225, "step": 3600 }, { "epoch": 0.27, "learning_rate": 6.688207238604962e-05, "loss": 0.3314, "step": 3700 }, { "epoch": 0.28, "learning_rate": 6.671347381338648e-05, "loss": 0.3279, "step": 3800 }, { "epoch": 0.29, "learning_rate": 6.654066016153562e-05, "loss": 0.3248, "step": 3900 }, { "epoch": 0.29, "learning_rate": 6.636365439938497e-05, "loss": 0.3184, "step": 4000 }, { "epoch": 0.3, "learning_rate": 6.618248005300135e-05, "loss": 0.3312, "step": 4100 }, { "epoch": 0.31, "learning_rate": 6.599716120250359e-05, "loss": 0.3156, "step": 4200 }, { "epoch": 0.32, "learning_rate": 6.58077224788619e-05, "loss": 0.32, "step": 4300 }, { "epoch": 0.32, "learning_rate": 6.561418906062424e-05, "loss": 0.3143, "step": 4400 }, { "epoch": 0.33, "learning_rate": 6.541658667056979e-05, "loss": 0.3151, "step": 4500 }, { "epoch": 0.33, "eval_loss": 0.2139398157596588, "eval_runtime": 1046.9527, "eval_samples_per_second": 15.289, "eval_steps_per_second": 1.911, "eval_wer": 0.19961351469463565, "step": 4500 }, { "epoch": 0.34, "learning_rate": 6.521494157229007e-05, "loss": 0.3228, "step": 4600 }, { "epoch": 0.34, "learning_rate": 6.50092805666982e-05, "loss": 0.3223, "step": 4700 }, { "epoch": 0.35, "learning_rate": 6.47996309884668e-05, "loss": 0.3186, "step": 4800 }, { "epoch": 0.36, "learning_rate": 6.45860207023949e-05, "loss": 0.3069, "step": 4900 }, { "epoch": 0.37, "learning_rate": 6.436847809970438e-05, "loss": 0.3149, "step": 5000 }, { "epoch": 0.37, "learning_rate": 6.41470320942664e-05, "loss": 0.3152, "step": 5100 }, { "epoch": 0.38, "learning_rate": 6.392171211875852e-05, "loss": 0.3054, "step": 5200 }, { "epoch": 0.39, "learning_rate": 6.369485868905532e-05, "loss": 0.3196, "step": 5300 }, { "epoch": 0.4, "learning_rate": 6.346191911024053e-05, "loss": 0.3011, "step": 5400 }, { "epoch": 0.4, "learning_rate": 6.322519662061658e-05, "loss": 0.3103, "step": 5500 }, { "epoch": 0.41, "learning_rate": 6.298472268327846e-05, "loss": 0.305, "step": 5600 }, { "epoch": 0.42, "learning_rate": 6.274052925993097e-05, "loss": 0.3043, "step": 5700 }, { "epoch": 0.43, "learning_rate": 6.249264880664065e-05, "loss": 0.3031, "step": 5800 }, { "epoch": 0.43, "learning_rate": 6.224111426952202e-05, "loss": 0.2988, "step": 5900 }, { "epoch": 0.44, "learning_rate": 6.198595908035864e-05, "loss": 0.3, "step": 6000 }, { "epoch": 0.44, "eval_loss": 0.20627757906913757, "eval_runtime": 1055.8743, "eval_samples_per_second": 15.16, "eval_steps_per_second": 1.895, "eval_wer": 0.20541079427510106, "step": 6000 }, { "epoch": 0.45, "learning_rate": 6.172721715215964e-05, "loss": 0.2983, "step": 6100 }, { "epoch": 0.45, "learning_rate": 6.146492287465236e-05, "loss": 0.3012, "step": 6200 }, { "epoch": 0.46, "learning_rate": 6.119911110971146e-05, "loss": 0.2989, "step": 6300 }, { "epoch": 0.47, "learning_rate": 6.092981718672549e-05, "loss": 0.2909, "step": 6400 }, { "epoch": 0.48, "learning_rate": 6.065707689790118e-05, "loss": 0.3063, "step": 6500 }, { "epoch": 0.48, "learning_rate": 6.03809264935062e-05, "loss": 0.2908, "step": 6600 }, { "epoch": 0.49, "learning_rate": 6.0101402677051154e-05, "loss": 0.309, "step": 6700 }, { "epoch": 0.5, "learning_rate": 5.981854260041124e-05, "loss": 0.2891, "step": 6800 }, { "epoch": 0.51, "learning_rate": 5.9532383858888345e-05, "loss": 0.2902, "step": 6900 }, { "epoch": 0.51, "learning_rate": 5.924296448621422e-05, "loss": 0.289, "step": 7000 }, { "epoch": 0.52, "learning_rate": 5.8950322949495356e-05, "loss": 0.2951, "step": 7100 }, { "epoch": 0.53, "learning_rate": 5.8654498144100274e-05, "loss": 0.2923, "step": 7200 }, { "epoch": 0.54, "learning_rate": 5.835552938848987e-05, "loss": 0.2967, "step": 7300 }, { "epoch": 0.54, "learning_rate": 5.805345641899159e-05, "loss": 0.2863, "step": 7400 }, { "epoch": 0.55, "learning_rate": 5.774831938451798e-05, "loss": 0.2824, "step": 7500 }, { "epoch": 0.55, "eval_loss": 0.18351121246814728, "eval_runtime": 1075.697, "eval_samples_per_second": 14.881, "eval_steps_per_second": 1.86, "eval_wer": 0.18133398885611274, "step": 7500 }, { "epoch": 0.56, "learning_rate": 5.744325527887681e-05, "loss": 0.2833, "step": 7600 }, { "epoch": 0.56, "learning_rate": 5.713214180625491e-05, "loss": 0.2909, "step": 7700 }, { "epoch": 0.57, "learning_rate": 5.68180867217851e-05, "loss": 0.286, "step": 7800 }, { "epoch": 0.58, "learning_rate": 5.650113176693846e-05, "loss": 0.2778, "step": 7900 }, { "epoch": 0.59, "learning_rate": 5.618131906861165e-05, "loss": 0.2777, "step": 8000 }, { "epoch": 0.59, "learning_rate": 5.5858691133527713e-05, "loss": 0.2829, "step": 8100 }, { "epoch": 0.6, "learning_rate": 5.553329084258652e-05, "loss": 0.2748, "step": 8200 }, { "epoch": 0.61, "learning_rate": 5.5205161445165346e-05, "loss": 0.2827, "step": 8300 }, { "epoch": 0.62, "learning_rate": 5.4874346553370585e-05, "loss": 0.2712, "step": 8400 }, { "epoch": 0.62, "learning_rate": 5.4540890136241195e-05, "loss": 0.2792, "step": 8500 }, { "epoch": 0.63, "learning_rate": 5.420483651390469e-05, "loss": 0.2815, "step": 8600 }, { "epoch": 0.64, "learning_rate": 5.386623035168656e-05, "loss": 0.2766, "step": 8700 }, { "epoch": 0.65, "learning_rate": 5.3525116654173646e-05, "loss": 0.2704, "step": 8800 }, { "epoch": 0.65, "learning_rate": 5.318154075923263e-05, "loss": 0.2695, "step": 8900 }, { "epoch": 0.66, "learning_rate": 5.283554833198404e-05, "loss": 0.2771, "step": 9000 }, { "epoch": 0.66, "eval_loss": 0.18808312714099884, "eval_runtime": 1061.3581, "eval_samples_per_second": 15.082, "eval_steps_per_second": 1.885, "eval_wer": 0.17624685895334863, "step": 9000 }, { "epoch": 0.67, "learning_rate": 5.2487185358732866e-05, "loss": 0.276, "step": 9100 }, { "epoch": 0.67, "learning_rate": 5.213649814085646e-05, "loss": 0.2647, "step": 9200 }, { "epoch": 0.68, "learning_rate": 5.178353328865057e-05, "loss": 0.2901, "step": 9300 }, { "epoch": 0.69, "learning_rate": 5.142833771513431e-05, "loss": 0.2771, "step": 9400 }, { "epoch": 0.7, "learning_rate": 5.107095862981481e-05, "loss": 0.2706, "step": 9500 }, { "epoch": 0.7, "learning_rate": 5.071144353241269e-05, "loss": 0.2753, "step": 9600 }, { "epoch": 0.71, "learning_rate": 5.0353466418956284e-05, "loss": 0.2777, "step": 9700 }, { "epoch": 0.72, "learning_rate": 4.9989843088674705e-05, "loss": 0.2621, "step": 9800 }, { "epoch": 0.73, "learning_rate": 4.962422743878782e-05, "loss": 0.2628, "step": 9900 }, { "epoch": 0.73, "learning_rate": 4.9256668063748734e-05, "loss": 0.272, "step": 10000 }, { "epoch": 0.74, "learning_rate": 4.8887213816353655e-05, "loss": 0.2683, "step": 10100 }, { "epoch": 0.75, "learning_rate": 4.851591380124868e-05, "loss": 0.2627, "step": 10200 }, { "epoch": 0.76, "learning_rate": 4.814281736840332e-05, "loss": 0.2565, "step": 10300 }, { "epoch": 0.76, "learning_rate": 4.776797410655135e-05, "loss": 0.2619, "step": 10400 }, { "epoch": 0.77, "learning_rate": 4.739143383659982e-05, "loss": 0.2616, "step": 10500 }, { "epoch": 0.77, "eval_loss": 0.18062810599803925, "eval_runtime": 1044.5923, "eval_samples_per_second": 15.324, "eval_steps_per_second": 1.916, "eval_wer": 0.17676581448705342, "step": 10500 }, { "epoch": 0.78, "learning_rate": 4.701324660500736e-05, "loss": 0.2575, "step": 10600 }, { "epoch": 0.79, "learning_rate": 4.663346267713244e-05, "loss": 0.2642, "step": 10700 }, { "epoch": 0.79, "learning_rate": 4.625213253055248e-05, "loss": 0.2561, "step": 10800 }, { "epoch": 0.8, "learning_rate": 4.586930684835486e-05, "loss": 0.2553, "step": 10900 }, { "epoch": 0.81, "learning_rate": 4.5485036512400575e-05, "loss": 0.2533, "step": 11000 }, { "epoch": 0.81, "learning_rate": 4.509937259656139e-05, "loss": 0.2528, "step": 11100 }, { "epoch": 0.82, "learning_rate": 4.471236635993164e-05, "loss": 0.2542, "step": 11200 }, { "epoch": 0.83, "learning_rate": 4.432406924001522e-05, "loss": 0.2506, "step": 11300 }, { "epoch": 0.84, "learning_rate": 4.393453284588905e-05, "loss": 0.2587, "step": 11400 }, { "epoch": 0.84, "learning_rate": 4.3543808951343574e-05, "loss": 0.2503, "step": 11500 }, { "epoch": 0.85, "learning_rate": 4.3151949488001475e-05, "loss": 0.2535, "step": 11600 }, { "epoch": 0.86, "learning_rate": 4.275900653841536e-05, "loss": 0.2526, "step": 11700 }, { "epoch": 0.87, "learning_rate": 4.236503232914543e-05, "loss": 0.253, "step": 11800 }, { "epoch": 0.87, "learning_rate": 4.197007922381793e-05, "loss": 0.2523, "step": 11900 }, { "epoch": 0.88, "learning_rate": 4.157419971616547e-05, "loss": 0.2446, "step": 12000 }, { "epoch": 0.88, "eval_loss": 0.1757470816373825, "eval_runtime": 1062.3089, "eval_samples_per_second": 15.068, "eval_steps_per_second": 1.884, "eval_wer": 0.1589711023708074, "step": 12000 }, { "epoch": 0.89, "learning_rate": 4.1177446423050005e-05, "loss": 0.2473, "step": 12100 }, { "epoch": 0.9, "learning_rate": 4.077987207746943e-05, "loss": 0.2438, "step": 12200 }, { "epoch": 0.9, "learning_rate": 4.0381529521548834e-05, "loss": 0.2468, "step": 12300 }, { "epoch": 0.91, "learning_rate": 3.998247169951711e-05, "loss": 0.2524, "step": 12400 }, { "epoch": 0.92, "learning_rate": 3.958275165067014e-05, "loss": 0.2372, "step": 12500 }, { "epoch": 0.92, "learning_rate": 3.91824225023212e-05, "loss": 0.2324, "step": 12600 }, { "epoch": 0.93, "learning_rate": 3.8785548889903e-05, "loss": 0.2436, "step": 12700 }, { "epoch": 0.94, "learning_rate": 3.8384166003361756e-05, "loss": 0.2405, "step": 12800 }, { "epoch": 0.95, "learning_rate": 3.798635370086602e-05, "loss": 0.2349, "step": 12900 }, { "epoch": 0.95, "learning_rate": 3.7584128333900755e-05, "loss": 0.2424, "step": 13000 }, { "epoch": 0.96, "learning_rate": 3.7181559507066575e-05, "loss": 0.2366, "step": 13100 }, { "epoch": 0.97, "learning_rate": 3.677870072631157e-05, "loss": 0.2435, "step": 13200 }, { "epoch": 0.98, "learning_rate": 3.637560553612199e-05, "loss": 0.2377, "step": 13300 }, { "epoch": 0.98, "learning_rate": 3.597232751240556e-05, "loss": 0.2302, "step": 13400 }, { "epoch": 0.99, "learning_rate": 3.556892025537066e-05, "loss": 0.2377, "step": 13500 }, { "epoch": 0.99, "eval_loss": 0.1588028222322464, "eval_runtime": 1044.908, "eval_samples_per_second": 15.319, "eval_steps_per_second": 1.915, "eval_wer": 0.15275046432863543, "step": 13500 }, { "epoch": 1.0, "learning_rate": 3.516543738240223e-05, "loss": 0.2313, "step": 13600 }, { "epoch": 1.01, "learning_rate": 3.476193252093543e-05, "loss": 0.22, "step": 13700 }, { "epoch": 1.01, "learning_rate": 3.4358459301327927e-05, "loss": 0.214, "step": 13800 }, { "epoch": 1.02, "learning_rate": 3.395507134973183e-05, "loss": 0.2257, "step": 13900 }, { "epoch": 1.03, "learning_rate": 3.355182228096618e-05, "loss": 0.2308, "step": 14000 }, { "epoch": 1.03, "learning_rate": 3.314876569139091e-05, "loss": 0.2244, "step": 14100 }, { "epoch": 1.04, "learning_rate": 3.274595515178329e-05, "loss": 0.2176, "step": 14200 }, { "epoch": 1.05, "learning_rate": 3.234344420021777e-05, "loss": 0.2238, "step": 14300 }, { "epoch": 1.06, "learning_rate": 3.194128633495017e-05, "loss": 0.219, "step": 14400 }, { "epoch": 1.06, "learning_rate": 3.153953500730713e-05, "loss": 0.2265, "step": 14500 }, { "epoch": 1.07, "learning_rate": 3.113824361458186e-05, "loss": 0.2218, "step": 14600 }, { "epoch": 1.08, "learning_rate": 3.073746549293703e-05, "loss": 0.2129, "step": 14700 }, { "epoch": 1.09, "learning_rate": 3.0337253910315748e-05, "loss": 0.2126, "step": 14800 }, { "epoch": 1.09, "learning_rate": 2.993766205936171e-05, "loss": 0.2047, "step": 14900 }, { "epoch": 1.1, "learning_rate": 2.9538743050349254e-05, "loss": 0.2141, "step": 15000 }, { "epoch": 1.1, "eval_loss": 0.14499780535697937, "eval_runtime": 1046.3411, "eval_samples_per_second": 15.298, "eval_steps_per_second": 1.912, "eval_wer": 0.14962307440183548, "step": 15000 }, { "epoch": 1.11, "learning_rate": 2.9140549904124422e-05, "loss": 0.2066, "step": 15100 }, { "epoch": 1.12, "learning_rate": 2.8743135545057887e-05, "loss": 0.2124, "step": 15200 }, { "epoch": 1.12, "learning_rate": 2.8346552794010703e-05, "loss": 0.2089, "step": 15300 }, { "epoch": 1.13, "learning_rate": 2.7950854361313814e-05, "loss": 0.2121, "step": 15400 }, { "epoch": 1.14, "learning_rate": 2.755609283976226e-05, "loss": 0.209, "step": 15500 }, { "epoch": 1.14, "learning_rate": 2.7162320697625e-05, "loss": 0.2052, "step": 15600 }, { "epoch": 1.15, "learning_rate": 2.676959027167128e-05, "loss": 0.209, "step": 15700 }, { "epoch": 1.16, "learning_rate": 2.6377953760214495e-05, "loss": 0.2089, "step": 15800 }, { "epoch": 1.17, "learning_rate": 2.598746321617443e-05, "loss": 0.2021, "step": 15900 }, { "epoch": 1.17, "learning_rate": 2.5598170540158846e-05, "loss": 0.2047, "step": 16000 }, { "epoch": 1.18, "learning_rate": 2.5210127473565314e-05, "loss": 0.1979, "step": 16100 }, { "epoch": 1.19, "learning_rate": 2.482338559170417e-05, "loss": 0.2044, "step": 16200 }, { "epoch": 1.2, "learning_rate": 2.4437996296943596e-05, "loss": 0.2082, "step": 16300 }, { "epoch": 1.2, "learning_rate": 2.4057843550135512e-05, "loss": 0.2004, "step": 16400 }, { "epoch": 1.21, "learning_rate": 2.3675298110320073e-05, "loss": 0.1953, "step": 16500 }, { "epoch": 1.21, "eval_loss": 0.13918258249759674, "eval_runtime": 1060.8068, "eval_samples_per_second": 15.089, "eval_steps_per_second": 1.886, "eval_wer": 0.13435485633125752, "step": 16500 }, { "epoch": 1.22, "learning_rate": 2.3294257851410495e-05, "loss": 0.1984, "step": 16600 }, { "epoch": 1.23, "learning_rate": 2.2914773417964826e-05, "loss": 0.1972, "step": 16700 }, { "epoch": 1.23, "learning_rate": 2.2536895247754305e-05, "loss": 0.194, "step": 16800 }, { "epoch": 1.24, "learning_rate": 2.2160673565059625e-05, "loss": 0.2016, "step": 16900 }, { "epoch": 1.25, "learning_rate": 2.1786158373995577e-05, "loss": 0.1973, "step": 17000 }, { "epoch": 1.25, "learning_rate": 2.1413399451864916e-05, "loss": 0.1872, "step": 17100 }, { "epoch": 1.26, "learning_rate": 2.1042446342542387e-05, "loss": 0.2035, "step": 17200 }, { "epoch": 1.27, "learning_rate": 2.0673348349889817e-05, "loss": 0.1937, "step": 17300 }, { "epoch": 1.28, "learning_rate": 2.0306154531203048e-05, "loss": 0.1938, "step": 17400 }, { "epoch": 1.28, "learning_rate": 1.994091369069168e-05, "loss": 0.1953, "step": 17500 }, { "epoch": 1.29, "learning_rate": 1.957767437299243e-05, "loss": 0.1887, "step": 17600 }, { "epoch": 1.3, "learning_rate": 1.9216484856717008e-05, "loss": 0.1921, "step": 17700 }, { "epoch": 1.31, "learning_rate": 1.8857393148035336e-05, "loss": 0.1917, "step": 17800 }, { "epoch": 1.31, "learning_rate": 1.8500446974295e-05, "loss": 0.1836, "step": 17900 }, { "epoch": 1.32, "learning_rate": 1.8145693777677743e-05, "loss": 0.1923, "step": 18000 }, { "epoch": 1.32, "eval_loss": 0.1327279508113861, "eval_runtime": 1039.577, "eval_samples_per_second": 15.398, "eval_steps_per_second": 1.925, "eval_wer": 0.13173959357587675, "step": 18000 }, { "epoch": 1.33, "learning_rate": 1.779669459685722e-05, "loss": 0.1927, "step": 18100 }, { "epoch": 1.34, "learning_rate": 1.7446445408141307e-05, "loss": 0.1959, "step": 18200 }, { "epoch": 1.34, "learning_rate": 1.7098529285272e-05, "loss": 0.1872, "step": 18300 }, { "epoch": 1.35, "learning_rate": 1.6752992470235188e-05, "loss": 0.1848, "step": 18400 }, { "epoch": 1.36, "learning_rate": 1.640988088877985e-05, "loss": 0.184, "step": 18500 }, { "epoch": 1.36, "learning_rate": 1.6069240144314012e-05, "loss": 0.1919, "step": 18600 }, { "epoch": 1.37, "learning_rate": 1.5731115511843525e-05, "loss": 0.176, "step": 18700 }, { "epoch": 1.38, "learning_rate": 1.5395551931954524e-05, "loss": 0.1789, "step": 18800 }, { "epoch": 1.39, "learning_rate": 1.5062594004840269e-05, "loss": 0.1937, "step": 18900 }, { "epoch": 1.39, "learning_rate": 1.4732285984373345e-05, "loss": 0.1861, "step": 19000 }, { "epoch": 1.4, "learning_rate": 1.440467177222377e-05, "loss": 0.1757, "step": 19100 }, { "epoch": 1.41, "learning_rate": 1.4079794912023988e-05, "loss": 0.1839, "step": 19200 }, { "epoch": 1.42, "learning_rate": 1.3757698583581431e-05, "loss": 0.1844, "step": 19300 }, { "epoch": 1.42, "learning_rate": 1.3438425597139414e-05, "loss": 0.1855, "step": 19400 }, { "epoch": 1.43, "learning_rate": 1.3122018387687183e-05, "loss": 0.1804, "step": 19500 }, { "epoch": 1.43, "eval_loss": 0.12711018323898315, "eval_runtime": 1058.0447, "eval_samples_per_second": 15.129, "eval_steps_per_second": 1.891, "eval_wer": 0.12372992461488037, "step": 19500 }, { "epoch": 1.44, "learning_rate": 1.280851900931984e-05, "loss": 0.1833, "step": 19600 }, { "epoch": 1.45, "learning_rate": 1.2497969129648841e-05, "loss": 0.181, "step": 19700 }, { "epoch": 1.45, "learning_rate": 1.2190410024263938e-05, "loss": 0.1719, "step": 19800 }, { "epoch": 1.46, "learning_rate": 1.1885882571247166e-05, "loss": 0.1758, "step": 19900 }, { "epoch": 1.47, "learning_rate": 1.1584427245739682e-05, "loss": 0.1792, "step": 20000 }, { "epoch": 1.47, "learning_rate": 1.1286084114562175e-05, "loss": 0.1774, "step": 20100 }, { "epoch": 1.48, "learning_rate": 1.0990892830889517e-05, "loss": 0.1796, "step": 20200 }, { "epoch": 1.49, "learning_rate": 1.0698892628980422e-05, "loss": 0.1816, "step": 20300 }, { "epoch": 1.5, "learning_rate": 1.041012231896276e-05, "loss": 0.174, "step": 20400 }, { "epoch": 1.5, "learning_rate": 1.012462028167525e-05, "loss": 0.1717, "step": 20500 }, { "epoch": 1.51, "learning_rate": 9.842424463566227e-06, "loss": 0.1793, "step": 20600 }, { "epoch": 1.52, "learning_rate": 9.563572371650113e-06, "loss": 0.1699, "step": 20700 }, { "epoch": 1.53, "learning_rate": 9.288101068522322e-06, "loss": 0.1726, "step": 20800 }, { "epoch": 1.53, "learning_rate": 9.016047167433221e-06, "loss": 0.1734, "step": 20900 }, { "epoch": 1.54, "learning_rate": 8.747446827421805e-06, "loss": 0.1776, "step": 21000 }, { "epoch": 1.54, "eval_loss": 0.12307832390069962, "eval_runtime": 1040.1826, "eval_samples_per_second": 15.389, "eval_steps_per_second": 1.924, "eval_wer": 0.1186018245384027, "step": 21000 }, { "epoch": 1.55, "learning_rate": 8.482335748509769e-06, "loss": 0.1755, "step": 21100 }, { "epoch": 1.56, "learning_rate": 8.220749166956552e-06, "loss": 0.1717, "step": 21200 }, { "epoch": 1.56, "learning_rate": 7.962721850576054e-06, "loss": 0.167, "step": 21300 }, { "epoch": 1.57, "learning_rate": 7.708288094115607e-06, "loss": 0.1698, "step": 21400 }, { "epoch": 1.58, "learning_rate": 7.457481714697784e-06, "loss": 0.1709, "step": 21500 }, { "epoch": 1.58, "learning_rate": 7.210336047325761e-06, "loss": 0.1748, "step": 21600 }, { "epoch": 1.59, "learning_rate": 6.9668839404526865e-06, "loss": 0.1776, "step": 21700 }, { "epoch": 1.6, "learning_rate": 6.727157751615771e-06, "loss": 0.1664, "step": 21800 }, { "epoch": 1.61, "learning_rate": 6.491189343135589e-06, "loss": 0.1754, "step": 21900 }, { "epoch": 1.61, "learning_rate": 6.2590100778812376e-06, "loss": 0.1766, "step": 22000 }, { "epoch": 1.62, "learning_rate": 6.030650815101828e-06, "loss": 0.171, "step": 22100 }, { "epoch": 1.63, "learning_rate": 5.808367837755271e-06, "loss": 0.1703, "step": 22200 }, { "epoch": 1.64, "learning_rate": 5.5877001747984834e-06, "loss": 0.164, "step": 22300 }, { "epoch": 1.64, "learning_rate": 5.3709417389918604e-06, "loss": 0.1664, "step": 22400 }, { "epoch": 1.65, "learning_rate": 5.158121339981953e-06, "loss": 0.1671, "step": 22500 }, { "epoch": 1.65, "eval_loss": 0.11993325501680374, "eval_runtime": 1045.6599, "eval_samples_per_second": 15.308, "eval_steps_per_second": 1.914, "eval_wer": 0.11479159838304381, "step": 22500 }, { "epoch": 1.66, "learning_rate": 4.949267264005701e-06, "loss": 0.1576, "step": 22600 }, { "epoch": 1.67, "learning_rate": 4.7444072701308795e-06, "loss": 0.1583, "step": 22700 }, { "epoch": 1.67, "learning_rate": 4.543568586566601e-06, "loss": 0.1678, "step": 22800 }, { "epoch": 1.68, "learning_rate": 4.346777907044375e-06, "loss": 0.1687, "step": 22900 }, { "epoch": 1.69, "learning_rate": 4.154061387270205e-06, "loss": 0.1671, "step": 23000 }, { "epoch": 1.69, "learning_rate": 3.965444641448219e-06, "loss": 0.1656, "step": 23100 }, { "epoch": 1.7, "learning_rate": 3.780952738876231e-06, "loss": 0.169, "step": 23200 }, { "epoch": 1.71, "learning_rate": 3.600610200613753e-06, "loss": 0.1619, "step": 23300 }, { "epoch": 1.72, "learning_rate": 3.4244409962228724e-06, "loss": 0.1702, "step": 23400 }, { "epoch": 1.72, "learning_rate": 3.252468540582438e-06, "loss": 0.1654, "step": 23500 }, { "epoch": 1.73, "learning_rate": 3.0847156907759337e-06, "loss": 0.1593, "step": 23600 }, { "epoch": 1.74, "learning_rate": 2.92120474305353e-06, "loss": 0.1622, "step": 23700 }, { "epoch": 1.75, "learning_rate": 2.7619574298686577e-06, "loss": 0.1653, "step": 23800 }, { "epoch": 1.75, "learning_rate": 2.6069949169895127e-06, "loss": 0.1637, "step": 23900 }, { "epoch": 1.76, "learning_rate": 2.4578229939112028e-06, "loss": 0.1597, "step": 24000 }, { "epoch": 1.76, "eval_loss": 0.11753135174512863, "eval_runtime": 1040.817, "eval_samples_per_second": 15.379, "eval_steps_per_second": 1.923, "eval_wer": 0.11268846279908226, "step": 24000 }, { "epoch": 1.77, "learning_rate": 2.311447946777479e-06, "loss": 0.1599, "step": 24100 }, { "epoch": 1.78, "learning_rate": 2.1694175777527574e-06, "loss": 0.1628, "step": 24200 }, { "epoch": 1.78, "learning_rate": 2.0317507642787156e-06, "loss": 0.1638, "step": 24300 }, { "epoch": 1.79, "learning_rate": 1.898465803831184e-06, "loss": 0.1651, "step": 24400 }, { "epoch": 1.8, "learning_rate": 1.7695804114881745e-06, "loss": 0.1629, "step": 24500 }, { "epoch": 1.8, "learning_rate": 1.6451117175753708e-06, "loss": 0.1699, "step": 24600 }, { "epoch": 1.81, "learning_rate": 1.5250762653892972e-06, "loss": 0.1578, "step": 24700 }, { "epoch": 1.82, "learning_rate": 1.4094900089985423e-06, "loss": 0.1648, "step": 24800 }, { "epoch": 1.83, "learning_rate": 1.2983683111232683e-06, "loss": 0.1607, "step": 24900 }, { "epoch": 1.83, "learning_rate": 1.1917259410933516e-06, "loss": 0.1593, "step": 25000 }, { "epoch": 1.84, "learning_rate": 1.0895770728853425e-06, "loss": 0.159, "step": 25100 }, { "epoch": 1.85, "learning_rate": 9.919352832386174e-07, "loss": 0.1608, "step": 25200 }, { "epoch": 1.86, "learning_rate": 8.988135498508481e-07, "loss": 0.1619, "step": 25300 }, { "epoch": 1.86, "learning_rate": 8.102242496531358e-07, "loss": 0.163, "step": 25400 }, { "epoch": 1.87, "learning_rate": 7.261791571649655e-07, "loss": 0.1619, "step": 25500 }, { "epoch": 1.87, "eval_loss": 0.11700794845819473, "eval_runtime": 1036.7542, "eval_samples_per_second": 15.44, "eval_steps_per_second": 1.93, "eval_wer": 0.11198514148366656, "step": 25500 }, { "epoch": 1.88, "learning_rate": 6.466894429292585e-07, "loss": 0.1627, "step": 25600 }, { "epoch": 1.89, "learning_rate": 5.717656720276581e-07, "loss": 0.165, "step": 25700 }, { "epoch": 1.89, "learning_rate": 5.014178026763216e-07, "loss": 0.1685, "step": 25800 }, { "epoch": 1.9, "learning_rate": 4.356551849023648e-07, "loss": 0.1632, "step": 25900 }, { "epoch": 1.91, "learning_rate": 3.7448655930113146e-07, "loss": 0.164, "step": 26000 }, { "epoch": 1.91, "learning_rate": 3.179200558744649e-07, "loss": 0.1649, "step": 26100 }, { "epoch": 1.92, "learning_rate": 2.6596319295015436e-07, "loss": 0.1583, "step": 26200 }, { "epoch": 1.93, "learning_rate": 2.1862287618264806e-07, "loss": 0.1629, "step": 26300 }, { "epoch": 1.94, "learning_rate": 1.7630967021918575e-07, "loss": 0.1608, "step": 26400 }, { "epoch": 1.94, "learning_rate": 1.3817439628416527e-07, "loss": 0.1629, "step": 26500 }, { "epoch": 1.95, "learning_rate": 1.0467265308166828e-07, "loss": 0.1656, "step": 26600 }, { "epoch": 1.96, "learning_rate": 7.58088933720985e-08, "loss": 0.1638, "step": 26700 }, { "epoch": 1.97, "learning_rate": 5.158695347542152e-08, "loss": 0.164, "step": 26800 }, { "epoch": 1.97, "learning_rate": 3.2010052761280434e-08, "loss": 0.1632, "step": 26900 }, { "epoch": 1.98, "learning_rate": 1.708079322109368e-08, "loss": 0.1664, "step": 27000 }, { "epoch": 1.98, "eval_loss": 0.11697087436914444, "eval_runtime": 1037.0691, "eval_samples_per_second": 15.435, "eval_steps_per_second": 1.929, "eval_wer": 0.11169152190538621, "step": 27000 } ], "max_steps": 27260, "num_train_epochs": 2, "total_flos": 4.0022174178965815e+20, "trial_name": null, "trial_params": null }