{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.5, "global_step": 34075, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 3.675e-06, "loss": 8.685, "step": 100 }, { "epoch": 0.01, "learning_rate": 7.425e-06, "loss": 3.0395, "step": 200 }, { "epoch": 0.02, "learning_rate": 1.1174999999999999e-05, "loss": 2.9522, "step": 300 }, { "epoch": 0.03, "learning_rate": 1.4925e-05, "loss": 2.4455, "step": 400 }, { "epoch": 0.04, "learning_rate": 1.8675e-05, "loss": 1.7543, "step": 500 }, { "epoch": 0.04, "learning_rate": 2.2424999999999996e-05, "loss": 1.4833, "step": 600 }, { "epoch": 0.05, "learning_rate": 2.6174999999999996e-05, "loss": 1.2163, "step": 700 }, { "epoch": 0.06, "learning_rate": 2.9925e-05, "loss": 1.1407, "step": 800 }, { "epoch": 0.07, "learning_rate": 3.3675e-05, "loss": 1.1119, "step": 900 }, { "epoch": 0.07, "learning_rate": 3.7424999999999995e-05, "loss": 1.0826, "step": 1000 }, { "epoch": 0.07, "eval_loss": 0.46367982029914856, "eval_runtime": 1042.4054, "eval_samples_per_second": 15.344, "eval_steps_per_second": 1.919, "eval_wer": 0.4653905513746807, "step": 1000 }, { "epoch": 0.08, "learning_rate": 4.1175e-05, "loss": 1.0901, "step": 1100 }, { "epoch": 0.09, "learning_rate": 4.4924999999999994e-05, "loss": 1.0794, "step": 1200 }, { "epoch": 0.1, "learning_rate": 4.8675e-05, "loss": 1.0774, "step": 1300 }, { "epoch": 0.1, "learning_rate": 5.2424999999999994e-05, "loss": 1.0604, "step": 1400 }, { "epoch": 0.11, "learning_rate": 5.6175e-05, "loss": 1.084, "step": 1500 }, { "epoch": 0.12, "learning_rate": 5.9925e-05, "loss": 1.0959, "step": 1600 }, { "epoch": 0.12, "learning_rate": 6.367499999999999e-05, "loss": 1.0873, "step": 1700 }, { "epoch": 0.13, "learning_rate": 6.7425e-05, "loss": 1.0938, "step": 1800 }, { "epoch": 0.14, "learning_rate": 7.1175e-05, "loss": 1.1218, "step": 1900 }, { "epoch": 0.15, "learning_rate": 7.492499999999999e-05, "loss": 1.118, "step": 2000 }, { "epoch": 0.15, "eval_loss": 0.25947731733322144, "eval_runtime": 1033.4206, "eval_samples_per_second": 15.478, "eval_steps_per_second": 1.935, "eval_wer": 0.2686807708592267, "step": 2000 }, { "epoch": 0.15, "learning_rate": 7.477084957131722e-05, "loss": 1.1204, "step": 2100 }, { "epoch": 0.16, "learning_rate": 7.453702260327356e-05, "loss": 1.1267, "step": 2200 }, { "epoch": 0.17, "learning_rate": 7.430319563522992e-05, "loss": 1.1174, "step": 2300 }, { "epoch": 0.18, "learning_rate": 7.406936866718628e-05, "loss": 1.1229, "step": 2400 }, { "epoch": 0.18, "learning_rate": 7.383554169914263e-05, "loss": 1.1171, "step": 2500 }, { "epoch": 0.19, "learning_rate": 7.360171473109897e-05, "loss": 1.1209, "step": 2600 }, { "epoch": 0.2, "learning_rate": 7.336788776305533e-05, "loss": 1.1378, "step": 2700 }, { "epoch": 0.21, "learning_rate": 7.313406079501168e-05, "loss": 1.1187, "step": 2800 }, { "epoch": 0.21, "learning_rate": 7.290023382696804e-05, "loss": 1.1289, "step": 2900 }, { "epoch": 0.22, "learning_rate": 7.26664068589244e-05, "loss": 1.1268, "step": 3000 }, { "epoch": 0.22, "eval_loss": 0.26353907585144043, "eval_runtime": 1021.5774, "eval_samples_per_second": 15.657, "eval_steps_per_second": 1.958, "eval_wer": 0.26611306117431743, "step": 3000 }, { "epoch": 0.23, "learning_rate": 7.243257989088074e-05, "loss": 1.1198, "step": 3100 }, { "epoch": 0.23, "learning_rate": 7.219875292283709e-05, "loss": 1.1013, "step": 3200 }, { "epoch": 0.24, "learning_rate": 7.196492595479345e-05, "loss": 1.1215, "step": 3300 }, { "epoch": 0.25, "learning_rate": 7.17310989867498e-05, "loss": 1.1323, "step": 3400 }, { "epoch": 0.26, "learning_rate": 7.149727201870615e-05, "loss": 1.1317, "step": 3500 }, { "epoch": 0.26, "learning_rate": 7.126344505066251e-05, "loss": 1.1058, "step": 3600 }, { "epoch": 0.27, "learning_rate": 7.102961808261886e-05, "loss": 1.0934, "step": 3700 }, { "epoch": 0.28, "learning_rate": 7.07957911145752e-05, "loss": 1.1386, "step": 3800 }, { "epoch": 0.29, "learning_rate": 7.056196414653156e-05, "loss": 1.0968, "step": 3900 }, { "epoch": 0.29, "learning_rate": 7.032813717848791e-05, "loss": 1.0919, "step": 4000 }, { "epoch": 0.29, "eval_loss": 0.24173684418201447, "eval_runtime": 1025.0912, "eval_samples_per_second": 15.603, "eval_steps_per_second": 1.951, "eval_wer": 0.256627559173416, "step": 4000 }, { "epoch": 0.3, "learning_rate": 7.009431021044427e-05, "loss": 1.1112, "step": 4100 }, { "epoch": 0.31, "learning_rate": 6.986048324240062e-05, "loss": 1.1122, "step": 4200 }, { "epoch": 0.32, "learning_rate": 6.962665627435696e-05, "loss": 1.1185, "step": 4300 }, { "epoch": 0.32, "learning_rate": 6.939282930631332e-05, "loss": 1.1078, "step": 4400 }, { "epoch": 0.33, "learning_rate": 6.915900233826968e-05, "loss": 1.0842, "step": 4500 }, { "epoch": 0.34, "learning_rate": 6.892517537022603e-05, "loss": 1.0929, "step": 4600 }, { "epoch": 0.34, "learning_rate": 6.869368667186282e-05, "loss": 1.1068, "step": 4700 }, { "epoch": 0.35, "learning_rate": 6.845985970381916e-05, "loss": 1.1078, "step": 4800 }, { "epoch": 0.36, "learning_rate": 6.822603273577552e-05, "loss": 1.1208, "step": 4900 }, { "epoch": 0.37, "learning_rate": 6.799220576773187e-05, "loss": 1.1013, "step": 5000 }, { "epoch": 0.37, "eval_loss": 0.24144533276557922, "eval_runtime": 1024.2346, "eval_samples_per_second": 15.617, "eval_steps_per_second": 1.953, "eval_wer": 0.2567436524304465, "step": 5000 }, { "epoch": 0.37, "learning_rate": 6.775837879968823e-05, "loss": 1.0729, "step": 5100 }, { "epoch": 0.38, "learning_rate": 6.752455183164457e-05, "loss": 1.0884, "step": 5200 }, { "epoch": 0.39, "learning_rate": 6.729072486360093e-05, "loss": 1.0744, "step": 5300 }, { "epoch": 0.4, "learning_rate": 6.705689789555728e-05, "loss": 1.0939, "step": 5400 }, { "epoch": 0.4, "learning_rate": 6.682307092751364e-05, "loss": 1.0989, "step": 5500 }, { "epoch": 0.41, "learning_rate": 6.658924395946998e-05, "loss": 1.0922, "step": 5600 }, { "epoch": 0.42, "learning_rate": 6.635541699142633e-05, "loss": 1.0885, "step": 5700 }, { "epoch": 0.43, "learning_rate": 6.612159002338269e-05, "loss": 1.0909, "step": 5800 }, { "epoch": 0.43, "learning_rate": 6.588776305533905e-05, "loss": 1.0773, "step": 5900 }, { "epoch": 0.44, "learning_rate": 6.56539360872954e-05, "loss": 1.0898, "step": 6000 }, { "epoch": 0.44, "eval_loss": 0.25459304451942444, "eval_runtime": 1029.5775, "eval_samples_per_second": 15.535, "eval_steps_per_second": 1.943, "eval_wer": 0.27307182758102627, "step": 6000 }, { "epoch": 0.45, "learning_rate": 6.542010911925175e-05, "loss": 1.069, "step": 6100 }, { "epoch": 0.45, "learning_rate": 6.51862821512081e-05, "loss": 1.0899, "step": 6200 }, { "epoch": 0.46, "learning_rate": 6.495245518316445e-05, "loss": 1.0907, "step": 6300 }, { "epoch": 0.47, "learning_rate": 6.47186282151208e-05, "loss": 1.0875, "step": 6400 }, { "epoch": 0.48, "learning_rate": 6.448480124707717e-05, "loss": 1.0729, "step": 6500 }, { "epoch": 0.48, "learning_rate": 6.425097427903351e-05, "loss": 1.0823, "step": 6600 }, { "epoch": 0.49, "learning_rate": 6.401714731098986e-05, "loss": 1.0558, "step": 6700 }, { "epoch": 0.5, "learning_rate": 6.378332034294622e-05, "loss": 1.0742, "step": 6800 }, { "epoch": 0.51, "learning_rate": 6.354949337490256e-05, "loss": 1.0748, "step": 6900 }, { "epoch": 0.51, "learning_rate": 6.331566640685892e-05, "loss": 1.0808, "step": 7000 }, { "epoch": 0.51, "eval_loss": 0.23990128934383392, "eval_runtime": 1020.4541, "eval_samples_per_second": 15.674, "eval_steps_per_second": 1.96, "eval_wer": 0.25348621221847384, "step": 7000 }, { "epoch": 0.52, "learning_rate": 6.308183943881527e-05, "loss": 1.0688, "step": 7100 }, { "epoch": 0.53, "learning_rate": 6.284801247077163e-05, "loss": 1.065, "step": 7200 }, { "epoch": 0.54, "learning_rate": 6.261418550272797e-05, "loss": 1.0603, "step": 7300 }, { "epoch": 0.54, "learning_rate": 6.238035853468433e-05, "loss": 1.072, "step": 7400 }, { "epoch": 0.55, "learning_rate": 6.214653156664068e-05, "loss": 1.0736, "step": 7500 }, { "epoch": 0.56, "learning_rate": 6.191270459859704e-05, "loss": 1.066, "step": 7600 }, { "epoch": 0.56, "learning_rate": 6.167887763055338e-05, "loss": 1.0761, "step": 7700 }, { "epoch": 0.57, "learning_rate": 6.144505066250973e-05, "loss": 1.0824, "step": 7800 }, { "epoch": 0.58, "learning_rate": 6.121122369446609e-05, "loss": 1.0571, "step": 7900 }, { "epoch": 0.59, "learning_rate": 6.097973499610288e-05, "loss": 1.0719, "step": 8000 }, { "epoch": 0.59, "eval_loss": 0.23534023761749268, "eval_runtime": 1024.9025, "eval_samples_per_second": 15.606, "eval_steps_per_second": 1.951, "eval_wer": 0.2527691656309327, "step": 8000 }, { "epoch": 0.59, "learning_rate": 6.074590802805923e-05, "loss": 1.0739, "step": 8100 }, { "epoch": 0.6, "learning_rate": 6.051208106001558e-05, "loss": 1.0628, "step": 8200 }, { "epoch": 0.61, "learning_rate": 6.027825409197193e-05, "loss": 1.0791, "step": 8300 }, { "epoch": 0.62, "learning_rate": 6.004442712392829e-05, "loss": 1.0769, "step": 8400 }, { "epoch": 0.62, "learning_rate": 5.981060015588464e-05, "loss": 1.0729, "step": 8500 }, { "epoch": 0.63, "learning_rate": 5.9576773187840996e-05, "loss": 1.0848, "step": 8600 }, { "epoch": 0.64, "learning_rate": 5.934294621979734e-05, "loss": 1.0636, "step": 8700 }, { "epoch": 0.65, "learning_rate": 5.9109119251753694e-05, "loss": 1.0537, "step": 8800 }, { "epoch": 0.65, "learning_rate": 5.887529228371005e-05, "loss": 1.0623, "step": 8900 }, { "epoch": 0.66, "learning_rate": 5.86414653156664e-05, "loss": 1.0446, "step": 9000 }, { "epoch": 0.66, "eval_loss": 0.24269308149814606, "eval_runtime": 1017.8128, "eval_samples_per_second": 15.715, "eval_steps_per_second": 1.965, "eval_wer": 0.25453788054686755, "step": 9000 }, { "epoch": 0.67, "learning_rate": 5.840763834762276e-05, "loss": 1.0692, "step": 9100 }, { "epoch": 0.67, "learning_rate": 5.817614964925954e-05, "loss": 1.042, "step": 9200 }, { "epoch": 0.68, "learning_rate": 5.7942322681215896e-05, "loss": 1.059, "step": 9300 }, { "epoch": 0.69, "learning_rate": 5.770849571317225e-05, "loss": 1.0443, "step": 9400 }, { "epoch": 0.7, "learning_rate": 5.74746687451286e-05, "loss": 1.0586, "step": 9500 }, { "epoch": 0.7, "learning_rate": 5.7240841777084954e-05, "loss": 1.0584, "step": 9600 }, { "epoch": 0.71, "learning_rate": 5.70070148090413e-05, "loss": 1.0414, "step": 9700 }, { "epoch": 0.72, "learning_rate": 5.677318784099765e-05, "loss": 1.0428, "step": 9800 }, { "epoch": 0.73, "learning_rate": 5.653936087295401e-05, "loss": 1.0408, "step": 9900 }, { "epoch": 0.73, "learning_rate": 5.6305533904910365e-05, "loss": 1.0347, "step": 10000 }, { "epoch": 0.73, "eval_loss": 0.22658555209636688, "eval_runtime": 1019.9931, "eval_samples_per_second": 15.681, "eval_steps_per_second": 1.961, "eval_wer": 0.24018329076580575, "step": 10000 }, { "epoch": 0.74, "learning_rate": 5.607170693686672e-05, "loss": 1.0356, "step": 10100 }, { "epoch": 0.75, "learning_rate": 5.583787996882306e-05, "loss": 1.0553, "step": 10200 }, { "epoch": 0.76, "learning_rate": 5.5604053000779416e-05, "loss": 1.0464, "step": 10300 }, { "epoch": 0.76, "learning_rate": 5.537022603273577e-05, "loss": 1.0545, "step": 10400 }, { "epoch": 0.77, "learning_rate": 5.513639906469212e-05, "loss": 1.0457, "step": 10500 }, { "epoch": 0.78, "learning_rate": 5.490257209664848e-05, "loss": 1.0446, "step": 10600 }, { "epoch": 0.79, "learning_rate": 5.4668745128604826e-05, "loss": 1.051, "step": 10700 }, { "epoch": 0.79, "learning_rate": 5.443491816056118e-05, "loss": 1.0533, "step": 10800 }, { "epoch": 0.8, "learning_rate": 5.420109119251753e-05, "loss": 1.0414, "step": 10900 }, { "epoch": 0.81, "learning_rate": 5.3967264224473884e-05, "loss": 1.0457, "step": 11000 }, { "epoch": 0.81, "eval_loss": 0.22899799048900604, "eval_runtime": 1038.9889, "eval_samples_per_second": 15.395, "eval_steps_per_second": 1.925, "eval_wer": 0.24484067907726348, "step": 11000 }, { "epoch": 0.81, "learning_rate": 5.373343725643024e-05, "loss": 1.0395, "step": 11100 }, { "epoch": 0.82, "learning_rate": 5.349961028838658e-05, "loss": 1.0475, "step": 11200 }, { "epoch": 0.83, "learning_rate": 5.326578332034294e-05, "loss": 1.0339, "step": 11300 }, { "epoch": 0.84, "learning_rate": 5.3031956352299295e-05, "loss": 1.0444, "step": 11400 }, { "epoch": 0.84, "learning_rate": 5.279812938425565e-05, "loss": 1.0359, "step": 11500 }, { "epoch": 0.85, "learning_rate": 5.256664068589244e-05, "loss": 1.0287, "step": 11600 }, { "epoch": 0.86, "learning_rate": 5.2332813717848785e-05, "loss": 1.0094, "step": 11700 }, { "epoch": 0.87, "learning_rate": 5.209898674980514e-05, "loss": 1.0234, "step": 11800 }, { "epoch": 0.87, "learning_rate": 5.186515978176149e-05, "loss": 1.0261, "step": 11900 }, { "epoch": 0.88, "learning_rate": 5.163133281371784e-05, "loss": 1.0124, "step": 12000 }, { "epoch": 0.88, "eval_loss": 0.22948846220970154, "eval_runtime": 1027.4606, "eval_samples_per_second": 15.568, "eval_steps_per_second": 1.947, "eval_wer": 0.2447928759714274, "step": 12000 }, { "epoch": 0.89, "learning_rate": 5.13975058456742e-05, "loss": 1.0341, "step": 12100 }, { "epoch": 0.9, "learning_rate": 5.116367887763055e-05, "loss": 1.0335, "step": 12200 }, { "epoch": 0.9, "learning_rate": 5.09298519095869e-05, "loss": 1.0088, "step": 12300 }, { "epoch": 0.91, "learning_rate": 5.069602494154325e-05, "loss": 1.0151, "step": 12400 }, { "epoch": 0.92, "learning_rate": 5.0462197973499606e-05, "loss": 1.0037, "step": 12500 }, { "epoch": 0.92, "learning_rate": 5.023070927513639e-05, "loss": 1.0189, "step": 12600 }, { "epoch": 0.93, "learning_rate": 4.9996882307092743e-05, "loss": 0.9913, "step": 12700 }, { "epoch": 0.94, "learning_rate": 4.9763055339049096e-05, "loss": 0.9999, "step": 12800 }, { "epoch": 0.95, "learning_rate": 4.9529228371005455e-05, "loss": 1.0152, "step": 12900 }, { "epoch": 0.95, "learning_rate": 4.929540140296181e-05, "loss": 1.025, "step": 13000 }, { "epoch": 0.95, "eval_loss": 0.21379277110099792, "eval_runtime": 1021.0623, "eval_samples_per_second": 15.665, "eval_steps_per_second": 1.959, "eval_wer": 0.2345493532922682, "step": 13000 }, { "epoch": 0.96, "learning_rate": 4.9061574434918154e-05, "loss": 0.9976, "step": 13100 }, { "epoch": 0.97, "learning_rate": 4.8827747466874507e-05, "loss": 1.0144, "step": 13200 }, { "epoch": 0.98, "learning_rate": 4.859392049883086e-05, "loss": 1.0086, "step": 13300 }, { "epoch": 0.98, "learning_rate": 4.836009353078721e-05, "loss": 0.9987, "step": 13400 }, { "epoch": 0.99, "learning_rate": 4.812626656274357e-05, "loss": 1.0206, "step": 13500 }, { "epoch": 1.0, "learning_rate": 4.7892439594699924e-05, "loss": 1.01, "step": 13600 }, { "epoch": 1.01, "learning_rate": 4.765861262665627e-05, "loss": 0.9801, "step": 13700 }, { "epoch": 1.01, "learning_rate": 4.742478565861262e-05, "loss": 1.0058, "step": 13800 }, { "epoch": 1.02, "learning_rate": 4.7190958690568975e-05, "loss": 0.999, "step": 13900 }, { "epoch": 1.03, "learning_rate": 4.695713172252533e-05, "loss": 1.0107, "step": 14000 }, { "epoch": 1.03, "eval_loss": 0.21082927286624908, "eval_runtime": 1032.6325, "eval_samples_per_second": 15.49, "eval_steps_per_second": 1.937, "eval_wer": 0.22944124998292748, "step": 14000 }, { "epoch": 1.03, "learning_rate": 4.672330475448168e-05, "loss": 0.9762, "step": 14100 }, { "epoch": 1.04, "learning_rate": 4.6489477786438026e-05, "loss": 1.0014, "step": 14200 }, { "epoch": 1.05, "learning_rate": 4.6255650818394385e-05, "loss": 1.0038, "step": 14300 }, { "epoch": 1.06, "learning_rate": 4.602182385035074e-05, "loss": 0.9838, "step": 14400 }, { "epoch": 1.06, "learning_rate": 4.578799688230709e-05, "loss": 0.9931, "step": 14500 }, { "epoch": 1.07, "learning_rate": 4.555416991426344e-05, "loss": 0.9834, "step": 14600 }, { "epoch": 1.08, "learning_rate": 4.532034294621979e-05, "loss": 0.9867, "step": 14700 }, { "epoch": 1.09, "learning_rate": 4.508651597817614e-05, "loss": 1.0056, "step": 14800 }, { "epoch": 1.09, "learning_rate": 4.4852689010132494e-05, "loss": 0.9834, "step": 14900 }, { "epoch": 1.1, "learning_rate": 4.4618862042088854e-05, "loss": 0.9758, "step": 15000 }, { "epoch": 1.1, "eval_loss": 0.20192867517471313, "eval_runtime": 1027.9664, "eval_samples_per_second": 15.56, "eval_steps_per_second": 1.946, "eval_wer": 0.2203996339647896, "step": 15000 }, { "epoch": 1.11, "learning_rate": 4.4385035074045206e-05, "loss": 0.9737, "step": 15100 }, { "epoch": 1.12, "learning_rate": 4.415120810600155e-05, "loss": 0.9755, "step": 15200 }, { "epoch": 1.12, "learning_rate": 4.3917381137957905e-05, "loss": 0.9793, "step": 15300 }, { "epoch": 1.13, "learning_rate": 4.368355416991426e-05, "loss": 0.9818, "step": 15400 }, { "epoch": 1.14, "learning_rate": 4.344972720187061e-05, "loss": 0.9867, "step": 15500 }, { "epoch": 1.14, "learning_rate": 4.321590023382697e-05, "loss": 0.9802, "step": 15600 }, { "epoch": 1.15, "learning_rate": 4.298207326578332e-05, "loss": 0.9823, "step": 15700 }, { "epoch": 1.16, "learning_rate": 4.274824629773967e-05, "loss": 0.9669, "step": 15800 }, { "epoch": 1.17, "learning_rate": 4.251441932969602e-05, "loss": 0.9626, "step": 15900 }, { "epoch": 1.17, "learning_rate": 4.228059236165237e-05, "loss": 0.9547, "step": 16000 }, { "epoch": 1.17, "eval_loss": 0.19999034702777863, "eval_runtime": 1020.7566, "eval_samples_per_second": 15.67, "eval_steps_per_second": 1.959, "eval_wer": 0.2178250952647609, "step": 16000 }, { "epoch": 1.18, "learning_rate": 4.2046765393608726e-05, "loss": 0.9711, "step": 16100 }, { "epoch": 1.19, "learning_rate": 4.181293842556508e-05, "loss": 0.9871, "step": 16200 }, { "epoch": 1.2, "learning_rate": 4.1579111457521424e-05, "loss": 0.9638, "step": 16300 }, { "epoch": 1.2, "learning_rate": 4.1345284489477784e-05, "loss": 0.9632, "step": 16400 }, { "epoch": 1.21, "learning_rate": 4.1111457521434136e-05, "loss": 0.9732, "step": 16500 }, { "epoch": 1.22, "learning_rate": 4.087763055339049e-05, "loss": 0.9506, "step": 16600 }, { "epoch": 1.23, "learning_rate": 4.064380358534684e-05, "loss": 0.9718, "step": 16700 }, { "epoch": 1.23, "learning_rate": 4.040997661730319e-05, "loss": 0.9656, "step": 16800 }, { "epoch": 1.24, "learning_rate": 4.017614964925954e-05, "loss": 0.9722, "step": 16900 }, { "epoch": 1.25, "learning_rate": 3.99423226812159e-05, "loss": 0.986, "step": 17000 }, { "epoch": 1.25, "eval_loss": 0.20177510380744934, "eval_runtime": 1018.9329, "eval_samples_per_second": 15.698, "eval_steps_per_second": 1.963, "eval_wer": 0.21997623502738434, "step": 17000 }, { "epoch": 1.25, "learning_rate": 3.970849571317225e-05, "loss": 0.9662, "step": 17100 }, { "epoch": 1.26, "learning_rate": 3.9474668745128605e-05, "loss": 0.969, "step": 17200 }, { "epoch": 1.27, "learning_rate": 3.924084177708495e-05, "loss": 0.9641, "step": 17300 }, { "epoch": 1.28, "learning_rate": 3.90070148090413e-05, "loss": 0.9647, "step": 17400 }, { "epoch": 1.28, "learning_rate": 3.8773187840997656e-05, "loss": 0.9658, "step": 17500 }, { "epoch": 1.29, "learning_rate": 3.853936087295401e-05, "loss": 0.9615, "step": 17600 }, { "epoch": 1.3, "learning_rate": 3.830553390491037e-05, "loss": 0.9639, "step": 17700 }, { "epoch": 1.31, "learning_rate": 3.807170693686672e-05, "loss": 0.9543, "step": 17800 }, { "epoch": 1.31, "learning_rate": 3.7837879968823066e-05, "loss": 0.9446, "step": 17900 }, { "epoch": 1.32, "learning_rate": 3.760405300077942e-05, "loss": 0.9588, "step": 18000 }, { "epoch": 1.32, "eval_loss": 0.1992081105709076, "eval_runtime": 1010.377, "eval_samples_per_second": 15.831, "eval_steps_per_second": 1.979, "eval_wer": 0.21378914732917217, "step": 18000 }, { "epoch": 1.33, "learning_rate": 3.737256430241621e-05, "loss": 0.9683, "step": 18100 }, { "epoch": 1.34, "learning_rate": 3.713873733437256e-05, "loss": 0.958, "step": 18200 }, { "epoch": 1.34, "learning_rate": 3.6904910366328916e-05, "loss": 0.961, "step": 18300 }, { "epoch": 1.35, "learning_rate": 3.667108339828526e-05, "loss": 0.9441, "step": 18400 }, { "epoch": 1.36, "learning_rate": 3.643725643024162e-05, "loss": 0.9401, "step": 18500 }, { "epoch": 1.36, "learning_rate": 3.6203429462197974e-05, "loss": 0.939, "step": 18600 }, { "epoch": 1.37, "learning_rate": 3.596960249415432e-05, "loss": 0.9637, "step": 18700 }, { "epoch": 1.38, "learning_rate": 3.573577552611068e-05, "loss": 0.9412, "step": 18800 }, { "epoch": 1.39, "learning_rate": 3.5501948558067025e-05, "loss": 0.9399, "step": 18900 }, { "epoch": 1.39, "learning_rate": 3.526812159002338e-05, "loss": 0.9413, "step": 19000 }, { "epoch": 1.39, "eval_loss": 0.18979620933532715, "eval_runtime": 1016.8182, "eval_samples_per_second": 15.73, "eval_steps_per_second": 1.967, "eval_wer": 0.20486362456806478, "step": 19000 }, { "epoch": 1.4, "learning_rate": 3.503429462197973e-05, "loss": 0.9497, "step": 19100 }, { "epoch": 1.41, "learning_rate": 3.480280592361652e-05, "loss": 0.9462, "step": 19200 }, { "epoch": 1.42, "learning_rate": 3.4568978955572874e-05, "loss": 0.9427, "step": 19300 }, { "epoch": 1.42, "learning_rate": 3.433515198752923e-05, "loss": 0.9466, "step": 19400 }, { "epoch": 1.43, "learning_rate": 3.410132501948558e-05, "loss": 0.9284, "step": 19500 }, { "epoch": 1.44, "learning_rate": 3.3867498051441925e-05, "loss": 0.9438, "step": 19600 }, { "epoch": 1.45, "learning_rate": 3.3633671083398285e-05, "loss": 0.9281, "step": 19700 }, { "epoch": 1.45, "learning_rate": 3.339984411535464e-05, "loss": 0.9291, "step": 19800 }, { "epoch": 1.46, "learning_rate": 3.316601714731098e-05, "loss": 0.9467, "step": 19900 }, { "epoch": 1.47, "learning_rate": 3.293219017926734e-05, "loss": 0.9339, "step": 20000 }, { "epoch": 1.47, "eval_loss": 0.1874116212129593, "eval_runtime": 1013.6201, "eval_samples_per_second": 15.78, "eval_steps_per_second": 1.973, "eval_wer": 0.2056421322916809, "step": 20000 }, { "epoch": 1.47, "learning_rate": 3.269836321122369e-05, "loss": 0.9356, "step": 20100 }, { "epoch": 1.48, "learning_rate": 3.246453624318004e-05, "loss": 0.9206, "step": 20200 }, { "epoch": 1.49, "learning_rate": 3.22307092751364e-05, "loss": 0.9282, "step": 20300 }, { "epoch": 1.5, "learning_rate": 3.1996882307092746e-05, "loss": 0.927, "step": 20400 }, { "epoch": 1.5, "learning_rate": 3.17630553390491e-05, "loss": 0.9318, "step": 20500 }, { "epoch": 1.51, "learning_rate": 3.152922837100545e-05, "loss": 0.9394, "step": 20600 }, { "epoch": 1.52, "learning_rate": 3.1295401402961804e-05, "loss": 0.9227, "step": 20700 }, { "epoch": 1.53, "learning_rate": 3.106157443491816e-05, "loss": 0.9197, "step": 20800 }, { "epoch": 1.53, "learning_rate": 3.082774746687451e-05, "loss": 0.9176, "step": 20900 }, { "epoch": 1.54, "learning_rate": 3.059392049883086e-05, "loss": 0.9268, "step": 21000 }, { "epoch": 1.54, "eval_loss": 0.1797132045030594, "eval_runtime": 1006.5438, "eval_samples_per_second": 15.891, "eval_steps_per_second": 1.987, "eval_wer": 0.19759072346586176, "step": 21000 }, { "epoch": 1.55, "learning_rate": 3.0360093530787215e-05, "loss": 0.93, "step": 21100 }, { "epoch": 1.56, "learning_rate": 3.0126266562743567e-05, "loss": 0.9069, "step": 21200 }, { "epoch": 1.56, "learning_rate": 2.989243959469992e-05, "loss": 0.9359, "step": 21300 }, { "epoch": 1.57, "learning_rate": 2.966095089633671e-05, "loss": 0.9289, "step": 21400 }, { "epoch": 1.58, "learning_rate": 2.942712392829306e-05, "loss": 0.9092, "step": 21500 }, { "epoch": 1.58, "learning_rate": 2.919329696024941e-05, "loss": 0.9282, "step": 21600 }, { "epoch": 1.59, "learning_rate": 2.8959469992205766e-05, "loss": 0.9108, "step": 21700 }, { "epoch": 1.6, "learning_rate": 2.872564302416212e-05, "loss": 0.9193, "step": 21800 }, { "epoch": 1.61, "learning_rate": 2.8491816056118468e-05, "loss": 0.898, "step": 21900 }, { "epoch": 1.61, "learning_rate": 2.8257989088074824e-05, "loss": 0.9194, "step": 22000 }, { "epoch": 1.61, "eval_loss": 0.17433622479438782, "eval_runtime": 1020.4225, "eval_samples_per_second": 15.675, "eval_steps_per_second": 1.96, "eval_wer": 0.19052952183236133, "step": 22000 }, { "epoch": 1.62, "learning_rate": 2.8024162120031173e-05, "loss": 0.9215, "step": 22100 }, { "epoch": 1.63, "learning_rate": 2.7790335151987526e-05, "loss": 0.915, "step": 22200 }, { "epoch": 1.64, "learning_rate": 2.7556508183943882e-05, "loss": 0.901, "step": 22300 }, { "epoch": 1.64, "learning_rate": 2.732268121590023e-05, "loss": 0.913, "step": 22400 }, { "epoch": 1.65, "learning_rate": 2.7088854247856584e-05, "loss": 0.9155, "step": 22500 }, { "epoch": 1.66, "learning_rate": 2.6855027279812936e-05, "loss": 0.9, "step": 22600 }, { "epoch": 1.67, "learning_rate": 2.662120031176929e-05, "loss": 0.8909, "step": 22700 }, { "epoch": 1.67, "learning_rate": 2.638737334372564e-05, "loss": 0.9014, "step": 22800 }, { "epoch": 1.68, "learning_rate": 2.615354637568199e-05, "loss": 0.9036, "step": 22900 }, { "epoch": 1.69, "learning_rate": 2.5919719407638347e-05, "loss": 0.8987, "step": 23000 }, { "epoch": 1.69, "eval_loss": 0.1737690269947052, "eval_runtime": 1004.9747, "eval_samples_per_second": 15.916, "eval_steps_per_second": 1.99, "eval_wer": 0.19324064083477882, "step": 23000 }, { "epoch": 1.69, "learning_rate": 2.56858924395947e-05, "loss": 0.9074, "step": 23100 }, { "epoch": 1.7, "learning_rate": 2.545206547155105e-05, "loss": 0.8877, "step": 23200 }, { "epoch": 1.71, "learning_rate": 2.52182385035074e-05, "loss": 0.8995, "step": 23300 }, { "epoch": 1.72, "learning_rate": 2.4984411535463757e-05, "loss": 0.9006, "step": 23400 }, { "epoch": 1.72, "learning_rate": 2.4750584567420107e-05, "loss": 0.9072, "step": 23500 }, { "epoch": 1.73, "learning_rate": 2.451675759937646e-05, "loss": 0.891, "step": 23600 }, { "epoch": 1.74, "learning_rate": 2.428293063133281e-05, "loss": 0.886, "step": 23700 }, { "epoch": 1.75, "learning_rate": 2.4049103663289164e-05, "loss": 0.9053, "step": 23800 }, { "epoch": 1.75, "learning_rate": 2.3815276695245517e-05, "loss": 0.9086, "step": 23900 }, { "epoch": 1.76, "learning_rate": 2.3581449727201866e-05, "loss": 0.8884, "step": 24000 }, { "epoch": 1.76, "eval_loss": 0.17026115953922272, "eval_runtime": 1004.0446, "eval_samples_per_second": 15.931, "eval_steps_per_second": 1.992, "eval_wer": 0.18726525260526927, "step": 24000 }, { "epoch": 1.77, "learning_rate": 2.3347622759158222e-05, "loss": 0.8968, "step": 24100 }, { "epoch": 1.78, "learning_rate": 2.311613406079501e-05, "loss": 0.8838, "step": 24200 }, { "epoch": 1.78, "learning_rate": 2.2882307092751363e-05, "loss": 0.8787, "step": 24300 }, { "epoch": 1.79, "learning_rate": 2.2648480124707713e-05, "loss": 0.8904, "step": 24400 }, { "epoch": 1.8, "learning_rate": 2.241465315666407e-05, "loss": 0.8854, "step": 24500 }, { "epoch": 1.8, "learning_rate": 2.218082618862042e-05, "loss": 0.8795, "step": 24600 }, { "epoch": 1.81, "learning_rate": 2.194699922057677e-05, "loss": 0.8897, "step": 24700 }, { "epoch": 1.82, "learning_rate": 2.1713172252533123e-05, "loss": 0.8888, "step": 24800 }, { "epoch": 1.83, "learning_rate": 2.1479345284489476e-05, "loss": 0.8817, "step": 24900 }, { "epoch": 1.83, "learning_rate": 2.1245518316445828e-05, "loss": 0.8939, "step": 25000 }, { "epoch": 1.83, "eval_loss": 0.16330334544181824, "eval_runtime": 1011.258, "eval_samples_per_second": 15.817, "eval_steps_per_second": 1.978, "eval_wer": 0.18310638239753063, "step": 25000 }, { "epoch": 1.84, "learning_rate": 2.101169134840218e-05, "loss": 0.8837, "step": 25100 }, { "epoch": 1.85, "learning_rate": 2.077786438035853e-05, "loss": 0.8832, "step": 25200 }, { "epoch": 1.86, "learning_rate": 2.0544037412314886e-05, "loss": 0.869, "step": 25300 }, { "epoch": 1.86, "learning_rate": 2.0310210444271235e-05, "loss": 0.8638, "step": 25400 }, { "epoch": 1.87, "learning_rate": 2.0076383476227588e-05, "loss": 0.8785, "step": 25500 }, { "epoch": 1.88, "learning_rate": 1.9842556508183944e-05, "loss": 0.8666, "step": 25600 }, { "epoch": 1.89, "learning_rate": 1.9608729540140293e-05, "loss": 0.8725, "step": 25700 }, { "epoch": 1.89, "learning_rate": 1.9374902572096646e-05, "loss": 0.8757, "step": 25800 }, { "epoch": 1.9, "learning_rate": 1.9141075604053002e-05, "loss": 0.8617, "step": 25900 }, { "epoch": 1.91, "learning_rate": 1.890724863600935e-05, "loss": 0.8629, "step": 26000 }, { "epoch": 1.91, "eval_loss": 0.154932901263237, "eval_runtime": 1005.7394, "eval_samples_per_second": 15.904, "eval_steps_per_second": 1.989, "eval_wer": 0.17495936736003934, "step": 26000 }, { "epoch": 1.91, "learning_rate": 1.8673421667965704e-05, "loss": 0.8748, "step": 26100 }, { "epoch": 1.92, "learning_rate": 1.8439594699922056e-05, "loss": 0.8659, "step": 26200 }, { "epoch": 1.93, "learning_rate": 1.820576773187841e-05, "loss": 0.8594, "step": 26300 }, { "epoch": 1.94, "learning_rate": 1.7974279033515197e-05, "loss": 0.8568, "step": 26400 }, { "epoch": 1.94, "learning_rate": 1.774045206547155e-05, "loss": 0.8694, "step": 26500 }, { "epoch": 1.95, "learning_rate": 1.7506625097427903e-05, "loss": 0.8652, "step": 26600 }, { "epoch": 1.96, "learning_rate": 1.7272798129384255e-05, "loss": 0.8565, "step": 26700 }, { "epoch": 1.97, "learning_rate": 1.7038971161340608e-05, "loss": 0.8608, "step": 26800 }, { "epoch": 1.97, "learning_rate": 1.680514419329696e-05, "loss": 0.8554, "step": 26900 }, { "epoch": 1.98, "learning_rate": 1.657131722525331e-05, "loss": 0.8607, "step": 27000 }, { "epoch": 1.98, "eval_loss": 0.15500280261039734, "eval_runtime": 1016.9779, "eval_samples_per_second": 15.728, "eval_steps_per_second": 1.967, "eval_wer": 0.17375746069901798, "step": 27000 }, { "epoch": 1.99, "learning_rate": 1.6339828526890098e-05, "loss": 0.8551, "step": 27100 }, { "epoch": 2.0, "learning_rate": 1.6106001558846454e-05, "loss": 0.8588, "step": 27200 }, { "epoch": 2.0, "learning_rate": 1.5872174590802803e-05, "loss": 0.8518, "step": 27300 }, { "epoch": 2.01, "learning_rate": 1.5638347622759156e-05, "loss": 0.8359, "step": 27400 }, { "epoch": 2.02, "learning_rate": 1.540452065471551e-05, "loss": 0.8404, "step": 27500 }, { "epoch": 2.02, "learning_rate": 1.5170693686671863e-05, "loss": 0.8596, "step": 27600 }, { "epoch": 2.03, "learning_rate": 1.4936866718628214e-05, "loss": 0.8499, "step": 27700 }, { "epoch": 2.04, "learning_rate": 1.4703039750584566e-05, "loss": 0.8425, "step": 27800 }, { "epoch": 2.05, "learning_rate": 1.4469212782540917e-05, "loss": 0.8391, "step": 27900 }, { "epoch": 2.05, "learning_rate": 1.4235385814497272e-05, "loss": 0.8316, "step": 28000 }, { "epoch": 2.05, "eval_loss": 0.15122003853321075, "eval_runtime": 1007.5325, "eval_samples_per_second": 15.875, "eval_steps_per_second": 1.985, "eval_wer": 0.17086878730349508, "step": 28000 }, { "epoch": 2.06, "learning_rate": 1.4001558846453624e-05, "loss": 0.8325, "step": 28100 }, { "epoch": 2.07, "learning_rate": 1.3767731878409975e-05, "loss": 0.8311, "step": 28200 }, { "epoch": 2.08, "learning_rate": 1.3533904910366328e-05, "loss": 0.8336, "step": 28300 }, { "epoch": 2.08, "learning_rate": 1.330007794232268e-05, "loss": 0.8248, "step": 28400 }, { "epoch": 2.09, "learning_rate": 1.3066250974279033e-05, "loss": 0.8316, "step": 28500 }, { "epoch": 2.1, "learning_rate": 1.2832424006235384e-05, "loss": 0.8313, "step": 28600 }, { "epoch": 2.11, "learning_rate": 1.2598597038191737e-05, "loss": 0.8284, "step": 28700 }, { "epoch": 2.11, "learning_rate": 1.236477007014809e-05, "loss": 0.8309, "step": 28800 }, { "epoch": 2.12, "learning_rate": 1.2130943102104442e-05, "loss": 0.8287, "step": 28900 }, { "epoch": 2.13, "learning_rate": 1.189945440374123e-05, "loss": 0.8321, "step": 29000 }, { "epoch": 2.13, "eval_loss": 0.14810478687286377, "eval_runtime": 999.3435, "eval_samples_per_second": 16.006, "eval_steps_per_second": 2.001, "eval_wer": 0.16567873581272108, "step": 29000 }, { "epoch": 2.13, "learning_rate": 1.1665627435697581e-05, "loss": 0.8264, "step": 29100 }, { "epoch": 2.14, "learning_rate": 1.1431800467653935e-05, "loss": 0.8186, "step": 29200 }, { "epoch": 2.15, "learning_rate": 1.1197973499610288e-05, "loss": 0.8264, "step": 29300 }, { "epoch": 2.16, "learning_rate": 1.0964146531566639e-05, "loss": 0.8285, "step": 29400 }, { "epoch": 2.16, "learning_rate": 1.0730319563522992e-05, "loss": 0.8321, "step": 29500 }, { "epoch": 2.17, "learning_rate": 1.0496492595479346e-05, "loss": 0.8261, "step": 29600 }, { "epoch": 2.18, "learning_rate": 1.0262665627435697e-05, "loss": 0.836, "step": 29700 }, { "epoch": 2.19, "learning_rate": 1.002883865939205e-05, "loss": 0.8178, "step": 29800 }, { "epoch": 2.19, "learning_rate": 9.7950116913484e-06, "loss": 0.8258, "step": 29900 }, { "epoch": 2.2, "learning_rate": 9.561184723304755e-06, "loss": 0.825, "step": 30000 }, { "epoch": 2.2, "eval_loss": 0.14461292326450348, "eval_runtime": 1002.0806, "eval_samples_per_second": 15.962, "eval_steps_per_second": 1.996, "eval_wer": 0.16271494325088437, "step": 30000 }, { "epoch": 2.21, "learning_rate": 9.327357755261106e-06, "loss": 0.8247, "step": 30100 }, { "epoch": 2.22, "learning_rate": 9.093530787217458e-06, "loss": 0.8223, "step": 30200 }, { "epoch": 2.22, "learning_rate": 8.85970381917381e-06, "loss": 0.8144, "step": 30300 }, { "epoch": 2.23, "learning_rate": 8.625876851130163e-06, "loss": 0.821, "step": 30400 }, { "epoch": 2.24, "learning_rate": 8.392049883086516e-06, "loss": 0.8069, "step": 30500 }, { "epoch": 2.25, "learning_rate": 8.158222915042867e-06, "loss": 0.8122, "step": 30600 }, { "epoch": 2.25, "learning_rate": 7.92439594699922e-06, "loss": 0.8042, "step": 30700 }, { "epoch": 2.26, "learning_rate": 7.690568978955572e-06, "loss": 0.8159, "step": 30800 }, { "epoch": 2.27, "learning_rate": 7.456742010911924e-06, "loss": 0.8125, "step": 30900 }, { "epoch": 2.27, "learning_rate": 7.222915042868277e-06, "loss": 0.8115, "step": 31000 }, { "epoch": 2.27, "eval_loss": 0.13957646489143372, "eval_runtime": 1001.1222, "eval_samples_per_second": 15.977, "eval_steps_per_second": 1.998, "eval_wer": 0.15831705751396533, "step": 31000 }, { "epoch": 2.28, "learning_rate": 6.989088074824628e-06, "loss": 0.7972, "step": 31100 }, { "epoch": 2.29, "learning_rate": 6.755261106780982e-06, "loss": 0.8031, "step": 31200 }, { "epoch": 2.3, "learning_rate": 6.521434138737334e-06, "loss": 0.8017, "step": 31300 }, { "epoch": 2.3, "learning_rate": 6.287607170693686e-06, "loss": 0.8036, "step": 31400 }, { "epoch": 2.31, "learning_rate": 6.053780202650038e-06, "loss": 0.8001, "step": 31500 }, { "epoch": 2.32, "learning_rate": 5.819953234606391e-06, "loss": 0.8133, "step": 31600 }, { "epoch": 2.33, "learning_rate": 5.586126266562743e-06, "loss": 0.8105, "step": 31700 }, { "epoch": 2.33, "learning_rate": 5.352299298519096e-06, "loss": 0.8104, "step": 31800 }, { "epoch": 2.34, "learning_rate": 5.118472330475448e-06, "loss": 0.7934, "step": 31900 }, { "epoch": 2.35, "learning_rate": 4.8846453624318e-06, "loss": 0.7959, "step": 32000 }, { "epoch": 2.35, "eval_loss": 0.1389056146144867, "eval_runtime": 1015.5862, "eval_samples_per_second": 15.75, "eval_steps_per_second": 1.969, "eval_wer": 0.15688296433888305, "step": 32000 }, { "epoch": 2.36, "learning_rate": 4.650818394388152e-06, "loss": 0.8036, "step": 32100 }, { "epoch": 2.36, "learning_rate": 4.416991426344505e-06, "loss": 0.7954, "step": 32200 }, { "epoch": 2.37, "learning_rate": 4.183164458300857e-06, "loss": 0.7958, "step": 32300 }, { "epoch": 2.38, "learning_rate": 3.949337490257209e-06, "loss": 0.7973, "step": 32400 }, { "epoch": 2.38, "learning_rate": 3.7155105222135617e-06, "loss": 0.7989, "step": 32500 }, { "epoch": 2.39, "learning_rate": 3.481683554169914e-06, "loss": 0.7959, "step": 32600 }, { "epoch": 2.4, "learning_rate": 3.2478565861262665e-06, "loss": 0.7945, "step": 32700 }, { "epoch": 2.41, "learning_rate": 3.0140296180826187e-06, "loss": 0.8044, "step": 32800 }, { "epoch": 2.41, "learning_rate": 2.780202650038971e-06, "loss": 0.7979, "step": 32900 }, { "epoch": 2.42, "learning_rate": 2.5463756819953235e-06, "loss": 0.7835, "step": 33000 }, { "epoch": 2.42, "eval_loss": 0.1362341344356537, "eval_runtime": 1006.8998, "eval_samples_per_second": 15.885, "eval_steps_per_second": 1.986, "eval_wer": 0.15451329609243755, "step": 33000 }, { "epoch": 2.43, "learning_rate": 2.3125487139516753e-06, "loss": 0.7871, "step": 33100 }, { "epoch": 2.44, "learning_rate": 2.078721745908028e-06, "loss": 0.7973, "step": 33200 }, { "epoch": 2.44, "learning_rate": 1.84489477786438e-06, "loss": 0.7855, "step": 33300 }, { "epoch": 2.45, "learning_rate": 1.6110678098207325e-06, "loss": 0.7884, "step": 33400 }, { "epoch": 2.46, "learning_rate": 1.377240841777085e-06, "loss": 0.7848, "step": 33500 }, { "epoch": 2.47, "learning_rate": 1.1434138737334373e-06, "loss": 0.7843, "step": 33600 }, { "epoch": 2.47, "learning_rate": 9.095869056897894e-07, "loss": 0.7996, "step": 33700 }, { "epoch": 2.48, "learning_rate": 6.757599376461418e-07, "loss": 0.7862, "step": 33800 }, { "epoch": 2.49, "learning_rate": 4.419329696024941e-07, "loss": 0.7888, "step": 33900 }, { "epoch": 2.49, "learning_rate": 2.0810600155884645e-07, "loss": 0.7959, "step": 34000 }, { "epoch": 2.49, "eval_loss": 0.13550546765327454, "eval_runtime": 1005.5212, "eval_samples_per_second": 15.907, "eval_steps_per_second": 1.989, "eval_wer": 0.15314749306854966, "step": 34000 }, { "epoch": 2.5, "step": 34075, "total_flos": 5.050048908581704e+20, "train_loss": 1.0034960915109994, "train_runtime": 150795.9745, "train_samples_per_second": 7.231, "train_steps_per_second": 0.226 } ], "max_steps": 34075, "num_train_epochs": 3, "total_flos": 5.050048908581704e+20, "trial_name": null, "trial_params": null }