{ "best_metric": null, "best_model_checkpoint": null, "epoch": 99.99749373433583, "global_step": 19900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.5, "learning_rate": 3.6375e-06, "loss": 136.5086, "step": 100 }, { "epoch": 1.01, "learning_rate": 7.3875e-06, "loss": 98.7644, "step": 200 }, { "epoch": 1.51, "learning_rate": 1.1137499999999998e-05, "loss": 81.5207, "step": 300 }, { "epoch": 2.01, "learning_rate": 1.48875e-05, "loss": 72.7656, "step": 400 }, { "epoch": 2.51, "learning_rate": 1.86375e-05, "loss": 64.6189, "step": 500 }, { "epoch": 2.51, "eval_cer": 1.0, "eval_loss": 63.80772399902344, "eval_runtime": 134.6445, "eval_samples_per_second": 21.969, "eval_steps_per_second": 2.748, "eval_wer": 1.0, "step": 500 }, { "epoch": 3.02, "learning_rate": 2.23875e-05, "loss": 53.9323, "step": 600 }, { "epoch": 3.52, "learning_rate": 2.6137499999999995e-05, "loss": 41.1416, "step": 700 }, { "epoch": 4.02, "learning_rate": 2.9887499999999998e-05, "loss": 27.1675, "step": 800 }, { "epoch": 4.52, "learning_rate": 3.36375e-05, "loss": 14.6933, "step": 900 }, { "epoch": 5.03, "learning_rate": 3.7387499999999994e-05, "loss": 8.0561, "step": 1000 }, { "epoch": 5.03, "eval_cer": 1.0, "eval_loss": 6.8014140129089355, "eval_runtime": 123.7155, "eval_samples_per_second": 23.91, "eval_steps_per_second": 2.991, "eval_wer": 1.0, "step": 1000 }, { "epoch": 5.53, "learning_rate": 4.11375e-05, "loss": 6.5122, "step": 1100 }, { "epoch": 6.03, "learning_rate": 4.48875e-05, "loss": 6.1973, "step": 1200 }, { "epoch": 6.53, "learning_rate": 4.8637499999999996e-05, "loss": 6.0687, "step": 1300 }, { "epoch": 7.04, "learning_rate": 5.23875e-05, "loss": 6.0963, "step": 1400 }, { "epoch": 7.54, "learning_rate": 5.61375e-05, "loss": 6.0427, "step": 1500 }, { "epoch": 7.54, "eval_cer": 1.0, "eval_loss": 6.074538230895996, "eval_runtime": 122.7663, "eval_samples_per_second": 24.095, "eval_steps_per_second": 3.014, "eval_wer": 1.0, "step": 1500 }, { "epoch": 8.04, "learning_rate": 5.988749999999999e-05, "loss": 6.0461, "step": 1600 }, { "epoch": 8.54, "learning_rate": 6.36375e-05, "loss": 5.9929, "step": 1700 }, { "epoch": 9.05, "learning_rate": 6.738749999999999e-05, "loss": 6.0191, "step": 1800 }, { "epoch": 9.55, "learning_rate": 7.11375e-05, "loss": 5.938, "step": 1900 }, { "epoch": 10.05, "learning_rate": 7.48875e-05, "loss": 5.9357, "step": 2000 }, { "epoch": 10.05, "eval_cer": 1.0, "eval_loss": 5.868249416351318, "eval_runtime": 121.8577, "eval_samples_per_second": 24.274, "eval_steps_per_second": 3.036, "eval_wer": 1.0, "step": 2000 }, { "epoch": 10.55, "learning_rate": 7.45935754189944e-05, "loss": 5.8143, "step": 2100 }, { "epoch": 11.06, "learning_rate": 7.417458100558659e-05, "loss": 5.7549, "step": 2200 }, { "epoch": 11.56, "learning_rate": 7.375558659217876e-05, "loss": 5.5454, "step": 2300 }, { "epoch": 12.06, "learning_rate": 7.333659217877095e-05, "loss": 5.3124, "step": 2400 }, { "epoch": 12.56, "learning_rate": 7.291759776536312e-05, "loss": 5.0489, "step": 2500 }, { "epoch": 12.56, "eval_cer": 0.7749867254829882, "eval_loss": 4.4031829833984375, "eval_runtime": 123.6444, "eval_samples_per_second": 23.923, "eval_steps_per_second": 2.992, "eval_wer": 0.9989858012170385, "step": 2500 }, { "epoch": 13.07, "learning_rate": 7.249860335195531e-05, "loss": 5.0055, "step": 2600 }, { "epoch": 13.57, "learning_rate": 7.207960893854748e-05, "loss": 4.8755, "step": 2700 }, { "epoch": 14.07, "learning_rate": 7.166480446927373e-05, "loss": 4.7729, "step": 2800 }, { "epoch": 14.57, "learning_rate": 7.124581005586592e-05, "loss": 4.7114, "step": 2900 }, { "epoch": 15.08, "learning_rate": 7.083100558659217e-05, "loss": 4.6184, "step": 3000 }, { "epoch": 15.08, "eval_cer": 0.6767552995956377, "eval_loss": 3.83825421333313, "eval_runtime": 124.4317, "eval_samples_per_second": 23.772, "eval_steps_per_second": 2.974, "eval_wer": 0.9983096686950642, "step": 3000 }, { "epoch": 15.58, "learning_rate": 7.041201117318434e-05, "loss": 4.5657, "step": 3100 }, { "epoch": 16.08, "learning_rate": 6.999301675977653e-05, "loss": 4.5613, "step": 3200 }, { "epoch": 16.58, "learning_rate": 6.95782122905028e-05, "loss": 4.4679, "step": 3300 }, { "epoch": 17.09, "learning_rate": 6.915921787709497e-05, "loss": 4.4288, "step": 3400 }, { "epoch": 17.59, "learning_rate": 6.874022346368715e-05, "loss": 4.365, "step": 3500 }, { "epoch": 17.59, "eval_cer": 0.62994731037863, "eval_loss": 3.4632537364959717, "eval_runtime": 122.6998, "eval_samples_per_second": 24.108, "eval_steps_per_second": 3.015, "eval_wer": 0.9959432048681541, "step": 3500 }, { "epoch": 18.09, "learning_rate": 6.832122905027933e-05, "loss": 4.3607, "step": 3600 }, { "epoch": 18.59, "learning_rate": 6.790223463687151e-05, "loss": 4.2659, "step": 3700 }, { "epoch": 19.1, "learning_rate": 6.748324022346368e-05, "loss": 4.2805, "step": 3800 }, { "epoch": 19.6, "learning_rate": 6.706424581005587e-05, "loss": 4.1783, "step": 3900 }, { "epoch": 20.1, "learning_rate": 6.664525139664804e-05, "loss": 4.1026, "step": 4000 }, { "epoch": 20.1, "eval_cer": 0.5813830004492914, "eval_loss": 3.073154926300049, "eval_runtime": 122.4373, "eval_samples_per_second": 24.159, "eval_steps_per_second": 3.022, "eval_wer": 0.9901960784313726, "step": 4000 }, { "epoch": 20.6, "learning_rate": 6.622625698324022e-05, "loss": 4.0481, "step": 4100 }, { "epoch": 21.11, "learning_rate": 6.58072625698324e-05, "loss": 4.005, "step": 4200 }, { "epoch": 21.61, "learning_rate": 6.538826815642457e-05, "loss": 3.966, "step": 4300 }, { "epoch": 22.11, "learning_rate": 6.496927374301676e-05, "loss": 3.9327, "step": 4400 }, { "epoch": 22.61, "learning_rate": 6.455027932960893e-05, "loss": 3.8655, "step": 4500 }, { "epoch": 22.61, "eval_cer": 0.5465016542090431, "eval_loss": 2.7638278007507324, "eval_runtime": 121.1871, "eval_samples_per_second": 24.409, "eval_steps_per_second": 3.053, "eval_wer": 0.986815415821501, "step": 4500 }, { "epoch": 23.12, "learning_rate": 6.413128491620112e-05, "loss": 3.8466, "step": 4600 }, { "epoch": 23.62, "learning_rate": 6.371229050279329e-05, "loss": 3.7785, "step": 4700 }, { "epoch": 24.12, "learning_rate": 6.329329608938548e-05, "loss": 3.7262, "step": 4800 }, { "epoch": 24.62, "learning_rate": 6.287430167597765e-05, "loss": 3.6768, "step": 4900 }, { "epoch": 25.13, "learning_rate": 6.245530726256982e-05, "loss": 3.6991, "step": 5000 }, { "epoch": 25.13, "eval_cer": 0.5088020258955194, "eval_loss": 2.475937604904175, "eval_runtime": 122.6664, "eval_samples_per_second": 24.114, "eval_steps_per_second": 3.016, "eval_wer": 0.9810682893847193, "step": 5000 }, { "epoch": 25.63, "learning_rate": 6.203631284916201e-05, "loss": 3.5971, "step": 5100 }, { "epoch": 26.13, "learning_rate": 6.161731843575418e-05, "loss": 3.6243, "step": 5200 }, { "epoch": 26.63, "learning_rate": 6.119832402234637e-05, "loss": 3.5803, "step": 5300 }, { "epoch": 27.14, "learning_rate": 6.077932960893854e-05, "loss": 3.5697, "step": 5400 }, { "epoch": 27.64, "learning_rate": 6.03645251396648e-05, "loss": 3.4894, "step": 5500 }, { "epoch": 27.64, "eval_cer": 0.4851529632806437, "eval_loss": 2.2937276363372803, "eval_runtime": 122.0039, "eval_samples_per_second": 24.245, "eval_steps_per_second": 3.033, "eval_wer": 0.9746450304259635, "step": 5500 }, { "epoch": 28.14, "learning_rate": 5.994553072625698e-05, "loss": 3.5363, "step": 5600 }, { "epoch": 28.64, "learning_rate": 5.952653631284916e-05, "loss": 3.4932, "step": 5700 }, { "epoch": 29.15, "learning_rate": 5.910754189944134e-05, "loss": 3.436, "step": 5800 }, { "epoch": 29.65, "learning_rate": 5.868854748603351e-05, "loss": 3.4209, "step": 5900 }, { "epoch": 30.15, "learning_rate": 5.8269553072625696e-05, "loss": 3.3983, "step": 6000 }, { "epoch": 30.15, "eval_cer": 0.4673855328186905, "eval_loss": 2.168361186981201, "eval_runtime": 121.2096, "eval_samples_per_second": 24.404, "eval_steps_per_second": 3.053, "eval_wer": 0.9732927653820149, "step": 6000 }, { "epoch": 30.65, "learning_rate": 5.785055865921787e-05, "loss": 3.3573, "step": 6100 }, { "epoch": 31.16, "learning_rate": 5.743156424581005e-05, "loss": 3.3821, "step": 6200 }, { "epoch": 31.66, "learning_rate": 5.701256983240223e-05, "loss": 3.3188, "step": 6300 }, { "epoch": 32.16, "learning_rate": 5.659357541899441e-05, "loss": 3.3519, "step": 6400 }, { "epoch": 32.66, "learning_rate": 5.6174581005586586e-05, "loss": 3.2736, "step": 6500 }, { "epoch": 32.66, "eval_cer": 0.44581954825797493, "eval_loss": 2.037248134613037, "eval_runtime": 123.5198, "eval_samples_per_second": 23.948, "eval_steps_per_second": 2.995, "eval_wer": 0.9658553076402975, "step": 6500 }, { "epoch": 33.17, "learning_rate": 5.5755586592178765e-05, "loss": 3.3093, "step": 6600 }, { "epoch": 33.67, "learning_rate": 5.5336592178770945e-05, "loss": 3.2599, "step": 6700 }, { "epoch": 34.17, "learning_rate": 5.4917597765363124e-05, "loss": 3.2115, "step": 6800 }, { "epoch": 34.67, "learning_rate": 5.4498603351955304e-05, "loss": 3.1972, "step": 6900 }, { "epoch": 35.18, "learning_rate": 5.407960893854748e-05, "loss": 3.1884, "step": 7000 }, { "epoch": 35.18, "eval_cer": 0.4329126332557285, "eval_loss": 1.9266635179519653, "eval_runtime": 122.7815, "eval_samples_per_second": 24.092, "eval_steps_per_second": 3.013, "eval_wer": 0.964841108857336, "step": 7000 }, { "epoch": 35.68, "learning_rate": 5.366061452513966e-05, "loss": 3.1908, "step": 7100 }, { "epoch": 36.18, "learning_rate": 5.324162011173184e-05, "loss": 3.1505, "step": 7200 }, { "epoch": 36.68, "learning_rate": 5.2822625698324014e-05, "loss": 3.134, "step": 7300 }, { "epoch": 37.19, "learning_rate": 5.24036312849162e-05, "loss": 3.101, "step": 7400 }, { "epoch": 37.69, "learning_rate": 5.198882681564245e-05, "loss": 3.1248, "step": 7500 }, { "epoch": 37.69, "eval_cer": 0.4217211942980844, "eval_loss": 1.8408104181289673, "eval_runtime": 122.5199, "eval_samples_per_second": 24.143, "eval_steps_per_second": 3.02, "eval_wer": 0.9590939824205544, "step": 7500 }, { "epoch": 38.19, "learning_rate": 5.156983240223463e-05, "loss": 3.0958, "step": 7600 }, { "epoch": 38.69, "learning_rate": 5.115083798882681e-05, "loss": 3.0627, "step": 7700 }, { "epoch": 39.2, "learning_rate": 5.073184357541899e-05, "loss": 3.0716, "step": 7800 }, { "epoch": 39.7, "learning_rate": 5.031284916201117e-05, "loss": 3.0455, "step": 7900 }, { "epoch": 40.2, "learning_rate": 4.989804469273743e-05, "loss": 3.0381, "step": 8000 }, { "epoch": 40.2, "eval_cer": 0.40742556059306456, "eval_loss": 1.7530696392059326, "eval_runtime": 125.5769, "eval_samples_per_second": 23.555, "eval_steps_per_second": 2.946, "eval_wer": 0.9503042596348884, "step": 8000 }, { "epoch": 40.7, "learning_rate": 4.94790502793296e-05, "loss": 2.9917, "step": 8100 }, { "epoch": 41.21, "learning_rate": 4.906005586592179e-05, "loss": 3.0649, "step": 8200 }, { "epoch": 41.71, "learning_rate": 4.864106145251396e-05, "loss": 2.9746, "step": 8300 }, { "epoch": 42.21, "learning_rate": 4.8222067039106146e-05, "loss": 2.9459, "step": 8400 }, { "epoch": 42.71, "learning_rate": 4.780307262569832e-05, "loss": 2.9515, "step": 8500 }, { "epoch": 42.71, "eval_cer": 0.39672425764816405, "eval_loss": 1.6879578828811646, "eval_runtime": 121.7673, "eval_samples_per_second": 24.292, "eval_steps_per_second": 3.039, "eval_wer": 0.9459093982420554, "step": 8500 }, { "epoch": 43.22, "learning_rate": 4.7384078212290505e-05, "loss": 2.9478, "step": 8600 }, { "epoch": 43.72, "learning_rate": 4.696508379888268e-05, "loss": 2.9046, "step": 8700 }, { "epoch": 44.22, "learning_rate": 4.654608938547485e-05, "loss": 2.9345, "step": 8800 }, { "epoch": 44.72, "learning_rate": 4.6127094972067036e-05, "loss": 2.8722, "step": 8900 }, { "epoch": 45.23, "learning_rate": 4.570810055865921e-05, "loss": 2.8704, "step": 9000 }, { "epoch": 45.23, "eval_cer": 0.3884327900992525, "eval_loss": 1.626428484916687, "eval_runtime": 124.9171, "eval_samples_per_second": 23.68, "eval_steps_per_second": 2.962, "eval_wer": 0.9377958079783637, "step": 9000 }, { "epoch": 45.73, "learning_rate": 4.5289106145251395e-05, "loss": 2.8807, "step": 9100 }, { "epoch": 46.23, "learning_rate": 4.487011173184357e-05, "loss": 2.8631, "step": 9200 }, { "epoch": 46.73, "learning_rate": 4.4451117318435753e-05, "loss": 2.8743, "step": 9300 }, { "epoch": 47.24, "learning_rate": 4.4032122905027926e-05, "loss": 2.8364, "step": 9400 }, { "epoch": 47.74, "learning_rate": 4.361312849162011e-05, "loss": 2.8128, "step": 9500 }, { "epoch": 47.74, "eval_cer": 0.37818077849936693, "eval_loss": 1.5620697736740112, "eval_runtime": 128.811, "eval_samples_per_second": 22.964, "eval_steps_per_second": 2.872, "eval_wer": 0.934077079107505, "step": 9500 }, { "epoch": 48.24, "learning_rate": 4.3194134078212285e-05, "loss": 2.855, "step": 9600 }, { "epoch": 48.74, "learning_rate": 4.2775139664804464e-05, "loss": 2.7587, "step": 9700 }, { "epoch": 49.25, "learning_rate": 4.2356145251396644e-05, "loss": 2.7794, "step": 9800 }, { "epoch": 49.75, "learning_rate": 4.193715083798882e-05, "loss": 2.7664, "step": 9900 }, { "epoch": 50.25, "learning_rate": 4.1518156424581e-05, "loss": 2.7386, "step": 10000 }, { "epoch": 50.25, "eval_cer": 0.3663766695257934, "eval_loss": 1.5010998249053955, "eval_runtime": 124.8062, "eval_samples_per_second": 23.701, "eval_steps_per_second": 2.965, "eval_wer": 0.9242731575388776, "step": 10000 }, { "epoch": 50.75, "learning_rate": 4.109916201117318e-05, "loss": 2.7295, "step": 10100 }, { "epoch": 51.26, "learning_rate": 4.0684357541899433e-05, "loss": 2.7372, "step": 10200 }, { "epoch": 51.76, "learning_rate": 4.026536312849162e-05, "loss": 2.7116, "step": 10300 }, { "epoch": 52.26, "learning_rate": 3.984636871508379e-05, "loss": 2.6882, "step": 10400 }, { "epoch": 52.76, "learning_rate": 3.942737430167598e-05, "loss": 2.6646, "step": 10500 }, { "epoch": 52.76, "eval_cer": 0.3574725319609525, "eval_loss": 1.460774540901184, "eval_runtime": 125.7182, "eval_samples_per_second": 23.529, "eval_steps_per_second": 2.943, "eval_wer": 0.9192021636240704, "step": 10500 }, { "epoch": 53.27, "learning_rate": 3.900837988826815e-05, "loss": 2.6904, "step": 10600 }, { "epoch": 53.77, "learning_rate": 3.858938547486034e-05, "loss": 2.6589, "step": 10700 }, { "epoch": 54.27, "learning_rate": 3.817039106145251e-05, "loss": 2.6618, "step": 10800 }, { "epoch": 54.77, "learning_rate": 3.7751396648044696e-05, "loss": 2.6187, "step": 10900 }, { "epoch": 55.28, "learning_rate": 3.733240223463687e-05, "loss": 2.6072, "step": 11000 }, { "epoch": 55.28, "eval_cer": 0.3501204917697995, "eval_loss": 1.4251333475112915, "eval_runtime": 122.3324, "eval_samples_per_second": 24.18, "eval_steps_per_second": 3.025, "eval_wer": 0.9148073022312373, "step": 11000 }, { "epoch": 55.78, "learning_rate": 3.691340782122905e-05, "loss": 2.6307, "step": 11100 }, { "epoch": 56.28, "learning_rate": 3.649441340782123e-05, "loss": 2.5884, "step": 11200 }, { "epoch": 56.78, "learning_rate": 3.6075418994413407e-05, "loss": 2.5678, "step": 11300 }, { "epoch": 57.29, "learning_rate": 3.5656424581005586e-05, "loss": 2.5722, "step": 11400 }, { "epoch": 57.79, "learning_rate": 3.523743016759776e-05, "loss": 2.569, "step": 11500 }, { "epoch": 57.79, "eval_cer": 0.34615855900012255, "eval_loss": 1.3836983442306519, "eval_runtime": 125.4562, "eval_samples_per_second": 23.578, "eval_steps_per_second": 2.949, "eval_wer": 0.9060175794455714, "step": 11500 }, { "epoch": 58.29, "learning_rate": 3.481843575418994e-05, "loss": 2.5487, "step": 11600 }, { "epoch": 58.79, "learning_rate": 3.439944134078212e-05, "loss": 2.5536, "step": 11700 }, { "epoch": 59.3, "learning_rate": 3.39804469273743e-05, "loss": 2.5402, "step": 11800 }, { "epoch": 59.8, "learning_rate": 3.3561452513966476e-05, "loss": 2.5001, "step": 11900 }, { "epoch": 60.3, "learning_rate": 3.3142458100558655e-05, "loss": 2.5091, "step": 12000 }, { "epoch": 60.3, "eval_cer": 0.3392149654862558, "eval_loss": 1.3589394092559814, "eval_runtime": 123.5116, "eval_samples_per_second": 23.949, "eval_steps_per_second": 2.996, "eval_wer": 0.9070317782285328, "step": 12000 }, { "epoch": 60.8, "learning_rate": 3.2723463687150835e-05, "loss": 2.4874, "step": 12100 }, { "epoch": 61.31, "learning_rate": 3.2304469273743014e-05, "loss": 2.4834, "step": 12200 }, { "epoch": 61.81, "learning_rate": 3.1885474860335194e-05, "loss": 2.478, "step": 12300 }, { "epoch": 62.31, "learning_rate": 3.147067039106145e-05, "loss": 2.4811, "step": 12400 }, { "epoch": 62.81, "learning_rate": 3.105167597765363e-05, "loss": 2.4588, "step": 12500 }, { "epoch": 62.81, "eval_cer": 0.3283911285381693, "eval_loss": 1.326094627380371, "eval_runtime": 126.176, "eval_samples_per_second": 23.443, "eval_steps_per_second": 2.932, "eval_wer": 0.896551724137931, "step": 12500 }, { "epoch": 63.32, "learning_rate": 3.063268156424581e-05, "loss": 2.4296, "step": 12600 }, { "epoch": 63.82, "learning_rate": 3.0213687150837987e-05, "loss": 2.4535, "step": 12700 }, { "epoch": 64.32, "learning_rate": 2.9794692737430163e-05, "loss": 2.4572, "step": 12800 }, { "epoch": 64.82, "learning_rate": 2.9375698324022342e-05, "loss": 2.3824, "step": 12900 }, { "epoch": 65.33, "learning_rate": 2.896508379888268e-05, "loss": 2.4083, "step": 13000 }, { "epoch": 65.33, "eval_cer": 0.32647142915492383, "eval_loss": 1.3052246570587158, "eval_runtime": 125.7147, "eval_samples_per_second": 23.529, "eval_steps_per_second": 2.943, "eval_wer": 0.8982420554428668, "step": 13000 }, { "epoch": 65.83, "learning_rate": 2.854608938547486e-05, "loss": 2.4015, "step": 13100 }, { "epoch": 66.33, "learning_rate": 2.812709497206704e-05, "loss": 2.3747, "step": 13200 }, { "epoch": 66.83, "learning_rate": 2.7708100558659218e-05, "loss": 2.3625, "step": 13300 }, { "epoch": 67.34, "learning_rate": 2.728910614525139e-05, "loss": 2.4027, "step": 13400 }, { "epoch": 67.84, "learning_rate": 2.687011173184357e-05, "loss": 2.3787, "step": 13500 }, { "epoch": 67.84, "eval_cer": 0.32430666176530654, "eval_loss": 1.2997361421585083, "eval_runtime": 122.085, "eval_samples_per_second": 24.229, "eval_steps_per_second": 3.031, "eval_wer": 0.8908045977011494, "step": 13500 }, { "epoch": 68.34, "learning_rate": 2.645111731843575e-05, "loss": 2.3351, "step": 13600 }, { "epoch": 68.84, "learning_rate": 2.603212290502793e-05, "loss": 2.3445, "step": 13700 }, { "epoch": 69.35, "learning_rate": 2.561312849162011e-05, "loss": 2.3649, "step": 13800 }, { "epoch": 69.85, "learning_rate": 2.5194134078212288e-05, "loss": 2.326, "step": 13900 }, { "epoch": 70.35, "learning_rate": 2.4775139664804467e-05, "loss": 2.3457, "step": 14000 }, { "epoch": 70.35, "eval_cer": 0.3187109422864845, "eval_loss": 1.2778037786483765, "eval_runtime": 128.0452, "eval_samples_per_second": 23.101, "eval_steps_per_second": 2.89, "eval_wer": 0.889790398918188, "step": 14000 }, { "epoch": 70.85, "learning_rate": 2.4356145251396646e-05, "loss": 2.2999, "step": 14100 }, { "epoch": 71.36, "learning_rate": 2.3937150837988826e-05, "loss": 2.3383, "step": 14200 }, { "epoch": 71.86, "learning_rate": 2.3518156424581005e-05, "loss": 2.312, "step": 14300 }, { "epoch": 72.36, "learning_rate": 2.309916201117318e-05, "loss": 2.296, "step": 14400 }, { "epoch": 72.86, "learning_rate": 2.268016759776536e-05, "loss": 2.3099, "step": 14500 }, { "epoch": 72.86, "eval_cer": 0.3172405342482539, "eval_loss": 1.266068935394287, "eval_runtime": 123.4423, "eval_samples_per_second": 23.963, "eval_steps_per_second": 2.997, "eval_wer": 0.8830290736984449, "step": 14500 }, { "epoch": 73.37, "learning_rate": 2.226117318435754e-05, "loss": 2.2976, "step": 14600 }, { "epoch": 73.87, "learning_rate": 2.184217877094972e-05, "loss": 2.3002, "step": 14700 }, { "epoch": 74.37, "learning_rate": 2.14231843575419e-05, "loss": 2.2536, "step": 14800 }, { "epoch": 74.87, "learning_rate": 2.1004189944134078e-05, "loss": 2.2683, "step": 14900 }, { "epoch": 75.38, "learning_rate": 2.0585195530726257e-05, "loss": 2.2559, "step": 15000 }, { "epoch": 75.38, "eval_cer": 0.3143405628395213, "eval_loss": 1.2474771738052368, "eval_runtime": 124.7835, "eval_samples_per_second": 23.705, "eval_steps_per_second": 2.965, "eval_wer": 0.8850574712643678, "step": 15000 }, { "epoch": 75.88, "learning_rate": 2.0166201117318437e-05, "loss": 2.2334, "step": 15100 }, { "epoch": 76.38, "learning_rate": 1.9747206703910616e-05, "loss": 2.2153, "step": 15200 }, { "epoch": 76.88, "learning_rate": 1.932821229050279e-05, "loss": 2.2244, "step": 15300 }, { "epoch": 77.39, "learning_rate": 1.8909217877094968e-05, "loss": 2.2327, "step": 15400 }, { "epoch": 77.89, "learning_rate": 1.8490223463687148e-05, "loss": 2.2264, "step": 15500 }, { "epoch": 77.89, "eval_cer": 0.3085406200220561, "eval_loss": 1.2318875789642334, "eval_runtime": 127.717, "eval_samples_per_second": 23.161, "eval_steps_per_second": 2.897, "eval_wer": 0.8739012846517917, "step": 15500 }, { "epoch": 78.39, "learning_rate": 1.8071229050279327e-05, "loss": 2.2722, "step": 15600 }, { "epoch": 78.89, "learning_rate": 1.7652234636871506e-05, "loss": 2.1997, "step": 15700 }, { "epoch": 79.4, "learning_rate": 1.7233240223463686e-05, "loss": 2.1979, "step": 15800 }, { "epoch": 79.9, "learning_rate": 1.6814245810055865e-05, "loss": 2.222, "step": 15900 }, { "epoch": 80.4, "learning_rate": 1.639525139664804e-05, "loss": 2.196, "step": 16000 }, { "epoch": 80.4, "eval_cer": 0.3048645999264796, "eval_loss": 1.2218027114868164, "eval_runtime": 124.9046, "eval_samples_per_second": 23.682, "eval_steps_per_second": 2.962, "eval_wer": 0.8722109533468559, "step": 16000 }, { "epoch": 80.9, "learning_rate": 1.597625698324022e-05, "loss": 2.2138, "step": 16100 }, { "epoch": 81.41, "learning_rate": 1.55572625698324e-05, "loss": 2.1853, "step": 16200 }, { "epoch": 81.91, "learning_rate": 1.5138268156424581e-05, "loss": 2.2029, "step": 16300 }, { "epoch": 82.41, "learning_rate": 1.471927374301676e-05, "loss": 2.1583, "step": 16400 }, { "epoch": 82.91, "learning_rate": 1.4300279329608936e-05, "loss": 2.1613, "step": 16500 }, { "epoch": 82.91, "eval_cer": 0.30506882326512275, "eval_loss": 1.2093260288238525, "eval_runtime": 123.6333, "eval_samples_per_second": 23.926, "eval_steps_per_second": 2.993, "eval_wer": 0.8718728870858689, "step": 16500 }, { "epoch": 83.42, "learning_rate": 1.3881284916201116e-05, "loss": 2.1637, "step": 16600 }, { "epoch": 83.92, "learning_rate": 1.3462290502793295e-05, "loss": 2.1648, "step": 16700 }, { "epoch": 84.42, "learning_rate": 1.3043296089385474e-05, "loss": 2.1639, "step": 16800 }, { "epoch": 84.92, "learning_rate": 1.2624301675977652e-05, "loss": 2.1513, "step": 16900 }, { "epoch": 85.43, "learning_rate": 1.2205307262569831e-05, "loss": 2.1455, "step": 17000 }, { "epoch": 85.43, "eval_cer": 0.30053506514724504, "eval_loss": 1.2055062055587769, "eval_runtime": 124.9017, "eval_samples_per_second": 23.683, "eval_steps_per_second": 2.962, "eval_wer": 0.8624070317782285, "step": 17000 }, { "epoch": 85.93, "learning_rate": 1.178631284916201e-05, "loss": 2.1356, "step": 17100 }, { "epoch": 86.43, "learning_rate": 1.1367318435754188e-05, "loss": 2.1369, "step": 17200 }, { "epoch": 86.93, "learning_rate": 1.0948324022346368e-05, "loss": 2.1328, "step": 17300 }, { "epoch": 87.44, "learning_rate": 1.0529329608938546e-05, "loss": 2.1225, "step": 17400 }, { "epoch": 87.94, "learning_rate": 1.0110335195530725e-05, "loss": 2.1193, "step": 17500 }, { "epoch": 87.94, "eval_cer": 0.29824776375444184, "eval_loss": 1.1974669694900513, "eval_runtime": 123.9455, "eval_samples_per_second": 23.865, "eval_steps_per_second": 2.985, "eval_wer": 0.8600405679513184, "step": 17500 }, { "epoch": 88.44, "learning_rate": 9.691340782122904e-06, "loss": 2.1388, "step": 17600 }, { "epoch": 88.94, "learning_rate": 9.276536312849161e-06, "loss": 2.0962, "step": 17700 }, { "epoch": 89.45, "learning_rate": 8.85754189944134e-06, "loss": 2.1021, "step": 17800 }, { "epoch": 89.95, "learning_rate": 8.438547486033518e-06, "loss": 2.0829, "step": 17900 }, { "epoch": 90.45, "learning_rate": 8.019553072625698e-06, "loss": 2.0911, "step": 18000 }, { "epoch": 90.45, "eval_cer": 0.30028999714087323, "eval_loss": 1.1960209608078003, "eval_runtime": 122.0182, "eval_samples_per_second": 24.242, "eval_steps_per_second": 3.032, "eval_wer": 0.8647734956051386, "step": 18000 }, { "epoch": 90.95, "learning_rate": 7.600558659217876e-06, "loss": 2.0836, "step": 18100 }, { "epoch": 91.46, "learning_rate": 7.1815642458100555e-06, "loss": 2.0527, "step": 18200 }, { "epoch": 91.96, "learning_rate": 6.762569832402233e-06, "loss": 2.0807, "step": 18300 }, { "epoch": 92.46, "learning_rate": 6.3435754189944126e-06, "loss": 2.0962, "step": 18400 }, { "epoch": 92.96, "learning_rate": 5.924581005586592e-06, "loss": 2.0884, "step": 18500 }, { "epoch": 92.96, "eval_cer": 0.2971449577257689, "eval_loss": 1.1871271133422852, "eval_runtime": 124.4024, "eval_samples_per_second": 23.778, "eval_steps_per_second": 2.974, "eval_wer": 0.8637592968221771, "step": 18500 }, { "epoch": 93.47, "learning_rate": 5.5055865921787705e-06, "loss": 2.0708, "step": 18600 }, { "epoch": 93.97, "learning_rate": 5.08659217877095e-06, "loss": 2.067, "step": 18700 }, { "epoch": 94.47, "learning_rate": 4.667597765363128e-06, "loss": 2.0985, "step": 18800 }, { "epoch": 94.97, "learning_rate": 4.248603351955307e-06, "loss": 2.0655, "step": 18900 }, { "epoch": 95.48, "learning_rate": 3.829608938547485e-06, "loss": 2.0766, "step": 19000 }, { "epoch": 95.48, "eval_cer": 0.296695666380754, "eval_loss": 1.1813979148864746, "eval_runtime": 123.6675, "eval_samples_per_second": 23.919, "eval_steps_per_second": 2.992, "eval_wer": 0.8617308992562542, "step": 19000 }, { "epoch": 95.98, "learning_rate": 3.4106145251396644e-06, "loss": 2.0485, "step": 19100 }, { "epoch": 96.48, "learning_rate": 2.9916201117318433e-06, "loss": 2.0725, "step": 19200 }, { "epoch": 96.98, "learning_rate": 2.572625698324022e-06, "loss": 2.047, "step": 19300 }, { "epoch": 97.49, "learning_rate": 2.153631284916201e-06, "loss": 2.0649, "step": 19400 }, { "epoch": 97.99, "learning_rate": 1.7346368715083795e-06, "loss": 2.0735, "step": 19500 }, { "epoch": 97.99, "eval_cer": 0.2968590450516685, "eval_loss": 1.180108904838562, "eval_runtime": 123.9462, "eval_samples_per_second": 23.865, "eval_steps_per_second": 2.985, "eval_wer": 0.8620689655172413, "step": 19500 }, { "epoch": 98.49, "learning_rate": 1.3156424581005587e-06, "loss": 2.0277, "step": 19600 }, { "epoch": 98.99, "learning_rate": 8.966480446927373e-07, "loss": 2.0545, "step": 19700 }, { "epoch": 99.5, "learning_rate": 4.776536312849162e-07, "loss": 2.0502, "step": 19800 }, { "epoch": 100.0, "learning_rate": 5.865921787709496e-08, "loss": 2.0333, "step": 19900 }, { "epoch": 100.0, "step": 19900, "total_flos": 6.621453767453566e+19, "train_loss": 5.91964619928868, "train_runtime": 36587.1788, "train_samples_per_second": 17.438, "train_steps_per_second": 0.544 } ], "max_steps": 19900, "num_train_epochs": 100, "total_flos": 6.621453767453566e+19, "trial_name": null, "trial_params": null }