{ "best_metric": 0.47799625992774963, "best_model_checkpoint": "../checkpoints/Wav2Vec/NURC-SP/final-version/train/checkpoint-5207", "epoch": 16.0, "eval_steps": 500, "global_step": 13886, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 0.48270344734191895, "learning_rate": 1.3837638376383764e-08, "loss": 0.4969, "step": 1 }, { "epoch": 0.12, "grad_norm": NaN, "learning_rate": 1.3699261992619927e-06, "loss": 1.0935, "step": 100 }, { "epoch": 0.23, "grad_norm": 10.196231842041016, "learning_rate": 2.753690036900369e-06, "loss": 1.0424, "step": 200 }, { "epoch": 0.35, "grad_norm": 7.084147930145264, "learning_rate": 4.137453874538745e-06, "loss": 0.9727, "step": 300 }, { "epoch": 0.46, "grad_norm": 4.268230438232422, "learning_rate": 5.521217712177122e-06, "loss": 0.9538, "step": 400 }, { "epoch": 0.58, "grad_norm": 7.1523942947387695, "learning_rate": 6.904981549815498e-06, "loss": 0.941, "step": 500 }, { "epoch": 0.69, "grad_norm": 7.721753120422363, "learning_rate": 8.288745387453875e-06, "loss": 0.9294, "step": 600 }, { "epoch": 0.81, "grad_norm": 6.676694393157959, "learning_rate": 9.67250922509225e-06, "loss": 0.9104, "step": 700 }, { "epoch": 0.92, "grad_norm": 13.917472839355469, "learning_rate": 1.1056273062730627e-05, "loss": 0.9171, "step": 800 }, { "epoch": 1.0, "eval_loss": 0.5722900629043579, "eval_runtime": 102.9565, "eval_samples_per_second": 30.45, "eval_steps_per_second": 3.807, "eval_wer": 0.25805475722281046, "step": 867 }, { "epoch": 1.04, "grad_norm": 1.1242845058441162, "learning_rate": 1.2440036900369004e-05, "loss": 0.8226, "step": 900 }, { "epoch": 1.15, "grad_norm": 1.4485478401184082, "learning_rate": 1.382380073800738e-05, "loss": 0.8966, "step": 1000 }, { "epoch": 1.27, "grad_norm": 1.5878037214279175, "learning_rate": 1.5207564575645757e-05, "loss": 0.881, "step": 1100 }, { "epoch": 1.38, "grad_norm": 0.9301995635032654, "learning_rate": 1.6591328413284134e-05, "loss": 0.8691, "step": 1200 }, { "epoch": 1.5, "grad_norm": 1.2811486721038818, "learning_rate": 1.797509225092251e-05, "loss": 0.8756, "step": 1300 }, { "epoch": 1.61, "grad_norm": 1.5193356275558472, "learning_rate": 1.9358856088560884e-05, "loss": 0.8687, "step": 1400 }, { "epoch": 1.73, "grad_norm": 1.1962685585021973, "learning_rate": 2.072878228782288e-05, "loss": 0.8588, "step": 1500 }, { "epoch": 1.84, "grad_norm": 1.906406044960022, "learning_rate": 2.2112546125461254e-05, "loss": 0.8614, "step": 1600 }, { "epoch": 1.96, "grad_norm": 2.207412004470825, "learning_rate": 2.349630996309963e-05, "loss": 0.8574, "step": 1700 }, { "epoch": 2.0, "eval_loss": 0.5288712978363037, "eval_runtime": 107.9908, "eval_samples_per_second": 29.03, "eval_steps_per_second": 3.63, "eval_wer": 0.254853022739878, "step": 1735 }, { "epoch": 2.07, "grad_norm": 1.1061471700668335, "learning_rate": 2.4880073800738008e-05, "loss": 0.8829, "step": 1800 }, { "epoch": 2.19, "grad_norm": 0.760769784450531, "learning_rate": 2.6263837638376385e-05, "loss": 0.8459, "step": 1900 }, { "epoch": 2.3, "grad_norm": 1.408493161201477, "learning_rate": 2.764760147601476e-05, "loss": 0.8378, "step": 2000 }, { "epoch": 2.42, "grad_norm": 0.9650698304176331, "learning_rate": 2.9031365313653138e-05, "loss": 0.8415, "step": 2100 }, { "epoch": 2.53, "grad_norm": 0.9561858177185059, "learning_rate": 2.9978145791850807e-05, "loss": 0.8364, "step": 2200 }, { "epoch": 2.65, "grad_norm": 0.7619096040725708, "learning_rate": 2.9905298431353505e-05, "loss": 0.8364, "step": 2300 }, { "epoch": 2.77, "grad_norm": 1.0944970846176147, "learning_rate": 2.9833179544461174e-05, "loss": 0.8256, "step": 2400 }, { "epoch": 2.88, "grad_norm": 0.7943573594093323, "learning_rate": 2.9760332183963868e-05, "loss": 0.8175, "step": 2500 }, { "epoch": 3.0, "grad_norm": 2.035665988922119, "learning_rate": 2.9688213297071538e-05, "loss": 0.8533, "step": 2600 }, { "epoch": 3.0, "eval_loss": 0.5230820178985596, "eval_runtime": 99.7653, "eval_samples_per_second": 31.424, "eval_steps_per_second": 3.929, "eval_wer": 0.2511722886098926, "step": 2603 }, { "epoch": 3.11, "grad_norm": 1.9500775337219238, "learning_rate": 2.9615365936574232e-05, "loss": 0.8022, "step": 2700 }, { "epoch": 3.23, "grad_norm": 2.2833547592163086, "learning_rate": 2.954251857607693e-05, "loss": 0.8143, "step": 2800 }, { "epoch": 3.34, "grad_norm": 1.7430369853973389, "learning_rate": 2.9469671215579623e-05, "loss": 0.8134, "step": 2900 }, { "epoch": 3.46, "grad_norm": 1.7504712343215942, "learning_rate": 2.9396823855082317e-05, "loss": 0.8026, "step": 3000 }, { "epoch": 3.57, "grad_norm": 1.6993814706802368, "learning_rate": 2.9323976494585015e-05, "loss": 0.8098, "step": 3100 }, { "epoch": 3.69, "grad_norm": 3.484405279159546, "learning_rate": 2.925112913408771e-05, "loss": 0.812, "step": 3200 }, { "epoch": 3.8, "grad_norm": 2.0759809017181396, "learning_rate": 2.917901024719538e-05, "loss": 0.8121, "step": 3300 }, { "epoch": 3.92, "grad_norm": 2.325293779373169, "learning_rate": 2.9106162886698072e-05, "loss": 0.8076, "step": 3400 }, { "epoch": 4.0, "eval_loss": 0.5064914226531982, "eval_runtime": 117.608, "eval_samples_per_second": 26.656, "eval_steps_per_second": 3.333, "eval_wer": 0.23836535067816267, "step": 3471 }, { "epoch": 4.03, "grad_norm": 1.8136922121047974, "learning_rate": 2.9033315526200766e-05, "loss": 0.7724, "step": 3500 }, { "epoch": 4.15, "grad_norm": 7.821990013122559, "learning_rate": 2.8961196639308436e-05, "loss": 0.7879, "step": 3600 }, { "epoch": 4.26, "grad_norm": 0.916205108165741, "learning_rate": 2.888834927881113e-05, "loss": 0.7907, "step": 3700 }, { "epoch": 4.38, "grad_norm": 1.157679557800293, "learning_rate": 2.8815501918313824e-05, "loss": 0.7859, "step": 3800 }, { "epoch": 4.49, "grad_norm": 1.1060450077056885, "learning_rate": 2.874265455781652e-05, "loss": 0.7934, "step": 3900 }, { "epoch": 4.61, "grad_norm": 1.0524017810821533, "learning_rate": 2.8669807197319215e-05, "loss": 0.7887, "step": 4000 }, { "epoch": 4.72, "grad_norm": 0.9493302702903748, "learning_rate": 2.8596959836821913e-05, "loss": 0.7962, "step": 4100 }, { "epoch": 4.84, "grad_norm": 1.216361403465271, "learning_rate": 2.8524840949929582e-05, "loss": 0.7902, "step": 4200 }, { "epoch": 4.95, "grad_norm": 0.858220100402832, "learning_rate": 2.8451993589432277e-05, "loss": 0.7875, "step": 4300 }, { "epoch": 5.0, "eval_loss": 0.4950461685657501, "eval_runtime": 107.5304, "eval_samples_per_second": 29.155, "eval_steps_per_second": 3.645, "eval_wer": 0.23864266626329855, "step": 4339 }, { "epoch": 5.07, "grad_norm": 0.6915091872215271, "learning_rate": 2.8379146228934974e-05, "loss": 0.8172, "step": 4400 }, { "epoch": 5.19, "grad_norm": 0.8395456671714783, "learning_rate": 2.8306298868437668e-05, "loss": 0.7719, "step": 4500 }, { "epoch": 5.3, "grad_norm": 0.5675917863845825, "learning_rate": 2.8233451507940365e-05, "loss": 0.7725, "step": 4600 }, { "epoch": 5.42, "grad_norm": 0.7588133215904236, "learning_rate": 2.816060414744306e-05, "loss": 0.7735, "step": 4700 }, { "epoch": 5.53, "grad_norm": 0.7658157348632812, "learning_rate": 2.8087756786945757e-05, "loss": 0.7713, "step": 4800 }, { "epoch": 5.65, "grad_norm": 0.6394296288490295, "learning_rate": 2.801490942644845e-05, "loss": 0.7683, "step": 4900 }, { "epoch": 5.76, "grad_norm": 1.0770230293273926, "learning_rate": 2.794206206595114e-05, "loss": 0.784, "step": 5000 }, { "epoch": 5.88, "grad_norm": 0.537818193435669, "learning_rate": 2.786921470545384e-05, "loss": 0.7705, "step": 5100 }, { "epoch": 5.99, "grad_norm": 1.1256853342056274, "learning_rate": 2.7796367344956533e-05, "loss": 0.7947, "step": 5200 }, { "epoch": 6.0, "eval_loss": 0.47799625992774963, "eval_runtime": 102.2073, "eval_samples_per_second": 30.673, "eval_steps_per_second": 3.835, "eval_wer": 0.23347451217667523, "step": 5207 }, { "epoch": 6.11, "grad_norm": 2.259537935256958, "learning_rate": 2.772351998445923e-05, "loss": 0.7386, "step": 5300 }, { "epoch": 6.22, "grad_norm": 1.8576114177703857, "learning_rate": 2.7650672623961924e-05, "loss": 0.7591, "step": 5400 }, { "epoch": 6.34, "grad_norm": 3.029550075531006, "learning_rate": 2.7578553737069594e-05, "loss": 0.765, "step": 5500 }, { "epoch": 6.45, "grad_norm": 1.3059985637664795, "learning_rate": 2.750570637657229e-05, "loss": 0.7484, "step": 5600 }, { "epoch": 6.57, "grad_norm": 2.597036600112915, "learning_rate": 2.7432859016074985e-05, "loss": 0.7696, "step": 5700 }, { "epoch": 6.68, "grad_norm": 1.8154231309890747, "learning_rate": 2.736074012918265e-05, "loss": 0.7642, "step": 5800 }, { "epoch": 6.8, "grad_norm": 1.9362813234329224, "learning_rate": 2.728789276868535e-05, "loss": 0.7703, "step": 5900 }, { "epoch": 6.91, "grad_norm": 2.5824599266052246, "learning_rate": 2.7215045408188043e-05, "loss": 0.7621, "step": 6000 }, { "epoch": 7.0, "eval_loss": 0.4984392821788788, "eval_runtime": 96.472, "eval_samples_per_second": 32.496, "eval_steps_per_second": 4.063, "eval_wer": 0.23700398326022287, "step": 6075 }, { "epoch": 7.03, "grad_norm": 1.1438063383102417, "learning_rate": 2.714219804769074e-05, "loss": 0.7505, "step": 6100 }, { "epoch": 7.14, "grad_norm": 0.8449379205703735, "learning_rate": 2.7069350687193434e-05, "loss": 0.7626, "step": 6200 }, { "epoch": 7.26, "grad_norm": 1.075359582901001, "learning_rate": 2.699650332669613e-05, "loss": 0.7503, "step": 6300 }, { "epoch": 7.37, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 14440011566940.16, "step": 6400 }, { "epoch": 7.49, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 6500 }, { "epoch": 7.6, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 6600 }, { "epoch": 7.72, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 6700 }, { "epoch": 7.84, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 6800 }, { "epoch": 7.95, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 6900 }, { "epoch": 8.0, "eval_loss": NaN, "eval_runtime": 97.0769, "eval_samples_per_second": 32.294, "eval_steps_per_second": 4.038, "eval_wer": 1.0, "step": 6943 }, { "epoch": 8.07, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 7000 }, { "epoch": 8.18, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 7100 }, { "epoch": 8.3, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 7200 }, { "epoch": 8.41, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 7300 }, { "epoch": 8.53, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 7400 }, { "epoch": 8.64, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 7500 }, { "epoch": 8.76, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 7600 }, { "epoch": 8.87, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 7700 }, { "epoch": 8.99, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 7800 }, { "epoch": 9.0, "eval_loss": NaN, "eval_runtime": 108.3909, "eval_samples_per_second": 28.923, "eval_steps_per_second": 3.617, "eval_wer": 1.0, "step": 7810 }, { "epoch": 9.1, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 7900 }, { "epoch": 9.22, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 8000 }, { "epoch": 9.33, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 8100 }, { "epoch": 9.45, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 8200 }, { "epoch": 9.56, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 8300 }, { "epoch": 9.68, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 8400 }, { "epoch": 9.79, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 8500 }, { "epoch": 9.91, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 8600 }, { "epoch": 10.0, "eval_loss": NaN, "eval_runtime": 107.8221, "eval_samples_per_second": 29.076, "eval_steps_per_second": 3.636, "eval_wer": 1.0, "step": 8678 }, { "epoch": 10.02, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 8700 }, { "epoch": 10.14, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 8800 }, { "epoch": 10.25, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 8900 }, { "epoch": 10.37, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 9000 }, { "epoch": 10.49, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 9100 }, { "epoch": 10.6, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 9200 }, { "epoch": 10.72, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 9300 }, { "epoch": 10.83, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 9400 }, { "epoch": 10.95, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 9500 }, { "epoch": 11.0, "eval_loss": NaN, "eval_runtime": 116.4634, "eval_samples_per_second": 26.918, "eval_steps_per_second": 3.366, "eval_wer": 1.0, "step": 9546 }, { "epoch": 11.06, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 9600 }, { "epoch": 11.18, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 9700 }, { "epoch": 11.29, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 9800 }, { "epoch": 11.41, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 9900 }, { "epoch": 11.52, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 10000 }, { "epoch": 11.64, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 10100 }, { "epoch": 11.75, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 10200 }, { "epoch": 11.87, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 10300 }, { "epoch": 11.98, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 10400 }, { "epoch": 12.0, "eval_loss": NaN, "eval_runtime": 114.432, "eval_samples_per_second": 27.396, "eval_steps_per_second": 3.426, "eval_wer": 1.0, "step": 10414 }, { "epoch": 12.1, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 10500 }, { "epoch": 12.21, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 10600 }, { "epoch": 12.33, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 10700 }, { "epoch": 12.44, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 10800 }, { "epoch": 12.56, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 10900 }, { "epoch": 12.67, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 11000 }, { "epoch": 12.79, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 11100 }, { "epoch": 12.91, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 11200 }, { "epoch": 13.0, "eval_loss": NaN, "eval_runtime": 107.4788, "eval_samples_per_second": 29.169, "eval_steps_per_second": 3.647, "eval_wer": 1.0, "step": 11282 }, { "epoch": 13.02, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 11300 }, { "epoch": 13.14, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 11400 }, { "epoch": 13.25, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 11500 }, { "epoch": 13.37, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 11600 }, { "epoch": 13.48, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 11700 }, { "epoch": 13.6, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 11800 }, { "epoch": 13.71, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 11900 }, { "epoch": 13.83, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 12000 }, { "epoch": 13.94, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 12100 }, { "epoch": 14.0, "eval_loss": NaN, "eval_runtime": 106.6864, "eval_samples_per_second": 29.385, "eval_steps_per_second": 3.674, "eval_wer": 1.0, "step": 12150 }, { "epoch": 14.06, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 12200 }, { "epoch": 14.17, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 12300 }, { "epoch": 14.29, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 12400 }, { "epoch": 14.4, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 12500 }, { "epoch": 14.52, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 12600 }, { "epoch": 14.63, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 12700 }, { "epoch": 14.75, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 12800 }, { "epoch": 14.86, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 12900 }, { "epoch": 14.98, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 13000 }, { "epoch": 15.0, "eval_loss": NaN, "eval_runtime": 109.7539, "eval_samples_per_second": 28.564, "eval_steps_per_second": 3.572, "eval_wer": 1.0, "step": 13018 }, { "epoch": 15.09, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 13100 }, { "epoch": 15.21, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 13200 }, { "epoch": 15.32, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 13300 }, { "epoch": 15.44, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 13400 }, { "epoch": 15.56, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 13500 }, { "epoch": 15.67, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 13600 }, { "epoch": 15.79, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 13700 }, { "epoch": 15.9, "grad_norm": NaN, "learning_rate": 2.697756301296683e-05, "loss": 0.0, "step": 13800 }, { "epoch": 16.0, "eval_loss": NaN, "eval_runtime": 108.5954, "eval_samples_per_second": 28.869, "eval_steps_per_second": 3.61, "eval_wer": 1.0, "step": 13886 }, { "epoch": 16.0, "step": 13886, "total_flos": 6.139205851819624e+20, "train_loss": 103989713142.67763, "train_runtime": 155022.8715, "train_samples_per_second": 53.744, "train_steps_per_second": 0.28 } ], "logging_steps": 100, "max_steps": 43350, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "total_flos": 6.139205851819624e+20, "train_batch_size": 8, "trial_name": null, "trial_params": null }