{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "global_step": 16300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.61, "learning_rate": 3.3949999999999997e-06, "loss": 21.3924, "step": 100 }, { "epoch": 1.23, "learning_rate": 6.895e-06, "loss": 10.9359, "step": 200 }, { "epoch": 1.84, "learning_rate": 1.0394999999999998e-05, "loss": 5.904, "step": 300 }, { "epoch": 2.45, "learning_rate": 1.3895e-05, "loss": 4.7637, "step": 400 }, { "epoch": 3.07, "learning_rate": 1.7395e-05, "loss": 3.9441, "step": 500 }, { "epoch": 3.68, "learning_rate": 2.0894999999999996e-05, "loss": 3.4134, "step": 600 }, { "epoch": 4.29, "learning_rate": 2.4394999999999996e-05, "loss": 3.2058, "step": 700 }, { "epoch": 4.91, "learning_rate": 2.7895e-05, "loss": 3.1361, "step": 800 }, { "epoch": 5.52, "learning_rate": 3.1395e-05, "loss": 3.0826, "step": 900 }, { "epoch": 6.13, "learning_rate": 3.4895e-05, "loss": 3.0202, "step": 1000 }, { "epoch": 6.75, "learning_rate": 3.8394999999999994e-05, "loss": 2.9628, "step": 1100 }, { "epoch": 7.36, "learning_rate": 4.1895e-05, "loss": 2.9213, "step": 1200 }, { "epoch": 7.98, "learning_rate": 4.5394999999999995e-05, "loss": 2.6817, "step": 1300 }, { "epoch": 8.59, "learning_rate": 4.8895e-05, "loss": 2.166, "step": 1400 }, { "epoch": 9.2, "learning_rate": 5.2395e-05, "loss": 1.7136, "step": 1500 }, { "epoch": 9.82, "learning_rate": 5.589499999999999e-05, "loss": 1.5318, "step": 1600 }, { "epoch": 10.43, "learning_rate": 5.9394999999999996e-05, "loss": 1.4485, "step": 1700 }, { "epoch": 11.04, "learning_rate": 6.289499999999999e-05, "loss": 1.3967, "step": 1800 }, { "epoch": 11.66, "learning_rate": 6.639499999999999e-05, "loss": 1.3518, "step": 1900 }, { "epoch": 12.27, "learning_rate": 6.9895e-05, "loss": 1.3161, "step": 2000 }, { "epoch": 12.27, "eval_loss": 0.419926255941391, "eval_runtime": 134.9817, "eval_samples_per_second": 17.743, "eval_steps_per_second": 17.743, "eval_wer": 0.4796722736018869, "step": 2000 }, { "epoch": 12.88, "learning_rate": 6.952517482517481e-05, "loss": 1.3043, "step": 2100 }, { "epoch": 13.5, "learning_rate": 6.903566433566433e-05, "loss": 1.2899, "step": 2200 }, { "epoch": 14.11, "learning_rate": 6.854615384615384e-05, "loss": 1.2446, "step": 2300 }, { "epoch": 14.72, "learning_rate": 6.805664335664335e-05, "loss": 1.243, "step": 2400 }, { "epoch": 15.34, "learning_rate": 6.756713286713286e-05, "loss": 1.2123, "step": 2500 }, { "epoch": 15.95, "learning_rate": 6.707762237762238e-05, "loss": 1.2192, "step": 2600 }, { "epoch": 16.56, "learning_rate": 6.658811188811187e-05, "loss": 1.1792, "step": 2700 }, { "epoch": 17.18, "learning_rate": 6.609860139860139e-05, "loss": 1.1835, "step": 2800 }, { "epoch": 17.79, "learning_rate": 6.56090909090909e-05, "loss": 1.146, "step": 2900 }, { "epoch": 18.4, "learning_rate": 6.511958041958041e-05, "loss": 1.1756, "step": 3000 }, { "epoch": 19.02, "learning_rate": 6.463006993006992e-05, "loss": 1.1423, "step": 3100 }, { "epoch": 19.63, "learning_rate": 6.414055944055944e-05, "loss": 1.1392, "step": 3200 }, { "epoch": 20.25, "learning_rate": 6.365104895104895e-05, "loss": 1.1176, "step": 3300 }, { "epoch": 20.86, "learning_rate": 6.316153846153845e-05, "loss": 1.1073, "step": 3400 }, { "epoch": 21.47, "learning_rate": 6.267202797202796e-05, "loss": 1.096, "step": 3500 }, { "epoch": 22.09, "learning_rate": 6.218251748251747e-05, "loss": 1.0931, "step": 3600 }, { "epoch": 22.7, "learning_rate": 6.169300699300698e-05, "loss": 1.0872, "step": 3700 }, { "epoch": 23.31, "learning_rate": 6.12034965034965e-05, "loss": 1.0687, "step": 3800 }, { "epoch": 23.93, "learning_rate": 6.071398601398601e-05, "loss": 1.0871, "step": 3900 }, { "epoch": 24.54, "learning_rate": 6.022447552447552e-05, "loss": 1.0643, "step": 4000 }, { "epoch": 24.54, "eval_loss": 0.29822662472724915, "eval_runtime": 132.5892, "eval_samples_per_second": 18.063, "eval_steps_per_second": 18.063, "eval_wer": 0.3721060145242381, "step": 4000 }, { "epoch": 25.15, "learning_rate": 5.973496503496503e-05, "loss": 1.0613, "step": 4100 }, { "epoch": 25.77, "learning_rate": 5.924545454545454e-05, "loss": 1.059, "step": 4200 }, { "epoch": 26.38, "learning_rate": 5.876083916083915e-05, "loss": 1.0477, "step": 4300 }, { "epoch": 26.99, "learning_rate": 5.827132867132866e-05, "loss": 1.0558, "step": 4400 }, { "epoch": 27.61, "learning_rate": 5.778671328671328e-05, "loss": 1.0401, "step": 4500 }, { "epoch": 28.22, "learning_rate": 5.729720279720279e-05, "loss": 1.0417, "step": 4600 }, { "epoch": 28.83, "learning_rate": 5.68076923076923e-05, "loss": 1.0258, "step": 4700 }, { "epoch": 29.45, "learning_rate": 5.631818181818181e-05, "loss": 1.0307, "step": 4800 }, { "epoch": 30.06, "learning_rate": 5.5828671328671325e-05, "loss": 1.0112, "step": 4900 }, { "epoch": 30.67, "learning_rate": 5.5344055944055944e-05, "loss": 1.0008, "step": 5000 }, { "epoch": 31.29, "learning_rate": 5.485454545454545e-05, "loss": 1.0034, "step": 5100 }, { "epoch": 31.9, "learning_rate": 5.4365034965034955e-05, "loss": 1.0028, "step": 5200 }, { "epoch": 32.52, "learning_rate": 5.387552447552447e-05, "loss": 0.9832, "step": 5300 }, { "epoch": 33.13, "learning_rate": 5.338601398601398e-05, "loss": 0.9872, "step": 5400 }, { "epoch": 33.74, "learning_rate": 5.289650349650349e-05, "loss": 0.998, "step": 5500 }, { "epoch": 34.36, "learning_rate": 5.2406993006993005e-05, "loss": 0.9826, "step": 5600 }, { "epoch": 34.97, "learning_rate": 5.191748251748252e-05, "loss": 0.9927, "step": 5700 }, { "epoch": 35.58, "learning_rate": 5.142797202797203e-05, "loss": 0.9891, "step": 5800 }, { "epoch": 36.2, "learning_rate": 5.093846153846153e-05, "loss": 0.9817, "step": 5900 }, { "epoch": 36.81, "learning_rate": 5.044895104895104e-05, "loss": 0.9718, "step": 6000 }, { "epoch": 36.81, "eval_loss": 0.27623239159584045, "eval_runtime": 136.6558, "eval_samples_per_second": 17.526, "eval_steps_per_second": 17.526, "eval_wer": 0.3333126435354727, "step": 6000 }, { "epoch": 37.42, "learning_rate": 4.995944055944055e-05, "loss": 0.9633, "step": 6100 }, { "epoch": 38.04, "learning_rate": 4.9469930069930065e-05, "loss": 0.958, "step": 6200 }, { "epoch": 38.65, "learning_rate": 4.898041958041958e-05, "loss": 0.9586, "step": 6300 }, { "epoch": 39.26, "learning_rate": 4.849580419580419e-05, "loss": 0.9523, "step": 6400 }, { "epoch": 39.88, "learning_rate": 4.80062937062937e-05, "loss": 0.9393, "step": 6500 }, { "epoch": 40.49, "learning_rate": 4.7516783216783215e-05, "loss": 0.9456, "step": 6600 }, { "epoch": 41.1, "learning_rate": 4.702727272727273e-05, "loss": 0.9357, "step": 6700 }, { "epoch": 41.72, "learning_rate": 4.653776223776223e-05, "loss": 0.9249, "step": 6800 }, { "epoch": 42.33, "learning_rate": 4.6048251748251745e-05, "loss": 0.9207, "step": 6900 }, { "epoch": 42.94, "learning_rate": 4.555874125874125e-05, "loss": 0.9177, "step": 7000 }, { "epoch": 43.56, "learning_rate": 4.506923076923076e-05, "loss": 0.8962, "step": 7100 }, { "epoch": 44.17, "learning_rate": 4.4579720279720275e-05, "loss": 0.9111, "step": 7200 }, { "epoch": 44.79, "learning_rate": 4.409020979020979e-05, "loss": 0.918, "step": 7300 }, { "epoch": 45.4, "learning_rate": 4.36006993006993e-05, "loss": 0.9098, "step": 7400 }, { "epoch": 46.01, "learning_rate": 4.3111188811188805e-05, "loss": 0.8899, "step": 7500 }, { "epoch": 46.63, "learning_rate": 4.262167832167832e-05, "loss": 0.8959, "step": 7600 }, { "epoch": 47.24, "learning_rate": 4.213216783216783e-05, "loss": 0.8981, "step": 7700 }, { "epoch": 47.85, "learning_rate": 4.1642657342657336e-05, "loss": 0.9062, "step": 7800 }, { "epoch": 48.47, "learning_rate": 4.115314685314685e-05, "loss": 0.8875, "step": 7900 }, { "epoch": 49.08, "learning_rate": 4.066363636363636e-05, "loss": 0.8772, "step": 8000 }, { "epoch": 49.08, "eval_loss": 0.258562296628952, "eval_runtime": 132.8917, "eval_samples_per_second": 18.022, "eval_steps_per_second": 18.022, "eval_wer": 0.30507106945565143, "step": 8000 }, { "epoch": 49.69, "learning_rate": 4.017412587412587e-05, "loss": 0.8811, "step": 8100 }, { "epoch": 50.31, "learning_rate": 3.9684615384615385e-05, "loss": 0.8742, "step": 8200 }, { "epoch": 50.92, "learning_rate": 3.919510489510489e-05, "loss": 0.8676, "step": 8300 }, { "epoch": 51.53, "learning_rate": 3.87055944055944e-05, "loss": 0.849, "step": 8400 }, { "epoch": 52.15, "learning_rate": 3.821608391608391e-05, "loss": 0.8752, "step": 8500 }, { "epoch": 52.76, "learning_rate": 3.772657342657342e-05, "loss": 0.8643, "step": 8600 }, { "epoch": 53.37, "learning_rate": 3.723706293706293e-05, "loss": 0.8682, "step": 8700 }, { "epoch": 53.99, "learning_rate": 3.6747552447552445e-05, "loss": 0.8516, "step": 8800 }, { "epoch": 54.6, "learning_rate": 3.6262937062937065e-05, "loss": 0.8413, "step": 8900 }, { "epoch": 55.21, "learning_rate": 3.577342657342657e-05, "loss": 0.8539, "step": 9000 }, { "epoch": 55.83, "learning_rate": 3.5283916083916076e-05, "loss": 0.8325, "step": 9100 }, { "epoch": 56.44, "learning_rate": 3.4794405594405595e-05, "loss": 0.84, "step": 9200 }, { "epoch": 57.06, "learning_rate": 3.43048951048951e-05, "loss": 0.8442, "step": 9300 }, { "epoch": 57.67, "learning_rate": 3.381538461538461e-05, "loss": 0.8271, "step": 9400 }, { "epoch": 58.28, "learning_rate": 3.3325874125874125e-05, "loss": 0.8277, "step": 9500 }, { "epoch": 58.9, "learning_rate": 3.283636363636364e-05, "loss": 0.8482, "step": 9600 }, { "epoch": 59.51, "learning_rate": 3.234685314685314e-05, "loss": 0.8216, "step": 9700 }, { "epoch": 60.12, "learning_rate": 3.1857342657342655e-05, "loss": 0.8225, "step": 9800 }, { "epoch": 60.74, "learning_rate": 3.136783216783217e-05, "loss": 0.8121, "step": 9900 }, { "epoch": 61.35, "learning_rate": 3.087832167832168e-05, "loss": 0.8236, "step": 10000 }, { "epoch": 61.35, "eval_loss": 0.25750261545181274, "eval_runtime": 138.9946, "eval_samples_per_second": 17.231, "eval_steps_per_second": 17.231, "eval_wer": 0.286450251381044, "step": 10000 }, { "epoch": 61.96, "learning_rate": 3.0388811188811185e-05, "loss": 0.8146, "step": 10100 }, { "epoch": 62.58, "learning_rate": 2.9899300699300698e-05, "loss": 0.8365, "step": 10200 }, { "epoch": 63.19, "learning_rate": 2.9409790209790207e-05, "loss": 0.8226, "step": 10300 }, { "epoch": 63.8, "learning_rate": 2.8920279720279716e-05, "loss": 0.7952, "step": 10400 }, { "epoch": 64.42, "learning_rate": 2.8430769230769228e-05, "loss": 0.816, "step": 10500 }, { "epoch": 65.03, "learning_rate": 2.794125874125874e-05, "loss": 0.8067, "step": 10600 }, { "epoch": 65.64, "learning_rate": 2.745174825174825e-05, "loss": 0.8081, "step": 10700 }, { "epoch": 66.26, "learning_rate": 2.6962237762237758e-05, "loss": 0.8084, "step": 10800 }, { "epoch": 66.87, "learning_rate": 2.647272727272727e-05, "loss": 0.7976, "step": 10900 }, { "epoch": 67.48, "learning_rate": 2.5983216783216783e-05, "loss": 0.7904, "step": 11000 }, { "epoch": 68.1, "learning_rate": 2.5493706293706292e-05, "loss": 0.809, "step": 11100 }, { "epoch": 68.71, "learning_rate": 2.50041958041958e-05, "loss": 0.7866, "step": 11200 }, { "epoch": 69.33, "learning_rate": 2.4514685314685313e-05, "loss": 0.795, "step": 11300 }, { "epoch": 69.94, "learning_rate": 2.4025174825174825e-05, "loss": 0.786, "step": 11400 }, { "epoch": 70.55, "learning_rate": 2.3535664335664334e-05, "loss": 0.7829, "step": 11500 }, { "epoch": 71.17, "learning_rate": 2.3046153846153843e-05, "loss": 0.7913, "step": 11600 }, { "epoch": 71.78, "learning_rate": 2.2556643356643356e-05, "loss": 0.7659, "step": 11700 }, { "epoch": 72.39, "learning_rate": 2.2067132867132865e-05, "loss": 0.7859, "step": 11800 }, { "epoch": 73.01, "learning_rate": 2.1577622377622374e-05, "loss": 0.7646, "step": 11900 }, { "epoch": 73.62, "learning_rate": 2.1088111888111886e-05, "loss": 0.7745, "step": 12000 }, { "epoch": 73.62, "eval_loss": 0.2602725625038147, "eval_runtime": 136.3746, "eval_samples_per_second": 17.562, "eval_steps_per_second": 17.562, "eval_wer": 0.2816088386816461, "step": 12000 }, { "epoch": 74.23, "learning_rate": 2.0598601398601398e-05, "loss": 0.7653, "step": 12100 }, { "epoch": 74.85, "learning_rate": 2.0109090909090907e-05, "loss": 0.7789, "step": 12200 }, { "epoch": 75.46, "learning_rate": 1.9619580419580416e-05, "loss": 0.7703, "step": 12300 }, { "epoch": 76.07, "learning_rate": 1.9134965034965032e-05, "loss": 0.7523, "step": 12400 }, { "epoch": 76.69, "learning_rate": 1.8645454545454544e-05, "loss": 0.7685, "step": 12500 }, { "epoch": 77.3, "learning_rate": 1.8155944055944057e-05, "loss": 0.7612, "step": 12600 }, { "epoch": 77.91, "learning_rate": 1.7666433566433566e-05, "loss": 0.7412, "step": 12700 }, { "epoch": 78.53, "learning_rate": 1.7176923076923075e-05, "loss": 0.7564, "step": 12800 }, { "epoch": 79.14, "learning_rate": 1.6687412587412587e-05, "loss": 0.7558, "step": 12900 }, { "epoch": 79.75, "learning_rate": 1.6197902097902096e-05, "loss": 0.7379, "step": 13000 }, { "epoch": 80.37, "learning_rate": 1.5708391608391608e-05, "loss": 0.7478, "step": 13100 }, { "epoch": 80.98, "learning_rate": 1.5218881118881117e-05, "loss": 0.7369, "step": 13200 }, { "epoch": 81.6, "learning_rate": 1.4729370629370628e-05, "loss": 0.7487, "step": 13300 }, { "epoch": 82.21, "learning_rate": 1.4239860139860138e-05, "loss": 0.7312, "step": 13400 }, { "epoch": 82.82, "learning_rate": 1.3750349650349649e-05, "loss": 0.7499, "step": 13500 }, { "epoch": 83.44, "learning_rate": 1.326083916083916e-05, "loss": 0.7332, "step": 13600 }, { "epoch": 84.05, "learning_rate": 1.277132867132867e-05, "loss": 0.7389, "step": 13700 }, { "epoch": 84.66, "learning_rate": 1.2281818181818181e-05, "loss": 0.7348, "step": 13800 }, { "epoch": 85.28, "learning_rate": 1.1792307692307692e-05, "loss": 0.7241, "step": 13900 }, { "epoch": 85.89, "learning_rate": 1.13027972027972e-05, "loss": 0.7297, "step": 14000 }, { "epoch": 85.89, "eval_loss": 0.25392723083496094, "eval_runtime": 135.7135, "eval_samples_per_second": 17.647, "eval_steps_per_second": 17.647, "eval_wer": 0.2727329153994165, "step": 14000 }, { "epoch": 86.5, "learning_rate": 1.0813286713286713e-05, "loss": 0.727, "step": 14100 }, { "epoch": 87.12, "learning_rate": 1.0323776223776222e-05, "loss": 0.7309, "step": 14200 }, { "epoch": 87.73, "learning_rate": 9.834265734265734e-06, "loss": 0.7133, "step": 14300 }, { "epoch": 88.34, "learning_rate": 9.344755244755243e-06, "loss": 0.7271, "step": 14400 }, { "epoch": 88.96, "learning_rate": 8.855244755244755e-06, "loss": 0.7264, "step": 14500 }, { "epoch": 89.57, "learning_rate": 8.365734265734264e-06, "loss": 0.71, "step": 14600 }, { "epoch": 90.18, "learning_rate": 7.876223776223775e-06, "loss": 0.7277, "step": 14700 }, { "epoch": 90.8, "learning_rate": 7.3867132867132865e-06, "loss": 0.7109, "step": 14800 }, { "epoch": 91.41, "learning_rate": 6.897202797202796e-06, "loss": 0.7203, "step": 14900 }, { "epoch": 92.02, "learning_rate": 6.407692307692307e-06, "loss": 0.6922, "step": 15000 }, { "epoch": 92.64, "learning_rate": 5.918181818181818e-06, "loss": 0.7343, "step": 15100 }, { "epoch": 93.25, "learning_rate": 5.428671328671328e-06, "loss": 0.71, "step": 15200 }, { "epoch": 93.87, "learning_rate": 4.939160839160839e-06, "loss": 0.7136, "step": 15300 }, { "epoch": 94.48, "learning_rate": 4.4496503496503495e-06, "loss": 0.7185, "step": 15400 }, { "epoch": 95.09, "learning_rate": 3.96013986013986e-06, "loss": 0.7178, "step": 15500 }, { "epoch": 95.71, "learning_rate": 3.4706293706293703e-06, "loss": 0.7125, "step": 15600 }, { "epoch": 96.32, "learning_rate": 2.981118881118881e-06, "loss": 0.7081, "step": 15700 }, { "epoch": 96.93, "learning_rate": 2.4916083916083916e-06, "loss": 0.7087, "step": 15800 }, { "epoch": 97.55, "learning_rate": 2.002097902097902e-06, "loss": 0.7061, "step": 15900 }, { "epoch": 98.16, "learning_rate": 1.5174825174825173e-06, "loss": 0.7079, "step": 16000 }, { "epoch": 98.16, "eval_loss": 0.25544750690460205, "eval_runtime": 137.2236, "eval_samples_per_second": 17.453, "eval_steps_per_second": 17.453, "eval_wer": 0.2680777108807647, "step": 16000 }, { "epoch": 98.77, "learning_rate": 1.027972027972028e-06, "loss": 0.7069, "step": 16100 }, { "epoch": 99.39, "learning_rate": 5.384615384615384e-07, "loss": 0.7058, "step": 16200 }, { "epoch": 100.0, "learning_rate": 4.8951048951048945e-08, "loss": 0.7044, "step": 16300 }, { "epoch": 100.0, "step": 16300, "total_flos": 7.338876659637128e+19, "train_loss": 1.2786970950635663, "train_runtime": 28590.9558, "train_samples_per_second": 18.244, "train_steps_per_second": 0.57 } ], "max_steps": 16300, "num_train_epochs": 100, "total_flos": 7.338876659637128e+19, "trial_name": null, "trial_params": null }